[PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for global percpu data

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for global percpu data
  2025-01-27 16:21 [PATCH bpf-next 0/4] bpf: Introduce " Leon Hwang
@ 2025-01-27 16:21 ` Leon Hwang
  2025-02-06  0:09   ` Andrii Nakryiko
  0 siblings, 1 reply; 8+ messages in thread
From: Leon Hwang @ 2025-01-27 16:21 UTC (permalink / raw)
  To: bpf
  Cc: ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
	leon.hwang, kernel-patches-bot

This patch enhances bpftool to generate skeletons for global percpu
variables. The generated skeleton includes a dedicated structure for
percpu data, allowing users to initialize and access percpu variables
efficiently.

Changes:

1. skeleton structure:

For global percpu variables, the skeleton now includes a nested
structure, e.g.:

struct test_global_percpu_data {
	struct bpf_object_skeleton *skeleton;
	struct bpf_object *obj;
	struct {
		struct bpf_map *bss;
		struct bpf_map *percpu;
	} maps;
	// ...
	struct test_global_percpu_data__percpu {
		int percpu_data;
	} *percpu;

	// ...
};

  * The "struct test_global_percpu_data__percpu" points to initialized
    data, which is actually "maps.percpu->data".
  * Before loading the skeleton, updating the
    "struct test_global_percpu_data__percpu" modifies the initial value
    of the corresponding global percpu variables.
  * After loading the skeleton, accessing or updating this struct is no
    longer meaningful. Instead, users must interact with the global
    percpu variables via the "maps.percpu" map.

2. code changes:

  * Added support for ".percpu" sections in bpftool's map identification
    logic.
  * Modified skeleton generation to handle percpu data maps
    appropriately.
  * Updated libbpf to make "percpu" pointing to "maps.percpu->data".

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 tools/bpf/bpftool/gen.c | 13 +++++++++----
 tools/lib/bpf/libbpf.c  |  3 +++
 tools/lib/bpf/libbpf.h  |  1 +
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 5a4d3240689ed..975775683ca12 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -92,7 +92,7 @@ static void get_header_guard(char *guard, const char *obj_name, const char *suff
 
 static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
 {
-	static const char *sfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
+	static const char *sfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
 	const char *name = bpf_map__name(map);
 	int i, n;
 
@@ -117,7 +117,7 @@ static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
 
 static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
 {
-	static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
+	static const char *pfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
 	int i, n;
 
 	/* recognize hard coded LLVM section name */
@@ -263,7 +263,9 @@ static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
 		return true;
 	}
 
-	if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
+	if (!bpf_map__is_internal(map) ||
+	    (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE) &&
+	     bpf_map__type(map) != BPF_MAP_TYPE_PERCPU_ARRAY))
 		return false;
 
 	if (!get_map_ident(map, buf, sz))
@@ -903,7 +905,10 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool
 			i, bpf_map__name(map), ident);
 		/* memory-mapped internal maps */
 		if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) {
-			printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
+			if (bpf_map__type(map) == BPF_MAP_TYPE_PERCPU_ARRAY)
+				printf("\tmap->data = (void **)&obj->%s;\n", ident);
+			else
+				printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
 		}
 
 		if (populate_links && bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) {
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6da6004c5c84d..dafb419bc5b86 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -13915,6 +13915,7 @@ static int populate_skeleton_maps(const struct bpf_object *obj,
 		struct bpf_map **map = map_skel->map;
 		const char *name = map_skel->name;
 		void **mmaped = map_skel->mmaped;
+		void **data = map_skel->data;
 
 		*map = bpf_object__find_map_by_name(obj, name);
 		if (!*map) {
@@ -13925,6 +13926,8 @@ static int populate_skeleton_maps(const struct bpf_object *obj,
 		/* externs shouldn't be pre-setup from user code */
 		if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
 			*mmaped = (*map)->mmaped;
+		if (data && (*map)->libbpf_type == LIBBPF_MAP_PERCPU_DATA)
+			*data = (*map)->data;
 	}
 	return 0;
 }
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3020ee45303a0..c49d6e44b5630 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1701,6 +1701,7 @@ struct bpf_map_skeleton {
 	const char *name;
 	struct bpf_map **map;
 	void **mmaped;
+	void **data;
 	struct bpf_link **link;
 };
 
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for global percpu data
  2025-01-27 16:21 ` [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for " Leon Hwang
@ 2025-02-06  0:09   ` Andrii Nakryiko
  2025-02-07  9:52     ` Leon Hwang
  0 siblings, 1 reply; 8+ messages in thread
From: Andrii Nakryiko @ 2025-02-06  0:09 UTC (permalink / raw)
  To: Leon Hwang
  Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
	kernel-patches-bot

On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>
> This patch enhances bpftool to generate skeletons for global percpu
> variables. The generated skeleton includes a dedicated structure for
> percpu data, allowing users to initialize and access percpu variables
> efficiently.
>
> Changes:
>
> 1. skeleton structure:
>
> For global percpu variables, the skeleton now includes a nested
> structure, e.g.:
>
> struct test_global_percpu_data {
>         struct bpf_object_skeleton *skeleton;
>         struct bpf_object *obj;
>         struct {
>                 struct bpf_map *bss;
>                 struct bpf_map *percpu;
>         } maps;
>         // ...
>         struct test_global_percpu_data__percpu {
>                 int percpu_data;
>         } *percpu;
>
>         // ...
> };
>
>   * The "struct test_global_percpu_data__percpu" points to initialized
>     data, which is actually "maps.percpu->data".
>   * Before loading the skeleton, updating the
>     "struct test_global_percpu_data__percpu" modifies the initial value
>     of the corresponding global percpu variables.
>   * After loading the skeleton, accessing or updating this struct is no
>     longer meaningful. Instead, users must interact with the global
>     percpu variables via the "maps.percpu" map.

can we set this pointer to NULL after load to avoid accidental
(successful) reads/writes to it?

>
> 2. code changes:
>
>   * Added support for ".percpu" sections in bpftool's map identification
>     logic.
>   * Modified skeleton generation to handle percpu data maps
>     appropriately.
>   * Updated libbpf to make "percpu" pointing to "maps.percpu->data".
>
> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
> ---
>  tools/bpf/bpftool/gen.c | 13 +++++++++----
>  tools/lib/bpf/libbpf.c  |  3 +++
>  tools/lib/bpf/libbpf.h  |  1 +
>  3 files changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
> index 5a4d3240689ed..975775683ca12 100644
> --- a/tools/bpf/bpftool/gen.c
> +++ b/tools/bpf/bpftool/gen.c
> @@ -92,7 +92,7 @@ static void get_header_guard(char *guard, const char *obj_name, const char *suff
>
>  static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
>  {
> -       static const char *sfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
> +       static const char *sfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
>         const char *name = bpf_map__name(map);
>         int i, n;
>
> @@ -117,7 +117,7 @@ static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
>
>  static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
>  {
> -       static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
> +       static const char *pfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
>         int i, n;
>
>         /* recognize hard coded LLVM section name */
> @@ -263,7 +263,9 @@ static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
>                 return true;
>         }
>
> -       if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
> +       if (!bpf_map__is_internal(map) ||
> +           (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE) &&
> +            bpf_map__type(map) != BPF_MAP_TYPE_PERCPU_ARRAY))

there will be no BPF_F_MMAPABLE set for PERCPU_ARRAY, why adding these
unnecessary extra conditionals?

>                 return false;
>
>         if (!get_map_ident(map, buf, sz))
> @@ -903,7 +905,10 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool
>                         i, bpf_map__name(map), ident);
>                 /* memory-mapped internal maps */
>                 if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) {
> -                       printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
> +                       if (bpf_map__type(map) == BPF_MAP_TYPE_PERCPU_ARRAY)
> +                               printf("\tmap->data = (void **)&obj->%s;\n", ident);
> +                       else
> +                               printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
>                 }
>
>                 if (populate_links && bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) {
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 6da6004c5c84d..dafb419bc5b86 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -13915,6 +13915,7 @@ static int populate_skeleton_maps(const struct bpf_object *obj,
>                 struct bpf_map **map = map_skel->map;
>                 const char *name = map_skel->name;
>                 void **mmaped = map_skel->mmaped;
> +               void **data = map_skel->data;
>
>                 *map = bpf_object__find_map_by_name(obj, name);
>                 if (!*map) {
> @@ -13925,6 +13926,8 @@ static int populate_skeleton_maps(const struct bpf_object *obj,
>                 /* externs shouldn't be pre-setup from user code */
>                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
>                         *mmaped = (*map)->mmaped;
> +               if (data && (*map)->libbpf_type == LIBBPF_MAP_PERCPU_DATA)
> +                       *data = (*map)->data;
>         }
>         return 0;
>  }
> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
> index 3020ee45303a0..c49d6e44b5630 100644
> --- a/tools/lib/bpf/libbpf.h
> +++ b/tools/lib/bpf/libbpf.h
> @@ -1701,6 +1701,7 @@ struct bpf_map_skeleton {
>         const char *name;
>         struct bpf_map **map;
>         void **mmaped;
> +       void **data;

this is breaking backwards/forward compatibility. let's try to reuse mmaped

>         struct bpf_link **link;
>  };
>
> --
> 2.47.1
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for global percpu data
  2025-02-06  0:09   ` Andrii Nakryiko
@ 2025-02-07  9:52     ` Leon Hwang
  0 siblings, 0 replies; 8+ messages in thread
From: Leon Hwang @ 2025-02-07  9:52 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: bpf, ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
	kernel-patches-bot



On 6/2/25 08:09, Andrii Nakryiko wrote:
> On Mon, Jan 27, 2025 at 8:22 AM Leon Hwang <leon.hwang@linux.dev> wrote:
>>
>> This patch enhances bpftool to generate skeletons for global percpu
>> variables. The generated skeleton includes a dedicated structure for
>> percpu data, allowing users to initialize and access percpu variables
>> efficiently.
>>
>> Changes:
>>
>> 1. skeleton structure:
>>
>> For global percpu variables, the skeleton now includes a nested
>> structure, e.g.:
>>
>> struct test_global_percpu_data {
>>         struct bpf_object_skeleton *skeleton;
>>         struct bpf_object *obj;
>>         struct {
>>                 struct bpf_map *bss;
>>                 struct bpf_map *percpu;
>>         } maps;
>>         // ...
>>         struct test_global_percpu_data__percpu {
>>                 int percpu_data;
>>         } *percpu;
>>
>>         // ...
>> };
>>
>>   * The "struct test_global_percpu_data__percpu" points to initialized
>>     data, which is actually "maps.percpu->data".
>>   * Before loading the skeleton, updating the
>>     "struct test_global_percpu_data__percpu" modifies the initial value
>>     of the corresponding global percpu variables.
>>   * After loading the skeleton, accessing or updating this struct is no
>>     longer meaningful. Instead, users must interact with the global
>>     percpu variables via the "maps.percpu" map.
> 
> can we set this pointer to NULL after load to avoid accidental
> (successful) reads/writes to it?
> 

Good idea. I'll try to achieve it.

>>
>> 2. code changes:
>>
>>   * Added support for ".percpu" sections in bpftool's map identification
>>     logic.
>>   * Modified skeleton generation to handle percpu data maps
>>     appropriately.
>>   * Updated libbpf to make "percpu" pointing to "maps.percpu->data".
>>
>> Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
>> ---
>>  tools/bpf/bpftool/gen.c | 13 +++++++++----
>>  tools/lib/bpf/libbpf.c  |  3 +++
>>  tools/lib/bpf/libbpf.h  |  1 +
>>  3 files changed, 13 insertions(+), 4 deletions(-)
>>
>> diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
>> index 5a4d3240689ed..975775683ca12 100644
>> --- a/tools/bpf/bpftool/gen.c
>> +++ b/tools/bpf/bpftool/gen.c
>> @@ -92,7 +92,7 @@ static void get_header_guard(char *guard, const char *obj_name, const char *suff
>>
>>  static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
>>  {
>> -       static const char *sfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
>> +       static const char *sfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
>>         const char *name = bpf_map__name(map);
>>         int i, n;
>>
>> @@ -117,7 +117,7 @@ static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
>>
>>  static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
>>  {
>> -       static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
>> +       static const char *pfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
>>         int i, n;
>>
>>         /* recognize hard coded LLVM section name */
>> @@ -263,7 +263,9 @@ static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
>>                 return true;
>>         }
>>
>> -       if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
>> +       if (!bpf_map__is_internal(map) ||
>> +           (!(bpf_map__map_flags(map) & BPF_F_MMAPABLE) &&
>> +            bpf_map__type(map) != BPF_MAP_TYPE_PERCPU_ARRAY))
> 
> there will be no BPF_F_MMAPABLE set for PERCPU_ARRAY, why adding these
> unnecessary extra conditionals?
> 

Ack. It's unnecessary.

>>                 return false;
>>
>>         if (!get_map_ident(map, buf, sz))
>> @@ -903,7 +905,10 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool
>>                         i, bpf_map__name(map), ident);
>>                 /* memory-mapped internal maps */
>>                 if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) {
>> -                       printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
>> +                       if (bpf_map__type(map) == BPF_MAP_TYPE_PERCPU_ARRAY)
>> +                               printf("\tmap->data = (void **)&obj->%s;\n", ident);
>> +                       else
>> +                               printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
>>                 }
>>
>>                 if (populate_links && bpf_map__type(map) == BPF_MAP_TYPE_STRUCT_OPS) {
>> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
>> index 6da6004c5c84d..dafb419bc5b86 100644
>> --- a/tools/lib/bpf/libbpf.c
>> +++ b/tools/lib/bpf/libbpf.c
>> @@ -13915,6 +13915,7 @@ static int populate_skeleton_maps(const struct bpf_object *obj,
>>                 struct bpf_map **map = map_skel->map;
>>                 const char *name = map_skel->name;
>>                 void **mmaped = map_skel->mmaped;
>> +               void **data = map_skel->data;
>>
>>                 *map = bpf_object__find_map_by_name(obj, name);
>>                 if (!*map) {
>> @@ -13925,6 +13926,8 @@ static int populate_skeleton_maps(const struct bpf_object *obj,
>>                 /* externs shouldn't be pre-setup from user code */
>>                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
>>                         *mmaped = (*map)->mmaped;
>> +               if (data && (*map)->libbpf_type == LIBBPF_MAP_PERCPU_DATA)
>> +                       *data = (*map)->data;
>>         }
>>         return 0;
>>  }
>> diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
>> index 3020ee45303a0..c49d6e44b5630 100644
>> --- a/tools/lib/bpf/libbpf.h
>> +++ b/tools/lib/bpf/libbpf.h
>> @@ -1701,6 +1701,7 @@ struct bpf_map_skeleton {
>>         const char *name;
>>         struct bpf_map **map;
>>         void **mmaped;
>> +       void **data;
> 
> this is breaking backwards/forward compatibility. let's try to reuse mmaped
> 

Indeed. If reuse map->mmaped for global percpu variables, it's
unnecessary to add this "data".

>>         struct bpf_link **link;
>>  };
>>
>> --
>> 2.47.1
>>

Thanks,
Leon



^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH bpf-next 0/4] bpf: Introduce global percpu data
@ 2025-02-13 16:06 Leon Hwang
  2025-02-13 16:06 ` [PATCH bpf-next 1/4] " Leon Hwang
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: Leon Hwang @ 2025-02-13 16:06 UTC (permalink / raw)
  To: bpf
  Cc: ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
	leon.hwang, kernel-patches-bot

This patch set introduces global percpu data, similar to commit
6316f78306c1 ("Merge branch 'support-global-data'"), to reduce restrictions
in C for BPF programs.

With this enhancement, it becomes possible to define and use global percpu
variables, much like the DEFINE_PER_CPU() macro in the kernel[0].

The idea stems from the bpflbr project[1], which itself was inspired by
retsnoop[2]. During testing of bpflbr on the v6.6 kernel, two LBR
(Last Branch Record) entries were observed related to the
bpf_get_smp_processor_id() helper.

Since commit 1ae6921009e5 ("bpf: inline bpf_get_smp_processor_id() helper"),
the bpf_get_smp_processor_id() helper has been inlined on x86_64, reducing
the overhead and consequently minimizing these two LBR records.

However, the introduction of global percpu data offers a more robust
solution. By leveraging the percpu_array map and percpu instructions,
global percpu data can be implemented intrinsically.

This feature also facilitates sharing percpu information between tail
callers and callees or between freplace callers and callees through a
shared global percpu variable. Previously, this was achieved using a
1-entry percpu_array map, which this patch set aims to improve upon.

Links:
[0] https://github.com/torvalds/linux/blob/fbfd64d25c7af3b8695201ebc85efe90be28c5a3/include/linux/percpu-defs.h#L114
[1] https://github.com/Asphaltt/bpflbr
[2] https://github.com/anakryiko/retsnoop

Changes:
v1 -> v2:
  * Address comments from Andrii:
    * Use LIBBPF_MAP_PERCPU and SEC_PERCPU.
    * Reuse mmaped of libbpf's struct bpf_map for .percpu map data.
    * Set .percpu struct pointer to NULL after loading skeleton.
    * Make sure value size of .percpu map is __aligned(8).
    * Use raw_tp and opts.cpu to test global percpu variables on all CPUs.
  * Address comments from Alexei:
    * Test non-zero offset of global percpu variable.
    * Test case about BPF_PSEUDO_MAP_IDX_VALUE.

rfc -> v1:
  * Address comments from Andrii:
    * Keep one image of global percpu variable for all CPUs.
    * Reject non-ARRAY map in bpf_map_direct_read(), check_reg_const_str(),
      and check_bpf_snprintf_call() in verifier.
    * Split out libbpf changes from kernel-side changes.
    * Use ".percpu" as PERCPU_DATA_SEC.
    * Use enum libbpf_map_type to distinguish BSS, DATA, RODATA and
      PERCPU_DATA.
    * Avoid using errno for checking err from libbpf_num_possible_cpus().
    * Use "map '%s': " prefix for error message.

Leon Hwang (4):
  bpf: Introduce global percpu data
  bpf, libbpf: Support global percpu data
  bpf, bpftool: Generate skeleton for global percpu data
  selftests/bpf: Add cases to test global percpu data

 kernel/bpf/arraymap.c                         |  41 +++-
 kernel/bpf/verifier.c                         |  45 ++++
 tools/bpf/bpftool/gen.c                       |  47 +++-
 tools/lib/bpf/libbpf.c                        | 101 ++++++--
 tools/lib/bpf/libbpf.h                        |   9 +
 tools/lib/bpf/libbpf.map                      |   1 +
 tools/testing/selftests/bpf/Makefile          |   2 +-
 .../bpf/prog_tests/global_data_init.c         | 218 +++++++++++++++++-
 .../bpf/progs/test_global_percpu_data.c       |  20 ++
 9 files changed, 449 insertions(+), 35 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_global_percpu_data.c

-- 
2.47.1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH bpf-next 1/4] bpf: Introduce global percpu data
  2025-02-13 16:06 [PATCH bpf-next 0/4] bpf: Introduce global percpu data Leon Hwang
@ 2025-02-13 16:06 ` Leon Hwang
  2025-02-13 16:06 ` [PATCH bpf-next 2/4] bpf, libbpf: Support " Leon Hwang
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: Leon Hwang @ 2025-02-13 16:06 UTC (permalink / raw)
  To: bpf
  Cc: ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
	leon.hwang, kernel-patches-bot

This patch introduces global percpu data, inspired by commit
6316f78306c1 ("Merge branch 'support-global-data'"). It enables the
definition of global percpu variables in BPF, similar to the
DEFINE_PER_CPU() macro in the kernel[0].

For example, in BPF, it is able to define a global percpu variable like:

int data SEC(".percpu");

With this patch, tools like retsnoop[1] and bpflbr[2] can simplify their
BPF code for handling LBRs. The code can be updated from

static struct perf_branch_entry lbrs[1][MAX_LBR_ENTRIES] SEC(".data.lbrs");

to

static struct perf_branch_entry lbrs[MAX_LBR_ENTRIES] SEC(".percpu.lbrs");

This eliminates the need to retrieve the CPU ID using the
bpf_get_smp_processor_id() helper.

Additionally, by reusing global percpu data map, sharing information
between tail callers and callees or freplace callers and callees becomes
simpler compared to reusing percpu_array maps.

Links:
[0] https://github.com/torvalds/linux/blob/fbfd64d25c7af3b8695201ebc85efe90be28c5a3/include/linux/percpu-defs.h#L114
[1] https://github.com/anakryiko/retsnoop
[2] https://github.com/Asphaltt/bpflbr

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 kernel/bpf/arraymap.c | 41 +++++++++++++++++++++++++++++++++++++--
 kernel/bpf/verifier.c | 45 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index eb28c0f219ee4..322bd42c0edcb 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -249,6 +249,40 @@ static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
 	return this_cpu_ptr(array->pptrs[index & array->index_mask]);
 }
 
+static int percpu_array_map_direct_value_addr(const struct bpf_map *map,
+					      u64 *imm, u32 off)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+	if (map->max_entries != 1)
+		return -EOPNOTSUPP;
+	if (off >= map->value_size)
+		return -EINVAL;
+	if (!bpf_jit_supports_percpu_insn())
+		return -EOPNOTSUPP;
+
+	*imm = (u64) array->pptrs[0];
+	return 0;
+}
+
+static int percpu_array_map_direct_value_meta(const struct bpf_map *map,
+					      u64 imm, u32 *off)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	u64 base = (u64) array->pptrs[0];
+	u64 range = array->elem_size;
+
+	if (map->max_entries != 1)
+		return -EOPNOTSUPP;
+	if (imm < base || imm >= base + range)
+		return -ENOENT;
+	if (!bpf_jit_supports_percpu_insn())
+		return -EOPNOTSUPP;
+
+	*off = imm - base;
+	return 0;
+}
+
 /* emit BPF instructions equivalent to C code of percpu_array_map_lookup_elem() */
 static int percpu_array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 {
@@ -532,9 +566,10 @@ static int array_map_check_btf(const struct bpf_map *map,
 {
 	u32 int_data;
 
-	/* One exception for keyless BTF: .bss/.data/.rodata map */
+	/* One exception for keyless BTF: .bss/.data/.rodata/.percpu map */
 	if (btf_type_is_void(key_type)) {
-		if (map->map_type != BPF_MAP_TYPE_ARRAY ||
+		if ((map->map_type != BPF_MAP_TYPE_ARRAY &&
+		     map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY) ||
 		    map->max_entries != 1)
 			return -EINVAL;
 
@@ -815,6 +850,8 @@ const struct bpf_map_ops percpu_array_map_ops = {
 	.map_get_next_key = array_map_get_next_key,
 	.map_lookup_elem = percpu_array_map_lookup_elem,
 	.map_gen_lookup = percpu_array_map_gen_lookup,
+	.map_direct_value_addr = percpu_array_map_direct_value_addr,
+	.map_direct_value_meta = percpu_array_map_direct_value_meta,
 	.map_update_elem = array_map_update_elem,
 	.map_delete_elem = array_map_delete_elem,
 	.map_lookup_percpu_elem = percpu_array_map_lookup_percpu_elem,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9971c03adfd5d..5682546b1193e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6810,6 +6810,8 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
 	u64 addr;
 	int err;
 
+	if (map->map_type != BPF_MAP_TYPE_ARRAY)
+		return -EINVAL;
 	err = map->ops->map_direct_value_addr(map, &addr, off);
 	if (err)
 		return err;
@@ -7322,6 +7324,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 			/* if map is read-only, track its contents as scalars */
 			if (tnum_is_const(reg->var_off) &&
 			    bpf_map_is_rdonly(map) &&
+			    map->map_type == BPF_MAP_TYPE_ARRAY &&
 			    map->ops->map_direct_value_addr) {
 				int map_off = off + reg->var_off.value;
 				u64 val = 0;
@@ -9128,6 +9131,11 @@ static int check_reg_const_str(struct bpf_verifier_env *env,
 		return -EACCES;
 	}
 
+	if (map->map_type != BPF_MAP_TYPE_ARRAY) {
+		verbose(env, "only array map supports direct string value access\n");
+		return -EINVAL;
+	}
+
 	err = check_map_access(env, regno, reg->off,
 			       map->value_size - reg->off, false,
 			       ACCESS_HELPER);
@@ -10802,6 +10810,11 @@ static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
 		return -EINVAL;
 	num_args = data_len_reg->var_off.value / 8;
 
+	if (fmt_map->map_type != BPF_MAP_TYPE_ARRAY) {
+		verbose(env, "only array map supports snprintf\n");
+		return -EINVAL;
+	}
+
 	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
 	 * and map_direct_value_addr is set.
 	 */
@@ -21381,6 +21394,38 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
 			goto next_insn;
 		}
 
+#ifdef CONFIG_SMP
+		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
+		    (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
+		     insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE)) {
+			struct bpf_map *map;
+
+			aux = &env->insn_aux_data[i + delta];
+			map = env->used_maps[aux->map_index];
+			if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY)
+				goto next_insn;
+
+			/* Reuse the original ld_imm64 insn. And add one
+			 * mov64_percpu_reg insn.
+			 */
+
+			insn_buf[0] = insn[1];
+			insn_buf[1] = BPF_MOV64_PERCPU_REG(insn->dst_reg, insn->dst_reg);
+			cnt = 2;
+
+			i++;
+			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
+			if (!new_prog)
+				return -ENOMEM;
+
+			delta    += cnt - 1;
+			env->prog = prog = new_prog;
+			insn      = new_prog->insnsi + i + delta;
+
+			goto next_insn;
+		}
+#endif
+
 		if (insn->code != (BPF_JMP | BPF_CALL))
 			goto next_insn;
 		if (insn->src_reg == BPF_PSEUDO_CALL)
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH bpf-next 2/4] bpf, libbpf: Support global percpu data
  2025-02-13 16:06 [PATCH bpf-next 0/4] bpf: Introduce global percpu data Leon Hwang
  2025-02-13 16:06 ` [PATCH bpf-next 1/4] " Leon Hwang
@ 2025-02-13 16:06 ` Leon Hwang
  2025-02-13 16:06 ` [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for " Leon Hwang
  2025-02-13 16:06 ` [PATCH bpf-next 4/4] selftests/bpf: Add cases to test " Leon Hwang
  3 siblings, 0 replies; 8+ messages in thread
From: Leon Hwang @ 2025-02-13 16:06 UTC (permalink / raw)
  To: bpf
  Cc: ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
	leon.hwang, kernel-patches-bot

This patch introduces support for global percpu data in libbpf by adding a
new ".percpu" section, similar to ".data". It enables efficient handling of
percpu global variables in bpf programs.

Key changes:

* Introduces the ".percpu" section in libbpf.
* Correct value size to __aligned(8) of ".percpu" map definition and btf.
* Creates internal maps for percpu data.
* Initializes and populates these maps accordingly.

This enhancement improves performance for workloads that benefit from
percpu storage.

Meanwhile, add bpf_map__is_internal_percpu() API to check whether the map
is an internal map used for global percpu variables.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 tools/lib/bpf/libbpf.c   | 101 +++++++++++++++++++++++++++++++--------
 tools/lib/bpf/libbpf.h   |   9 ++++
 tools/lib/bpf/libbpf.map |   1 +
 3 files changed, 90 insertions(+), 21 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 194809da51725..736a902a667e9 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -516,6 +516,7 @@ struct bpf_struct_ops {
 };
 
 #define DATA_SEC ".data"
+#define PERCPU_SEC ".percpu"
 #define BSS_SEC ".bss"
 #define RODATA_SEC ".rodata"
 #define KCONFIG_SEC ".kconfig"
@@ -530,6 +531,7 @@ enum libbpf_map_type {
 	LIBBPF_MAP_BSS,
 	LIBBPF_MAP_RODATA,
 	LIBBPF_MAP_KCONFIG,
+	LIBBPF_MAP_PERCPU,
 };
 
 struct bpf_map_def {
@@ -640,6 +642,7 @@ enum sec_type {
 	SEC_DATA,
 	SEC_RODATA,
 	SEC_ST_OPS,
+	SEC_PERCPU,
 };
 
 struct elf_sec_desc {
@@ -1903,7 +1906,7 @@ static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
 	struct btf_var_secinfo *vsi;
 	int i, n;
 
-	if (!map->btf_value_type_id)
+	if (!map->btf_value_type_id || map->libbpf_type == LIBBPF_MAP_PERCPU)
 		return false;
 
 	t = btf__type_by_id(obj->btf, map->btf_value_type_id);
@@ -1927,6 +1930,7 @@ static int
 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 			      const char *real_name, int sec_idx, void *data, size_t data_sz)
 {
+	bool is_percpu = type == LIBBPF_MAP_PERCPU;
 	struct bpf_map_def *def;
 	struct bpf_map *map;
 	size_t mmap_sz;
@@ -1948,9 +1952,9 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 	}
 
 	def = &map->def;
-	def->type = BPF_MAP_TYPE_ARRAY;
+	def->type = is_percpu ? BPF_MAP_TYPE_PERCPU_ARRAY : BPF_MAP_TYPE_ARRAY;
 	def->key_size = sizeof(int);
-	def->value_size = data_sz;
+	def->value_size = is_percpu ? roundup(data_sz, 8) : data_sz;
 	def->max_entries = 1;
 	def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
 		? BPF_F_RDONLY_PROG : 0;
@@ -1961,10 +1965,11 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
 	if (map_is_mmapable(obj, map))
 		def->map_flags |= BPF_F_MMAPABLE;
 
-	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
-		 map->name, map->sec_idx, map->sec_offset, def->map_flags);
+	pr_debug("map '%s' (global %sdata): at sec_idx %d, offset %zu, flags %x.\n",
+		 map->name, is_percpu ? "percpu " : "", map->sec_idx,
+		 map->sec_offset, def->map_flags);
 
-	mmap_sz = bpf_map_mmap_sz(map);
+	mmap_sz = is_percpu ? def->value_size : bpf_map_mmap_sz(map);
 	map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
 			   MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 	if (map->mmaped == MAP_FAILED) {
@@ -2015,6 +2020,13 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
 							    sec_desc->data->d_buf,
 							    sec_desc->data->d_size);
 			break;
+		case SEC_PERCPU:
+			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_PERCPU,
+							    sec_name, sec_idx,
+							    sec_desc->data->d_buf,
+							    sec_desc->data->d_size);
+			break;
 		case SEC_BSS:
 			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
 			err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
@@ -3364,6 +3376,10 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
 		fixup_offsets = true;
 	}
 
+	/* .percpu DATASEC must has __aligned(8) size. */
+	if (strcmp(sec_name, PERCPU_SEC) == 0 || str_has_pfx(sec_name, PERCPU_SEC))
+		t->size = roundup(t->size, 8);
+
 	for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
 		const struct btf_type *t_var;
 		struct btf_var *var;
@@ -3934,6 +3950,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 				sec_desc->sec_type = SEC_RODATA;
 				sec_desc->shdr = sh;
 				sec_desc->data = data;
+			} else if (strcmp(name, PERCPU_SEC) == 0 ||
+				   str_has_pfx(name, PERCPU_SEC)) {
+				sec_desc->sec_type = SEC_PERCPU;
+				sec_desc->shdr = sh;
+				sec_desc->data = data;
 			} else if (strcmp(name, STRUCT_OPS_SEC) == 0 ||
 				   strcmp(name, STRUCT_OPS_LINK_SEC) == 0 ||
 				   strcmp(name, "?" STRUCT_OPS_SEC) == 0 ||
@@ -4453,6 +4474,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
 	case SEC_BSS:
 	case SEC_DATA:
 	case SEC_RODATA:
+	case SEC_PERCPU:
 		return true;
 	default:
 		return false;
@@ -4478,6 +4500,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
 		return LIBBPF_MAP_DATA;
 	case SEC_RODATA:
 		return LIBBPF_MAP_RODATA;
+	case SEC_PERCPU:
+		return LIBBPF_MAP_PERCPU;
 	default:
 		return LIBBPF_MAP_UNSPEC;
 	}
@@ -4795,7 +4819,7 @@ static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
 
 	/*
 	 * LLVM annotates global data differently in BTF, that is,
-	 * only as '.data', '.bss' or '.rodata'.
+	 * only as '.data', '.bss', '.percpu' or '.rodata'.
 	 */
 	if (!bpf_map__is_internal(map))
 		return -ENOENT;
@@ -5125,23 +5149,47 @@ static int
 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 {
 	enum libbpf_map_type map_type = map->libbpf_type;
-	int err, zero = 0;
-	size_t mmap_sz;
+	bool is_percpu = map_type == LIBBPF_MAP_PERCPU;
+	int err = 0, zero = 0, num_cpus, i;
+	size_t data_sz, elem_sz, mmap_sz;
+	void *data = NULL;
+
+	data_sz = map->def.value_size;
+	if (is_percpu) {
+		num_cpus = libbpf_num_possible_cpus();
+		if (num_cpus < 0) {
+			err = num_cpus;
+			return err;
+		}
+
+		data_sz = data_sz * num_cpus;
+		data = malloc(data_sz);
+		if (!data) {
+			err = -ENOMEM;
+			return err;
+		}
+
+		elem_sz = map->def.value_size;
+		for (i = 0; i < num_cpus; i++)
+			memcpy(data + i * elem_sz, map->mmaped, elem_sz);
+	} else {
+		data = map->mmaped;
+	}
 
 	if (obj->gen_loader) {
 		bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
-					 map->mmaped, map->def.value_size);
+					 data, data_sz);
 		if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
 			bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
-		return 0;
+		goto free_data;
 	}
 
-	err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
+	err = bpf_map_update_elem(map->fd, &zero, data, 0);
 	if (err) {
 		err = -errno;
 		pr_warn("map '%s': failed to set initial contents: %s\n",
 			bpf_map__name(map), errstr(err));
-		return err;
+		goto free_data;
 	}
 
 	/* Freeze .rodata and .kconfig map as read-only from syscall side. */
@@ -5151,7 +5199,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 			err = -errno;
 			pr_warn("map '%s': failed to freeze as read-only: %s\n",
 				bpf_map__name(map), errstr(err));
-			return err;
+			goto free_data;
 		}
 	}
 
@@ -5178,7 +5226,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 			err = -errno;
 			pr_warn("map '%s': failed to re-mmap() contents: %s\n",
 				bpf_map__name(map), errstr(err));
-			return err;
+			goto free_data;
 		}
 		map->mmaped = mmaped;
 	} else if (map->mmaped) {
@@ -5186,7 +5234,10 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 		map->mmaped = NULL;
 	}
 
-	return 0;
+free_data:
+	if (is_percpu)
+		free(data);
+	return err;
 }
 
 static void bpf_map__destroy(struct bpf_map *map);
@@ -10132,14 +10183,17 @@ int bpf_map__fd(const struct bpf_map *map)
 
 static bool map_uses_real_name(const struct bpf_map *map)
 {
-	/* Since libbpf started to support custom .data.* and .rodata.* maps,
-	 * their user-visible name differs from kernel-visible name. Users see
-	 * such map's corresponding ELF section name as a map name.
-	 * This check distinguishes .data/.rodata from .data.* and .rodata.*
-	 * maps to know which name has to be returned to the user.
+	/* Since libbpf started to support custom .data.*, .percpu.* and
+	 * .rodata.* maps, their user-visible name differs from kernel-visible
+	 * name. Users see such map's corresponding ELF section name as a map
+	 * name. This check distinguishes .data/.percpu/.rodata from .data.*,
+	 * .percpu.* and .rodata.* maps to know which name has to be returned to
+	 * the user.
 	 */
 	if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
 		return true;
+	if (map->libbpf_type == LIBBPF_MAP_PERCPU && strcmp(map->real_name, PERCPU_SEC) != 0)
+		return true;
 	if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
 		return true;
 	return false;
@@ -10386,6 +10440,11 @@ bool bpf_map__is_internal(const struct bpf_map *map)
 	return map->libbpf_type != LIBBPF_MAP_UNSPEC;
 }
 
+bool bpf_map__is_internal_percpu(const struct bpf_map *map)
+{
+	return map->libbpf_type == LIBBPF_MAP_PERCPU;
+}
+
 __u32 bpf_map__ifindex(const struct bpf_map *map)
 {
 	return map->map_ifindex;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3020ee45303a0..1d8ca33d370d1 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1072,6 +1072,15 @@ LIBBPF_API void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize
  */
 LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
 
+/**
+ * @brief **bpf_map__is_internal_percpu()** tells the caller whether or not
+ * the passed map is an internal map used for global percpu variables.
+ * @param map the bpf_map
+ * @return true, if the map is an internal map used for global percpu
+ * variables; false, otherwise
+ */
+LIBBPF_API bool bpf_map__is_internal_percpu(const struct bpf_map *map);
+
 /**
  * @brief **bpf_map__set_pin_path()** sets the path attribute that tells where the
  * BPF map should be pinned. This does not actually create the 'pin'.
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index b5a838de6f47c..09cdbd6e32218 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -438,4 +438,5 @@ LIBBPF_1.6.0 {
 		bpf_linker__new_fd;
 		btf__add_decl_attr;
 		btf__add_type_attr;
+		bpf_map__is_internal_percpu;
 } LIBBPF_1.5.0;
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for global percpu data
  2025-02-13 16:06 [PATCH bpf-next 0/4] bpf: Introduce global percpu data Leon Hwang
  2025-02-13 16:06 ` [PATCH bpf-next 1/4] " Leon Hwang
  2025-02-13 16:06 ` [PATCH bpf-next 2/4] bpf, libbpf: Support " Leon Hwang
@ 2025-02-13 16:06 ` Leon Hwang
  2025-02-13 16:06 ` [PATCH bpf-next 4/4] selftests/bpf: Add cases to test " Leon Hwang
  3 siblings, 0 replies; 8+ messages in thread
From: Leon Hwang @ 2025-02-13 16:06 UTC (permalink / raw)
  To: bpf
  Cc: ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
	leon.hwang, kernel-patches-bot

This patch enhances bpftool to generate skeletons that properly handle
global percpu variables. The generated skeleton now includes a dedicated
structure for percpu data, allowing users to initialize and access percpu
variables more efficiently.

Changes:

1. skeleton structure:

For global percpu variables, the skeleton now includes a nested
structure, e.g.:

struct test_global_percpu_data {
	struct bpf_object_skeleton *skeleton;
	struct bpf_object *obj;
	struct {
		struct bpf_map *percpu;
	} maps;
	// ...
	struct test_global_percpu_data__percpu {
		int data;
		int run;
		int data2;
	} __aligned(8) *percpu;

	// ...
};

  * The "struct test_global_percpu_data__percpu" points to initialized
    data, which is actually "maps.percpu->mmaped".
  * Before loading the skeleton, updating the
    "struct test_global_percpu_data__percpu" modifies the initial value
    of the corresponding global percpu variables.
  * After loading the skeleton, accessing or updating this struct is not
    allowed because this struct pointer has been reset to NULL. Instead,
    users must interact with the global percpu variables via the
    "maps.percpu" map.

2. code changes:

  * Added support for ".percpu" sections in bpftool's map identification
    logic.
  * Modified skeleton generation to handle percpu data maps
    appropriately.
  * Updated libbpf to make "percpu" pointing to "maps.percpu->mmaped".
  * Set ".percpu" struct points to NULL after loading the skeleton.

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 tools/bpf/bpftool/gen.c | 47 ++++++++++++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 10 deletions(-)

diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 67a60114368f5..f2bf509248718 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -92,7 +92,7 @@ static void get_header_guard(char *guard, const char *obj_name, const char *suff
 
 static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
 {
-	static const char *sfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
+	static const char *sfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
 	const char *name = bpf_map__name(map);
 	int i, n;
 
@@ -117,7 +117,7 @@ static bool get_map_ident(const struct bpf_map *map, char *buf, size_t buf_sz)
 
 static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
 {
-	static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
+	static const char *pfxs[] = { ".data", ".rodata", ".percpu", ".bss", ".kconfig" };
 	int i, n;
 
 	/* recognize hard coded LLVM section name */
@@ -148,7 +148,8 @@ static int codegen_datasec_def(struct bpf_object *obj,
 			       struct btf *btf,
 			       struct btf_dump *d,
 			       const struct btf_type *sec,
-			       const char *obj_name)
+			       const char *obj_name,
+			       bool is_percpu)
 {
 	const char *sec_name = btf__name_by_offset(btf, sec->name_off);
 	const struct btf_var_secinfo *sec_var = btf_var_secinfos(sec);
@@ -228,7 +229,7 @@ static int codegen_datasec_def(struct bpf_object *obj,
 
 		off = sec_var->offset + sec_var->size;
 	}
-	printf("	} *%s;\n", sec_ident);
+	printf("	}%s *%s;\n", is_percpu ? " __aligned(8)" : "", sec_ident);
 	return 0;
 }
 
@@ -279,6 +280,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
 	struct bpf_map *map;
 	const struct btf_type *sec;
 	char map_ident[256];
+	bool is_percpu;
 	int err = 0;
 
 	d = btf_dump__new(btf, codegen_btf_dump_printf, NULL, NULL);
@@ -286,8 +288,11 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
 		return -errno;
 
 	bpf_object__for_each_map(map, obj) {
-		/* only generate definitions for memory-mapped internal maps */
-		if (!is_mmapable_map(map, map_ident, sizeof(map_ident)))
+		/* only generate definitions for memory-mapped or .percpu internal maps */
+		is_percpu = bpf_map__is_internal_percpu(map);
+		if (!is_mmapable_map(map, map_ident, sizeof(map_ident)) && !is_percpu)
+			continue;
+		if (is_percpu && (use_loader || !get_map_ident(map, map_ident, sizeof(map_ident))))
 			continue;
 
 		sec = find_type_for_map(btf, map_ident);
@@ -303,7 +308,7 @@ static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
 			printf("	struct %s__%s {\n", obj_name, map_ident);
 			printf("	} *%s;\n", map_ident);
 		} else {
-			err = codegen_datasec_def(obj, btf, d, sec, obj_name);
+			err = codegen_datasec_def(obj, btf, d, sec, obj_name, is_percpu);
 			if (err)
 				goto out;
 		}
@@ -901,8 +906,9 @@ codegen_maps_skeleton(struct bpf_object *obj, size_t map_cnt, bool mmaped, bool
 				map->map = &obj->maps.%s;	    \n\
 			",
 			i, bpf_map__name(map), ident);
-		/* memory-mapped internal maps */
-		if (mmaped && is_mmapable_map(map, ident, sizeof(ident))) {
+		/* memory-mapped or .percpu internal maps */
+		if (mmaped && (is_mmapable_map(map, ident, sizeof(ident)) ||
+			       bpf_map__is_internal_percpu(map))) {
 			printf("\tmap->mmaped = (void **)&obj->%s;\n", ident);
 		}
 
@@ -1434,7 +1440,28 @@ static int do_skeleton(int argc, char **argv)
 		static inline int					    \n\
 		%1$s__load(struct %1$s *obj)				    \n\
 		{							    \n\
-			return bpf_object__load_skeleton(obj->skeleton);    \n\
+			int err;					    \n\
+									    \n\
+			err = bpf_object__load_skeleton(obj->skeleton);	    \n\
+			if (err)					    \n\
+				return err;				    \n\
+									    \n\
+		", obj_name);
+
+	if (map_cnt) {
+		bpf_object__for_each_map(map, obj) {
+			if (!get_map_ident(map, ident, sizeof(ident)))
+				continue;
+			if (bpf_map__is_internal(map) &&
+			    bpf_map__type(map) == BPF_MAP_TYPE_PERCPU_ARRAY) {
+				printf("\tobj->%s = NULL;\n", ident);
+			}
+		}
+	}
+
+	codegen("\
+		\n\
+			return 0;					    \n\
 		}							    \n\
 									    \n\
 		static inline struct %1$s *				    \n\
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH bpf-next 4/4] selftests/bpf: Add cases to test global percpu data
  2025-02-13 16:06 [PATCH bpf-next 0/4] bpf: Introduce global percpu data Leon Hwang
                   ` (2 preceding siblings ...)
  2025-02-13 16:06 ` [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for " Leon Hwang
@ 2025-02-13 16:06 ` Leon Hwang
  3 siblings, 0 replies; 8+ messages in thread
From: Leon Hwang @ 2025-02-13 16:06 UTC (permalink / raw)
  To: bpf
  Cc: ast, daniel, andrii, yonghong.song, song, eddyz87, qmo, dxu,
	leon.hwang, kernel-patches-bot

If the arch, like s390x, does not support percpu insn, this case won't
test global percpu data by checking -EOPNOTSUPP after loading prog.

The following APIs have been tested for global percpu data:
1. bpf_map__set_initial_value()
2. bpf_map__initial_value()
3. generated percpu struct pointer pointing to internal map's mmaped
4. bpf_map__lookup_elem() for global percpu data map
5. bpf_map__is_internal_percpu()

At the same time, the case is also tested with 'bpftool gen skeleton -L'.

125     global_percpu_data_init:OK
126     global_percpu_data_lskel:OK
Summary: 2/0 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Leon Hwang <leon.hwang@linux.dev>
---
 tools/testing/selftests/bpf/Makefile          |   2 +-
 .../bpf/prog_tests/global_data_init.c         | 218 +++++++++++++++++-
 .../bpf/progs/test_global_percpu_data.c       |  20 ++
 3 files changed, 238 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_global_percpu_data.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 0d552bfcfe7da..7991de79d55c5 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -503,7 +503,7 @@ LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c 		\
 
 # Generate both light skeleton and libbpf skeleton for these
 LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
-	kfunc_call_test_subprog.c
+	kfunc_call_test_subprog.c test_global_percpu_data.c
 SKEL_BLACKLIST += $$(LSKELS)
 
 test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 8466332d7406f..f4cb97b7bd63b 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -1,5 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <sched.h>
 #include <test_progs.h>
+#include "bpf/libbpf_internal.h"
+#include "test_global_percpu_data.skel.h"
+#include "test_global_percpu_data.lskel.h"
 
 void test_global_data_init(void)
 {
@@ -8,7 +12,7 @@ void test_global_data_init(void)
 	__u8 *buff = NULL, *newval = NULL;
 	struct bpf_object *obj;
 	struct bpf_map *map;
-        __u32 duration = 0;
+	__u32 duration = 0;
 	size_t sz;
 
 	obj = bpf_object__open_file(file, NULL);
@@ -60,3 +64,215 @@ void test_global_data_init(void)
 	free(newval);
 	bpf_object__close(obj);
 }
+
+void test_global_percpu_data_init(void)
+{
+	struct test_global_percpu_data__percpu init_value, *init_data, *data, *percpu_data;
+	int key, prog_fd, err, num_cpus, num_online, comm_fd = -1, i;
+	struct test_global_percpu_data *skel = NULL;
+	__u64 args[2] = {0x1234ULL, 0x5678ULL};
+	size_t elem_sz, init_data_sz;
+	char buf[] = "new_name";
+	struct bpf_map *map;
+	bool *online;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		    .ctx_in = args,
+		    .ctx_size_in = sizeof(args),
+		    .flags = BPF_F_TEST_RUN_ON_CPU,
+	);
+
+	num_cpus = libbpf_num_possible_cpus();
+	if (!ASSERT_GT(num_cpus, 0, "libbpf_num_possible_cpus"))
+		return;
+
+	err = parse_cpu_mask_file("/sys/devices/system/cpu/online",
+				  &online, &num_online);
+	if (!ASSERT_OK(err, "parse_cpu_mask_file"))
+		return;
+
+	elem_sz = sizeof(*percpu_data);
+	percpu_data = calloc(num_cpus, elem_sz);
+	if (!ASSERT_OK_PTR(percpu_data, "calloc percpu_data"))
+		goto out;
+
+	skel = test_global_percpu_data__open();
+	if (!ASSERT_OK_PTR(skel, "test_global_percpu_data__open"))
+		goto out;
+	if (!ASSERT_OK_PTR(skel->percpu, "skel->percpu"))
+		goto out;
+
+	ASSERT_EQ(skel->percpu->data, -1, "skel->percpu->data");
+	ASSERT_FALSE(skel->percpu->run, "skel->percpu->run");
+	ASSERT_EQ(skel->percpu->data2, 0, "skel->percpu->data2");
+
+	map = skel->maps.percpu;
+	if (!ASSERT_EQ(bpf_map__type(map), BPF_MAP_TYPE_PERCPU_ARRAY, "bpf_map__type"))
+		goto out;
+	if (!ASSERT_TRUE(bpf_map__is_internal_percpu(map), "bpf_map__is_internal_percpu"))
+		goto out;
+
+	init_value.data = 2;
+	init_value.run = false;
+	err = bpf_map__set_initial_value(map, &init_value, sizeof(init_value));
+	if (!ASSERT_OK(err, "bpf_map__set_initial_value"))
+		goto out;
+
+	init_data = bpf_map__initial_value(map, &init_data_sz);
+	if (!ASSERT_OK_PTR(init_data, "bpf_map__initial_value"))
+		goto out;
+
+	ASSERT_EQ(init_data->data, init_value.data, "initial_value data");
+	ASSERT_EQ(init_data->run, init_value.run, "initial_value run");
+	ASSERT_EQ(init_data_sz, sizeof(init_value), "initial_value size");
+	ASSERT_EQ((void *) init_data, (void *) skel->percpu, "skel->percpu eq init_data");
+	ASSERT_EQ(skel->percpu->data, init_value.data, "skel->percpu->data");
+	ASSERT_EQ(skel->percpu->run, init_value.run, "skel->percpu->run");
+
+	err = test_global_percpu_data__load(skel);
+	if (err == -EOPNOTSUPP) {
+		test__skip();
+		goto out;
+	}
+	if (!ASSERT_OK(err, "test_global_percpu_data__load"))
+		goto out;
+
+	ASSERT_NULL(skel->percpu, "NULL skel->percpu");
+
+	err = test_global_percpu_data__attach(skel);
+	if (!ASSERT_OK(err, "test_global_percpu_data__attach"))
+		goto out;
+
+	comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+	if (!ASSERT_GE(comm_fd, 0, "open /proc/self/comm"))
+		goto out;
+
+	err = write(comm_fd, buf, sizeof(buf));
+	if (!ASSERT_GE(err, 0, "task rename"))
+		goto out;
+
+	prog_fd = bpf_program__fd(skel->progs.update_percpu_data);
+
+	/* run on every CPU */
+	for (i = 0; i < num_online; i++) {
+		if (!online[i])
+			continue;
+
+		topts.cpu = i;
+		topts.retval = 0;
+		err = bpf_prog_test_run_opts(prog_fd, &topts);
+		ASSERT_OK(err, "bpf_prog_test_run_opts");
+		ASSERT_EQ(topts.retval, 0, "bpf_prog_test_run_opts retval");
+	}
+
+	key = 0;
+	err = bpf_map__lookup_elem(map, &key, sizeof(key), percpu_data,
+				   elem_sz * num_cpus, 0);
+	if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
+		goto out;
+
+	for (i = 0; i < num_online; i++) {
+		if (!online[i])
+			continue;
+
+		data = percpu_data + i;
+		ASSERT_EQ(data->data, 1, "percpu_data->data");
+		ASSERT_TRUE(data->run, "percpu_data->run");
+		ASSERT_EQ(data->data2, 0xc0de, "percpu_data->data2");
+	}
+
+out:
+	close(comm_fd);
+	test_global_percpu_data__destroy(skel);
+	if (percpu_data)
+		free(percpu_data);
+	free(online);
+}
+
+void test_global_percpu_data_lskel(void)
+{
+	int key, prog_fd, map_fd, err, num_cpus, num_online, comm_fd = -1, i;
+	struct test_global_percpu_data__percpu *data, *percpu_data;
+	struct test_global_percpu_data_lskel *lskel = NULL;
+	__u64 args[2] = {0x1234ULL, 0x5678ULL};
+	char buf[] = "new_name";
+	bool *online;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		    .ctx_in = args,
+		    .ctx_size_in = sizeof(args),
+		    .flags = BPF_F_TEST_RUN_ON_CPU,
+	);
+
+	num_cpus = libbpf_num_possible_cpus();
+	if (!ASSERT_GT(num_cpus, 0, "libbpf_num_possible_cpus"))
+		return;
+
+	err = parse_cpu_mask_file("/sys/devices/system/cpu/online",
+				  &online, &num_online);
+	if (!ASSERT_OK(err, "parse_cpu_mask_file"))
+		return;
+
+	percpu_data = calloc(num_cpus, sizeof(*percpu_data));
+	if (!ASSERT_OK_PTR(percpu_data, "calloc percpu_data"))
+		goto out;
+
+	lskel = test_global_percpu_data_lskel__open();
+	if (!ASSERT_OK_PTR(lskel, "test_global_percpu_data_lskel__open"))
+		goto out;
+
+	err = test_global_percpu_data_lskel__load(lskel);
+	if (err == -EOPNOTSUPP) {
+		test__skip();
+		goto out;
+	}
+	if (!ASSERT_OK(err, "test_global_percpu_data_lskel__load"))
+		goto out;
+
+	err = test_global_percpu_data_lskel__attach(lskel);
+	if (!ASSERT_OK(err, "test_global_percpu_data_lskel__attach"))
+		goto out;
+
+	comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+	if (!ASSERT_GE(comm_fd, 0, "open /proc/self/comm"))
+		goto out;
+
+	err = write(comm_fd, buf, sizeof(buf));
+	if (!ASSERT_GE(err, 0, "task rename"))
+		goto out;
+
+	prog_fd = lskel->progs.update_percpu_data.prog_fd;
+
+	/* run on every CPU */
+	for (i = 0; i < num_online; i++) {
+		if (!online[i])
+			continue;
+
+		topts.cpu = i;
+		topts.retval = 0;
+		err = bpf_prog_test_run_opts(prog_fd, &topts);
+		ASSERT_OK(err, "bpf_prog_test_run_opts");
+		ASSERT_EQ(topts.retval, 0, "bpf_prog_test_run_opts retval");
+	}
+
+	key = 0;
+	map_fd = lskel->maps.percpu.map_fd;
+	err = bpf_map_lookup_elem(map_fd, &key, percpu_data);
+	if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+		goto out;
+
+	for (i = 0; i < num_online; i++) {
+		if (!online[i])
+			continue;
+
+		data = percpu_data + i;
+		ASSERT_EQ(data->data, 1, "percpu_data->data");
+		ASSERT_TRUE(data->run, "percpu_data->run");
+		ASSERT_EQ(data->data2, 0xc0de, "percpu_data->data2");
+	}
+
+out:
+	close(comm_fd);
+	test_global_percpu_data_lskel__destroy(lskel);
+	if (percpu_data)
+		free(percpu_data);
+	free(online);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_percpu_data.c b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
new file mode 100644
index 0000000000000..ada292d3a164c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_percpu_data.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int data SEC(".percpu") = -1;
+int run SEC(".percpu") = 0;
+int data2 SEC(".percpu");
+
+SEC("raw_tp/task_rename")
+int update_percpu_data(struct __sk_buff *skb)
+{
+	data = 1;
+	run = 1;
+	data2 = 0xc0de;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
2.47.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-02-13 16:11 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-02-13 16:06 [PATCH bpf-next 0/4] bpf: Introduce global percpu data Leon Hwang
2025-02-13 16:06 ` [PATCH bpf-next 1/4] " Leon Hwang
2025-02-13 16:06 ` [PATCH bpf-next 2/4] bpf, libbpf: Support " Leon Hwang
2025-02-13 16:06 ` [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for " Leon Hwang
2025-02-13 16:06 ` [PATCH bpf-next 4/4] selftests/bpf: Add cases to test " Leon Hwang
  -- strict thread matches above, loose matches on Subject: below --
2025-01-27 16:21 [PATCH bpf-next 0/4] bpf: Introduce " Leon Hwang
2025-01-27 16:21 ` [PATCH bpf-next 3/4] bpf, bpftool: Generate skeleton for " Leon Hwang
2025-02-06  0:09   ` Andrii Nakryiko
2025-02-07  9:52     ` Leon Hwang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.