From: Yafang Shao <laoar.shao@gmail.com>
To: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
kafai@fb.com, songliubraving@fb.com, yhs@fb.com,
john.fastabend@gmail.com, kpsingh@kernel.org, sdf@google.com,
haoluo@google.com, jolsa@kernel.org, hannes@cmpxchg.org,
mhocko@kernel.org, roman.gushchin@linux.dev, shakeelb@google.com,
songmuchun@bytedance.com, akpm@linux-foundation.org
Cc: netdev@vger.kernel.org, bpf@vger.kernel.org, linux-mm@kvack.org,
Yafang Shao <laoar.shao@gmail.com>
Subject: [RFC PATCH bpf-next 15/15] bpf: Introduce selectable memcg for bpf map
Date: Fri, 29 Jul 2022 15:23:16 +0000 [thread overview]
Message-ID: <20220729152316.58205-16-laoar.shao@gmail.com> (raw)
In-Reply-To: <20220729152316.58205-1-laoar.shao@gmail.com>
A new member memcg_fd is introduced into bpf attr of BPF_MAP_CREATE
command, which is the fd of an opened cgroup directory. In this cgroup,
the memory subsystem must be enabled. This value is valid only when
BPF_F_SELECTABLE_MEMCG is set in map_flags. Once the kernel get the
memory cgroup from this fd, it will set this memcg into bpf map, then
all the subsequent memory allocation of this map will be charge to the
memcg.
The map creation paths in libbpf are also changed consequently.
Currently it is only supported for cgroup2 directory.
The usage of this new member as follows,
struct bpf_map_create_opts map_opts = {
.sz = sizeof(map_opts),
.map_flags = BPF_F_SELECTABLE_MEMCG,
};
int memcg_fd, int map_fd;
int key, value;
memcg_fd = open("/cgroup2", O_DIRECTORY);
if (memcg_fd < 0) {
perror("memcg dir open");
return -1;
}
map_opts.memcg_fd = memcg_fd;
map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "map_for_memcg",
sizeof(key), sizeof(value),
1024, &map_opts);
if (map_fd <= 0) {
perror("map create");
return -1;
}
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
include/uapi/linux/bpf.h | 2 ++
kernel/bpf/syscall.c | 47 ++++++++++++++++++++++++++--------
tools/include/uapi/linux/bpf.h | 2 ++
tools/lib/bpf/bpf.c | 1 +
tools/lib/bpf/bpf.h | 3 ++-
tools/lib/bpf/libbpf.c | 2 ++
6 files changed, 46 insertions(+), 11 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d5fc1ea70b59..a6e02c8be924 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1296,6 +1296,8 @@ union bpf_attr {
* struct stored as the
* map value
*/
+ __s32 memcg_fd; /* selectable memcg */
+ __s32 :32; /* hole */
/* Any per-map-type extra fields
*
* BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6401cc417fa9..9900e2b87315 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -402,14 +402,30 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock)
}
#ifdef CONFIG_MEMCG_KMEM
-static void bpf_map_save_memcg(struct bpf_map *map)
+static int bpf_map_save_memcg(struct bpf_map *map, union bpf_attr *attr)
{
- /* Currently if a map is created by a process belonging to the root
- * memory cgroup, get_obj_cgroup_from_current() will return NULL.
- * So we have to check map->objcg for being NULL each time it's
- * being used.
- */
- map->objcg = get_obj_cgroup_from_current();
+ struct obj_cgroup *objcg;
+ struct cgroup *cgrp;
+
+ if (attr->map_flags & BPF_F_SELECTABLE_MEMCG) {
+ cgrp = cgroup_get_from_fd(attr->memcg_fd);
+ if (IS_ERR(cgrp))
+ return -EINVAL;
+
+ objcg = get_obj_cgroup_from_cgroup(cgrp);
+ if (IS_ERR(objcg))
+ return PTR_ERR(objcg);
+ } else {
+ /* Currently if a map is created by a process belonging to the root
+ * memory cgroup, get_obj_cgroup_from_current() will return NULL.
+ * So we have to check map->objcg for being NULL each time it's
+ * being used.
+ */
+ objcg = get_obj_cgroup_from_current();
+ }
+
+ map->objcg = objcg;
+ return 0;
}
static void bpf_map_release_memcg(struct bpf_map *map)
@@ -485,8 +501,9 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size,
}
#else
-static void bpf_map_save_memcg(struct bpf_map *map)
+static int bpf_map_save_memcg(struct bpf_map *map, union bpf_attr *attr)
{
+ return 0;
}
static void bpf_map_release_memcg(struct bpf_map *map)
@@ -530,13 +547,18 @@ void *bpf_map_container_alloc(union bpf_attr *attr, u64 size, int numa_node)
{
struct bpf_map *map;
void *container;
+ int ret;
container = __bpf_map_area_alloc(size, numa_node, false);
if (!container)
return ERR_PTR(-ENOMEM);
map = (struct bpf_map *)container;
- bpf_map_save_memcg(map);
+ ret = bpf_map_save_memcg(map, attr);
+ if (ret) {
+ bpf_map_area_free(container);
+ return ERR_PTR(ret);
+ }
return container;
}
@@ -547,6 +569,7 @@ void *bpf_map_container_mmapable_alloc(union bpf_attr *attr, u64 size,
struct bpf_map *map;
void *container;
void *ptr;
+ int ret;
/* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
ptr = __bpf_map_area_alloc(size, numa_node, true);
@@ -555,7 +578,11 @@ void *bpf_map_container_mmapable_alloc(union bpf_attr *attr, u64 size,
container = ptr + align - offset;
map = (struct bpf_map *)container;
- bpf_map_save_memcg(map);
+ ret = bpf_map_save_memcg(map, attr);
+ if (ret) {
+ bpf_map_area_free(ptr);
+ return ERR_PTR(ret);
+ }
return ptr;
}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d5fc1ea70b59..a6e02c8be924 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1296,6 +1296,8 @@ union bpf_attr {
* struct stored as the
* map value
*/
+ __s32 memcg_fd; /* selectable memcg */
+ __s32 :32; /* hole */
/* Any per-map-type extra fields
*
* BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 5eb0df90eb2b..662ce5808386 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -199,6 +199,7 @@ int bpf_map_create(enum bpf_map_type map_type,
attr.map_extra = OPTS_GET(opts, map_extra, 0);
attr.numa_node = OPTS_GET(opts, numa_node, 0);
attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
+ attr.memcg_fd = OPTS_GET(opts, memcg_fd, 0);
fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
return libbpf_err_errno(fd);
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 88a7cc4bd76f..481aad49422b 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -51,8 +51,9 @@ struct bpf_map_create_opts {
__u32 numa_node;
__u32 map_ifindex;
+ __u32 memcg_fd;
};
-#define bpf_map_create_opts__last_field map_ifindex
+#define bpf_map_create_opts__last_field memcg_fd
LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
const char *map_name,
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 50d41815f431..86916d550031 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -505,6 +505,7 @@ struct bpf_map {
bool pinned;
bool reused;
bool autocreate;
+ __s32 memcg_fd;
__u64 map_extra;
};
@@ -4928,6 +4929,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
create_attr.map_ifindex = map->map_ifindex;
create_attr.map_flags = def->map_flags;
create_attr.numa_node = map->numa_node;
+ create_attr.memcg_fd = map->memcg_fd;
create_attr.map_extra = map->map_extra;
if (bpf_map__is_struct_ops(map))
--
2.17.1
next prev parent reply other threads:[~2022-07-29 15:23 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-29 15:23 [RFC PATCH bpf-next 00/15] bpf: Introduce selectable memcg for bpf map Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 01/15] bpf: Remove unneeded memset in queue_stack_map creation Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 02/15] bpf: Use bpf_map_area_free instread of kvfree Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 03/15] bpf: Make __GFP_NOWARN consistent in bpf map creation Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 04/15] bpf: Use bpf_map_area_alloc consistently on " Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 05/15] bpf: Introduce helpers for container of struct bpf_map Yafang Shao
2022-08-02 4:58 ` Alexei Starovoitov
2022-08-02 13:47 ` Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 06/15] bpf: Use bpf_map_container_alloc helpers in various bpf maps Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 07/15] bpf: Define bpf_map_get_memcg for !CONFIG_MEMCG_KMEM Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 08/15] bpf: Use scope-based charge for bpf_map_area_alloc Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 09/15] bpf: Use bpf_map_kzalloc in arraymap Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 10/15] bpf: Use bpf_map_pages_alloc in ringbuf Yafang Shao
2022-08-01 23:16 ` Andrii Nakryiko
2022-08-02 13:31 ` Yafang Shao
2022-08-02 18:00 ` Andrii Nakryiko
2022-08-03 13:27 ` Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 11/15] bpf: Use bpf_map_kvcalloc in bpf_local_storage Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 12/15] mm, memcg: Add new helper get_obj_cgroup_from_cgroup Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 13/15] bpf: Add new parameter into bpf_map_container_alloc Yafang Shao
2022-07-29 15:23 ` [RFC PATCH bpf-next 14/15] bpf: Add new map flag BPF_F_SELECTABLE_MEMCG Yafang Shao
2022-07-29 15:23 ` Yafang Shao [this message]
2022-08-02 4:55 ` [RFC PATCH bpf-next 15/15] bpf: Introduce selectable memcg for bpf map Alexei Starovoitov
2022-08-02 13:47 ` Yafang Shao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220729152316.58205-16-laoar.shao@gmail.com \
--to=laoar.shao@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=hannes@cmpxchg.org \
--cc=haoluo@google.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kafai@fb.com \
--cc=kpsingh@kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=roman.gushchin@linux.dev \
--cc=sdf@google.com \
--cc=shakeelb@google.com \
--cc=songliubraving@fb.com \
--cc=songmuchun@bytedance.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).