* [RFC bpf-next v3 1/7] bpf: define BPF_UPTR a new enumerator of btf_field_type.
2024-08-14 3:30 [RFC bpf-next v3 0/7] Share user memory to BPF program through task storage map Kui-Feng Lee
@ 2024-08-14 3:30 ` Kui-Feng Lee
2024-08-14 3:30 ` [RFC bpf-next v3 2/7] bpf: Parse and support "uptr" tag Kui-Feng Lee
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Kui-Feng Lee @ 2024-08-14 3:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
Define BPF_UPTR, and modify functions that describe attributes of a field
type.
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
include/linux/bpf.h | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9f35df07e86d..954e476b5605 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -203,6 +203,7 @@ enum btf_field_type {
BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD,
BPF_REFCOUNT = (1 << 9),
BPF_WORKQUEUE = (1 << 10),
+ BPF_UPTR = (1 << 11),
};
typedef void (*btf_dtor_kfunc_t)(void *);
@@ -322,6 +323,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
return "kptr";
case BPF_KPTR_PERCPU:
return "percpu_kptr";
+ case BPF_UPTR:
+ return "uptr";
case BPF_LIST_HEAD:
return "bpf_list_head";
case BPF_LIST_NODE:
@@ -350,6 +353,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
return sizeof(u64);
case BPF_LIST_HEAD:
return sizeof(struct bpf_list_head);
@@ -379,6 +383,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
return __alignof__(u64);
case BPF_LIST_HEAD:
return __alignof__(struct bpf_list_head);
@@ -419,6 +424,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
break;
default:
WARN_ON_ONCE(1);
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC bpf-next v3 2/7] bpf: Parse and support "uptr" tag.
2024-08-14 3:30 [RFC bpf-next v3 0/7] Share user memory to BPF program through task storage map Kui-Feng Lee
2024-08-14 3:30 ` [RFC bpf-next v3 1/7] bpf: define BPF_UPTR a new enumerator of btf_field_type Kui-Feng Lee
@ 2024-08-14 3:30 ` Kui-Feng Lee
2024-08-14 3:30 ` [RFC bpf-next v3 3/7] bpf: Handle BPF_UPTR in verifier Kui-Feng Lee
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Kui-Feng Lee @ 2024-08-14 3:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
Parse "uptr" tag from BTF, map it to BPF_UPTR, and support it in related
functions. "uptr" tag is used to annotate a field in a struct type is a
uptr, which is used to share a block memory between user programs and BPF
programs.
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
kernel/bpf/btf.c | 5 +++++
kernel/bpf/syscall.c | 2 ++
2 files changed, 7 insertions(+)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index c4506d788c85..9db3e7d2fa66 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3361,6 +3361,8 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
type = BPF_KPTR_REF;
else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off)))
type = BPF_KPTR_PERCPU;
+ else if (!strcmp("uptr", __btf_name_by_offset(btf, t->name_off)))
+ type = BPF_UPTR;
else
return -EINVAL;
@@ -3538,6 +3540,7 @@ static int btf_repeat_fields(struct btf_field_info *info,
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
case BPF_LIST_HEAD:
case BPF_RB_ROOT:
break;
@@ -3664,6 +3667,7 @@ static int btf_find_field_one(const struct btf *btf,
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
ret = btf_find_kptr(btf, var_type, off, sz,
info_cnt ? &info[0] : &tmp);
if (ret < 0)
@@ -3988,6 +3992,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
if (ret < 0)
goto end;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 65dcd92d0b2c..fed4a2145f81 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -548,6 +548,7 @@ void btf_record_free(struct btf_record *rec)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
if (rec->fields[i].kptr.module)
module_put(rec->fields[i].kptr.module);
btf_put(rec->fields[i].kptr.btf);
@@ -596,6 +597,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
btf_get(fields[i].kptr.btf);
if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
ret = -ENXIO;
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC bpf-next v3 3/7] bpf: Handle BPF_UPTR in verifier.
2024-08-14 3:30 [RFC bpf-next v3 0/7] Share user memory to BPF program through task storage map Kui-Feng Lee
2024-08-14 3:30 ` [RFC bpf-next v3 1/7] bpf: define BPF_UPTR a new enumerator of btf_field_type Kui-Feng Lee
2024-08-14 3:30 ` [RFC bpf-next v3 2/7] bpf: Parse and support "uptr" tag Kui-Feng Lee
@ 2024-08-14 3:30 ` Kui-Feng Lee
2024-08-14 3:30 ` [RFC bpf-next v3 4/7] bpf: add helper functions of pinning and converting BPF_UPTR Kui-Feng Lee
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Kui-Feng Lee @ 2024-08-14 3:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
Give PTR_TO_MEM | PTR_MAYBE_NULL to the memory pointed by an uptr with the
size of the pointed type to make them readable and writable.
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
kernel/bpf/verifier.c | 37 ++++++++++++++++++++++++++++++++++++-
1 file changed, 36 insertions(+), 1 deletion(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e3932f8ce10a..5bc5b37b63cc 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5340,6 +5340,10 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
int perm_flags;
const char *reg_name = "";
+ if (kptr_field->type == BPF_UPTR)
+ /* BPF programs should not change any user kptr */
+ return -EACCES;
+
if (btf_is_kernel(reg->btf)) {
perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
@@ -5488,6 +5492,29 @@ static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr
return ret;
}
+static int mark_uptr_ld_reg(struct bpf_verifier_env *env, u32 regno,
+ struct btf_field *field)
+{
+ struct bpf_reg_state *val_reg;
+ const struct btf_type *t;
+ u32 type_id, tsz;
+
+ val_reg = reg_state(env, regno);
+ type_id = field->kptr.btf_id;
+ t = btf_type_id_size(field->kptr.btf, &type_id, &tsz);
+ if (!t) {
+ verbose(env, "The type of uptr is invalid");
+ return -EACCES;
+ }
+
+ mark_reg_known_zero(env, cur_regs(env), regno);
+ val_reg->type = PTR_TO_MEM | PTR_MAYBE_NULL;
+ val_reg->mem_size = tsz;
+ val_reg->id = ++env->id_gen;
+
+ return 0;
+}
+
static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
int value_regno, int insn_idx,
struct btf_field *kptr_field)
@@ -5516,9 +5543,16 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
verbose(env, "store to referenced kptr disallowed\n");
return -EACCES;
}
+ if (class != BPF_LDX && kptr_field->type == BPF_UPTR) {
+ verbose(env, "store to uptr disallowed\n");
+ return -EACCES;
+ }
if (class == BPF_LDX) {
val_reg = reg_state(env, value_regno);
+ if (kptr_field->type == BPF_UPTR)
+ return mark_uptr_ld_reg(env, value_regno, kptr_field);
+
/* We can simply mark the value_regno receiving the pointer
* value from map as PTR_TO_BTF_ID, with the correct type.
*/
@@ -5576,6 +5610,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
+ case BPF_UPTR:
if (src != ACCESS_DIRECT) {
verbose(env, "kptr cannot be accessed indirectly by helper\n");
return -EACCES;
@@ -6956,7 +6991,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
return err;
if (tnum_is_const(reg->var_off))
kptr_field = btf_record_find(reg->map_ptr->record,
- off + reg->var_off.value, BPF_KPTR);
+ off + reg->var_off.value, BPF_KPTR | BPF_UPTR);
if (kptr_field) {
err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
} else if (t == BPF_READ && value_regno >= 0) {
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC bpf-next v3 4/7] bpf: add helper functions of pinning and converting BPF_UPTR.
2024-08-14 3:30 [RFC bpf-next v3 0/7] Share user memory to BPF program through task storage map Kui-Feng Lee
` (2 preceding siblings ...)
2024-08-14 3:30 ` [RFC bpf-next v3 3/7] bpf: Handle BPF_UPTR in verifier Kui-Feng Lee
@ 2024-08-14 3:30 ` Kui-Feng Lee
2024-08-14 3:30 ` [RFC bpf-next v3 5/7] bpf: pin, translate, and unpin __uptr from syscalls Kui-Feng Lee
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: Kui-Feng Lee @ 2024-08-14 3:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee, linux-mm
The helper functions manage uptrs in BPF map values from userspace. These
uptrs point to user memory, so we must pin and convert them into kernel
address space for BPF programs to access them in the kernel. These helper
functions will be utilized by the upcoming patches.
To access uptrs in BPF programs, they are pinned using
pin_user_pages_fast(), but the conversion to kernel addresses is actually
done by page_address(). The uptrs can be unpinned using unpin_user_pages().
Currently, the memory block pointed to by a uptr must reside in a single
memory page, as crossing multiple pages is not supported.
Cc: linux-mm@kvack.org
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
include/linux/bpf.h | 30 ++++++++++
kernel/bpf/helpers.c | 20 +++++++
kernel/bpf/syscall.c | 132 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 182 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 954e476b5605..886c818ff555 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -477,6 +477,8 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
data_race(*ldst++ = *lsrc++);
}
+void bpf_obj_unpin_uptr(const struct btf_field *field, void *addr);
+
/* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */
static inline void bpf_obj_memcpy(struct btf_record *rec,
void *dst, void *src, u32 size,
@@ -503,6 +505,34 @@ static inline void bpf_obj_memcpy(struct btf_record *rec,
memcpy(dst + curr_off, src + curr_off, size - curr_off);
}
+static inline void bpf_obj_uptrcpy(struct btf_record *rec,
+ void *dst, void *src)
+{
+ int i;
+
+ if (IS_ERR_OR_NULL(rec))
+ return;
+
+ for (i = 0; i < rec->cnt; i++) {
+ u32 next_off = rec->fields[i].offset;
+ void *addr;
+
+ if (rec->fields[i].type == BPF_UPTR) {
+ /* Unpin old address.
+ *
+ * Alignments are guaranteed by btf_find_field_one().
+ */
+ addr = *(void **)(dst + next_off);
+ if (addr)
+ bpf_obj_unpin_uptr(&rec->fields[i], addr);
+
+ *(void **)(dst + next_off) = *(void **)(src + next_off);
+ }
+ }
+}
+
+void copy_map_uptr_locked(struct bpf_map *map, void *dst, void *src, bool lock_src);
+
static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
{
bpf_obj_memcpy(map->record, dst, src, map->value_size, false);
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index d02ae323996b..d588b52605b9 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -388,6 +388,26 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
preempt_enable();
}
+/* Copy map value and uptr from src to dst, with lock_src indicating
+ * whether src or dst is locked.
+ */
+void copy_map_uptr_locked(struct bpf_map *map, void *src, void *dst,
+ bool lock_src)
+{
+ struct bpf_spin_lock *lock;
+
+ if (lock_src)
+ lock = src + map->record->spin_lock_off;
+ else
+ lock = dst + map->record->spin_lock_off;
+ preempt_disable();
+ __bpf_spin_lock_irqsave(lock);
+ copy_map_value(map, dst, src);
+ bpf_obj_uptrcpy(map->record, dst, src);
+ __bpf_spin_unlock_irqrestore(lock);
+ preempt_enable();
+}
+
BPF_CALL_0(bpf_jiffies64)
{
return get_jiffies_64();
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index fed4a2145f81..d504f5eb955a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -155,6 +155,138 @@ static void maybe_wait_bpf_programs(struct bpf_map *map)
synchronize_rcu();
}
+void bpf_obj_unpin_uptr(const struct btf_field *field, void *addr)
+{
+ struct page *pages[1];
+ u32 size, type_id;
+ int npages;
+ void *ptr;
+
+ type_id = field->kptr.btf_id;
+ btf_type_id_size(field->kptr.btf, &type_id, &size);
+ if (size == 0)
+ return;
+
+ ptr = (void *)((intptr_t)addr & PAGE_MASK);
+
+ npages = (((intptr_t)addr + size + ~PAGE_MASK) - (intptr_t)ptr) >> PAGE_SHIFT;
+ if (WARN_ON_ONCE(npages > 1))
+ return;
+
+ pages[0] = virt_to_page(ptr);
+ unpin_user_pages(pages, 1);
+}
+
+/* Unpin uptr fields in the record up to cnt */
+static void bpf_obj_unpin_uptrs_cnt(struct btf_record *rec, int cnt, void *src)
+{
+ u32 next_off;
+ void **kaddr_ptr;
+ int i;
+
+ for (i = 0; i < cnt; i++) {
+ if (rec->fields[i].type != BPF_UPTR)
+ continue;
+
+ next_off = rec->fields[i].offset;
+ kaddr_ptr = src + next_off;
+ if (*kaddr_ptr) {
+ bpf_obj_unpin_uptr(&rec->fields[i], *kaddr_ptr);
+ *kaddr_ptr = NULL;
+ }
+ }
+}
+
+/* Find all BPF_UPTR fields in the record, pin the user memory, map it
+ * to kernel space, and update the addresses in the source memory.
+ *
+ * The map value passing from userspace may contain user kptrs pointing to
+ * user memory. This function pins the user memory and maps it to kernel
+ * memory so that BPF programs can access it.
+ */
+static int bpf_obj_trans_pin_uptrs(struct btf_record *rec, void *src, u32 size)
+{
+ u32 type_id, tsz, npages, next_off;
+ void *uaddr, *kaddr, **uaddr_ptr;
+ const struct btf_type *t;
+ struct page *pages[1];
+ int i, err;
+
+ if (IS_ERR_OR_NULL(rec))
+ return 0;
+
+ if (!btf_record_has_field(rec, BPF_UPTR))
+ return 0;
+
+ for (i = 0; i < rec->cnt; i++) {
+ if (rec->fields[i].type != BPF_UPTR)
+ continue;
+
+ next_off = rec->fields[i].offset;
+ if (next_off + sizeof(void *) > size) {
+ err = -EFAULT;
+ goto rollback;
+ }
+ uaddr_ptr = src + next_off;
+ uaddr = *uaddr_ptr;
+ if (!uaddr)
+ continue;
+
+ /* Make sure the user memory takes up at most one page */
+ type_id = rec->fields[i].kptr.btf_id;
+ t = btf_type_id_size(rec->fields[i].kptr.btf, &type_id, &tsz);
+ if (!t) {
+ err = -EFAULT;
+ goto rollback;
+ }
+ if (tsz == 0) {
+ *uaddr_ptr = NULL;
+ continue;
+ }
+ npages = (((intptr_t)uaddr + tsz + ~PAGE_MASK) -
+ ((intptr_t)uaddr & PAGE_MASK)) >> PAGE_SHIFT;
+ if (npages > 1) {
+ /* Allow only one page */
+ err = -EFAULT;
+ goto rollback;
+ }
+
+ /* Pin the user memory */
+ err = pin_user_pages_fast((intptr_t)uaddr, 1, FOLL_LONGTERM | FOLL_WRITE, pages);
+ if (err < 0)
+ goto rollback;
+
+ /* Map to kernel space */
+ kaddr = page_address(pages[0]);
+ if (unlikely(!kaddr)) {
+ WARN_ON_ONCE(1);
+ unpin_user_pages(pages, 1);
+ err = -EFAULT;
+ goto rollback;
+ }
+ *uaddr_ptr = kaddr + ((intptr_t)uaddr & ~PAGE_MASK);
+ }
+
+ return 0;
+
+rollback:
+ /* Unpin the user memory of earlier fields */
+ bpf_obj_unpin_uptrs_cnt(rec, i, src);
+
+ return err;
+}
+
+static void bpf_obj_unpin_uptrs(struct btf_record *rec, void *src)
+{
+ if (IS_ERR_OR_NULL(rec))
+ return;
+
+ if (!btf_record_has_field(rec, BPF_UPTR))
+ return;
+
+ bpf_obj_unpin_uptrs_cnt(rec, rec->cnt, src);
+}
+
static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
void *key, void *value, __u64 flags)
{
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC bpf-next v3 5/7] bpf: pin, translate, and unpin __uptr from syscalls.
2024-08-14 3:30 [RFC bpf-next v3 0/7] Share user memory to BPF program through task storage map Kui-Feng Lee
` (3 preceding siblings ...)
2024-08-14 3:30 ` [RFC bpf-next v3 4/7] bpf: add helper functions of pinning and converting BPF_UPTR Kui-Feng Lee
@ 2024-08-14 3:30 ` Kui-Feng Lee
2024-08-14 3:30 ` [RFC bpf-next v3 6/7] libbpf: define __uptr Kui-Feng Lee
2024-08-14 3:30 ` [RFC bpf-next v3 7/7] selftests/bpf: test __uptr on the value of a task storage map Kui-Feng Lee
6 siblings, 0 replies; 8+ messages in thread
From: Kui-Feng Lee @ 2024-08-14 3:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
When a user program updates a map value, every uptr will be pinned and
translated to an address in the kernel. This process is initiated by
calling bpf_map_update_elem() from user programs.
Currently, uptr is only supported by task storage maps and can only be set
by user programs through syscalls.
When the value of an uptr is overwritten or destroyed, the memory pointed
to by the old value must be unpinned. This is ensured by calling
bpf_obj_uptrcpy() and copy_map_uptr_locked() when updating map value and by
bpf_obj_free_fields() when destroying map value.
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
kernel/bpf/bpf_local_storage.c | 23 ++++++++++++++-----
kernel/bpf/syscall.c | 40 +++++++++++++++++++++++++++++++---
2 files changed, 55 insertions(+), 8 deletions(-)
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index c938dea5ddbf..2fafad53b9d9 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -99,8 +99,11 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
}
if (selem) {
- if (value)
+ if (value) {
copy_map_value(&smap->map, SDATA(selem)->data, value);
+ if (smap->map.map_type == BPF_MAP_TYPE_TASK_STORAGE)
+ bpf_obj_uptrcpy(smap->map.record, SDATA(selem)->data, value);
+ }
/* No need to call check_and_init_map_value as memory is zero init */
return selem;
}
@@ -575,8 +578,13 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
if (err)
return ERR_PTR(err);
if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) {
- copy_map_value_locked(&smap->map, old_sdata->data,
- value, false);
+ if (smap->map.map_type == BPF_MAP_TYPE_TASK_STORAGE &&
+ btf_record_has_field(smap->map.record, BPF_UPTR))
+ copy_map_uptr_locked(&smap->map, old_sdata->data,
+ value, false);
+ else
+ copy_map_value_locked(&smap->map, old_sdata->data,
+ value, false);
return old_sdata;
}
}
@@ -607,8 +615,13 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
goto unlock;
if (old_sdata && (map_flags & BPF_F_LOCK)) {
- copy_map_value_locked(&smap->map, old_sdata->data, value,
- false);
+ if (smap->map.map_type == BPF_MAP_TYPE_TASK_STORAGE &&
+ btf_record_has_field(smap->map.record, BPF_UPTR))
+ copy_map_uptr_locked(&smap->map, old_sdata->data,
+ value, false);
+ else
+ copy_map_value_locked(&smap->map, old_sdata->data,
+ value, false);
selem = SELEM(old_sdata);
goto unlock;
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d504f5eb955a..1854aeb13ff7 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -287,8 +287,8 @@ static void bpf_obj_unpin_uptrs(struct btf_record *rec, void *src)
bpf_obj_unpin_uptrs_cnt(rec, rec->cnt, src);
}
-static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
- void *key, void *value, __u64 flags)
+static int bpf_map_update_value_inner(struct bpf_map *map, struct file *map_file,
+ void *key, void *value, __u64 flags)
{
int err;
@@ -340,6 +340,29 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
return err;
}
+static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
+ void *key, void *value, __u64 flags)
+{
+ int err;
+
+ if (map->map_type == BPF_MAP_TYPE_TASK_STORAGE) {
+ /* Pin user memory can lead to context switch, so we need
+ * to do it before potential RCU lock.
+ */
+ err = bpf_obj_trans_pin_uptrs(map->record, value,
+ bpf_map_value_size(map));
+ if (err)
+ return err;
+ }
+
+ err = bpf_map_update_value_inner(map, map_file, key, value, flags);
+
+ if (err && map->map_type == BPF_MAP_TYPE_TASK_STORAGE)
+ bpf_obj_unpin_uptrs(map->record, value);
+
+ return err;
+}
+
static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
__u64 flags)
{
@@ -846,6 +869,11 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
field->kptr.dtor(xchgd_field);
}
break;
+ case BPF_UPTR:
+ if (*(void **)field_ptr)
+ bpf_obj_unpin_uptr(field, *(void **)field_ptr);
+ *(void **)field_ptr = NULL;
+ break;
case BPF_LIST_HEAD:
if (WARN_ON_ONCE(rec->spin_lock_off < 0))
continue;
@@ -1231,7 +1259,7 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
map->record = btf_parse_fields(btf, value_type,
BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
- BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE,
+ BPF_RB_ROOT | BPF_REFCOUNT | BPF_WORKQUEUE | BPF_UPTR,
map->value_size);
if (!IS_ERR_OR_NULL(map->record)) {
int i;
@@ -1287,6 +1315,12 @@ static int map_check_btf(struct bpf_map *map, struct bpf_token *token,
goto free_map_tab;
}
break;
+ case BPF_UPTR:
+ if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) {
+ ret = -EOPNOTSUPP;
+ goto free_map_tab;
+ }
+ break;
case BPF_LIST_HEAD:
case BPF_RB_ROOT:
if (map->map_type != BPF_MAP_TYPE_HASH &&
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC bpf-next v3 6/7] libbpf: define __uptr.
2024-08-14 3:30 [RFC bpf-next v3 0/7] Share user memory to BPF program through task storage map Kui-Feng Lee
` (4 preceding siblings ...)
2024-08-14 3:30 ` [RFC bpf-next v3 5/7] bpf: pin, translate, and unpin __uptr from syscalls Kui-Feng Lee
@ 2024-08-14 3:30 ` Kui-Feng Lee
2024-08-14 3:30 ` [RFC bpf-next v3 7/7] selftests/bpf: test __uptr on the value of a task storage map Kui-Feng Lee
6 siblings, 0 replies; 8+ messages in thread
From: Kui-Feng Lee @ 2024-08-14 3:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
Make __uptr available to BPF programs to enable them to define uptrs.
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
tools/lib/bpf/bpf_helpers.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 305c62817dd3..7ff9d947b976 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -185,6 +185,7 @@ enum libbpf_tristate {
#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))
#define __kptr __attribute__((btf_type_tag("kptr")))
#define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr")))
+#define __uptr __attribute__((btf_type_tag("uptr")))
#if defined (__clang__)
#define bpf_ksym_exists(sym) ({ \
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [RFC bpf-next v3 7/7] selftests/bpf: test __uptr on the value of a task storage map.
2024-08-14 3:30 [RFC bpf-next v3 0/7] Share user memory to BPF program through task storage map Kui-Feng Lee
` (5 preceding siblings ...)
2024-08-14 3:30 ` [RFC bpf-next v3 6/7] libbpf: define __uptr Kui-Feng Lee
@ 2024-08-14 3:30 ` Kui-Feng Lee
6 siblings, 0 replies; 8+ messages in thread
From: Kui-Feng Lee @ 2024-08-14 3:30 UTC (permalink / raw)
To: bpf, ast, martin.lau, song, kernel-team, andrii
Cc: sinquersw, kuifeng, Kui-Feng Lee
Make sure the memory of uptrs have been mapped to the kernel properly. Also
ensure the values of uptrs in the kernel haven't been copied to userspace.
Signed-off-by: Kui-Feng Lee <thinker.li@gmail.com>
---
.../bpf/prog_tests/task_local_storage.c | 106 ++++++++++++++++++
.../selftests/bpf/progs/task_ls_uptr.c | 65 +++++++++++
2 files changed, 171 insertions(+)
create mode 100644 tools/testing/selftests/bpf/progs/task_ls_uptr.c
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index c33c05161a9e..5709b083021c 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -5,6 +5,7 @@
#include <unistd.h>
#include <sched.h>
#include <pthread.h>
+#include <sys/eventfd.h>
#include <sys/syscall.h> /* For SYS_xxx definitions */
#include <sys/types.h>
#include <test_progs.h>
@@ -14,6 +15,20 @@
#include "task_ls_recursion.skel.h"
#include "task_storage_nodeadlock.skel.h"
+struct user_data {
+ int a;
+ int b;
+ int result;
+};
+
+struct value_type {
+ struct user_data *udata;
+};
+
+#define MAGIC_VALUE 0xabcd1234
+
+#include "task_ls_uptr.skel.h"
+
static void test_sys_enter_exit(void)
{
struct task_local_storage *skel;
@@ -40,6 +55,95 @@ static void test_sys_enter_exit(void)
task_local_storage__destroy(skel);
}
+static struct user_data user_data __attribute__((aligned(16))) = {
+ .a = 1,
+ .b = 2,
+};
+
+static void test_uptr(void)
+{
+ struct task_ls_uptr *skel = NULL;
+ int task_fd = -1, ev_fd = -1;
+ struct value_type value;
+ int err, wstatus;
+ __u64 dummy = 1;
+ pid_t pid;
+
+ value.udata = &user_data;
+
+ task_fd = sys_pidfd_open(getpid(), 0);
+ if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open"))
+ goto out;
+
+ ev_fd = eventfd(0, 0);
+ if (!ASSERT_NEQ(ev_fd, -1, "eventfd"))
+ goto out;
+
+ skel = task_ls_uptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto out;
+
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.datamap), &task_fd, &value, 0);
+ if (!ASSERT_OK(err, "update_datamap"))
+ exit(1);
+
+ err = task_ls_uptr__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ fflush(stdout);
+ fflush(stderr);
+
+ pid = fork();
+ if (pid < 0)
+ goto out;
+
+ /* Call syscall in the child process, but access the map value of
+ * the parent process in the BPF program to check if the user kptr
+ * is translated/mapped correctly.
+ */
+ if (pid == 0) {
+ /* child */
+
+ /* Overwrite the user_data in the child process to check if
+ * the BPF program accesses the user_data of the parent.
+ */
+ user_data.a = 0;
+ user_data.b = 0;
+
+ /* Wait for the parent to set child_pid */
+ read(ev_fd, &dummy, sizeof(dummy));
+
+ exit(0);
+ }
+
+ skel->bss->parent_pid = syscall(SYS_gettid);
+ skel->bss->child_pid = pid;
+
+ write(ev_fd, &dummy, sizeof(dummy));
+
+ err = waitpid(pid, &wstatus, 0);
+ ASSERT_EQ(err, pid, "waitpid");
+ skel->bss->child_pid = 0;
+
+ ASSERT_EQ(MAGIC_VALUE + user_data.a + user_data.b,
+ user_data.result, "result");
+
+ /* Check if user programs can access the value of user kptrs
+ * through bpf_map_lookup_elem(). Make sure the kernel value is not
+ * leaked.
+ */
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.datamap), &task_fd, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto out;
+ ASSERT_EQ(value.udata, NULL, "lookup_udata");
+
+out:
+ task_ls_uptr__destroy(skel);
+ close(ev_fd);
+ close(task_fd);
+}
+
static void test_exit_creds(void)
{
struct task_local_storage_exit_creds *skel;
@@ -237,4 +341,6 @@ void test_task_local_storage(void)
test_recursion();
if (test__start_subtest("nodeadlock"))
test_nodeadlock();
+ if (test__start_subtest("uptr"))
+ test_uptr();
}
diff --git a/tools/testing/selftests/bpf/progs/task_ls_uptr.c b/tools/testing/selftests/bpf/progs/task_ls_uptr.c
new file mode 100644
index 000000000000..473e6890d522
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_ls_uptr.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct user_data {
+ int a;
+ int b;
+ int result;
+};
+
+struct value_type {
+ struct user_data __uptr *udata;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct value_type);
+} datamap SEC(".maps");
+
+#define MAGIC_VALUE 0xabcd1234
+
+/* This is a workaround to avoid clang generating a forward reference for
+ * struct user_data. This is a known issue and will be fixed in the future.
+ */
+struct user_data __dummy;
+
+pid_t child_pid = 0;
+pid_t parent_pid = 0;
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task, *data_task;
+ struct value_type *ptr;
+ struct user_data *udata;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != child_pid)
+ return 0;
+
+ data_task = bpf_task_from_pid(parent_pid);
+ if (!data_task)
+ return 0;
+
+ ptr = bpf_task_storage_get(&datamap, data_task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ bpf_task_release(data_task);
+ if (!ptr)
+ return 0;
+
+ udata = ptr->udata;
+ if (!udata)
+ return 0;
+ udata->result = MAGIC_VALUE + udata->a + udata->b;
+
+ return 0;
+}
--
2.34.1
^ permalink raw reply related [flat|nested] 8+ messages in thread