From: Menglong Dong <menglong.dong@linux.dev>
To: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
martin.lau@linux.dev, eddyz87@gmail.com, memxor@gmail.com,
song@kernel.org, yonghong.song@linux.dev, jolsa@kernel.org,
john.fastabend@gmail.com, kpsingh@kernel.org,
mattbobrowski@google.com, jiayuan.chen@linux.dev,
Feng Yang <yangfeng59949@163.com>
Cc: bpf@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-kselftest@vger.kernel.org
Subject: Re: [PATCH v2 bpf-next 1/2] bpf: Fix Null-Pointer Dereference in kernel_clone() via BPF fmod_ret on security_task_alloc
Date: Fri, 10 Apr 2026 17:20:28 +0800 [thread overview]
Message-ID: <13985062.uLZWGnKmhe@7940hx> (raw)
In-Reply-To: <20260410061037.149532-2-yangfeng59949@163.com>
On 2026/4/10 14:10 Feng Yang <yangfeng59949@163.com> write:
> From: Feng Yang <yangfeng@kylinos.cn>
>
> Using the following BPF program will cause a kernel panic:
> SEC("fmod_ret/security_task_alloc")
> int fmod_task_alloc(void *ctx)
> {
> return 1;
> }
>
[...]
> +
> +static int check_attach_modify_return(unsigned long addr, const char *func_name)
> +{
> + if (within_error_injection_list(addr) ||
> + !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
> + return 0;
> +
> + return -EINVAL;
> +}
> +
> +static int modify_return_get_retval_range(const struct bpf_prog *prog,
> + struct bpf_retval_range *retval_range)
> +{
> + unsigned long addr = (unsigned long)prog->aux->dst_trampoline->func.addr;
> +
> + if (within_error_injection_list(addr)) {
> + switch (get_injectable_error_type(addr)) {
> + case EI_ETYPE_NULL:
> + retval_range->minval = 0;
> + retval_range->maxval = 0;
> + break;
> + case EI_ETYPE_ERRNO:
> + retval_range->minval = -MAX_ERRNO;
> + retval_range->maxval = -1;
> + break;
> + case EI_ETYPE_ERRNO_NULL:
> + retval_range->minval = -MAX_ERRNO;
> + retval_range->maxval = 0;
> + break;
> + case EI_ETYPE_TRUE:
> + retval_range->minval = 1;
> + retval_range->maxval = 1;
> + break;
> + }
MODIFY_RETURN should always be able to return 0. 0 means
that "not modify the return and call the target function". So
the return value here should not restrict it.
I think it's a limitation of the MODIFY_RETURN, as it can't modify
the return value to 0/false. Maybe we can introduce a kfunc, such
as bpf_set_return_zero(), but it will be complex, as we need do
some adjustment to the trampoline, which I suspect Alexei won't
like it.
However, it's another problem. So, for this patch, I think we need
to make retval_range always cover "0".
BTW, I see you've moved some code upwards, which is not very
friendly for review and will also make the patch relatively large.
I suggest you declare modify_return_get_retval_range and
bpf_security_get_retval_range at the beginning of the file instead.
Thanks!
Menglong Dong
> + retval_range->return_32bit = true;
> +
> + return 0;
> + }
> +
> + return -EINVAL;
> +}
> +
> +#else
> +
> +/* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
> + * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
> + * but that just compares two concrete function names.
> + */
> +static bool has_arch_syscall_prefix(const char *func_name)
> +{
> +#if defined(__x86_64__)
> + return !strncmp(func_name, "__x64_", 6);
> +#elif defined(__i386__)
> + return !strncmp(func_name, "__ia32_", 7);
> +#elif defined(__s390x__)
> + return !strncmp(func_name, "__s390x_", 8);
> +#elif defined(__aarch64__)
> + return !strncmp(func_name, "__arm64_", 8);
> +#elif defined(__riscv)
> + return !strncmp(func_name, "__riscv_", 8);
> +#elif defined(__powerpc__) || defined(__powerpc64__)
> + return !strncmp(func_name, "sys_", 4);
> +#elif defined(__loongarch__)
> + return !strncmp(func_name, "sys_", 4);
> +#else
> + return false;
> +#endif
> +}
> +
> +/* Without error injection, allow sleepable and fmod_ret progs on syscalls. */
> +
> +static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
> +{
> + if (has_arch_syscall_prefix(func_name))
> + return 0;
> +
> + return -EINVAL;
> +}
> +
> +static int check_attach_modify_return(unsigned long addr, const char *func_name)
> +{
> + if (has_arch_syscall_prefix(func_name) ||
> + !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
> + return 0;
> +
> + return -EINVAL;
> +}
> +
> +/* The system call return value is allowed to be an arbitrary value. */
> +static int modify_return_get_retval_range(const struct bpf_prog *prog,
> + struct bpf_retval_range *retval_range)
> +{
> + return -EINVAL;
> +}
> +
> +#endif /* CONFIG_FUNCTION_ERROR_INJECTION */
> +
> +/* hooks return 0 or 1 */
> +BTF_SET_START(bool_security_hooks)
> +BTF_ID(func, security_xfrm_state_pol_flow_match)
> +BTF_ID(func, security_audit_rule_known)
> +BTF_ID(func, security_inode_xattr_skipcap)
> +BTF_SET_END(bool_security_hooks)
> +
> +/* Similar to bpf_lsm_get_retval_range,
> + * ensure that the return values of fmod_ret are valid.
> + */
> +static int bpf_security_get_retval_range(const struct bpf_prog *prog,
> + struct bpf_retval_range *retval_range)
> +{
> + if (strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
> + sizeof(SECURITY_PREFIX) - 1))
> + return -EINVAL;
> +
> + if (btf_id_set_contains(&bool_security_hooks, prog->aux->attach_btf_id)) {
> + retval_range->minval = 0;
> + retval_range->maxval = 1;
> + } else {
> + retval_range->minval = -MAX_ERRNO;
> + retval_range->maxval = 0;
> + }
> + retval_range->return_32bit = true;
> +
> + return 0;
> +}
>
> static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_range *range)
> {
> @@ -18444,8 +18605,13 @@ static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_
> *range = retval_range(0, 0);
> break;
> case BPF_TRACE_RAW_TP:
> - case BPF_MODIFY_RETURN:
> return false;
> + case BPF_MODIFY_RETURN:
> + if (!bpf_security_get_retval_range(env->prog, range))
> + break;
> + if (modify_return_get_retval_range(env->prog, range))
> + return false;
> + break;
> case BPF_TRACE_ITER:
> default:
> break;
> @@ -25487,99 +25653,6 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
> return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info,
> st_ops_desc->arg_info[member_idx].cnt);
> }
> -#define SECURITY_PREFIX "security_"
> -
> -#ifdef CONFIG_FUNCTION_ERROR_INJECTION
> -
> -/* list of non-sleepable functions that are otherwise on
> - * ALLOW_ERROR_INJECTION list
> - */
> -BTF_SET_START(btf_non_sleepable_error_inject)
> -/* Three functions below can be called from sleepable and non-sleepable context.
> - * Assume non-sleepable from bpf safety point of view.
> - */
> -BTF_ID(func, __filemap_add_folio)
> -#ifdef CONFIG_FAIL_PAGE_ALLOC
> -BTF_ID(func, should_fail_alloc_page)
> -#endif
> -#ifdef CONFIG_FAILSLAB
> -BTF_ID(func, should_failslab)
> -#endif
> -BTF_SET_END(btf_non_sleepable_error_inject)
> -
> -static int check_non_sleepable_error_inject(u32 btf_id)
> -{
> - return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
> -}
> -
> -static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
> -{
> - /* fentry/fexit/fmod_ret progs can be sleepable if they are
> - * attached to ALLOW_ERROR_INJECTION and are not in denylist.
> - */
> - if (!check_non_sleepable_error_inject(btf_id) &&
> - within_error_injection_list(addr))
> - return 0;
> -
> - return -EINVAL;
> -}
> -
> -static int check_attach_modify_return(unsigned long addr, const char *func_name)
> -{
> - if (within_error_injection_list(addr) ||
> - !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
> - return 0;
> -
> - return -EINVAL;
> -}
> -
> -#else
> -
> -/* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
> - * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
> - * but that just compares two concrete function names.
> - */
> -static bool has_arch_syscall_prefix(const char *func_name)
> -{
> -#if defined(__x86_64__)
> - return !strncmp(func_name, "__x64_", 6);
> -#elif defined(__i386__)
> - return !strncmp(func_name, "__ia32_", 7);
> -#elif defined(__s390x__)
> - return !strncmp(func_name, "__s390x_", 8);
> -#elif defined(__aarch64__)
> - return !strncmp(func_name, "__arm64_", 8);
> -#elif defined(__riscv)
> - return !strncmp(func_name, "__riscv_", 8);
> -#elif defined(__powerpc__) || defined(__powerpc64__)
> - return !strncmp(func_name, "sys_", 4);
> -#elif defined(__loongarch__)
> - return !strncmp(func_name, "sys_", 4);
> -#else
> - return false;
> -#endif
> -}
> -
> -/* Without error injection, allow sleepable and fmod_ret progs on syscalls. */
> -
> -static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
> -{
> - if (has_arch_syscall_prefix(func_name))
> - return 0;
> -
> - return -EINVAL;
> -}
> -
> -static int check_attach_modify_return(unsigned long addr, const char *func_name)
> -{
> - if (has_arch_syscall_prefix(func_name) ||
> - !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
> - return 0;
> -
> - return -EINVAL;
> -}
> -
> -#endif /* CONFIG_FUNCTION_ERROR_INJECTION */
>
> int bpf_check_attach_target(struct bpf_verifier_log *log,
> const struct bpf_prog *prog,
> --
> 2.43.0
>
>
>
next prev parent reply other threads:[~2026-04-10 9:20 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-10 6:10 [PATCH v2 bpf-next 0/2] bpf: Fix Null-Pointer Dereference in kernel_clone() via BPF fmod_ret on security_task_alloc Feng Yang
2026-04-10 6:10 ` [PATCH v2 bpf-next 1/2] " Feng Yang
2026-04-10 7:00 ` bot+bpf-ci
2026-04-10 8:24 ` Feng Yang
2026-04-10 7:21 ` Leon Hwang
2026-04-10 7:36 ` Leon Hwang
2026-04-10 7:40 ` Feng Yang
2026-04-10 7:49 ` Leon Hwang
2026-04-10 8:07 ` Feng Yang
2026-04-10 8:03 ` Feng Yang
2026-04-10 8:27 ` Leon Hwang
2026-04-10 9:20 ` Menglong Dong [this message]
2026-04-10 6:10 ` [PATCH v2 bpf-next 2/2] selftests/bpf: Add selftests for verifying return values of fmod_ret Feng Yang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=13985062.uLZWGnKmhe@7940hx \
--to=menglong.dong@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=jiayuan.chen@linux.dev \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-kselftest@vger.kernel.org \
--cc=martin.lau@linux.dev \
--cc=mattbobrowski@google.com \
--cc=memxor@gmail.com \
--cc=song@kernel.org \
--cc=yangfeng59949@163.com \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox