Re: [PATCH v2 bpf-next 1/2] bpf: Fix Null-Pointer Dereference in kernel_clone() via BPF fmod_ret on security_task_alloc

public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed

From: Menglong Dong <menglong.dong@linux.dev>
To: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	martin.lau@linux.dev, eddyz87@gmail.com, memxor@gmail.com,
	song@kernel.org, yonghong.song@linux.dev, jolsa@kernel.org,
	john.fastabend@gmail.com, kpsingh@kernel.org,
	mattbobrowski@google.com, jiayuan.chen@linux.dev,
	Feng Yang <yangfeng59949@163.com>
Cc: bpf@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-kselftest@vger.kernel.org
Subject: Re: [PATCH v2 bpf-next 1/2] bpf: Fix Null-Pointer Dereference in kernel_clone() via BPF fmod_ret on security_task_alloc
Date: Fri, 10 Apr 2026 17:20:28 +0800	[thread overview]
Message-ID: <13985062.uLZWGnKmhe@7940hx> (raw)
In-Reply-To: <20260410061037.149532-2-yangfeng59949@163.com>

On 2026/4/10 14:10 Feng Yang <yangfeng59949@163.com> write:
> From: Feng Yang <yangfeng@kylinos.cn>
> 
> Using the following BPF program will cause a kernel panic:
> SEC("fmod_ret/security_task_alloc")
> int fmod_task_alloc(void *ctx)
> {
>         return 1;
> }
> 
[...]
> +
> +static int check_attach_modify_return(unsigned long addr, const char *func_name)
> +{
> +	if (within_error_injection_list(addr) ||
> +	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
> +		return 0;
> +
> +	return -EINVAL;
> +}
> +
> +static int modify_return_get_retval_range(const struct bpf_prog *prog,
> +					  struct bpf_retval_range *retval_range)
> +{
> +	unsigned long addr = (unsigned long)prog->aux->dst_trampoline->func.addr;
> +
> +	if (within_error_injection_list(addr)) {
> +		switch (get_injectable_error_type(addr)) {
> +		case EI_ETYPE_NULL:
> +			retval_range->minval = 0;
> +			retval_range->maxval = 0;
> +			break;
> +		case EI_ETYPE_ERRNO:
> +			retval_range->minval = -MAX_ERRNO;
> +			retval_range->maxval = -1;
> +			break;
> +		case EI_ETYPE_ERRNO_NULL:
> +			retval_range->minval = -MAX_ERRNO;
> +			retval_range->maxval = 0;
> +			break;
> +		case EI_ETYPE_TRUE:
> +			retval_range->minval = 1;
> +			retval_range->maxval = 1;
> +			break;
> +		}

MODIFY_RETURN should always be able to return 0. 0 means
that "not modify the return and call the target function". So
the return value here should not restrict it.

I think it's a limitation of the MODIFY_RETURN, as it can't modify
the return value to 0/false. Maybe we can introduce a kfunc, such
as bpf_set_return_zero(), but it will be complex, as we need do
some adjustment to the trampoline, which I suspect Alexei won't
like it.

However, it's another problem. So, for this patch, I think we need
to make retval_range always cover "0".

BTW, I see you've moved some code upwards, which is not very
friendly for review and will also make the patch relatively large.
I suggest you declare modify_return_get_retval_range and
bpf_security_get_retval_range at the beginning of the file instead.

Thanks!
Menglong Dong

> +		retval_range->return_32bit = true;
> +
> +		return 0;
> +	}
> +
> +	return -EINVAL;
> +}
> +
> +#else
> +
> +/* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
> + * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
> + * but that just compares two concrete function names.
> + */
> +static bool has_arch_syscall_prefix(const char *func_name)
> +{
> +#if defined(__x86_64__)
> +	return !strncmp(func_name, "__x64_", 6);
> +#elif defined(__i386__)
> +	return !strncmp(func_name, "__ia32_", 7);
> +#elif defined(__s390x__)
> +	return !strncmp(func_name, "__s390x_", 8);
> +#elif defined(__aarch64__)
> +	return !strncmp(func_name, "__arm64_", 8);
> +#elif defined(__riscv)
> +	return !strncmp(func_name, "__riscv_", 8);
> +#elif defined(__powerpc__) || defined(__powerpc64__)
> +	return !strncmp(func_name, "sys_", 4);
> +#elif defined(__loongarch__)
> +	return !strncmp(func_name, "sys_", 4);
> +#else
> +	return false;
> +#endif
> +}
> +
> +/* Without error injection, allow sleepable and fmod_ret progs on syscalls. */
> +
> +static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
> +{
> +	if (has_arch_syscall_prefix(func_name))
> +		return 0;
> +
> +	return -EINVAL;
> +}
> +
> +static int check_attach_modify_return(unsigned long addr, const char *func_name)
> +{
> +	if (has_arch_syscall_prefix(func_name) ||
> +	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
> +		return 0;
> +
> +	return -EINVAL;
> +}
> +
> +/* The system call return value is allowed to be an arbitrary value. */
> +static int modify_return_get_retval_range(const struct bpf_prog *prog,
> +					  struct bpf_retval_range *retval_range)
> +{
> +	return -EINVAL;
> +}
> +
> +#endif /* CONFIG_FUNCTION_ERROR_INJECTION */
> +
> +/* hooks return 0 or 1 */
> +BTF_SET_START(bool_security_hooks)
> +BTF_ID(func, security_xfrm_state_pol_flow_match)
> +BTF_ID(func, security_audit_rule_known)
> +BTF_ID(func, security_inode_xattr_skipcap)
> +BTF_SET_END(bool_security_hooks)
> +
> +/* Similar to bpf_lsm_get_retval_range,
> + * ensure that the return values of fmod_ret are valid.
> + */
> +static int bpf_security_get_retval_range(const struct bpf_prog *prog,
> +					 struct bpf_retval_range *retval_range)
> +{
> +	if (strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
> +		    sizeof(SECURITY_PREFIX) - 1))
> +		return -EINVAL;
> +
> +	if (btf_id_set_contains(&bool_security_hooks, prog->aux->attach_btf_id)) {
> +		retval_range->minval = 0;
> +		retval_range->maxval = 1;
> +	} else {
> +		retval_range->minval = -MAX_ERRNO;
> +		retval_range->maxval = 0;
> +	}
> +	retval_range->return_32bit = true;
> +
> +	return 0;
> +}
>  
>  static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_range *range)
>  {
> @@ -18444,8 +18605,13 @@ static bool return_retval_range(struct bpf_verifier_env *env, struct bpf_retval_
>  			*range = retval_range(0, 0);
>  			break;
>  		case BPF_TRACE_RAW_TP:
> -		case BPF_MODIFY_RETURN:
>  			return false;
> +		case BPF_MODIFY_RETURN:
> +			if (!bpf_security_get_retval_range(env->prog, range))
> +				break;
> +			if (modify_return_get_retval_range(env->prog, range))
> +				return false;
> +			break;
>  		case BPF_TRACE_ITER:
>  		default:
>  			break;
> @@ -25487,99 +25653,6 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
>  	return bpf_prog_ctx_arg_info_init(prog, st_ops_desc->arg_info[member_idx].info,
>  					  st_ops_desc->arg_info[member_idx].cnt);
>  }
> -#define SECURITY_PREFIX "security_"
> -
> -#ifdef CONFIG_FUNCTION_ERROR_INJECTION
> -
> -/* list of non-sleepable functions that are otherwise on
> - * ALLOW_ERROR_INJECTION list
> - */
> -BTF_SET_START(btf_non_sleepable_error_inject)
> -/* Three functions below can be called from sleepable and non-sleepable context.
> - * Assume non-sleepable from bpf safety point of view.
> - */
> -BTF_ID(func, __filemap_add_folio)
> -#ifdef CONFIG_FAIL_PAGE_ALLOC
> -BTF_ID(func, should_fail_alloc_page)
> -#endif
> -#ifdef CONFIG_FAILSLAB
> -BTF_ID(func, should_failslab)
> -#endif
> -BTF_SET_END(btf_non_sleepable_error_inject)
> -
> -static int check_non_sleepable_error_inject(u32 btf_id)
> -{
> -	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
> -}
> -
> -static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
> -{
> -	/* fentry/fexit/fmod_ret progs can be sleepable if they are
> -	 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
> -	 */
> -	if (!check_non_sleepable_error_inject(btf_id) &&
> -	    within_error_injection_list(addr))
> -		return 0;
> -
> -	return -EINVAL;
> -}
> -
> -static int check_attach_modify_return(unsigned long addr, const char *func_name)
> -{
> -	if (within_error_injection_list(addr) ||
> -	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
> -		return 0;
> -
> -	return -EINVAL;
> -}
> -
> -#else
> -
> -/* Unfortunately, the arch-specific prefixes are hard-coded in arch syscall code
> - * so we need to hard-code them, too. Ftrace has arch_syscall_match_sym_name()
> - * but that just compares two concrete function names.
> - */
> -static bool has_arch_syscall_prefix(const char *func_name)
> -{
> -#if defined(__x86_64__)
> -	return !strncmp(func_name, "__x64_", 6);
> -#elif defined(__i386__)
> -	return !strncmp(func_name, "__ia32_", 7);
> -#elif defined(__s390x__)
> -	return !strncmp(func_name, "__s390x_", 8);
> -#elif defined(__aarch64__)
> -	return !strncmp(func_name, "__arm64_", 8);
> -#elif defined(__riscv)
> -	return !strncmp(func_name, "__riscv_", 8);
> -#elif defined(__powerpc__) || defined(__powerpc64__)
> -	return !strncmp(func_name, "sys_", 4);
> -#elif defined(__loongarch__)
> -	return !strncmp(func_name, "sys_", 4);
> -#else
> -	return false;
> -#endif
> -}
> -
> -/* Without error injection, allow sleepable and fmod_ret progs on syscalls. */
> -
> -static int check_attach_sleepable(u32 btf_id, unsigned long addr, const char *func_name)
> -{
> -	if (has_arch_syscall_prefix(func_name))
> -		return 0;
> -
> -	return -EINVAL;
> -}
> -
> -static int check_attach_modify_return(unsigned long addr, const char *func_name)
> -{
> -	if (has_arch_syscall_prefix(func_name) ||
> -	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
> -		return 0;
> -
> -	return -EINVAL;
> -}
> -
> -#endif /* CONFIG_FUNCTION_ERROR_INJECTION */
>  
>  int bpf_check_attach_target(struct bpf_verifier_log *log,
>  			    const struct bpf_prog *prog,
> -- 
> 2.43.0
> 
> 
>

next prev parent reply	other threads:[~2026-04-10  9:20 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-10  6:10 [PATCH v2 bpf-next 0/2] bpf: Fix Null-Pointer Dereference in kernel_clone() via BPF fmod_ret on security_task_alloc Feng Yang
2026-04-10  6:10 ` [PATCH v2 bpf-next 1/2] " Feng Yang
2026-04-10  7:00   ` bot+bpf-ci
2026-04-10  8:24     ` Feng Yang
2026-04-10  7:21   ` Leon Hwang
2026-04-10  7:36     ` Leon Hwang
2026-04-10  7:40     ` Feng Yang
2026-04-10  7:49       ` Leon Hwang
2026-04-10  8:07         ` Feng Yang
2026-04-10  8:03   ` Feng Yang
2026-04-10  8:27     ` Leon Hwang
2026-04-10  9:20   ` Menglong Dong [this message]
2026-04-10  6:10 ` [PATCH v2 bpf-next 2/2] selftests/bpf: Add selftests for verifying return values of fmod_ret Feng Yang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=13985062.uLZWGnKmhe@7940hx \
    --to=menglong.dong@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=jiayuan.chen@linux.dev \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=martin.lau@linux.dev \
    --cc=mattbobrowski@google.com \
    --cc=memxor@gmail.com \
    --cc=song@kernel.org \
    --cc=yangfeng59949@163.com \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox