[PATCH bpf-next v4 13/20] bpf, lsm: Add check for BPF LSM return value

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH bpf-next v4 13/20] bpf, lsm: Add check for BPF LSM return value
@ 2024-07-11 11:38 Xu Kuohai
  2024-07-11 11:38 ` [PATCH bpf-next v4 14/20] bpf: Prevent tail call between progs attached to different hooks Xu Kuohai
                   ` (6 more replies)
  0 siblings, 7 replies; 18+ messages in thread
From: Xu Kuohai @ 2024-07-11 11:38 UTC (permalink / raw)
  To: bpf, netdev, linux-security-module, linux-kselftest,
	linux-integrity, apparmor, selinux
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Matt Bobrowski, Brendan Jackman, Paul Moore, James Morris,
	Serge E . Hallyn, Khadija Kamran, Casey Schaufler,
	Ondrej Mosnacek, Kees Cook, John Johansen, Lukas Bulwahn,
	Roberto Sassu, Shung-Hsi Yu, Edward Cree, Alexander Viro,
	Christian Brauner, Trond Myklebust, Anna Schumaker, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Stephen Smalley

From: Xu Kuohai <xukuohai@huawei.com>

A bpf prog returning a positive number attached to file_alloc_security
hook makes kernel panic.

This happens because file system can not filter out the positive number
returned by the LSM prog using IS_ERR, and misinterprets this positive
number as a file pointer.

Given that hook file_alloc_security never returned positive number
before the introduction of BPF LSM, and other BPF LSM hooks may
encounter similar issues, this patch adds LSM return value check
in verifier, to ensure no unexpected value is returned.

Fixes: 520b7aa00d8c ("bpf: lsm: Initialize the BPF LSM hooks")
Reported-by: Xin Liu <liuxin350@huawei.com>
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Acked-by: Eduard Zingerman <eddyz87@gmail.com>
---
 include/linux/bpf.h     |  1 +
 include/linux/bpf_lsm.h |  9 +++++++
 kernel/bpf/bpf_lsm.c    | 30 ++++++++++++++++++++-
 kernel/bpf/btf.c        |  5 +++-
 kernel/bpf/verifier.c   | 60 ++++++++++++++++++++++++++++++++++-------
 5 files changed, 94 insertions(+), 11 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4f1d4a97b9d1..d255201035c4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -927,6 +927,7 @@ struct bpf_insn_access_aux {
 		};
 	};
 	struct bpf_verifier_log *log; /* for verbose logs */
+	bool is_retval; /* is accessing function return value ? */
 };
 
 static inline void
diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h
index 73e1f6dbec4a..ca5f2176c940 100644
--- a/include/linux/bpf_lsm.h
+++ b/include/linux/bpf_lsm.h
@@ -9,6 +9,7 @@
 
 #include <linux/sched.h>
 #include <linux/bpf.h>
+#include <linux/bpf_verifier.h>
 #include <linux/lsm_hooks.h>
 
 #ifdef CONFIG_BPF_LSM
@@ -47,6 +48,8 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *prog, bpf_func_t *bpf_func)
 
 bool bpf_lsm_has_retval_param(const struct bpf_prog *prog);
 
+int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
+			     struct bpf_retval_range *range);
 #else /* !CONFIG_BPF_LSM */
 
 static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
@@ -84,6 +87,12 @@ static inline bool bpf_lsm_has_retval_param(const struct bpf_prog *prog)
 {
 	return false;
 }
+
+static inline int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
+					   struct bpf_retval_range *range)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* CONFIG_BPF_LSM */
 
 #endif /* _LINUX_BPF_LSM_H */
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index a8f8358c77e3..9dd338099d4a 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -11,7 +11,6 @@
 #include <linux/lsm_hooks.h>
 #include <linux/bpf_lsm.h>
 #include <linux/kallsyms.h>
-#include <linux/bpf_verifier.h>
 #include <net/bpf_sk_storage.h>
 #include <linux/bpf_local_storage.h>
 #include <linux/btf_ids.h>
@@ -420,3 +419,32 @@ bool bpf_lsm_has_retval_param(const struct bpf_prog *prog)
 	return btf_id_set_contains(&retval_param_lsm_hooks,
 				   prog->aux->attach_btf_id);
 }
+
+/* hooks return 0 or 1 */
+BTF_SET_START(bool_lsm_hooks)
+BTF_ID(func, bpf_lsm_xfrm_state_pol_flow_match)
+BTF_ID(func, bpf_lsm_audit_rule_known)
+BTF_ID(func, bpf_lsm_inode_xattr_skipcap)
+BTF_SET_END(bool_lsm_hooks)
+
+int bpf_lsm_get_retval_range(const struct bpf_prog *prog,
+			     struct bpf_retval_range *retval_range)
+{
+	/* no return value range for void hooks */
+	if (!prog->aux->attach_func_proto->type)
+		return -EINVAL;
+
+	if (btf_id_set_contains(&bool_lsm_hooks, prog->aux->attach_btf_id)) {
+		retval_range->minval = 0;
+		retval_range->maxval = 1;
+	} else {
+		/* All other LSM hooks, except task_prctl, return 0 on success
+		 * and negative error code on failure.
+		 * To keep things simple, we only allow bpf progs to return 0
+		 * or negative errno for task_prctl.
+		 */
+		retval_range->minval = -MAX_ERRNO;
+		retval_range->maxval = 0;
+	}
+	return 0;
+}
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index df299d600b10..ce892565367d 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6416,8 +6416,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 
 	if (arg == nr_args) {
 		switch (prog->expected_attach_type) {
-		case BPF_LSM_CGROUP:
 		case BPF_LSM_MAC:
+			/* mark we are accessing the return value */
+			info->is_retval = true;
+			fallthrough;
+		case BPF_LSM_CGROUP:
 		case BPF_TRACE_FEXIT:
 			/* When LSM programs are attached to void LSM hooks
 			 * they use FEXIT trampolines and when attached to
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a0bbef2d14e4..6f5d8ca995d6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2334,6 +2334,25 @@ static void mark_reg_unknown(struct bpf_verifier_env *env,
 	__mark_reg_unknown(env, regs + regno);
 }
 
+static int __mark_reg_s32_range(struct bpf_verifier_env *env,
+				struct bpf_reg_state *regs,
+				u32 regno,
+				s32 s32_min,
+				s32 s32_max)
+{
+	struct bpf_reg_state *reg = regs + regno;
+
+	reg->s32_min_value = max_t(s32, reg->s32_min_value, s32_min);
+	reg->s32_max_value = min_t(s32, reg->s32_max_value, s32_max);
+
+	reg->smin_value = max_t(s64, reg->smin_value, s32_min);
+	reg->smax_value = min_t(s64, reg->smax_value, s32_max);
+
+	reg_bounds_sync(reg);
+
+	return reg_bounds_sanity_check(env, reg, "s32_range");
+}
+
 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
 				struct bpf_reg_state *reg)
 {
@@ -5587,11 +5606,12 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
-			    struct btf **btf, u32 *btf_id)
+			    struct btf **btf, u32 *btf_id, bool *is_retval)
 {
 	struct bpf_insn_access_aux info = {
 		.reg_type = *reg_type,
 		.log = &env->log,
+		.is_retval = false,
 	};
 
 	if (env->ops->is_valid_access &&
@@ -5604,6 +5624,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 		 * type of narrower access.
 		 */
 		*reg_type = info.reg_type;
+		*is_retval = info.is_retval;
 
 		if (base_type(*reg_type) == PTR_TO_BTF_ID) {
 			*btf = info.btf;
@@ -6803,6 +6824,17 @@ static int check_stack_access_within_bounds(
 	return grow_stack_state(env, state, -min_off /* size */);
 }
 
+static bool get_func_retval_range(struct bpf_prog *prog,
+				  struct bpf_retval_range *range)
+{
+	if (prog->type == BPF_PROG_TYPE_LSM &&
+		prog->expected_attach_type == BPF_LSM_MAC &&
+		!bpf_lsm_get_retval_range(prog, range)) {
+		return true;
+	}
+	return false;
+}
+
 /* check whether memory at (regno + off) is accessible for t = (read | write)
  * if t==write, value_regno is a register which value is stored into memory
  * if t==read, value_regno is a register which will receive the value from memory
@@ -6907,6 +6939,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
 			mark_reg_unknown(env, regs, value_regno);
 	} else if (reg->type == PTR_TO_CTX) {
+		bool is_retval = false;
+		struct bpf_retval_range range;
 		enum bpf_reg_type reg_type = SCALAR_VALUE;
 		struct btf *btf = NULL;
 		u32 btf_id = 0;
@@ -6922,7 +6956,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 			return err;
 
 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
-				       &btf_id);
+				       &btf_id, &is_retval);
 		if (err)
 			verbose_linfo(env, insn_idx, "; ");
 		if (!err && t == BPF_READ && value_regno >= 0) {
@@ -6931,7 +6965,14 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 			 * case, we know the offset is zero.
 			 */
 			if (reg_type == SCALAR_VALUE) {
-				mark_reg_unknown(env, regs, value_regno);
+				if (is_retval && get_func_retval_range(env->prog, &range)) {
+					err = __mark_reg_s32_range(env, regs, value_regno,
+								   range.minval, range.maxval);
+					if (err)
+						return err;
+				} else {
+					mark_reg_unknown(env, regs, value_regno);
+				}
 			} else {
 				mark_reg_known_zero(env, regs,
 						    value_regno);
@@ -15782,12 +15823,13 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 
 	case BPF_PROG_TYPE_LSM:
 		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
-			/* Regular BPF_PROG_TYPE_LSM programs can return
-			 * any value.
-			 */
-			return 0;
-		}
-		if (!env->prog->aux->attach_func_proto->type) {
+			/* no range found, any return value is allowed */
+			if (!get_func_retval_range(env->prog, &range))
+				return 0;
+			/* no restricted range, any return value is allowed */
+			if (range.minval == S32_MIN && range.maxval == S32_MAX)
+				return 0;
+		} else if (!env->prog->aux->attach_func_proto->type) {
 			/* Make sure programs that attach to void
 			 * hooks don't try to modify return value.
 			 */
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH bpf-next v4 14/20] bpf: Prevent tail call between progs attached to different hooks
  2024-07-11 11:38 [PATCH bpf-next v4 13/20] bpf, lsm: Add check for BPF LSM return value Xu Kuohai
@ 2024-07-11 11:38 ` Xu Kuohai
  2024-07-11 11:38 ` [PATCH bpf-next v4 15/20] bpf: Fix compare error in function retval_range_within Xu Kuohai
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Xu Kuohai @ 2024-07-11 11:38 UTC (permalink / raw)
  To: bpf, netdev, linux-security-module, linux-kselftest,
	linux-integrity, apparmor, selinux
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Matt Bobrowski, Brendan Jackman, Paul Moore, James Morris,
	Serge E . Hallyn, Khadija Kamran, Casey Schaufler,
	Ondrej Mosnacek, Kees Cook, John Johansen, Lukas Bulwahn,
	Roberto Sassu, Shung-Hsi Yu, Edward Cree, Alexander Viro,
	Christian Brauner, Trond Myklebust, Anna Schumaker, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Stephen Smalley

From: Xu Kuohai <xukuohai@huawei.com>

bpf progs can be attached to kernel functions, and the attached functions
can take different parameters or return different return values. If
prog attached to one kernel function tail calls prog attached to another
kernel function, the ctx access or return value verification could be
bypassed.

For example, if prog1 is attached to func1 which takes only 1 parameter
and prog2 is attached to func2 which takes two parameters. Since verifier
assumes the bpf ctx passed to prog2 is constructed based on func2's
prototype, verifier allows prog2 to access the second parameter from
the bpf ctx passed to it. The problem is that verifier does not prevent
prog1 from passing its bpf ctx to prog2 via tail call. In this case,
the bpf ctx passed to prog2 is constructed from func1 instead of func2,
that is, the assumption for ctx access verification is bypassed.

Another example, if BPF LSM prog1 is attached to hook file_alloc_security,
and BPF LSM prog2 is attached to hook bpf_lsm_audit_rule_known. Verifier
knows the return value rules for these two hooks, e.g. it is legal for
bpf_lsm_audit_rule_known to return positive number 1, and it is illegal
for file_alloc_security to return positive number. So verifier allows
prog2 to return positive number 1, but does not allow prog1 to return
positive number. The problem is that verifier does not prevent prog1
from calling prog2 via tail call. In this case, prog2's return value 1
will be used as the return value for prog1's hook file_alloc_security.
That is, the return value rule is bypassed.

This patch adds restriction for tail call to prevent such bypasses.

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
---
 include/linux/bpf.h |  1 +
 kernel/bpf/core.c   | 21 ++++++++++++++++++---
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d255201035c4..bf71edb260cd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -294,6 +294,7 @@ struct bpf_map {
 	 * same prog type, JITed flag and xdp_has_frags flag.
 	 */
 	struct {
+		const struct btf_type *attach_func_proto;
 		spinlock_t lock;
 		enum bpf_prog_type type;
 		bool jited;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7ee62e38faf0..4e07cc057d6f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2302,6 +2302,7 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
 {
 	enum bpf_prog_type prog_type = resolve_prog_type(fp);
 	bool ret;
+	struct bpf_prog_aux *aux = fp->aux;

 	if (fp->kprobe_override)
 		return false;
@@ -2311,7 +2312,7 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
 	 * in the case of devmap and cpumap). Until device checks
 	 * are implemented, prohibit adding dev-bound programs to program maps.
 	 */
-	if (bpf_prog_is_dev_bound(fp->aux))
+	if (bpf_prog_is_dev_bound(aux))
 		return false;

 	spin_lock(&map->owner.lock);
@@ -2321,12 +2322,26 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
 		 */
 		map->owner.type  = prog_type;
 		map->owner.jited = fp->jited;
-		map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
+		map->owner.xdp_has_frags = aux->xdp_has_frags;
+		map->owner.attach_func_proto = aux->attach_func_proto;
 		ret = true;
 	} else {
 		ret = map->owner.type  == prog_type &&
 		      map->owner.jited == fp->jited &&
-		      map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
+		      map->owner.xdp_has_frags == aux->xdp_has_frags;
+		if (ret &&
+		    map->owner.attach_func_proto != aux->attach_func_proto) {
+			switch (prog_type) {
+			case BPF_PROG_TYPE_TRACING:
+			case BPF_PROG_TYPE_LSM:
+			case BPF_PROG_TYPE_EXT:
+			case BPF_PROG_TYPE_STRUCT_OPS:
+				ret = false;
+				break;
+			default:
+				break;
+			}
+		}
 	}
 	spin_unlock(&map->owner.lock);

-- 
2.30.2

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH bpf-next v4 15/20] bpf: Fix compare error in function retval_range_within
  2024-07-11 11:38 [PATCH bpf-next v4 13/20] bpf, lsm: Add check for BPF LSM return value Xu Kuohai
  2024-07-11 11:38 ` [PATCH bpf-next v4 14/20] bpf: Prevent tail call between progs attached to different hooks Xu Kuohai
@ 2024-07-11 11:38 ` Xu Kuohai
  2024-07-11 11:38 ` [PATCH bpf-next v4 16/20] bpf: Add a special case for bitwise AND on range [-1, 0] Xu Kuohai
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Xu Kuohai @ 2024-07-11 11:38 UTC (permalink / raw)
  To: bpf, netdev, linux-security-module, linux-kselftest,
	linux-integrity, apparmor, selinux
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Matt Bobrowski, Brendan Jackman, Paul Moore, James Morris,
	Serge E . Hallyn, Khadija Kamran, Casey Schaufler,
	Ondrej Mosnacek, Kees Cook, John Johansen, Lukas Bulwahn,
	Roberto Sassu, Shung-Hsi Yu, Edward Cree, Alexander Viro,
	Christian Brauner, Trond Myklebust, Anna Schumaker, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Stephen Smalley

From: Xu Kuohai <xukuohai@huawei.com>

After checking lsm hook return range in verifier, the test case
"test_progs -t test_lsm" failed, and the failure log says:

libbpf: prog 'test_int_hook': BPF program load failed: Invalid argument
libbpf: prog 'test_int_hook': -- BEGIN PROG LOAD LOG --
0: R1=ctx() R10=fp0
; int BPF_PROG(test_int_hook, struct vm_area_struct *vma, @ lsm.c:89
0: (79) r0 = *(u64 *)(r1 +24)         ; R0_w=scalar(smin=smin32=-4095,smax=smax32=0) R1=ctx()

[...]

24: (b4) w0 = -1                      ; R0_w=0xffffffff
; int BPF_PROG(test_int_hook, struct vm_area_struct *vma, @ lsm.c:89
25: (95) exit
At program exit the register R0 has smin=4294967295 smax=4294967295 should have been in [-4095, 0]

It can be seen that instruction "w0 = -1" zero extended -1 to 64-bit
register r0, setting both smin and smax values of r0 to 4294967295.
This resulted in a false reject when r0 was checked with range [-4095, 0].

Given bpf lsm does not return 64-bit values, this patch fixes it by changing
the compare between r0 and return range from 64-bit operation to 32-bit
operation for bpf lsm.

Fixes: 8fa4ecd49b81 ("bpf: enforce exact retval range on subprog/callback exit")
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
Acked-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
---
 kernel/bpf/verifier.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6f5d8ca995d6..19ef3d27dbb7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9995,9 +9995,13 @@ static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
 	return is_rbtree_lock_required_kfunc(kfunc_btf_id);
 }
 
-static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg)
+static bool retval_range_within(struct bpf_retval_range range, const struct bpf_reg_state *reg,
+				bool return_32bit)
 {
-	return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
+	if (return_32bit)
+		return range.minval <= reg->s32_min_value && reg->s32_max_value <= range.maxval;
+	else
+		return range.minval <= reg->smin_value && reg->smax_value <= range.maxval;
 }
 
 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
@@ -10034,8 +10038,8 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 		if (err)
 			return err;
 
-		/* enforce R0 return value range */
-		if (!retval_range_within(callee->callback_ret_range, r0)) {
+		/* enforce R0 return value range, and bpf_callback_t returns 64bit */
+		if (!retval_range_within(callee->callback_ret_range, r0, false)) {
 			verbose_invalid_scalar(env, r0, callee->callback_ret_range,
 					       "At callback return", "R0");
 			return -EINVAL;
@@ -15718,6 +15722,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 	int err;
 	struct bpf_func_state *frame = env->cur_state->frame[0];
 	const bool is_subprog = frame->subprogno;
+	bool return_32bit = false;
 
 	/* LSM and struct_ops func-ptr's return type could be "void" */
 	if (!is_subprog || frame->in_exception_callback_fn) {
@@ -15829,6 +15834,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 			/* no restricted range, any return value is allowed */
 			if (range.minval == S32_MIN && range.maxval == S32_MAX)
 				return 0;
+			return_32bit = true;
 		} else if (!env->prog->aux->attach_func_proto->type) {
 			/* Make sure programs that attach to void
 			 * hooks don't try to modify return value.
@@ -15859,7 +15865,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
 	if (err)
 		return err;
 
-	if (!retval_range_within(range, reg)) {
+	if (!retval_range_within(range, reg, return_32bit)) {
 		verbose_invalid_scalar(env, reg, range, exit_ctx, reg_name);
 		if (!is_subprog &&
 		    prog->expected_attach_type == BPF_LSM_CGROUP &&
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH bpf-next v4 16/20] bpf: Add a special case for bitwise AND on range [-1, 0]
  2024-07-11 11:38 [PATCH bpf-next v4 13/20] bpf, lsm: Add check for BPF LSM return value Xu Kuohai
  2024-07-11 11:38 ` [PATCH bpf-next v4 14/20] bpf: Prevent tail call between progs attached to different hooks Xu Kuohai
  2024-07-11 11:38 ` [PATCH bpf-next v4 15/20] bpf: Fix compare error in function retval_range_within Xu Kuohai
@ 2024-07-11 11:38 ` Xu Kuohai
  2024-07-15 15:29   ` Shung-Hsi Yu
  2024-07-11 11:38 ` [PATCH bpf-next v4 17/20] selftests/bpf: Avoid load failure for token_lsm.c Xu Kuohai
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 18+ messages in thread
From: Xu Kuohai @ 2024-07-11 11:38 UTC (permalink / raw)
  To: bpf, netdev, linux-security-module, linux-kselftest,
	linux-integrity, apparmor, selinux
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Matt Bobrowski, Brendan Jackman, Paul Moore, James Morris,
	Serge E . Hallyn, Khadija Kamran, Casey Schaufler,
	Ondrej Mosnacek, Kees Cook, John Johansen, Lukas Bulwahn,
	Roberto Sassu, Shung-Hsi Yu, Edward Cree, Alexander Viro,
	Christian Brauner, Trond Myklebust, Anna Schumaker, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Stephen Smalley

From: Xu Kuohai <xukuohai@huawei.com>

With lsm return value check, the no-alu32 version test_libbpf_get_fd_by_id_opts
is rejected by the verifier, and the log says:

0: R1=ctx() R10=fp0
; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
0: (b7) r0 = 0                        ; R0_w=0
1: (79) r2 = *(u64 *)(r1 +0)
func 'bpf_lsm_bpf_map' arg0 has btf_id 916 type STRUCT 'bpf_map'
2: R1=ctx() R2_w=trusted_ptr_bpf_map()
; if (map != (struct bpf_map *)&data_input) @ test_libbpf_get_fd_by_id_opts.c:29
2: (18) r3 = 0xffff9742c0951a00       ; R3_w=map_ptr(map=data_input,ks=4,vs=4)
4: (5d) if r2 != r3 goto pc+4         ; R2_w=trusted_ptr_bpf_map() R3_w=map_ptr(map=data_input,ks=4,vs=4)
; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
5: (79) r0 = *(u64 *)(r1 +8)          ; R0_w=scalar() R1=ctx()
; if (fmode & FMODE_WRITE) @ test_libbpf_get_fd_by_id_opts.c:32
6: (67) r0 <<= 62                     ; R0_w=scalar(smax=0x4000000000000000,umax=0xc000000000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xc000000000000000))
7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
;  @ test_libbpf_get_fd_by_id_opts.c:0
8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
9: (95) exit

And here is the C code of the prog.

SEC("lsm/bpf_map")
int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
{
    if (map != (struct bpf_map *)&data_input)
	    return 0;

    if (fmode & FMODE_WRITE)
	    return -EACCES;

    return 0;
}

It is clear that the prog can only return either 0 or -EACCESS, and both
values are legal.

So why is it rejected by the verifier?

The verifier log shows that the second if and return value setting
statements in the prog is optimized to bitwise operations "r0 s>>= 63"
and "r0 &= -13". The verifier correctly deduces that the value of
r0 is in the range [-1, 0] after verifing instruction "r0 s>>= 63".
But when the verifier proceeds to verify instruction "r0 &= -13", it
fails to deduce the correct value range of r0.

7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))

So why the verifier fails to deduce the result of 'r0 &= -13'?

The verifier uses tnum to track values, and the two ranges "[-1, 0]" and
"[0, -1ULL]" are encoded to the same tnum. When verifing instruction
"r0 &= -13", the verifier erroneously deduces the result from
"[0, -1ULL] AND -13", which is out of the expected return range
[-4095, 0].

As explained by Eduard in [0], the clang transformation that generates this
pattern is located in DAGCombiner::SimplifySelectCC() method (see [1]).

The transformation happens as a part of DAG to DAG rewrites
(LLVM uses several internal representations:
 - generic optimizer uses LLVM IR, most of the work is done
   using this representation;
 - before instruction selection IR is converted to Selection DAG,
   some optimizations are applied at this stage,
   all such optimizations are a set of pattern replacements;
 - Selection DAG is converted to machine code, some optimizations
   are applied at the machine code level).

Full pattern is described as follows:

  // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A)
  // where y is has a single bit set.
  // A plaintext description would be, we can turn the SELECT_CC into an AND
  // when the condition can be materialized as an all-ones register.  Any
  // single bit-test can be materialized as an all-ones register with
  // shift-left and shift-right-arith.

For this particular test case the DAG is converted as follows:

                    .---------------- lhs         The meaning of this select_cc is:
                    |        .------- rhs         `lhs == rhs ? true value : false value`
                    |        | .----- true value
                    |        | |  .-- false value
                    v        v v  v
  (select_cc seteq (and X 2) 0 0 -13)
                          ^
->                        '---------------.
  (and (sra (sll X 62) 63)                |
       -13)                               |
                                          |
Before pattern is applied, it checks that second 'and' operand has
only one bit set, (which is true for '2').

The pattern itself generates logical shift left / arithmetic shift
right pair, that ensures that result is either all ones (-1) or all
zeros (0). Hence, applying 'and' to shifts result and false value
generates a correct result.

As suggested by Eduard and Andrii, this patch makes a special case
for source or destination register of '&=' operation being in
range [-1, 0].

Meaning that one of the '&=' operands is either:
- all ones, in which case the counterpart is the result of the operation;
- all zeros, in which case zero is the result of the operation.

That is, the result is equivalent to adding 0 to the counterpart. And MIN
and MAX values could be deduced based on these observations.

[0] https://lore.kernel.org/bpf/e62e2971301ca7f2e9eb74fc500c520285cad8f5.camel@gmail.com/
[1] https://github.com/llvm/llvm-project/blob/4523a267829c807f3fc8fab8e5e9613985a51565/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Suggested-by: Eduard Zingerman <eddyz87@gmail.com>
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
---
 include/linux/tnum.h  |  3 ++
 kernel/bpf/tnum.c     | 25 +++++++++++++++++
 kernel/bpf/verifier.c | 64 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+)

diff --git a/include/linux/tnum.h b/include/linux/tnum.h
index 3c13240077b8..5e795d728b9f 100644
--- a/include/linux/tnum.h
+++ b/include/linux/tnum.h
@@ -52,6 +52,9 @@ struct tnum tnum_mul(struct tnum a, struct tnum b);
 /* Return a tnum representing numbers satisfying both @a and @b */
 struct tnum tnum_intersect(struct tnum a, struct tnum b);
 
+/* Return a tnum representing numbers satisfying either @a or @b */
+struct tnum tnum_union(struct tnum a, struct tnum b);
+
 /* Return @a with all but the lowest @size bytes cleared */
 struct tnum tnum_cast(struct tnum a, u8 size);
 
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c
index 9dbc31b25e3d..8028ce06fc1e 100644
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -150,6 +150,31 @@ struct tnum tnum_intersect(struct tnum a, struct tnum b)
 	return TNUM(v & ~mu, mu);
 }
 
+/* Each bit has 3 states: unknown, known 0, known 1. Using x to represent
+ * unknown state, the result of the union of two bits is as follows:
+ *
+ *         | x    0    1
+ *    -----+------------
+ *     x   | x    x    x
+ *     0   | x    0    x
+ *     1   | x    x    1
+ *
+ * For tnum a and b, only the bits that are both known 0 or known 1 in a
+ * and b are known in the result of union a and b.
+ */
+struct tnum tnum_union(struct tnum a, struct tnum b)
+{
+	u64 v0, v1, mu;
+
+	/* unknown bits either in a or b */
+	mu = a.mask | b.mask;
+	/* "known 1" bits in both a and b */
+	v1 = (a.value & b.value) & ~mu;
+	/* "known 0" bits in both a and b */
+	v0 = (~a.value & ~b.value) & ~mu;
+	return TNUM(v1, ~(v0 | v1));
+}
+
 struct tnum tnum_cast(struct tnum a, u8 size)
 {
 	a.value &= (1ULL << (size * 8)) - 1;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 19ef3d27dbb7..7f4ee3b95f4e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -13632,6 +13632,39 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
 		return;
 	}
 
+	/* special case: dst_reg is in range [-1, 0] */
+	if (dst_reg->s32_min_value == -1 && dst_reg->s32_max_value == 0) {
+		/* the result is equivalent to adding 0 to src_reg */
+		var32_off = tnum_union(src_reg->var_off, tnum_const(0));
+		dst_reg->var_off = tnum_with_subreg(dst_reg->var_off, var32_off);
+		/* update signed min/max to include 0 */
+		dst_reg->s32_min_value = min_t(s32, src_reg->s32_min_value, 0);
+		dst_reg->s32_max_value = max_t(s32, src_reg->s32_max_value, 0);
+		/* since we're adding 0 to src_reg and 0 is the smallest
+		 * unsigned integer, dst_reg->u32_min_value should be 0,
+		 * and dst->u32_max_value should be src_reg->u32_max_value.
+		 */
+		dst_reg->u32_min_value = 0;
+		dst_reg->u32_max_value = src_reg->u32_max_value;
+		return;
+	}
+
+	/* special case: src_reg is in range [-1, 0] */
+	if (src_reg->s32_min_value == -1 && src_reg->s32_max_value == 0) {
+		/* the result is equivalent to adding 0 to dst_reg */
+		var32_off = tnum_union(dst_reg->var_off, tnum_const(0));
+		dst_reg->var_off = tnum_with_subreg(dst_reg->var_off, var32_off);
+		/* update signed min/max to include 0 */
+		dst_reg->s32_min_value = min_t(s32, dst_reg->s32_min_value, 0);
+		dst_reg->s32_max_value = max_t(s32, dst_reg->s32_max_value, 0);
+		/* since we're adding 0 to dst_reg and 0 is the smallest
+		 * unsigned integer, dst_reg->u32_min_value should be 0,
+		 * and dst->u32_max_value should remain unchanged.
+		 */
+		dst_reg->u32_min_value = 0;
+		return;
+	}
+
 	/* We get our minimum from the var_off, since that's inherently
 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
 	 */
@@ -13662,6 +13695,37 @@ static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
 		return;
 	}
 
+	/* special case: dst_reg is in range [-1, 0] */
+	if (dst_reg->smin_value == -1 && dst_reg->smax_value == 0) {
+		/* the result is equivalent to adding 0 to src_reg */
+		dst_reg->var_off = tnum_union(src_reg->var_off, tnum_const(0));
+		/* update signed min/max to include 0 */
+		dst_reg->smin_value = min_t(s64, src_reg->smin_value, 0);
+		dst_reg->smax_value = max_t(s64, src_reg->smax_value, 0);
+		/* since we're adding 0 to src_reg and 0 is the smallest
+		 * unsigned integer, dst_reg->umin_value should be 0,
+		 * and dst->umax_value should be src_reg->umax_value.
+		 */
+		dst_reg->umin_value = 0;
+		dst_reg->umax_value = src_reg->umax_value;
+		return;
+	}
+
+	/* special case: src_reg is in range [-1, 0] */
+	if (src_reg->smin_value == -1 && src_reg->smax_value == 0) {
+		/* the result is equivalent to adding 0 to dst_reg */
+		dst_reg->var_off = tnum_union(dst_reg->var_off, tnum_const(0));
+		/* update signed min/max to include 0 */
+		dst_reg->smin_value = min_t(s64, dst_reg->smin_value, 0);
+		dst_reg->smax_value = max_t(s64, dst_reg->smax_value, 0);
+		/* since we're adding 0 to dst_reg and 0 is the smallest
+		 * unsigned integer, dst_reg->min_value should be 0,
+		 * and dst->umax_value should remain unchanged.
+		 */
+		dst_reg->umin_value = 0;
+		return;
+	}
+
 	/* We get our minimum from the var_off, since that's inherently
 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
 	 */
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH bpf-next v4 17/20] selftests/bpf: Avoid load failure for token_lsm.c
  2024-07-11 11:38 [PATCH bpf-next v4 13/20] bpf, lsm: Add check for BPF LSM return value Xu Kuohai
                   ` (2 preceding siblings ...)
  2024-07-11 11:38 ` [PATCH bpf-next v4 16/20] bpf: Add a special case for bitwise AND on range [-1, 0] Xu Kuohai
@ 2024-07-11 11:38 ` Xu Kuohai
  2024-07-11 11:38 ` [PATCH bpf-next v4 18/20] selftests/bpf: Add return value checks for failed tests Xu Kuohai
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 18+ messages in thread
From: Xu Kuohai @ 2024-07-11 11:38 UTC (permalink / raw)
  To: bpf, netdev, linux-security-module, linux-kselftest,
	linux-integrity, apparmor, selinux
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Matt Bobrowski, Brendan Jackman, Paul Moore, James Morris,
	Serge E . Hallyn, Khadija Kamran, Casey Schaufler,
	Ondrej Mosnacek, Kees Cook, John Johansen, Lukas Bulwahn,
	Roberto Sassu, Shung-Hsi Yu, Edward Cree, Alexander Viro,
	Christian Brauner, Trond Myklebust, Anna Schumaker, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Stephen Smalley

From: Xu Kuohai <xukuohai@huawei.com>

The compiler optimized the two bpf progs in token_lsm.c to make return
value from the bool variable in the "return -1" path, causing an
unexpected rejection:

0: R1=ctx() R10=fp0
; int BPF_PROG(bpf_token_capable, struct bpf_token *token, int cap) @ bpf_lsm.c:17
0: (b7) r6 = 0                        ; R6_w=0
; if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32)) @ bpf_lsm.c:19
1: (18) r1 = 0xffffc9000102a000       ; R1_w=map_value(map=bpf_lsm.bss,ks=4,vs=5)
3: (61) r7 = *(u32 *)(r1 +0)          ; R1_w=map_value(map=bpf_lsm.bss,ks=4,vs=5) R7_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
4: (15) if r7 == 0x0 goto pc+11       ; R7_w=scalar(smin=umin=umin32=1,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
5: (67) r7 <<= 32                     ; R7_w=scalar(smax=0x7fffffff00000000,umax=0xffffffff00000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xffffffff00000000))
6: (c7) r7 s>>= 32                    ; R7_w=scalar(smin=0xffffffff80000000,smax=0x7fffffff)
7: (85) call bpf_get_current_pid_tgid#14      ; R0=scalar()
8: (77) r0 >>= 32                     ; R0_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))
9: (5d) if r0 != r7 goto pc+6         ; R0_w=scalar(smin=smin32=0,smax=umax=umax32=0x7fffffff,var_off=(0x0; 0x7fffffff)) R7=scalar(smin=smin32=0,smax=umax=umax32=0x7fffffff,var_off=(0x0; 0x7fffffff))
; if (reject_capable) @ bpf_lsm.c:21
10: (18) r1 = 0xffffc9000102a004      ; R1_w=map_value(map=bpf_lsm.bss,ks=4,vs=5,off=4)
12: (71) r6 = *(u8 *)(r1 +0)          ; R1_w=map_value(map=bpf_lsm.bss,ks=4,vs=5,off=4) R6_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=255,var_off=(0x0; 0xff))
;  @ bpf_lsm.c:0
13: (87) r6 = -r6                     ; R6_w=scalar()
14: (67) r6 <<= 56                    ; R6_w=scalar(smax=0x7f00000000000000,umax=0xff00000000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xff00000000000000))
15: (c7) r6 s>>= 56                   ; R6_w=scalar(smin=smin32=-128,smax=smax32=127)
; int BPF_PROG(bpf_token_capable, struct bpf_token *token, int cap) @ bpf_lsm.c:17
16: (bf) r0 = r6                      ; R0_w=scalar(id=1,smin=smin32=-128,smax=smax32=127) R6_w=scalar(id=1,smin=smin32=-128,smax=smax32=127)
17: (95) exit
At program exit the register R0 has smin=-128 smax=127 should have been in [-4095, 0]

To avoid this failure, change the variable type from bool to int.

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
---
 tools/testing/selftests/bpf/progs/token_lsm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/token_lsm.c b/tools/testing/selftests/bpf/progs/token_lsm.c
index e4d59b6ba743..a6002d073b1b 100644
--- a/tools/testing/selftests/bpf/progs/token_lsm.c
+++ b/tools/testing/selftests/bpf/progs/token_lsm.c
@@ -8,8 +8,8 @@
 char _license[] SEC("license") = "GPL";
 
 int my_pid;
-bool reject_capable;
-bool reject_cmd;
+int reject_capable;
+int reject_cmd;
 
 SEC("lsm/bpf_token_capable")
 int BPF_PROG(token_capable, struct bpf_token *token, int cap)
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH bpf-next v4 18/20] selftests/bpf: Add return value checks for failed tests
  2024-07-11 11:38 [PATCH bpf-next v4 13/20] bpf, lsm: Add check for BPF LSM return value Xu Kuohai
                   ` (3 preceding siblings ...)
  2024-07-11 11:38 ` [PATCH bpf-next v4 17/20] selftests/bpf: Avoid load failure for token_lsm.c Xu Kuohai
@ 2024-07-11 11:38 ` Xu Kuohai
  2024-07-11 11:38 ` [PATCH bpf-next v4 19/20] selftests/bpf: Add test for lsm tail call Xu Kuohai
  2024-07-11 11:38 ` [PATCH bpf-next v4 20/20] selftests/bpf: Add verifier tests for bpf lsm Xu Kuohai
  6 siblings, 0 replies; 18+ messages in thread
From: Xu Kuohai @ 2024-07-11 11:38 UTC (permalink / raw)
  To: bpf, netdev, linux-security-module, linux-kselftest,
	linux-integrity, apparmor, selinux
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Matt Bobrowski, Brendan Jackman, Paul Moore, James Morris,
	Serge E . Hallyn, Khadija Kamran, Casey Schaufler,
	Ondrej Mosnacek, Kees Cook, John Johansen, Lukas Bulwahn,
	Roberto Sassu, Shung-Hsi Yu, Edward Cree, Alexander Viro,
	Christian Brauner, Trond Myklebust, Anna Schumaker, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Stephen Smalley

From: Xu Kuohai <xukuohai@huawei.com>

The return ranges of some bpf lsm test progs can not be deduced by
the verifier accurately. To avoid erroneous rejections, add explicit
return value checks for these progs.

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
---
 tools/testing/selftests/bpf/progs/err.h                | 10 ++++++++++
 tools/testing/selftests/bpf/progs/test_sig_in_xattr.c  |  4 ++++
 .../selftests/bpf/progs/test_verify_pkcs7_sig.c        |  8 ++++++--
 .../selftests/bpf/progs/verifier_global_subprogs.c     |  7 ++++++-
 4 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/err.h b/tools/testing/selftests/bpf/progs/err.h
index d66d283d9e59..38529779a236 100644
--- a/tools/testing/selftests/bpf/progs/err.h
+++ b/tools/testing/selftests/bpf/progs/err.h
@@ -5,6 +5,16 @@
 #define MAX_ERRNO 4095
 #define IS_ERR_VALUE(x) (unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO
 
+#define __STR(x) #x
+
+#define set_if_not_errno_or_zero(x, y)			\
+({							\
+	asm volatile ("if %0 s< -4095 goto +1\n"	\
+		      "if %0 s<= 0 goto +1\n"		\
+		      "%0 = " __STR(y) "\n"		\
+		      : "+r"(x));			\
+})
+
 static inline int IS_ERR_OR_NULL(const void *ptr)
 {
 	return !ptr || IS_ERR_VALUE((unsigned long)ptr);
diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
index 2f0eb1334d65..8ef6b39335b6 100644
--- a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
+++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
@@ -6,6 +6,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_kfuncs.h"
+#include "err.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -79,5 +80,8 @@ int BPF_PROG(test_file_open, struct file *f)
 	ret = bpf_verify_pkcs7_signature(&digest_ptr, &sig_ptr, trusted_keyring);
 
 	bpf_key_put(trusted_keyring);
+
+	set_if_not_errno_or_zero(ret, -EFAULT);
+
 	return ret;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
index f42e9f3831a1..12034a73ee2d 100644
--- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -11,6 +11,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_kfuncs.h"
+#include "err.h"
 
 #define MAX_DATA_SIZE (1024 * 1024)
 #define MAX_SIG_SIZE 1024
@@ -55,12 +56,12 @@ int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
 
 	ret = bpf_probe_read_kernel(&value, sizeof(value), &attr->value);
 	if (ret)
-		return ret;
+		goto out;
 
 	ret = bpf_copy_from_user(data_val, sizeof(struct data),
 				 (void *)(unsigned long)value);
 	if (ret)
-		return ret;
+		goto out;
 
 	if (data_val->data_len > sizeof(data_val->data))
 		return -EINVAL;
@@ -84,5 +85,8 @@ int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
 
 	bpf_key_put(trusted_keyring);
 
+out:
+	set_if_not_errno_or_zero(ret, -EFAULT);
+
 	return ret;
 }
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
index a9fc30ed4d73..20904cd2baa2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
@@ -7,6 +7,7 @@
 #include "bpf_misc.h"
 #include "xdp_metadata.h"
 #include "bpf_kfuncs.h"
+#include "err.h"
 
 /* The compiler may be able to detect the access to uninitialized
    memory in the routines performing out of bound memory accesses and
@@ -331,7 +332,11 @@ SEC("?lsm/bpf")
 __success __log_level(2)
 int BPF_PROG(arg_tag_ctx_lsm)
 {
-	return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx);
+	int ret;
+
+	ret = tracing_subprog_void(ctx) + tracing_subprog_u64(ctx);
+	set_if_not_errno_or_zero(ret, -1);
+	return ret;
 }
 
 SEC("?struct_ops/test_1")
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH bpf-next v4 19/20] selftests/bpf: Add test for lsm tail call
  2024-07-11 11:38 [PATCH bpf-next v4 13/20] bpf, lsm: Add check for BPF LSM return value Xu Kuohai
                   ` (4 preceding siblings ...)
  2024-07-11 11:38 ` [PATCH bpf-next v4 18/20] selftests/bpf: Add return value checks for failed tests Xu Kuohai
@ 2024-07-11 11:38 ` Xu Kuohai
  2024-07-11 11:38 ` [PATCH bpf-next v4 20/20] selftests/bpf: Add verifier tests for bpf lsm Xu Kuohai
  6 siblings, 0 replies; 18+ messages in thread
From: Xu Kuohai @ 2024-07-11 11:38 UTC (permalink / raw)
  To: bpf, netdev, linux-security-module, linux-kselftest,
	linux-integrity, apparmor, selinux
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Matt Bobrowski, Brendan Jackman, Paul Moore, James Morris,
	Serge E . Hallyn, Khadija Kamran, Casey Schaufler,
	Ondrej Mosnacek, Kees Cook, John Johansen, Lukas Bulwahn,
	Roberto Sassu, Shung-Hsi Yu, Edward Cree, Alexander Viro,
	Christian Brauner, Trond Myklebust, Anna Schumaker, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Stephen Smalley

From: Xu Kuohai <xukuohai@huawei.com>

Add test for lsm tail call to ensure tail call can only be used between
bpf lsm progs attached to the same hook.

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
---
 .../selftests/bpf/prog_tests/test_lsm.c       | 46 ++++++++++++++++++-
 .../selftests/bpf/progs/lsm_tailcall.c        | 34 ++++++++++++++
 2 files changed, 79 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/progs/lsm_tailcall.c

diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index 16175d579bc7..2a27f3714f5c 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -12,6 +12,7 @@
 #include <stdlib.h>
 
 #include "lsm.skel.h"
+#include "lsm_tailcall.skel.h"
 
 char *CMD_ARGS[] = {"true", NULL};
 
@@ -95,7 +96,7 @@ static int test_lsm(struct lsm *skel)
 	return 0;
 }
 
-void test_test_lsm(void)
+static void test_lsm_basic(void)
 {
 	struct lsm *skel = NULL;
 	int err;
@@ -114,3 +115,46 @@ void test_test_lsm(void)
 close_prog:
 	lsm__destroy(skel);
 }
+
+static void test_lsm_tailcall(void)
+{
+	struct lsm_tailcall *skel = NULL;
+	int map_fd, prog_fd;
+	int err, key;
+
+	skel = lsm_tailcall__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "lsm_tailcall__skel_load"))
+		goto close_prog;
+
+	map_fd = bpf_map__fd(skel->maps.jmp_table);
+	if (CHECK_FAIL(map_fd < 0))
+		goto close_prog;
+
+	prog_fd = bpf_program__fd(skel->progs.lsm_file_permission_prog);
+	if (CHECK_FAIL(prog_fd < 0))
+		goto close_prog;
+
+	key = 0;
+	err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY);
+	if (CHECK_FAIL(!err))
+		goto close_prog;
+
+	prog_fd = bpf_program__fd(skel->progs.lsm_file_alloc_security_prog);
+	if (CHECK_FAIL(prog_fd < 0))
+		goto close_prog;
+
+	err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY);
+	if (CHECK_FAIL(err))
+		goto close_prog;
+
+close_prog:
+	lsm_tailcall__destroy(skel);
+}
+
+void test_test_lsm(void)
+{
+	if (test__start_subtest("lsm_basic"))
+		test_lsm_basic();
+	if (test__start_subtest("lsm_tailcall"))
+		test_lsm_tailcall();
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm_tailcall.c b/tools/testing/selftests/bpf/progs/lsm_tailcall.c
new file mode 100644
index 000000000000..49c075ce2d4c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_tailcall.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Huawei Technologies Co., Ltd */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+SEC("lsm/file_permission")
+int lsm_file_permission_prog(void *ctx)
+{
+	return 0;
+}
+
+SEC("lsm/file_alloc_security")
+int lsm_file_alloc_security_prog(void *ctx)
+{
+	return 0;
+}
+
+SEC("lsm/file_alloc_security")
+int lsm_file_alloc_security_entry(void *ctx)
+{
+	bpf_tail_call_static(ctx, &jmp_table, 0);
+	return 0;
+}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH bpf-next v4 20/20] selftests/bpf: Add verifier tests for bpf lsm
  2024-07-11 11:38 [PATCH bpf-next v4 13/20] bpf, lsm: Add check for BPF LSM return value Xu Kuohai
                   ` (5 preceding siblings ...)
  2024-07-11 11:38 ` [PATCH bpf-next v4 19/20] selftests/bpf: Add test for lsm tail call Xu Kuohai
@ 2024-07-11 11:38 ` Xu Kuohai
  6 siblings, 0 replies; 18+ messages in thread
From: Xu Kuohai @ 2024-07-11 11:38 UTC (permalink / raw)
  To: bpf, netdev, linux-security-module, linux-kselftest,
	linux-integrity, apparmor, selinux
  Cc: Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Eduard Zingerman, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Matt Bobrowski, Brendan Jackman, Paul Moore, James Morris,
	Serge E . Hallyn, Khadija Kamran, Casey Schaufler,
	Ondrej Mosnacek, Kees Cook, John Johansen, Lukas Bulwahn,
	Roberto Sassu, Shung-Hsi Yu, Edward Cree, Alexander Viro,
	Christian Brauner, Trond Myklebust, Anna Schumaker, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Stephen Smalley

From: Xu Kuohai <xukuohai@huawei.com>

Add verifier tests to check bpf lsm return values, output parameter
access and disabled hooks.

Signed-off-by: Xu Kuohai <xukuohai@huawei.com>
---
 .../selftests/bpf/prog_tests/verifier.c       |   2 +
 .../selftests/bpf/progs/verifier_lsm.c        | 274 ++++++++++++++++++
 2 files changed, 276 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/verifier_lsm.c

diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index 9dc3687bc406..ff1c7da1d06e 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -88,6 +88,7 @@
 #include "verifier_xdp.skel.h"
 #include "verifier_xdp_direct_packet_access.skel.h"
 #include "verifier_bits_iter.skel.h"
+#include "verifier_lsm.skel.h"
 
 #define MAX_ENTRIES 11
 
@@ -206,6 +207,7 @@ void test_verifier_xadd(void)                 { RUN(verifier_xadd); }
 void test_verifier_xdp(void)                  { RUN(verifier_xdp); }
 void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); }
 void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); }
+void test_verifier_lsm(void)                  { RUN(verifier_lsm); }
 
 static int init_test_val_map(struct bpf_object *obj, char *map_name)
 {
diff --git a/tools/testing/selftests/bpf/progs/verifier_lsm.c b/tools/testing/selftests/bpf/progs/verifier_lsm.c
new file mode 100644
index 000000000000..fcc74efe46c6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_lsm.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("lsm/file_alloc_security")
+__description("lsm bpf prog with -4095~0 retval. test 1")
+__success
+__naked int errno_zero_retval_test1(void *ctx)
+{
+	asm volatile (
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/file_alloc_security")
+__description("lsm bpf prog with -4095~0 retval. test 2")
+__success
+__naked int errno_zero_retval_test2(void *ctx)
+{
+	asm volatile (
+	"r0 = -4095;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/file_alloc_security")
+__description("lsm bpf prog with -4095~0 retval. test 3")
+__success
+__naked int errno_zero_retval_test3(void *ctx)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"r0 <<= 63;"
+	"r0 s>>= 63;"
+	"r0 &= -13;"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("lsm/file_mprotect")
+__description("lsm bpf prog with -4095~0 retval. test 4")
+__failure __msg("R0 has smin=-4096 smax=-4096 should have been in [-4095, 0]")
+__naked int errno_zero_retval_test4(void *ctx)
+{
+	asm volatile (
+	"r0 = -4096;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/file_mprotect")
+__description("lsm bpf prog with -4095~0 retval. test 5")
+__failure __msg("R0 has smin=4096 smax=4096 should have been in [-4095, 0]")
+__naked int errno_zero_retval_test5(void *ctx)
+{
+	asm volatile (
+	"r0 = 4096;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/vm_enough_memory")
+__description("lsm bpf prog with -4095~0 retval. test 6")
+__failure __msg("R0 has smin=1 smax=1 should have been in [-4095, 0]")
+__naked int errno_zero_retval_test6(void *ctx)
+{
+	asm volatile (
+	"r0 = 1;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_known")
+__description("lsm bpf prog with bool retval. test 1")
+__success
+__naked int bool_retval_test1(void *ctx)
+{
+	asm volatile (
+	"r0 = 1;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_known")
+__description("lsm bpf prog with bool retval. test 2")
+__success
+__success
+__naked int bool_retval_test2(void *ctx)
+{
+	asm volatile (
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_known")
+__description("lsm bpf prog with bool retval. test 3")
+__failure __msg("R0 has smin=-1 smax=-1 should have been in [0, 1]")
+__naked int bool_retval_test3(void *ctx)
+{
+	asm volatile (
+	"r0 = -1;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_known")
+__description("lsm bpf prog with bool retval. test 4")
+__failure __msg("R0 has smin=2 smax=2 should have been in [0, 1]")
+__naked int bool_retval_test4(void *ctx)
+{
+	asm volatile (
+	"r0 = 2;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/file_free_security")
+__success
+__description("lsm bpf prog with void retval. test 1")
+__naked int void_retval_test1(void *ctx)
+{
+	asm volatile (
+	"r0 = -4096;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/file_free_security")
+__success
+__description("lsm bpf prog with void retval. test 2")
+__naked int void_retval_test2(void *ctx)
+{
+	asm volatile (
+	"r0 = 4096;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_match")
+__description("lsm bpf prog read write valid output parameter success")
+__success
+__naked int outparam_valid_test(void *ctx)
+{
+	asm volatile (
+	"r1 = *(u64 *)(r1 + 0x20);"
+	"r2 = *(u8 *)(r1 + 0x0);"
+	"r2 &= 0x1;"
+	"*(u8 *)(r1 + 0x0) = r2;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_match")
+__description("lsm bpf prog read write output parameter, invalid read offset")
+__failure __msg("invalid read offset: 1 (expected 0, type=_Bool)")
+__naked int outparam_invalid_read_offset(void *ctx)
+{
+	asm volatile (
+	"r1 = *(u64 *)(r1 + 0x20);"
+	"r2 = *(u8 *)(r1 + 0x1);"
+	"r2 &= 0x1;"
+	"*(u8 *)(r1 + 0x0) = r2;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_match")
+__description("lsm bpf prog read write invalid output parameter, invalid read size")
+__failure __msg("invalid read size: 2 (expected 1, type=_Bool)")
+__naked int outparam_invalid_read_size(void *ctx)
+{
+	asm volatile (
+	"r1 = *(u64 *)(r1 + 0x20);"
+	"r2 = *(u16 *)(r1 + 0x0);"
+	"r2 &= 0x1;"
+	"*(u8 *)(r1 + 0x0) = r2;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_match")
+__description("lsm bpf prog read write invalid output parameter, invalid write offset")
+__failure __msg("invalid write offset: 1 (expected 0, type=_Bool)")
+__naked int outparam_invalid_write_offset(void *ctx)
+{
+	asm volatile (
+	"r1 = *(u64 *)(r1 + 0x20);"
+	"r2 = *(u8 *)(r1 + 0x0);"
+	"r2 &= 0x1;"
+	"*(u8 *)(r1 + 0x1) = r2;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/audit_rule_match")
+__description("lsm bpf prog read write invalid output parameter, invalid write size")
+__failure __msg("invalid write size: 2 (expected 1, type=_Bool)")
+__naked int outparam_invalid_write_size(void *ctx)
+{
+	asm volatile (
+	"r1 = *(u64 *)(r1 + 0x20);"
+	"r2 = *(u8 *)(r1 + 0x0);"
+	"r2 &= 0x1;"
+	"*(u16 *)(r1 + 0x0) = r2;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* hook prototype:
+ * int bpf_lsm_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid)
+ *
+ * although the last param is a pointer to u32, it iss not the output param for
+ * return value.
+ */
+SEC("lsm/secctx_to_secid")
+__description("lsm bpf prog read write invalid output parameter, not output param hook")
+__failure __msg("invalid mem access 'scalar'")
+__naked int outparam_invalid_hook(void *ctx)
+{
+	asm volatile (
+	"r1 = *(u64 *)(r1 + 0x10);"
+	"r2 = *(u32 *)(r1 + 0x0);"
+	"r2 &= 0x1;"
+	"*(u32 *)(r1 + 0x0) = r2;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/getprocattr")
+__description("lsm disabled hook: getprocattr")
+__failure __msg("points to disabled hook")
+__naked int disabled_hook_test1(void *ctx)
+{
+	asm volatile (
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/setprocattr")
+__description("lsm disabled hook: setprocattr")
+__failure __msg("points to disabled hook")
+__naked int disabled_hook_test2(void *ctx)
+{
+	asm volatile (
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm/ismaclabel")
+__description("lsm disabled hook: ismaclabel")
+__failure __msg("points to disabled hook")
+__naked int disabled_hook_test3(void *ctx)
+{
+	asm volatile (
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH bpf-next v4 16/20] bpf: Add a special case for bitwise AND on range [-1, 0]
  2024-07-11 11:38 ` [PATCH bpf-next v4 16/20] bpf: Add a special case for bitwise AND on range [-1, 0] Xu Kuohai
@ 2024-07-15 15:29   ` Shung-Hsi Yu
  2024-07-16  7:05     ` Xu Kuohai
  0 siblings, 1 reply; 18+ messages in thread
From: Shung-Hsi Yu @ 2024-07-15 15:29 UTC (permalink / raw)
  To: Xu Kuohai, Eduard Zingerman
  Cc: bpf, netdev, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa, Roberto Sassu,
	Edward Cree, Eric Dumazet, Jakub Kicinski,
	Harishankar Vishwanathan, Santosh Nagarakatte, Srinivas Narayana,
	Matan Shachnai

Cc Harishankar Vishwanathan, Prof. Srinivas Narayana and Prof. Santosh
Nagarakatte, and Matan Shachnai, whom have recently work on
scalar*_min_max_and(); also dropping LSM/FS related mails from Cc since
it's a bit long and I'm not sure whether the mailing list will reject
due to too many email in Cc.

On Thu, Jul 11, 2024 at 07:38:24PM GMT, Xu Kuohai wrote:
> With lsm return value check, the no-alu32 version test_libbpf_get_fd_by_id_opts
> is rejected by the verifier, and the log says:
> 
> 0: R1=ctx() R10=fp0
> ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
> 0: (b7) r0 = 0                        ; R0_w=0
> 1: (79) r2 = *(u64 *)(r1 +0)
> func 'bpf_lsm_bpf_map' arg0 has btf_id 916 type STRUCT 'bpf_map'
> 2: R1=ctx() R2_w=trusted_ptr_bpf_map()
> ; if (map != (struct bpf_map *)&data_input) @ test_libbpf_get_fd_by_id_opts.c:29
> 2: (18) r3 = 0xffff9742c0951a00       ; R3_w=map_ptr(map=data_input,ks=4,vs=4)
> 4: (5d) if r2 != r3 goto pc+4         ; R2_w=trusted_ptr_bpf_map() R3_w=map_ptr(map=data_input,ks=4,vs=4)
> ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
> 5: (79) r0 = *(u64 *)(r1 +8)          ; R0_w=scalar() R1=ctx()
> ; if (fmode & FMODE_WRITE) @ test_libbpf_get_fd_by_id_opts.c:32
> 6: (67) r0 <<= 62                     ; R0_w=scalar(smax=0x4000000000000000,umax=0xc000000000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xc000000000000000))
> 7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
> ;  @ test_libbpf_get_fd_by_id_opts.c:0
> 8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
> ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
> 9: (95) exit
> 
> And here is the C code of the prog.
> 
> SEC("lsm/bpf_map")
> int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
> {
>     if (map != (struct bpf_map *)&data_input)
> 	    return 0;
> 
>     if (fmode & FMODE_WRITE)
> 	    return -EACCES;
> 
>     return 0;
> }
> 
> It is clear that the prog can only return either 0 or -EACCESS, and both
> values are legal.
> 
> So why is it rejected by the verifier?
> 
> The verifier log shows that the second if and return value setting
> statements in the prog is optimized to bitwise operations "r0 s>>= 63"
> and "r0 &= -13". The verifier correctly deduces that the value of
> r0 is in the range [-1, 0] after verifing instruction "r0 s>>= 63".
> But when the verifier proceeds to verify instruction "r0 &= -13", it
> fails to deduce the correct value range of r0.
> 
> 7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
> 8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
> 
> So why the verifier fails to deduce the result of 'r0 &= -13'?
> 
> The verifier uses tnum to track values, and the two ranges "[-1, 0]" and
> "[0, -1ULL]" are encoded to the same tnum. When verifing instruction
> "r0 &= -13", the verifier erroneously deduces the result from
> "[0, -1ULL] AND -13", which is out of the expected return range
> [-4095, 0].
> 
> As explained by Eduard in [0], the clang transformation that generates this
> pattern is located in DAGCombiner::SimplifySelectCC() method (see [1]).
...
> As suggested by Eduard and Andrii, this patch makes a special case
> for source or destination register of '&=' operation being in
> range [-1, 0].
...

Been wonder whether it possible for a more general approach ever since I
saw the discussion back in April. I think I've finally got something.

The problem we face here is that the tightest bound for the [-1, 0] case
was tracked with signed ranges, yet the BPF verifier looses knowledge of
them all too quickly in scalar*_min_max_and(); knowledge of previous
signed ranges were not used at all to derive the outcome of signed
ranges after BPF_AND.

	static void scalar_min_max_and(...) {
		...
		if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
			dst_reg->smin_value = dst_reg->umin_value;
			dst_reg->smax_value = dst_reg->umax_value;
		} else {
			dst_reg->smin_value = S64_MIN;
			dst_reg->smax_value = S64_MAX;
		}
		...
	}

So looks like its time to be nobody[1] and try to teach BPF verifier how
track signed ranges when ANDing two (possibly) negative numbers. Luckily
bitwise AND is comparatively easier to do than other bitwise operations:
non-negative range & non-negative range is always non-negative,
non-negative range & negative range is still always non-negative, and
negative range & negative range is always negative.

smax_value is straight forwards, we can just do

	max(dst_reg->smax_value, src_reg->smax_value)

which works across all sign combinations. Technically for non-negative &
non-negative we can use min() instead of max(), but the non-negative &
non-negative case should be handled pretty well by the unsigned ranges
already; it seems simpler to let such knowledge flows from unsigned
ranges to signed ranges during reg_bounds_sync(). Plus we are not wrong
for non-negative & non-negative by using max(), just imprecise, so no
correctness/soundness issue here.

smin_value is the tricker one, but doable with

	masked_negative(min(dst_reg->smin_value, src_reg->smin_value))

where masked_negative(v) basically just clear all bits after the most
significant unset bit, effectively rounding a negative value down to a
negative power-of-2 value, and returning 0 for non-negative values. E.g.
for some 8-bit, negative value

	masked_negative(0b11101001) == 0b11100000

This can be done with a tweaked version of "Round up to the next highest
power of 2"[2], 

	/* Invert the bits so the first unset bit can be propagated with |= */
	v = ~v;
	/* Now propagate the first (previously unset, now set) bit to the
	 * trailing positions */
	v |= v >> 1;
	v |= v >> 2;
	v |= v >> 4;
	...
	v |= v >> 32; /* Assuming 64-bit */
	/* Propagation done, now invert again */
	v = ~v;

Again, we technically can do better if we take sign bit into account,
but deriving smin_value this way should still be correct/sound across
different sign combinations, and overall should help us derived [-16, 0]
from "[-1, 0] AND -13", thus preventing BPF verifier from rejecting the
program.

---

Alternatively we can employ a range-splitting trick (think I saw this in
[3]) that allow us to take advantage of existing tnum_and() by splitting
the signed ranges into two if the range crosses the sign boundary (i.e.
contains both non-negative and negative values), one range will be
[smin, U64_MAX], the other will be [0, smax]. This way we get around
tnum's weakness of representing [-1, 0] as [0, U64_MAX].

	if (src_reg->smin_value < 0 && src_reg->smax_value >= 0) {
		src_lower = tnum_range(src_reg->smin_value, U64_MAX);
		src_higher = tnum_range(0, src_reg->smax_value);
	} else {
		src_lower = tnum_range(src_reg->smin_value, src_reg->smax_value);
		src_higher = tnum_range(src_reg->smin_value, src_reg->smax_value);
	}

	if (dst_reg->smin_value < 0 && dst_reg->smax_value >= 0) {
		dst_lower = tnum_range(dst_reg->smin_value, U64_MAX);
		dst_higher = tnum_range(0, dst_reg->smax_value);
	} else {
		dst_lower = tnum_range(dst_reg->smin_value, dst_reg->smax_value);
		dst_higher = tnum_range(dst_reg->smin_value, dst_reg->smax_value);
	}

	lower = tnum_and(src_lower, dst_lower);
	higher = tnum_and(src_higher, dst_higher);
	dst->smin_value = lower.value;
	dst->smax_value = higher.value | higher.mask;

---

Personally I like the first method better as it is simpler yet still
does the job well enough. I'll work on that in the next few days and see
if it actually works.

1: https://github.com/torvalds/linux/blob/dac045fc9fa6/kernel/bpf/verifier.c#L13338
2: https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
3: https://dl.acm.org/doi/10.1145/2651360

...

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH bpf-next v4 16/20] bpf: Add a special case for bitwise AND on range [-1, 0]
  2024-07-15 15:29   ` Shung-Hsi Yu
@ 2024-07-16  7:05     ` Xu Kuohai
  2024-07-16 14:52       ` [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND Shung-Hsi Yu
  2024-07-16 15:19       ` [PATCH bpf-next v4 16/20] bpf: Add a special case for bitwise AND on range [-1, 0] Shung-Hsi Yu
  0 siblings, 2 replies; 18+ messages in thread
From: Xu Kuohai @ 2024-07-16  7:05 UTC (permalink / raw)
  To: Shung-Hsi Yu, Eduard Zingerman
  Cc: bpf, netdev, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa, Roberto Sassu,
	Edward Cree, Eric Dumazet, Jakub Kicinski,
	Harishankar Vishwanathan, Santosh Nagarakatte, Srinivas Narayana,
	Matan Shachnai

On 7/15/2024 11:29 PM, Shung-Hsi Yu wrote:
> Cc Harishankar Vishwanathan, Prof. Srinivas Narayana and Prof. Santosh
> Nagarakatte, and Matan Shachnai, whom have recently work on
> scalar*_min_max_and(); also dropping LSM/FS related mails from Cc since
> it's a bit long and I'm not sure whether the mailing list will reject
> due to too many email in Cc.
> 
> On Thu, Jul 11, 2024 at 07:38:24PM GMT, Xu Kuohai wrote:
>> With lsm return value check, the no-alu32 version test_libbpf_get_fd_by_id_opts
>> is rejected by the verifier, and the log says:
>>
>> 0: R1=ctx() R10=fp0
>> ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
>> 0: (b7) r0 = 0                        ; R0_w=0
>> 1: (79) r2 = *(u64 *)(r1 +0)
>> func 'bpf_lsm_bpf_map' arg0 has btf_id 916 type STRUCT 'bpf_map'
>> 2: R1=ctx() R2_w=trusted_ptr_bpf_map()
>> ; if (map != (struct bpf_map *)&data_input) @ test_libbpf_get_fd_by_id_opts.c:29
>> 2: (18) r3 = 0xffff9742c0951a00       ; R3_w=map_ptr(map=data_input,ks=4,vs=4)
>> 4: (5d) if r2 != r3 goto pc+4         ; R2_w=trusted_ptr_bpf_map() R3_w=map_ptr(map=data_input,ks=4,vs=4)
>> ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
>> 5: (79) r0 = *(u64 *)(r1 +8)          ; R0_w=scalar() R1=ctx()
>> ; if (fmode & FMODE_WRITE) @ test_libbpf_get_fd_by_id_opts.c:32
>> 6: (67) r0 <<= 62                     ; R0_w=scalar(smax=0x4000000000000000,umax=0xc000000000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xc000000000000000))
>> 7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
>> ;  @ test_libbpf_get_fd_by_id_opts.c:0
>> 8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
>> ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
>> 9: (95) exit
>>
>> And here is the C code of the prog.
>>
>> SEC("lsm/bpf_map")
>> int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
>> {
>>      if (map != (struct bpf_map *)&data_input)
>> 	    return 0;
>>
>>      if (fmode & FMODE_WRITE)
>> 	    return -EACCES;
>>
>>      return 0;
>> }
>>
>> It is clear that the prog can only return either 0 or -EACCESS, and both
>> values are legal.
>>
>> So why is it rejected by the verifier?
>>
>> The verifier log shows that the second if and return value setting
>> statements in the prog is optimized to bitwise operations "r0 s>>= 63"
>> and "r0 &= -13". The verifier correctly deduces that the value of
>> r0 is in the range [-1, 0] after verifing instruction "r0 s>>= 63".
>> But when the verifier proceeds to verify instruction "r0 &= -13", it
>> fails to deduce the correct value range of r0.
>>
>> 7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
>> 8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
>>
>> So why the verifier fails to deduce the result of 'r0 &= -13'?
>>
>> The verifier uses tnum to track values, and the two ranges "[-1, 0]" and
>> "[0, -1ULL]" are encoded to the same tnum. When verifing instruction
>> "r0 &= -13", the verifier erroneously deduces the result from
>> "[0, -1ULL] AND -13", which is out of the expected return range
>> [-4095, 0].
>>
>> As explained by Eduard in [0], the clang transformation that generates this
>> pattern is located in DAGCombiner::SimplifySelectCC() method (see [1]).
> ...
>> As suggested by Eduard and Andrii, this patch makes a special case
>> for source or destination register of '&=' operation being in
>> range [-1, 0].
> ...
> 
> Been wonder whether it possible for a more general approach ever since I
> saw the discussion back in April. I think I've finally got something.
> 
> The problem we face here is that the tightest bound for the [-1, 0] case
> was tracked with signed ranges, yet the BPF verifier looses knowledge of
> them all too quickly in scalar*_min_max_and(); knowledge of previous
> signed ranges were not used at all to derive the outcome of signed
> ranges after BPF_AND.
> 
> 	static void scalar_min_max_and(...) {
> 		...
> 		if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
> 			dst_reg->smin_value = dst_reg->umin_value;
> 			dst_reg->smax_value = dst_reg->umax_value;
> 		} else {
> 			dst_reg->smin_value = S64_MIN;
> 			dst_reg->smax_value = S64_MAX;
> 		}
> 		...
> 	}
>

This is indeed the root cause.

> So looks like its time to be nobody[1] and try to teach BPF verifier how
> track signed ranges when ANDing two (possibly) negative numbers. Luckily
> bitwise AND is comparatively easier to do than other bitwise operations:
> non-negative range & non-negative range is always non-negative,
> non-negative range & negative range is still always non-negative, and
> negative range & negative range is always negative.
>

Right, only bitwise ANDing two negatives yields to a negative result.

> smax_value is straight forwards, we can just do
> 
> 	max(dst_reg->smax_value, src_reg->smax_value)
> 
> which works across all sign combinations. Technically for non-negative &
> non-negative we can use min() instead of max(), but the non-negative &
> non-negative case should be handled pretty well by the unsigned ranges
> already; it seems simpler to let such knowledge flows from unsigned
> ranges to signed ranges during reg_bounds_sync(). Plus we are not wrong
> for non-negative & non-negative by using max(), just imprecise, so no
> correctness/soundness issue here.
>

I think this is correct, since in two's complement, more '1' bits means
more large, regardless of sign, and bitwise AND never generates more '1'
bits.

> smin_value is the tricker one, but doable with
> 
> 	masked_negative(min(dst_reg->smin_value, src_reg->smin_value))
> 
> where masked_negative(v) basically just clear all bits after the most
> significant unset bit, effectively rounding a negative value down to a
> negative power-of-2 value, and returning 0 for non-negative values. E.g.
> for some 8-bit, negative value
> 
> 	masked_negative(0b11101001) == 0b11100000
>

Ah, it's really tricky. Seems it's the longest high '1' bits sequence
in both operands. This '1' bits should remain unchanged by the bitwise
AND operation. So this sequence must be in the result, making it the
minimum possible value.

> This can be done with a tweaked version of "Round up to the next highest
> power of 2"[2],
> 
> 	/* Invert the bits so the first unset bit can be propagated with |= */
> 	v = ~v;
> 	/* Now propagate the first (previously unset, now set) bit to the
> 	 * trailing positions */
> 	v |= v >> 1;
> 	v |= v >> 2;
> 	v |= v >> 4;
> 	...
> 	v |= v >> 32; /* Assuming 64-bit */
> 	/* Propagation done, now invert again */
> 	v = ~v;
>
> Again, we technically can do better if we take sign bit into account,
> but deriving smin_value this way should still be correct/sound across
> different sign combinations, and overall should help us derived [-16, 0]
> from "[-1, 0] AND -13", thus preventing BPF verifier from rejecting the
> program.
>
> ---
> 
> Alternatively we can employ a range-splitting trick (think I saw this in
> [3]) that allow us to take advantage of existing tnum_and() by splitting
> the signed ranges into two if the range crosses the sign boundary (i.e.
> contains both non-negative and negative values), one range will be
> [smin, U64_MAX], the other will be [0, smax]. This way we get around
> tnum's weakness of representing [-1, 0] as [0, U64_MAX].
> 
> 	if (src_reg->smin_value < 0 && src_reg->smax_value >= 0) {
> 		src_lower = tnum_range(src_reg->smin_value, U64_MAX);
> 		src_higher = tnum_range(0, src_reg->smax_value);
> 	} else {
> 		src_lower = tnum_range(src_reg->smin_value, src_reg->smax_value);
> 		src_higher = tnum_range(src_reg->smin_value, src_reg->smax_value);
> 	}
> 
> 	if (dst_reg->smin_value < 0 && dst_reg->smax_value >= 0) {
> 		dst_lower = tnum_range(dst_reg->smin_value, U64_MAX);
> 		dst_higher = tnum_range(0, dst_reg->smax_value);
> 	} else {
> 		dst_lower = tnum_range(dst_reg->smin_value, dst_reg->smax_value);
> 		dst_higher = tnum_range(dst_reg->smin_value, dst_reg->smax_value);
> 	}
> 
> 	lower = tnum_and(src_lower, dst_lower);
> 	higher = tnum_and(src_higher, dst_higher);
> 	dst->smin_value = lower.value;
> 	dst->smax_value = higher.value | higher.mask;
>

This looks even more tricky...

> ---
> 
> Personally I like the first method better as it is simpler yet still
> does the job well enough. I'll work on that in the next few days and see
> if it actually works.
> 

This really sounds great. Thank you for the excellent work!

> 
> 1: https://github.com/torvalds/linux/blob/dac045fc9fa6/kernel/bpf/verifier.c#L13338
> 2: https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
> 3: https://dl.acm.org/doi/10.1145/2651360
> 
> ...


^ permalink raw reply	[flat|nested] 18+ messages in thread

* [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND
  2024-07-16  7:05     ` Xu Kuohai
@ 2024-07-16 14:52       ` Shung-Hsi Yu
  2024-07-16 15:10         ` Shung-Hsi Yu
                           ` (2 more replies)
  2024-07-16 15:19       ` [PATCH bpf-next v4 16/20] bpf: Add a special case for bitwise AND on range [-1, 0] Shung-Hsi Yu
  1 sibling, 3 replies; 18+ messages in thread
From: Shung-Hsi Yu @ 2024-07-16 14:52 UTC (permalink / raw)
  To: Xu Kuohai, Eduard Zingerman
  Cc: bpf, netdev, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa, Roberto Sassu,
	Edward Cree, Eric Dumazet, Jakub Kicinski,
	Harishankar Vishwanathan, Santosh Nagarakatte, Srinivas Narayana,
	Matan Shachnai

This commit teach the BPF verifier how to infer signed ranges directly
from signed ranges of the operands to prevent verifier rejection, which
is needed for the following BPF program's no-alu32 version, as shown by
Xu Kuohai:

    SEC("lsm/bpf_map")
    int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
    {
         if (map != (struct bpf_map *)&data_input)
    	    return 0;

    	 if (fmode & FMODE_WRITE)
    	    return -EACCES;

         return 0;
    }

Where the relevant verifer log upon rejection are:

    ...
    5: (79) r0 = *(u64 *)(r1 +8)          ; R0_w=scalar() R1=ctx()
    ; if (fmode & FMODE_WRITE) @ test_libbpf_get_fd_by_id_opts.c:32
    6: (67) r0 <<= 62                     ; R0_w=scalar(smax=0x4000000000000000,umax=0xc000000000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xc000000000000000))
    7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
    ;  @ test_libbpf_get_fd_by_id_opts.c:0
    8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
    9: (95) exit

This sequence of instructions comes from Clang's transformation located
in DAGCombiner::SimplifySelectCC() method, which combined the "fmode &
FMODE_WRITE" check with the return statement without needing BPF_JMP at
all. See Eduard's comment for more detail of this transformation[0].

While the verifier can correctly infer that the value of r0 is in a
tight [-1, 0] range after instruction "r0 s>>= 63", is was not able to
come up with a tight range for "r0 &= -13" (which would be [-13, 0]),
and instead inferred a very loose range:

    r0 s>>= 63; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
    r0 &= -13 ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))

The reason is that scalar*_min_max_add() mainly relies on tnum for
interring value in register after BPF_AND, however [-1, 0] cannot be
tracked precisely with tnum, and effectively turns into [0, -1] (i.e.
tnum_unknown). So upon BPF_AND the resulting tnum is equivalent to

    dst_reg->var_off = tnum_and(tnum_unknown, tnum_const(-13))

And from there the BPF verifier was only able to infer smin=S64_MIN,
smax=0x7ffffffffffffff3, which is outside of the expected [-4095, 0]
range for return values, and thus the program was rejected.

To allow verification of such instruction pattern, update
scalar*_min_max_and() to infer signed ranges directly from signed ranges
of the operands. With BPF_AND, the resulting value always gains more
unset '0' bit, thus it only move towards 0x0000000000000000. The
difficulty lies with how to deal with signs. While non-negative
(positive and zero) value simply grows smaller, a negative number can
grows smaller, but may also underflow and become a larger value.

To better address this situation we split the signed ranges into
negative range and non-negative range cases, ignoring the mixed sign
cases for now; and only consider how to calculate smax_value.

Since negative range & negative range preserve the sign bit, so we know
the result is still a negative value, thus it only move towards S64_MIN,
but never underflow, thus a save bet is to use a value in ranges that is
closet to 0, thus "max(dst_reg->smax_value, src->smax_value)". For
negative range & positive range the sign bit is always cleared, thus we
know the resulting is a non-negative, and only moves towards 0, so a
safe bet is to use smax_value of the non-negative range. Last but not
least, non-negative range & non-negative range is still a non-negative
value, and only moves towards 0; however same as the unsigned range
case, the maximum is actually capped by the lesser of the two, and thus
min(dst_reg->smax_value, src_reg->smax_value);

Listing out the above reasoning as a table (dst_reg abbreviated as dst,
src_reg abbreviated as src, smax_value abbrivated as smax) we get:

                        |                         src_reg
       smax = ?         +---------------------------+---------------------------
                        |        negative           |       non-negative
---------+--------------+---------------------------+---------------------------
         | negative     | max(dst->smax, src->smax) |         src->smax
dst_reg  +--------------+---------------------------+---------------------------
         | non-negative |         dst->smax         | min(dst->smax, src->smax)

However this is quite complicated, luckily it can be simplified given
the following observations

    max(dst_reg->smax_value, src_reg->smax_value) >= src_reg->smax_value
    max(dst_reg->smax_value, src_reg->smax_value) >= dst_reg->smax_value
    max(dst_reg->smax_value, src_reg->smax_value) >= min(dst_reg->smax_value, src_reg->smax_value)

So we could substitute the cells in the table above all with max(...),
and arrive at:

                        |                         src_reg
      smax' = ?         +---------------------------+---------------------------
                        |        negative           |       non-negative
---------+--------------+---------------------------+---------------------------
         | negative     | max(dst->smax, src->smax) | max(dst->smax, src->smax)
dst_reg  +--------------+---------------------------+---------------------------
         | non-negative | max(dst->smax, src->smax) | max(dst->smax, src->smax)

Meaning that simply using

  max(dst_reg->smax_value, src_reg->smax_value)

to calculate the resulting smax_value would work across all sign combinations.

For smin_value, we know that both non-negative range & non-negative
range and negative range & non-negative range both result in a
non-negative value, so an easy guess is to use the minimum non-negative
value, thus 0.

                        |                         src_reg
       smin = ?         +----------------------------+---------------------------
                        |          negative          |       non-negative
---------+--------------+----------------------------+---------------------------
         | negative     |             ?              |             0
dst_reg  +--------------+----------------------------+---------------------------
         | non-negative |             0              |             0

This leave the negative range & negative range case to be considered. We
know that negative range & negative range always yield a negative value,
so a preliminary guess would be S64_MIN. However, that guess is too
imprecise to help with the r0 <<= 62, r0 s>>= 63, r0 &= -13 pattern
we're trying to deal with here.

This can be further improve with the observation that for negative range
& negative range, the smallest possible value must be one that has
longest _common_ most-significant set '1' bits sequence, thus we can use
min(dst_reg->smin_value, src->smin_value) as the starting point, as the
smaller value will be the one with the shorter most-significant set '1'
bits sequence. But that alone is not enough, as we do not know whether
rest of the bits would be set, so the safest guess would be one that
clear alls bits after the most-significant set '1' bits sequence,
something akin to bit_floor(), but for rounding to a negative power-of-2
instead.

    negative_bit_floor(0xffff000000000003) == 0xffff000000000000
    negative_bit_floor(0xf0ff0000ffff0000) == 0xf000000000000000
    negative_bit_floor(0xfffffb0000000000) == 0xfffff80000000000

With negative range & negative range solve, we now have:

                        |                         src_reg
       smin = ?         +----------------------------+---------------------------
                        |        negative            |       non-negative
---------+--------------+----------------------------+---------------------------
         |   negative   |negative_bit_floor(         |             0
         |              |  min(dst->smin, src->smin))|
dst_reg  +--------------+----------------------------+---------------------------
         | non-negative |           0                |             0

This can be further simplied since min(dst->smin, src->smin) < 0 when both
dst_reg and src_reg have a negative range. Which means using

    negative_bit_floor(min(dst_reg->smin_value, src_reg->smin_value)

to calculate the resulting smin_value would work across all sign combinations.

Together these allows us to infer the signed range of the result of BPF_AND
operation using the signed range from its operands.

[0] https://lore.kernel.org/bpf/e62e2971301ca7f2e9eb74fc500c520285cad8f5.camel@gmail.com/

Link: https://lore.kernel.org/bpf/phcqmyzeqrsfzy7sb4rwpluc37hxyz7rcajk2bqw6cjk2x7rt5@m2hl6enudv7d/
Cc: Eduard Zingerman <eddyz87@gmail.com>
Signed-off-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
---
 kernel/bpf/verifier.c | 62 +++++++++++++++++++++++++++++--------------
 1 file changed, 42 insertions(+), 20 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8da132a1ef28..6d4cdf30cd76 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -13466,6 +13466,39 @@ static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
 	}
 }

+/* Clears all trailing bits after the most significant unset bit.
+ *
+ * Used for estimating the minimum possible value after BPF_AND. This
+ * effectively rounds a negative value down to a negative power-of-2 value
+ * (except for -1, which just return -1) and returning 0 for non-negative
+ * values. E.g. masked32_negative(0xff0ff0ff) == 0xff000000.
+ */
+static inline s32 negative32_bit_floor(s32 v)
+{
+	/* XXX: per C standard section 6.5.7 right shift of signed negative
+	 * value is implementation-defined. Should unsigned type be used here
+	 * instead?
+	 */
+	v &= v >> 1;
+	v &= v >> 2;
+	v &= v >> 4;
+	v &= v >> 8;
+	v &= v >> 16;
+	return v;
+}
+
+/* Same as negative32_bit_floor() above, but for 64-bit signed value */
+static inline s64 negative_bit_floor(s64 v)
+{
+	v &= v >> 1;
+	v &= v >> 2;
+	v &= v >> 4;
+	v &= v >> 8;
+	v &= v >> 16;
+	v &= v >> 32;
+	return v;
+}
+
 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
 				 struct bpf_reg_state *src_reg)
 {
@@ -13485,16 +13518,10 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
 	dst_reg->u32_min_value = var32_off.value;
 	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);

-	/* Safe to set s32 bounds by casting u32 result into s32 when u32
-	 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
-	 */
-	if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
-		dst_reg->s32_min_value = dst_reg->u32_min_value;
-		dst_reg->s32_max_value = dst_reg->u32_max_value;
-	} else {
-		dst_reg->s32_min_value = S32_MIN;
-		dst_reg->s32_max_value = S32_MAX;
-	}
+	/* Rough estimate tuned for [-1, 0] & -CONSTANT cases. */
+	dst_reg->s32_min_value = negative32_bit_floor(min(dst_reg->s32_min_value,
+							  src_reg->s32_min_value));
+	dst_reg->s32_max_value = max(dst_reg->s32_max_value, src_reg->s32_max_value);
 }

 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
@@ -13515,16 +13542,11 @@ static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
 	dst_reg->umin_value = dst_reg->var_off.value;
 	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);

-	/* Safe to set s64 bounds by casting u64 result into s64 when u64
-	 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
-	 */
-	if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
-		dst_reg->smin_value = dst_reg->umin_value;
-		dst_reg->smax_value = dst_reg->umax_value;
-	} else {
-		dst_reg->smin_value = S64_MIN;
-		dst_reg->smax_value = S64_MAX;
-	}
+	/* Rough estimate tuned for [-1, 0] & -CONSTANT cases. */
+	dst_reg->smin_value = negative_bit_floor(min(dst_reg->smin_value,
+						     src_reg->smin_value));
+	dst_reg->smax_value = max(dst_reg->smax_value, src_reg->smax_value);
+
 	/* We may learn something more from the var_off */
 	__update_reg_bounds(dst_reg);
 }
-- 
2.45.2

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND
  2024-07-16 14:52       ` [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND Shung-Hsi Yu
@ 2024-07-16 15:10         ` Shung-Hsi Yu
  2024-07-17 21:10         ` Eduard Zingerman
  2024-07-28 22:38         ` Harishankar Vishwanathan
  2 siblings, 0 replies; 18+ messages in thread
From: Shung-Hsi Yu @ 2024-07-16 15:10 UTC (permalink / raw)
  To: Xu Kuohai
  Cc: bpf, netdev, Eduard Zingerman, Alexei Starovoitov,
	Andrii Nakryiko, Daniel Borkmann, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Roberto Sassu, Edward Cree, Eric Dumazet,
	Jakub Kicinski, Harishankar Vishwanathan, Santosh Nagarakatte,
	Srinivas Narayana, Matan Shachnai

[-- Attachment #1: Type: text/plain, Size: 4063 bytes --]

On Tue, Jul 16, 2024 at 10:52:26PM GMT, Shung-Hsi Yu wrote:
> This commit teach the BPF verifier how to infer signed ranges directly
> from signed ranges of the operands to prevent verifier rejection
...
> ---
>  kernel/bpf/verifier.c | 62 +++++++++++++++++++++++++++++--------------
>  1 file changed, 42 insertions(+), 20 deletions(-)
> 
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 8da132a1ef28..6d4cdf30cd76 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -13466,6 +13466,39 @@ static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
>  	}
>  }
>  
> +/* Clears all trailing bits after the most significant unset bit.
> + *
> + * Used for estimating the minimum possible value after BPF_AND. This
> + * effectively rounds a negative value down to a negative power-of-2 value
> + * (except for -1, which just return -1) and returning 0 for non-negative
> + * values. E.g. masked32_negative(0xff0ff0ff) == 0xff000000.

s/masked32_negative/negative32_bit_floor/

> + */
> +static inline s32 negative32_bit_floor(s32 v)
> +{
> +	/* XXX: per C standard section 6.5.7 right shift of signed negative
> +	 * value is implementation-defined. Should unsigned type be used here
> +	 * instead?
> +	 */
> +	v &= v >> 1;
> +	v &= v >> 2;
> +	v &= v >> 4;
> +	v &= v >> 8;
> +	v &= v >> 16;
> +	return v;
> +}
> +
> +/* Same as negative32_bit_floor() above, but for 64-bit signed value */
> +static inline s64 negative_bit_floor(s64 v)
> +{
> +	v &= v >> 1;
> +	v &= v >> 2;
> +	v &= v >> 4;
> +	v &= v >> 8;
> +	v &= v >> 16;
> +	v &= v >> 32;
> +	return v;
> +}
> +
>  static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
>  				 struct bpf_reg_state *src_reg)
>  {
> @@ -13485,16 +13518,10 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
>  	dst_reg->u32_min_value = var32_off.value;
>  	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
>  
> -	/* Safe to set s32 bounds by casting u32 result into s32 when u32
> -	 * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
> -	 */
> -	if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
> -		dst_reg->s32_min_value = dst_reg->u32_min_value;
> -		dst_reg->s32_max_value = dst_reg->u32_max_value;
> -	} else {
> -		dst_reg->s32_min_value = S32_MIN;
> -		dst_reg->s32_max_value = S32_MAX;
> -	}
> +	/* Rough estimate tuned for [-1, 0] & -CONSTANT cases. */
> +	dst_reg->s32_min_value = negative32_bit_floor(min(dst_reg->s32_min_value,
> +							  src_reg->s32_min_value));
> +	dst_reg->s32_max_value = max(dst_reg->s32_max_value, src_reg->s32_max_value);
>  }
>  
>  static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
> @@ -13515,16 +13542,11 @@ static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
>  	dst_reg->umin_value = dst_reg->var_off.value;
>  	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
>  
> -	/* Safe to set s64 bounds by casting u64 result into s64 when u64
> -	 * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
> -	 */
> -	if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
> -		dst_reg->smin_value = dst_reg->umin_value;
> -		dst_reg->smax_value = dst_reg->umax_value;
> -	} else {
> -		dst_reg->smin_value = S64_MIN;
> -		dst_reg->smax_value = S64_MAX;
> -	}
> +	/* Rough estimate tuned for [-1, 0] & -CONSTANT cases. */
> +	dst_reg->smin_value = negative_bit_floor(min(dst_reg->smin_value,
> +						     src_reg->smin_value));
> +	dst_reg->smax_value = max(dst_reg->smax_value, src_reg->smax_value);
> +
>  	/* We may learn something more from the var_off */
>  	__update_reg_bounds(dst_reg);
>  }

Checked that this passes BPF CI[0] (except s390x-gcc/test_verifier,
which seems stucked), and verified the logic with z3 (see attached
Python script, adapted from [1]); so it seems to work.

Will try running tools/testing/selftests/bpf/prog_tests/reg_bounds.c
against it next.

0: https://github.com/kernel-patches/bpf/actions/runs/9958322024
1: https://github.com/bpfverif/tnums-cgo22/blob/main/verification/tnum.py

[-- Attachment #2: Type: text/x-python, Size: 2283 bytes --]

#!/usr/bin/env python3
# Need python3-z3/Z3Py to run
from math import floor, log2
from z3 import *

SIZE = 32
SIZE_LOG_2 = floor(log2(SIZE))


class SignedRange:
    name: str
    min: BitVecRef
    max: BitVecRef

    def __init__(self, name, min=None, max=None):
        self.name = name
        if min is None:
            self.min = BitVec(f'SignedRange({self.name}).min', bv=SIZE)
        elif isinstance(min, int):
            self.min = BitVecVal(min, bv=SIZE)
        else:
            self.min = min
        if max is None:
            self.max = BitVec(f'SignedRange({self.name}).max', bv=SIZE)
        elif isinstance(max, int):
            self.max = BitVecVal(max, bv=SIZE)
        else:
            self.max = max

    def wellformed(self):
        return self.min <= self.max

    def contains(self, val):
        if isinstance(val, int):
            val = BitVecVal(val, bv=SIZE)
        return And(self.min <= val, val <= self.max)


def negative_bit_floor(x: BitVecRef):
    for i in range(0, SIZE_LOG_2):
        shift_count = 2**i
        # Use arithmetic right shift to preserve leading signed bit
        x &= x >> shift_count
    return x


s = Solver()
premises = []

# Given x that is within a well-formed srange1, and y that is within a
# well-formed srange2
x = BitVec('x', bv=SIZE)
srange1 = SignedRange('srange1')
premises += [
    srange1.wellformed(),
    srange1.contains(x),
]

y = BitVec('y', bv=SIZE)
srange2 = SignedRange('srange2')
premises += [
    srange2.wellformed(),
    srange2.contains(y),
]

# Calculate x & y
actual = x & y

# x & y will always be LESS than or equal to max(srange1.max, srange2.max)
guessed_max = BitVec('guessed_max', bv=SIZE)
premises += [
    guessed_max == If(srange1.max > srange2.max, srange1.max, srange2.max)
]

# x & y will always be GREATER than or equal to negative_bit_floor(min(srange1.min, srange2.max)
guessed_min = BitVec('guessed_min', bv=SIZE)
premises += [
    guessed_min == negative_bit_floor(If(srange1.min > srange2.min, srange2.min, srange1.min)),
]

# Check result
s.add(Not(
    Implies(
        And(premises),
        And(guessed_min <= actual, actual <= guessed_max))
))
result = s.check()

if result != sat:
    print('Proved!')
else:
    print('Found counter example')
    print(s.model())

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH bpf-next v4 16/20] bpf: Add a special case for bitwise AND on range [-1, 0]
  2024-07-16  7:05     ` Xu Kuohai
  2024-07-16 14:52       ` [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND Shung-Hsi Yu
@ 2024-07-16 15:19       ` Shung-Hsi Yu
  1 sibling, 0 replies; 18+ messages in thread
From: Shung-Hsi Yu @ 2024-07-16 15:19 UTC (permalink / raw)
  To: Xu Kuohai
  Cc: Eduard Zingerman, bpf, netdev, Alexei Starovoitov,
	Andrii Nakryiko, Daniel Borkmann, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Roberto Sassu, Edward Cree, Eric Dumazet,
	Jakub Kicinski, Harishankar Vishwanathan, Santosh Nagarakatte,
	Srinivas Narayana, Matan Shachnai

On Tue, Jul 16, 2024 at 03:05:11PM GMT, Xu Kuohai wrote:
> On 7/15/2024 11:29 PM, Shung-Hsi Yu wrote:
> > Cc Harishankar Vishwanathan, Prof. Srinivas Narayana and Prof. Santosh
> > Nagarakatte, and Matan Shachnai, whom have recently work on
> > scalar*_min_max_and(); also dropping LSM/FS related mails from Cc since
> > it's a bit long and I'm not sure whether the mailing list will reject
> > due to too many email in Cc.
> > 
> > On Thu, Jul 11, 2024 at 07:38:24PM GMT, Xu Kuohai wrote:
> > > With lsm return value check, the no-alu32 version test_libbpf_get_fd_by_id_opts
> > > is rejected by the verifier, and the log says:
> > > 
> > > 0: R1=ctx() R10=fp0
> > > ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
> > > 0: (b7) r0 = 0                        ; R0_w=0
> > > 1: (79) r2 = *(u64 *)(r1 +0)
> > > func 'bpf_lsm_bpf_map' arg0 has btf_id 916 type STRUCT 'bpf_map'
> > > 2: R1=ctx() R2_w=trusted_ptr_bpf_map()
> > > ; if (map != (struct bpf_map *)&data_input) @ test_libbpf_get_fd_by_id_opts.c:29
> > > 2: (18) r3 = 0xffff9742c0951a00       ; R3_w=map_ptr(map=data_input,ks=4,vs=4)
> > > 4: (5d) if r2 != r3 goto pc+4         ; R2_w=trusted_ptr_bpf_map() R3_w=map_ptr(map=data_input,ks=4,vs=4)
> > > ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
> > > 5: (79) r0 = *(u64 *)(r1 +8)          ; R0_w=scalar() R1=ctx()
> > > ; if (fmode & FMODE_WRITE) @ test_libbpf_get_fd_by_id_opts.c:32
> > > 6: (67) r0 <<= 62                     ; R0_w=scalar(smax=0x4000000000000000,umax=0xc000000000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xc000000000000000))
> > > 7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
> > > ;  @ test_libbpf_get_fd_by_id_opts.c:0
> > > 8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
> > > ; int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode) @ test_libbpf_get_fd_by_id_opts.c:27
> > > 9: (95) exit
> > > 
> > > And here is the C code of the prog.
> > > 
> > > SEC("lsm/bpf_map")
> > > int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
> > > {
> > >      if (map != (struct bpf_map *)&data_input)
> > > 	    return 0;
> > > 
> > >      if (fmode & FMODE_WRITE)
> > > 	    return -EACCES;
> > > 
> > >      return 0;
> > > }
> > > 
> > > It is clear that the prog can only return either 0 or -EACCESS, and both
> > > values are legal.
> > > 
> > > So why is it rejected by the verifier?
> > > 
> > > The verifier log shows that the second if and return value setting
> > > statements in the prog is optimized to bitwise operations "r0 s>>= 63"
> > > and "r0 &= -13". The verifier correctly deduces that the value of
> > > r0 is in the range [-1, 0] after verifing instruction "r0 s>>= 63".
> > > But when the verifier proceeds to verify instruction "r0 &= -13", it
> > > fails to deduce the correct value range of r0.
> > > 
> > > 7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
> > > 8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
> > > 
> > > So why the verifier fails to deduce the result of 'r0 &= -13'?
> > > 
> > > The verifier uses tnum to track values, and the two ranges "[-1, 0]" and
> > > "[0, -1ULL]" are encoded to the same tnum. When verifing instruction
> > > "r0 &= -13", the verifier erroneously deduces the result from
> > > "[0, -1ULL] AND -13", which is out of the expected return range
> > > [-4095, 0].
> > > 
> > > As explained by Eduard in [0], the clang transformation that generates this
> > > pattern is located in DAGCombiner::SimplifySelectCC() method (see [1]).
> > ...
> > > As suggested by Eduard and Andrii, this patch makes a special case
> > > for source or destination register of '&=' operation being in
> > > range [-1, 0].
> > ...
> > 
> > Been wonder whether it possible for a more general approach ever since I
> > saw the discussion back in April. I think I've finally got something.
> > 
> > The problem we face here is that the tightest bound for the [-1, 0] case
> > was tracked with signed ranges, yet the BPF verifier looses knowledge of
> > them all too quickly in scalar*_min_max_and(); knowledge of previous
> > signed ranges were not used at all to derive the outcome of signed
> > ranges after BPF_AND.
> > 
> > 	static void scalar_min_max_and(...) {
> > 		...
> > 		if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
> > 			dst_reg->smin_value = dst_reg->umin_value;
> > 			dst_reg->smax_value = dst_reg->umax_value;
> > 		} else {
> > 			dst_reg->smin_value = S64_MIN;
> > 			dst_reg->smax_value = S64_MAX;
> > 		}
> > 		...
> > 	}
> > 
> 
> This is indeed the root cause.
> 
> > So looks like its time to be nobody[1] and try to teach BPF verifier how
> > track signed ranges when ANDing two (possibly) negative numbers. Luckily
> > bitwise AND is comparatively easier to do than other bitwise operations:
> > non-negative range & non-negative range is always non-negative,
> > non-negative range & negative range is still always non-negative, and
> > negative range & negative range is always negative.
> > 
> 
> Right, only bitwise ANDing two negatives yields to a negative result.
> 
> > smax_value is straight forwards, we can just do
> > 
> > 	max(dst_reg->smax_value, src_reg->smax_value)
> > 
> > which works across all sign combinations. Technically for non-negative &
> > non-negative we can use min() instead of max(), but the non-negative &
> > non-negative case should be handled pretty well by the unsigned ranges
> > already; it seems simpler to let such knowledge flows from unsigned
> > ranges to signed ranges during reg_bounds_sync(). Plus we are not wrong
> > for non-negative & non-negative by using max(), just imprecise, so no
> > correctness/soundness issue here.
> > 
> 
> I think this is correct, since in two's complement, more '1' bits means
> more large, regardless of sign, and bitwise AND never generates more '1'
> bits.
> 
> > smin_value is the tricker one, but doable with
> > 
> > 	masked_negative(min(dst_reg->smin_value, src_reg->smin_value))
> > 
> > where masked_negative(v) basically just clear all bits after the most
> > significant unset bit, effectively rounding a negative value down to a
> > negative power-of-2 value, and returning 0 for non-negative values. E.g.
> > for some 8-bit, negative value
> > 
> > 	masked_negative(0b11101001) == 0b11100000
> > 
> 
> Ah, it's really tricky. Seems it's the longest high '1' bits sequence
> in both operands. This '1' bits should remain unchanged by the bitwise
> AND operation. So this sequence must be in the result, making it the
> minimum possible value.
> 
> > This can be done with a tweaked version of "Round up to the next highest
> > power of 2"[2],
> > 
> > 	/* Invert the bits so the first unset bit can be propagated with |= */
> > 	v = ~v;
> > 	/* Now propagate the first (previously unset, now set) bit to the
> > 	 * trailing positions */
> > 	v |= v >> 1;
> > 	v |= v >> 2;
> > 	v |= v >> 4;
> > 	...
> > 	v |= v >> 32; /* Assuming 64-bit */
> > 	/* Propagation done, now invert again */
> > 	v = ~v;
> > 
> > Again, we technically can do better if we take sign bit into account,
> > but deriving smin_value this way should still be correct/sound across
> > different sign combinations, and overall should help us derived [-16, 0]
> > from "[-1, 0] AND -13", thus preventing BPF verifier from rejecting the
> > program.
> > 
> > ---
> > 
> > Alternatively we can employ a range-splitting trick (think I saw this in
> > [3]) that allow us to take advantage of existing tnum_and() by splitting
> > the signed ranges into two if the range crosses the sign boundary (i.e.
> > contains both non-negative and negative values), one range will be
> > [smin, U64_MAX], the other will be [0, smax]. This way we get around
> > tnum's weakness of representing [-1, 0] as [0, U64_MAX].
> > 
> > 	if (src_reg->smin_value < 0 && src_reg->smax_value >= 0) {
> > 		src_lower = tnum_range(src_reg->smin_value, U64_MAX);
> > 		src_higher = tnum_range(0, src_reg->smax_value);
> > 	} else {
> > 		src_lower = tnum_range(src_reg->smin_value, src_reg->smax_value);
> > 		src_higher = tnum_range(src_reg->smin_value, src_reg->smax_value);
> > 	}
> > 
> > 	if (dst_reg->smin_value < 0 && dst_reg->smax_value >= 0) {
> > 		dst_lower = tnum_range(dst_reg->smin_value, U64_MAX);
> > 		dst_higher = tnum_range(0, dst_reg->smax_value);
> > 	} else {
> > 		dst_lower = tnum_range(dst_reg->smin_value, dst_reg->smax_value);
> > 		dst_higher = tnum_range(dst_reg->smin_value, dst_reg->smax_value);
> > 	}
> > 
> > 	lower = tnum_and(src_lower, dst_lower);
> > 	higher = tnum_and(src_higher, dst_higher);
> > 	dst->smin_value = lower.value;
> > 	dst->smax_value = higher.value | higher.mask;
> 
> This looks even more tricky...

Indeed, and I think the above is still wrong because it did not proper
set smin_value to S64_MIN when needed.

> > Personally I like the first method better as it is simpler yet still
> > does the job well enough. I'll work on that in the next few days and see
> > if it actually works.
> 
> This really sounds great. Thank you for the excellent work!

Sent RFC in sibling thread. I think it would be better if the patch was
included as part of your series. But let's see what the other think of
it first.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND
  2024-07-16 14:52       ` [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND Shung-Hsi Yu
  2024-07-16 15:10         ` Shung-Hsi Yu
@ 2024-07-17 21:10         ` Eduard Zingerman
  2024-07-19  8:32           ` Shung-Hsi Yu
  2024-07-28 22:38         ` Harishankar Vishwanathan
  2 siblings, 1 reply; 18+ messages in thread
From: Eduard Zingerman @ 2024-07-17 21:10 UTC (permalink / raw)
  To: Shung-Hsi Yu, Xu Kuohai
  Cc: bpf, netdev, Alexei Starovoitov, Andrii Nakryiko, Daniel Borkmann,
	Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
	KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa, Roberto Sassu,
	Edward Cree, Eric Dumazet, Jakub Kicinski,
	Harishankar Vishwanathan, Santosh Nagarakatte, Srinivas Narayana,
	Matan Shachnai

On Tue, 2024-07-16 at 22:52 +0800, Shung-Hsi Yu wrote:

[...]

> To allow verification of such instruction pattern, update
> scalar*_min_max_and() to infer signed ranges directly from signed ranges
> of the operands. With BPF_AND, the resulting value always gains more
> unset '0' bit, thus it only move towards 0x0000000000000000. The
> difficulty lies with how to deal with signs. While non-negative
> (positive and zero) value simply grows smaller, a negative number can
> grows smaller, but may also underflow and become a larger value.
> 
> To better address this situation we split the signed ranges into
> negative range and non-negative range cases, ignoring the mixed sign
> cases for now; and only consider how to calculate smax_value.
> 
> Since negative range & negative range preserve the sign bit, so we know
> the result is still a negative value, thus it only move towards S64_MIN,
> but never underflow, thus a save bet is to use a value in ranges that is
> closet to 0, thus "max(dst_reg->smax_value, src->smax_value)". For
> negative range & positive range the sign bit is always cleared, thus we
> know the resulting is a non-negative, and only moves towards 0, so a
> safe bet is to use smax_value of the non-negative range. Last but not
> least, non-negative range & non-negative range is still a non-negative
> value, and only moves towards 0; however same as the unsigned range
> case, the maximum is actually capped by the lesser of the two, and thus
> min(dst_reg->smax_value, src_reg->smax_value);
> 
> Listing out the above reasoning as a table (dst_reg abbreviated as dst,
> src_reg abbreviated as src, smax_value abbrivated as smax) we get:
> 
>                         |                         src_reg
>        smax = ?         +---------------------------+---------------------------
>                         |        negative           |       non-negative
> ---------+--------------+---------------------------+---------------------------
>          | negative     | max(dst->smax, src->smax) |         src->smax
> dst_reg  +--------------+---------------------------+---------------------------
>          | non-negative |         dst->smax         | min(dst->smax, src->smax)
> 
> However this is quite complicated, luckily it can be simplified given
> the following observations
> 
>     max(dst_reg->smax_value, src_reg->smax_value) >= src_reg->smax_value
>     max(dst_reg->smax_value, src_reg->smax_value) >= dst_reg->smax_value
>     max(dst_reg->smax_value, src_reg->smax_value) >= min(dst_reg->smax_value, src_reg->smax_value)
> 
> So we could substitute the cells in the table above all with max(...),
> and arrive at:
> 
>                         |                         src_reg
>       smax' = ?         +---------------------------+---------------------------
>                         |        negative           |       non-negative
> ---------+--------------+---------------------------+---------------------------
>          | negative     | max(dst->smax, src->smax) | max(dst->smax, src->smax)
> dst_reg  +--------------+---------------------------+---------------------------
>          | non-negative | max(dst->smax, src->smax) | max(dst->smax, src->smax)
> 
> Meaning that simply using
> 
>   max(dst_reg->smax_value, src_reg->smax_value)
> 
> to calculate the resulting smax_value would work across all sign combinations.
> 
> 
> For smin_value, we know that both non-negative range & non-negative
> range and negative range & non-negative range both result in a
> non-negative value, so an easy guess is to use the minimum non-negative
> value, thus 0.
> 
>                         |                         src_reg
>        smin = ?         +----------------------------+---------------------------
>                         |          negative          |       non-negative
> ---------+--------------+----------------------------+---------------------------
>          | negative     |             ?              |             0
> dst_reg  +--------------+----------------------------+---------------------------
>          | non-negative |             0              |             0
> 
> This leave the negative range & negative range case to be considered. We
> know that negative range & negative range always yield a negative value,
> so a preliminary guess would be S64_MIN. However, that guess is too
> imprecise to help with the r0 <<= 62, r0 s>>= 63, r0 &= -13 pattern
> we're trying to deal with here.
> 
> This can be further improve with the observation that for negative range
> & negative range, the smallest possible value must be one that has
> longest _common_ most-significant set '1' bits sequence, thus we can use
> min(dst_reg->smin_value, src->smin_value) as the starting point, as the
> smaller value will be the one with the shorter most-significant set '1'
> bits sequence. But that alone is not enough, as we do not know whether
> rest of the bits would be set, so the safest guess would be one that
> clear alls bits after the most-significant set '1' bits sequence,
> something akin to bit_floor(), but for rounding to a negative power-of-2
> instead.
> 
>     negative_bit_floor(0xffff000000000003) == 0xffff000000000000
>     negative_bit_floor(0xf0ff0000ffff0000) == 0xf000000000000000
>     negative_bit_floor(0xfffffb0000000000) == 0xfffff80000000000
> 
> With negative range & negative range solve, we now have:
> 
>                         |                         src_reg
>        smin = ?         +----------------------------+---------------------------
>                         |        negative            |       non-negative
> ---------+--------------+----------------------------+---------------------------
>          |   negative   |negative_bit_floor(         |             0
>          |              |  min(dst->smin, src->smin))|
> dst_reg  +--------------+----------------------------+---------------------------
>          | non-negative |           0                |             0
> 
> This can be further simplied since min(dst->smin, src->smin) < 0 when both
> dst_reg and src_reg have a negative range. Which means using
> 
>     negative_bit_floor(min(dst_reg->smin_value, src_reg->smin_value)
> 
> to calculate the resulting smin_value would work across all sign combinations.
> 
> Together these allows us to infer the signed range of the result of BPF_AND
> operation using the signed range from its operands.


Hi Shung-Hsi,

This seems quite elegant.
As an additional check, I did a simple brute-force for all possible
ranges of 6-bit integers and bounds are computed safely.

[...]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND
  2024-07-17 21:10         ` Eduard Zingerman
@ 2024-07-19  8:32           ` Shung-Hsi Yu
  0 siblings, 0 replies; 18+ messages in thread
From: Shung-Hsi Yu @ 2024-07-19  8:32 UTC (permalink / raw)
  To: Eduard Zingerman
  Cc: Xu Kuohai, bpf, netdev, Alexei Starovoitov, Andrii Nakryiko,
	Daniel Borkmann, Martin KaFai Lau, Song Liu, Yonghong Song,
	John Fastabend, KP Singh, Stanislav Fomichev, Hao Luo, Jiri Olsa,
	Roberto Sassu, Edward Cree, Eric Dumazet, Jakub Kicinski,
	Harishankar Vishwanathan, Santosh Nagarakatte, Srinivas Narayana,
	Matan Shachnai

On Wed, Jul 17, 2024 at 02:10:35PM GMT, Eduard Zingerman wrote:
> On Tue, 2024-07-16 at 22:52 +0800, Shung-Hsi Yu wrote:
> 
> [...]
> 
> > To allow verification of such instruction pattern, update
> > scalar*_min_max_and() to infer signed ranges directly from signed ranges
> > of the operands. With BPF_AND, the resulting value always gains more
> > unset '0' bit, thus it only move towards 0x0000000000000000. The
> > difficulty lies with how to deal with signs. While non-negative
> > (positive and zero) value simply grows smaller, a negative number can
> > grows smaller, but may also underflow and become a larger value.
> > 
> > To better address this situation we split the signed ranges into
> > negative range and non-negative range cases, ignoring the mixed sign
> > cases for now; and only consider how to calculate smax_value.
> > 
> > Since negative range & negative range preserve the sign bit, so we know
> > the result is still a negative value, thus it only move towards S64_MIN,
> > but never underflow, thus a save bet is to use a value in ranges that is
> > closet to 0, thus "max(dst_reg->smax_value, src->smax_value)". For
> > negative range & positive range the sign bit is always cleared, thus we
> > know the resulting is a non-negative, and only moves towards 0, so a
> > safe bet is to use smax_value of the non-negative range. Last but not
> > least, non-negative range & non-negative range is still a non-negative
> > value, and only moves towards 0; however same as the unsigned range
> > case, the maximum is actually capped by the lesser of the two, and thus
> > min(dst_reg->smax_value, src_reg->smax_value);
> > 
> > Listing out the above reasoning as a table (dst_reg abbreviated as dst,
> > src_reg abbreviated as src, smax_value abbrivated as smax) we get:
> > 
> >                         |                         src_reg
> >        smax = ?         +---------------------------+---------------------------
> >                         |        negative           |       non-negative
> > ---------+--------------+---------------------------+---------------------------
> >          | negative     | max(dst->smax, src->smax) |         src->smax
> > dst_reg  +--------------+---------------------------+---------------------------
> >          | non-negative |         dst->smax         | min(dst->smax, src->smax)
> > 
> > However this is quite complicated, luckily it can be simplified given
> > the following observations
> > 
> >     max(dst_reg->smax_value, src_reg->smax_value) >= src_reg->smax_value
> >     max(dst_reg->smax_value, src_reg->smax_value) >= dst_reg->smax_value
> >     max(dst_reg->smax_value, src_reg->smax_value) >= min(dst_reg->smax_value, src_reg->smax_value)
> > 
> > So we could substitute the cells in the table above all with max(...),
> > and arrive at:
> > 
> >                         |                         src_reg
> >       smax' = ?         +---------------------------+---------------------------
> >                         |        negative           |       non-negative
> > ---------+--------------+---------------------------+---------------------------
> >          | negative     | max(dst->smax, src->smax) | max(dst->smax, src->smax)
> > dst_reg  +--------------+---------------------------+---------------------------
> >          | non-negative | max(dst->smax, src->smax) | max(dst->smax, src->smax)
> > 
> > Meaning that simply using
> > 
> >   max(dst_reg->smax_value, src_reg->smax_value)
> > 
> > to calculate the resulting smax_value would work across all sign combinations.
> > 
> > 
> > For smin_value, we know that both non-negative range & non-negative
> > range and negative range & non-negative range both result in a
> > non-negative value, so an easy guess is to use the minimum non-negative
> > value, thus 0.
> > 
> >                         |                         src_reg
> >        smin = ?         +----------------------------+---------------------------
> >                         |          negative          |       non-negative
> > ---------+--------------+----------------------------+---------------------------
> >          | negative     |             ?              |             0
> > dst_reg  +--------------+----------------------------+---------------------------
> >          | non-negative |             0              |             0
> > 
> > This leave the negative range & negative range case to be considered. We
> > know that negative range & negative range always yield a negative value,
> > so a preliminary guess would be S64_MIN. However, that guess is too
> > imprecise to help with the r0 <<= 62, r0 s>>= 63, r0 &= -13 pattern
> > we're trying to deal with here.
> > 
> > This can be further improve with the observation that for negative range
> > & negative range, the smallest possible value must be one that has
> > longest _common_ most-significant set '1' bits sequence, thus we can use
> > min(dst_reg->smin_value, src->smin_value) as the starting point, as the
> > smaller value will be the one with the shorter most-significant set '1'
> > bits sequence. But that alone is not enough, as we do not know whether
> > rest of the bits would be set, so the safest guess would be one that
> > clear alls bits after the most-significant set '1' bits sequence,
> > something akin to bit_floor(), but for rounding to a negative power-of-2
> > instead.
> > 
> >     negative_bit_floor(0xffff000000000003) == 0xffff000000000000
> >     negative_bit_floor(0xf0ff0000ffff0000) == 0xf000000000000000
> >     negative_bit_floor(0xfffffb0000000000) == 0xfffff80000000000
> > 
> > With negative range & negative range solve, we now have:
> > 
> >                         |                         src_reg
> >        smin = ?         +----------------------------+---------------------------
> >                         |        negative            |       non-negative
> > ---------+--------------+----------------------------+---------------------------
> >          |   negative   |negative_bit_floor(         |             0
> >          |              |  min(dst->smin, src->smin))|
> > dst_reg  +--------------+----------------------------+---------------------------
> >          | non-negative |           0                |             0
> > 
> > This can be further simplied since min(dst->smin, src->smin) < 0 when both
> > dst_reg and src_reg have a negative range. Which means using
> > 
> >     negative_bit_floor(min(dst_reg->smin_value, src_reg->smin_value)
> > 
> > to calculate the resulting smin_value would work across all sign combinations.
> > 
> > Together these allows us to infer the signed range of the result of BPF_AND
> > operation using the signed range from its operands.
> 
> Hi Shung-Hsi,
> 
> This seems quite elegant.
> As an additional check, I did a simple brute-force for all possible
> ranges of 6-bit integers and bounds are computed safely.

Thanks for looking into this, as well as the complement.

Did took me quite awhile to try come up with a simple solution that
works just well enough without further complication, felt quite proud :)

> [...]

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND
  2024-07-16 14:52       ` [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND Shung-Hsi Yu
  2024-07-16 15:10         ` Shung-Hsi Yu
  2024-07-17 21:10         ` Eduard Zingerman
@ 2024-07-28 22:38         ` Harishankar Vishwanathan
  2024-07-30  4:25           ` Shung-Hsi Yu
  2 siblings, 1 reply; 18+ messages in thread
From: Harishankar Vishwanathan @ 2024-07-28 22:38 UTC (permalink / raw)
  To: Shung-Hsi Yu
  Cc: Xu Kuohai, Eduard Zingerman, bpf, netdev, Alexei Starovoitov,
	Andrii Nakryiko, Daniel Borkmann, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Roberto Sassu, Edward Cree, Eric Dumazet,
	Jakub Kicinski, Santosh Nagarakatte, Srinivas Narayana,
	Matan Shachnai

On Tue, Jul 16, 2024 at 10:52 AM Shung-Hsi Yu <shung-hsi.yu@suse.com> wrote:
>
> This commit teach the BPF verifier how to infer signed ranges directly
> from signed ranges of the operands to prevent verifier rejection, which
> is needed for the following BPF program's no-alu32 version, as shown by
> Xu Kuohai:
>
>     SEC("lsm/bpf_map")
>     int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
>     {
>          if (map != (struct bpf_map *)&data_input)
>             return 0;
>
>          if (fmode & FMODE_WRITE)
>             return -EACCES;
>
>          return 0;
>     }
>
> Where the relevant verifer log upon rejection are:
>
>     ...
>     5: (79) r0 = *(u64 *)(r1 +8)          ; R0_w=scalar() R1=ctx()
>     ; if (fmode & FMODE_WRITE) @ test_libbpf_get_fd_by_id_opts.c:32
>     6: (67) r0 <<= 62                     ; R0_w=scalar(smax=0x4000000000000000,umax=0xc000000000000000,smin32=0,smax32=umax32=0,var_off=(0x0; 0xc000000000000000))
>     7: (c7) r0 s>>= 63                    ; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
>     ;  @ test_libbpf_get_fd_by_id_opts.c:0
>     8: (57) r0 &= -13                     ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
>     9: (95) exit
>
> This sequence of instructions comes from Clang's transformation located
> in DAGCombiner::SimplifySelectCC() method, which combined the "fmode &
> FMODE_WRITE" check with the return statement without needing BPF_JMP at
> all. See Eduard's comment for more detail of this transformation[0].
>
> While the verifier can correctly infer that the value of r0 is in a
> tight [-1, 0] range after instruction "r0 s>>= 63", is was not able to
> come up with a tight range for "r0 &= -13" (which would be [-13, 0]),
> and instead inferred a very loose range:
>
>     r0 s>>= 63; R0_w=scalar(smin=smin32=-1,smax=smax32=0)
>     r0 &= -13 ; R0_w=scalar(smax=0x7ffffffffffffff3,umax=0xfffffffffffffff3,smax32=0x7ffffff3,umax32=0xfffffff3,var_off=(0x0; 0xfffffffffffffff3))
>
> The reason is that scalar*_min_max_add() mainly relies on tnum for
> interring value in register after BPF_AND, however [-1, 0] cannot be
> tracked precisely with tnum, and effectively turns into [0, -1] (i.e.
> tnum_unknown). So upon BPF_AND the resulting tnum is equivalent to
>
>     dst_reg->var_off = tnum_and(tnum_unknown, tnum_const(-13))
>
> And from there the BPF verifier was only able to infer smin=S64_MIN,
> smax=0x7ffffffffffffff3, which is outside of the expected [-4095, 0]
> range for return values, and thus the program was rejected.
>
> To allow verification of such instruction pattern, update
> scalar*_min_max_and() to infer signed ranges directly from signed ranges
> of the operands. With BPF_AND, the resulting value always gains more
> unset '0' bit, thus it only move towards 0x0000000000000000. The
> difficulty lies with how to deal with signs. While non-negative
> (positive and zero) value simply grows smaller, a negative number can
> grows smaller, but may also underflow and become a larger value.
>
> To better address this situation we split the signed ranges into
> negative range and non-negative range cases, ignoring the mixed sign
> cases for now; and only consider how to calculate smax_value.
>
> Since negative range & negative range preserve the sign bit, so we know
> the result is still a negative value, thus it only move towards S64_MIN,
> but never underflow, thus a save bet is to use a value in ranges that is
> closet to 0, thus "max(dst_reg->smax_value, src->smax_value)". For
> negative range & positive range the sign bit is always cleared, thus we
> know the resulting is a non-negative, and only moves towards 0, so a
> safe bet is to use smax_value of the non-negative range. Last but not
> least, non-negative range & non-negative range is still a non-negative
> value, and only moves towards 0; however same as the unsigned range
> case, the maximum is actually capped by the lesser of the two, and thus
> min(dst_reg->smax_value, src_reg->smax_value);
>
> Listing out the above reasoning as a table (dst_reg abbreviated as dst,
> src_reg abbreviated as src, smax_value abbrivated as smax) we get:
>
>                         |                         src_reg
>        smax = ?         +---------------------------+---------------------------
>                         |        negative           |       non-negative
> ---------+--------------+---------------------------+---------------------------
>          | negative     | max(dst->smax, src->smax) |         src->smax
> dst_reg  +--------------+---------------------------+---------------------------
>          | non-negative |         dst->smax         | min(dst->smax, src->smax)
>
> However this is quite complicated, luckily it can be simplified given
> the following observations
>
>     max(dst_reg->smax_value, src_reg->smax_value) >= src_reg->smax_value
>     max(dst_reg->smax_value, src_reg->smax_value) >= dst_reg->smax_value
>     max(dst_reg->smax_value, src_reg->smax_value) >= min(dst_reg->smax_value, src_reg->smax_value)
>
> So we could substitute the cells in the table above all with max(...),
> and arrive at:
>
>                         |                         src_reg
>       smax' = ?         +---------------------------+---------------------------
>                         |        negative           |       non-negative
> ---------+--------------+---------------------------+---------------------------
>          | negative     | max(dst->smax, src->smax) | max(dst->smax, src->smax)
> dst_reg  +--------------+---------------------------+---------------------------
>          | non-negative | max(dst->smax, src->smax) | max(dst->smax, src->smax)
>
> Meaning that simply using
>
>   max(dst_reg->smax_value, src_reg->smax_value)
>
> to calculate the resulting smax_value would work across all sign combinations.
>
>
> For smin_value, we know that both non-negative range & non-negative
> range and negative range & non-negative range both result in a
> non-negative value, so an easy guess is to use the minimum non-negative
> value, thus 0.
>
>                         |                         src_reg
>        smin = ?         +----------------------------+---------------------------
>                         |          negative          |       non-negative
> ---------+--------------+----------------------------+---------------------------
>          | negative     |             ?              |             0
> dst_reg  +--------------+----------------------------+---------------------------
>          | non-negative |             0              |             0
>
> This leave the negative range & negative range case to be considered. We
> know that negative range & negative range always yield a negative value,
> so a preliminary guess would be S64_MIN. However, that guess is too
> imprecise to help with the r0 <<= 62, r0 s>>= 63, r0 &= -13 pattern
> we're trying to deal with here.
>
> This can be further improve with the observation that for negative range
> & negative range, the smallest possible value must be one that has
> longest _common_ most-significant set '1' bits sequence, thus we can use
> min(dst_reg->smin_value, src->smin_value) as the starting point, as the
> smaller value will be the one with the shorter most-significant set '1'
> bits sequence. But that alone is not enough, as we do not know whether
> rest of the bits would be set, so the safest guess would be one that
> clear alls bits after the most-significant set '1' bits sequence,
> something akin to bit_floor(), but for rounding to a negative power-of-2
> instead.
>
>     negative_bit_floor(0xffff000000000003) == 0xffff000000000000
>     negative_bit_floor(0xf0ff0000ffff0000) == 0xf000000000000000
>     negative_bit_floor(0xfffffb0000000000) == 0xfffff80000000000
>
> With negative range & negative range solve, we now have:
>
>                         |                         src_reg
>        smin = ?         +----------------------------+---------------------------
>                         |        negative            |       non-negative
> ---------+--------------+----------------------------+---------------------------
>          |   negative   |negative_bit_floor(         |             0
>          |              |  min(dst->smin, src->smin))|
> dst_reg  +--------------+----------------------------+---------------------------
>          | non-negative |           0                |             0
>
> This can be further simplied since min(dst->smin, src->smin) < 0 when both
> dst_reg and src_reg have a negative range. Which means using
>
>     negative_bit_floor(min(dst_reg->smin_value, src_reg->smin_value)
>
> to calculate the resulting smin_value would work across all sign combinations.
>
> Together these allows us to infer the signed range of the result of BPF_AND
> operation using the signed range from its operands.
>
> [0] https://lore.kernel.org/bpf/e62e2971301ca7f2e9eb74fc500c520285cad8f5.camel@gmail.com/
>
> Link: https://lore.kernel.org/bpf/phcqmyzeqrsfzy7sb4rwpluc37hxyz7rcajk2bqw6cjk2x7rt5@m2hl6enudv7d/
> Cc: Eduard Zingerman <eddyz87@gmail.com>
> Signed-off-by: Shung-Hsi Yu <shung-hsi.yu@suse.com>
> ---
>  kernel/bpf/verifier.c | 62 +++++++++++++++++++++++++++++--------------
>  1 file changed, 42 insertions(+), 20 deletions(-)
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 8da132a1ef28..6d4cdf30cd76 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -13466,6 +13466,39 @@ static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
>         }
>  }
>
> +/* Clears all trailing bits after the most significant unset bit.
> + *
> + * Used for estimating the minimum possible value after BPF_AND. This
> + * effectively rounds a negative value down to a negative power-of-2 value
> + * (except for -1, which just return -1) and returning 0 for non-negative
> + * values. E.g. masked32_negative(0xff0ff0ff) == 0xff000000.
> + */
> +static inline s32 negative32_bit_floor(s32 v)
> +{
> +       /* XXX: per C standard section 6.5.7 right shift of signed negative
> +        * value is implementation-defined. Should unsigned type be used here
> +        * instead?
> +        */
> +       v &= v >> 1;
> +       v &= v >> 2;
> +       v &= v >> 4;
> +       v &= v >> 8;
> +       v &= v >> 16;
> +       return v;
> +}
> +
> +/* Same as negative32_bit_floor() above, but for 64-bit signed value */
> +static inline s64 negative_bit_floor(s64 v)
> +{
> +       v &= v >> 1;
> +       v &= v >> 2;
> +       v &= v >> 4;
> +       v &= v >> 8;
> +       v &= v >> 16;
> +       v &= v >> 32;
> +       return v;
> +}
> +
>  static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
>                                  struct bpf_reg_state *src_reg)
>  {
> @@ -13485,16 +13518,10 @@ static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
>         dst_reg->u32_min_value = var32_off.value;
>         dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
>
> -       /* Safe to set s32 bounds by casting u32 result into s32 when u32
> -        * doesn't cross sign boundary. Otherwise set s32 bounds to unbounded.
> -        */
> -       if ((s32)dst_reg->u32_min_value <= (s32)dst_reg->u32_max_value) {
> -               dst_reg->s32_min_value = dst_reg->u32_min_value;
> -               dst_reg->s32_max_value = dst_reg->u32_max_value;
> -       } else {
> -               dst_reg->s32_min_value = S32_MIN;
> -               dst_reg->s32_max_value = S32_MAX;
> -       }
> +       /* Rough estimate tuned for [-1, 0] & -CONSTANT cases. */
> +       dst_reg->s32_min_value = negative32_bit_floor(min(dst_reg->s32_min_value,
> +                                                         src_reg->s32_min_value));
> +       dst_reg->s32_max_value = max(dst_reg->s32_max_value, src_reg->s32_max_value);
>  }
>
>  static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
> @@ -13515,16 +13542,11 @@ static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
>         dst_reg->umin_value = dst_reg->var_off.value;
>         dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
>
> -       /* Safe to set s64 bounds by casting u64 result into s64 when u64
> -        * doesn't cross sign boundary. Otherwise set s64 bounds to unbounded.
> -        */
> -       if ((s64)dst_reg->umin_value <= (s64)dst_reg->umax_value) {
> -               dst_reg->smin_value = dst_reg->umin_value;
> -               dst_reg->smax_value = dst_reg->umax_value;
> -       } else {
> -               dst_reg->smin_value = S64_MIN;
> -               dst_reg->smax_value = S64_MAX;
> -       }
> +       /* Rough estimate tuned for [-1, 0] & -CONSTANT cases. */
> +       dst_reg->smin_value = negative_bit_floor(min(dst_reg->smin_value,
> +                                                    src_reg->smin_value));
> +       dst_reg->smax_value = max(dst_reg->smax_value, src_reg->smax_value);
> +
>         /* We may learn something more from the var_off */
>         __update_reg_bounds(dst_reg);
>  }
> --
> 2.45.2
>

Apologies for the late response and thank you for CCing us Shung-Hsi.

The patch itself seems well thought out and looks correct. Great work!

We quickly checked your patch using Agni [1], and were not able to find any
violations. That is, given well-formed register state inputs to
adjust_scalar_min_max_vals, the new algorithm always produces sound outputs
for the BPF_AND (both 32/64) instruction.

It looks like you already performed tests with Z3, and Eduard performed a
brute force testing using 6-bit integers. Agni's result stands as an
additional stronger guarantee because Agni generates SMT formulas directly
from the C source code of the verifier and checks the correctness in Z3
without any external library functions, it uses full 64-bit size bitvectors
in the formulas generated and considers the correctness for 64-bit integer
inputs, and finally it considers the correctness of the *final* output
abstract values generated after running update_reg_bounds() and
reg_bounds_sync().

Using Agni's encodings we were also quickly able to check the precision of
the new algorithm. An algorithm is more precise if it produces tighter
range bounds, while being correct. We are happy to note that the new
algorithm produces outputs that are at least as precise or more precise
than the old algorithm, for all well-formed register state inputs.

Best,
Hari

[1] https://github.com/bpfverif/agni

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND
  2024-07-28 22:38         ` Harishankar Vishwanathan
@ 2024-07-30  4:25           ` Shung-Hsi Yu
  2024-08-02 21:30             ` Harishankar Vishwanathan
  0 siblings, 1 reply; 18+ messages in thread
From: Shung-Hsi Yu @ 2024-07-30  4:25 UTC (permalink / raw)
  To: Harishankar Vishwanathan
  Cc: Xu Kuohai, Eduard Zingerman, bpf, netdev, Alexei Starovoitov,
	Andrii Nakryiko, Daniel Borkmann, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Roberto Sassu, Edward Cree, Eric Dumazet,
	Jakub Kicinski, Santosh Nagarakatte, Srinivas Narayana,
	Matan Shachnai

Hi Harishankar,

On Sun, Jul 28, 2024 at 06:38:40PM GMT, Harishankar Vishwanathan wrote:
> On Tue, Jul 16, 2024 at 10:52 AM Shung-Hsi Yu <shung-hsi.yu@suse.com> wrote:
> > This commit teach the BPF verifier how to infer signed ranges directly
> > from signed ranges of the operands to prevent verifier rejection, which
> > is needed for the following BPF program's no-alu32 version, as shown by
> > Xu Kuohai:
[...]
> Apologies for the late response and thank you for CCing us Shung-Hsi.
> 
> The patch itself seems well thought out and looks correct. Great work!

Thanks! :)

> We quickly checked your patch using Agni [1], and were not able to find any
> violations. That is, given well-formed register state inputs to
> adjust_scalar_min_max_vals, the new algorithm always produces sound outputs
> for the BPF_AND (both 32/64) instruction.

That is great to hear and really boost the level of confidence. Though I
did made an update[1] to the patch such that implementation of
negative_bit_floor() is change from

	v &= v >> 1;
	v &= v >> 2;
	v &= v >> 4;
	v &= v >> 8;
	v &= v >> 16;
	v &= v >> 32;
	return v;

to one that closer resembles tnum_range()

	u8 bits = fls64(~v); /* find most-significant unset bit */
	u64 delta;

	/* special case, needed because 1ULL << 64 is undefined */
	if (bits > 63)
		return 0;

	delta = (1ULL << bits) - 1;
	return ~delta;

My understanding is that the two implementations should return the same
output for the same input, so overall the deduction remains the same.
And my simpler test with Z3 does not find violation in the new
implementation. But it would be much better if we can have Agni check
the new implementation for violation as well.

Speak of which, would you and others involved in checking this patch be
comfortable with adding a formal acknowledgment[2] for the patch so this
work can be credited in the git repo as well? (i.e. usually replying
with an Acked-by, other alternatives are Reviewed-by and Tested-by)

IMHO the work done here is in the realm of Reviewed-by, but that itself
comes with other implications[3], which may or may not be wanted
depending on individual's circumstances.

I'll probably post the updated patch out next week, changing only the
comments in [1].

> It looks like you already performed tests with Z3, and Eduard performed a
> brute force testing using 6-bit integers. Agni's result stands as an
> additional stronger guarantee because Agni generates SMT formulas directly
> from the C source code of the verifier and checks the correctness in Z3
> without any external library functions, it uses full 64-bit size bitvectors
> in the formulas generated and considers the correctness for 64-bit integer
> inputs, and finally it considers the correctness of the *final* output
> abstract values generated after running update_reg_bounds() and
> reg_bounds_sync().

I had some vague ideas that Agni provides better guarantee, but did not
know exactly what they are. Thanks for the clear explanation on the
additional guarantee Agni provides; its especially assuring to know that
update_reg_bounds() and reg_bounds_sync() have been taken into account.

> Using Agni's encodings we were also quickly able to check the precision of
> the new algorithm. An algorithm is more precise if it produces tighter
> range bounds, while being correct. We are happy to note that the new
> algorithm produces outputs that are at least as precise or more precise
> than the old algorithm, for all well-formed register state inputs.

That is great to hear as well. I really should try Agni myself, hope I
could find time in the near future.

Cheers,
Shung-Hsi

1: https://lore.kernel.org/bpf/20240719081702.137173-1-shung-hsi.yu@suse.com/
2: https://www.kernel.org/doc/html/v6.9/process/submitting-patches.html#when-to-use-acked-by-cc-and-co-developed-by
3: https://www.kernel.org/doc/html/v6.9/process/submitting-patches.html#reviewer-s-statement-of-oversight

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND
  2024-07-30  4:25           ` Shung-Hsi Yu
@ 2024-08-02 21:30             ` Harishankar Vishwanathan
  0 siblings, 0 replies; 18+ messages in thread
From: Harishankar Vishwanathan @ 2024-08-02 21:30 UTC (permalink / raw)
  To: Shung-Hsi Yu
  Cc: Xu Kuohai, Eduard Zingerman, bpf, netdev, Alexei Starovoitov,
	Andrii Nakryiko, Daniel Borkmann, Martin KaFai Lau, Song Liu,
	Yonghong Song, John Fastabend, KP Singh, Stanislav Fomichev,
	Hao Luo, Jiri Olsa, Roberto Sassu, Edward Cree, Eric Dumazet,
	Jakub Kicinski, Santosh Nagarakatte, Srinivas Narayana,
	Matan Shachnai

On Tue, Jul 30, 2024 at 12:26 AM Shung-Hsi Yu <shung-hsi.yu@suse.com> wrote:
[...]
> That is great to hear and really boost the level of confidence. Though I
> did made an update[1] to the patch such that implementation of
> negative_bit_floor() is change from
>
>         v &= v >> 1;
>         v &= v >> 2;
>         v &= v >> 4;
>         v &= v >> 8;
>         v &= v >> 16;
>         v &= v >> 32;
>         return v;
>
> to one that closer resembles tnum_range()
>
>         u8 bits = fls64(~v); /* find most-significant unset bit */
>         u64 delta;
>
>         /* special case, needed because 1ULL << 64 is undefined */
>         if (bits > 63)
>                 return 0;
>
>         delta = (1ULL << bits) - 1;
>         return ~delta;
>

This [1] is indeed the version of the patch that we checked: the one that
uses fls and fls64 in negative_bit_floor and negative32_bit_floor .
I replied here because you had CCed us in this thread.

Note that for checking in Agni, the implementation of fls and fls64 were
borrowed from asm-generic [2,3,4].

Having said that, the patch [1] looks good to me.
Tested-by: Harishankar Vishwanathan <harishankar.vishwanathan@gmail.com>

[1]: https://lore.kernel.org/bpf/20240719081702.137173-1-shung-hsi.yu@suse.com/
[2]: https://elixir.bootlin.com/linux/v6.10/source/include/asm-generic/bitops/fls.h#L43
[3]: https://elixir.bootlin.com/linux/v6.10/source/include/asm-generic/bitops/fls64.h#L19
[4]: https://elixir.bootlin.com/linux/v6.10/source/include/asm-generic/bitops/__fls.h#L45

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2024-08-02 21:30 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-11 11:38 [PATCH bpf-next v4 13/20] bpf, lsm: Add check for BPF LSM return value Xu Kuohai
2024-07-11 11:38 ` [PATCH bpf-next v4 14/20] bpf: Prevent tail call between progs attached to different hooks Xu Kuohai
2024-07-11 11:38 ` [PATCH bpf-next v4 15/20] bpf: Fix compare error in function retval_range_within Xu Kuohai
2024-07-11 11:38 ` [PATCH bpf-next v4 16/20] bpf: Add a special case for bitwise AND on range [-1, 0] Xu Kuohai
2024-07-15 15:29   ` Shung-Hsi Yu
2024-07-16  7:05     ` Xu Kuohai
2024-07-16 14:52       ` [RFC bpf-next] bpf, verifier: improve signed ranges inference for BPF_AND Shung-Hsi Yu
2024-07-16 15:10         ` Shung-Hsi Yu
2024-07-17 21:10         ` Eduard Zingerman
2024-07-19  8:32           ` Shung-Hsi Yu
2024-07-28 22:38         ` Harishankar Vishwanathan
2024-07-30  4:25           ` Shung-Hsi Yu
2024-08-02 21:30             ` Harishankar Vishwanathan
2024-07-16 15:19       ` [PATCH bpf-next v4 16/20] bpf: Add a special case for bitwise AND on range [-1, 0] Shung-Hsi Yu
2024-07-11 11:38 ` [PATCH bpf-next v4 17/20] selftests/bpf: Avoid load failure for token_lsm.c Xu Kuohai
2024-07-11 11:38 ` [PATCH bpf-next v4 18/20] selftests/bpf: Add return value checks for failed tests Xu Kuohai
2024-07-11 11:38 ` [PATCH bpf-next v4 19/20] selftests/bpf: Add test for lsm tail call Xu Kuohai
2024-07-11 11:38 ` [PATCH bpf-next v4 20/20] selftests/bpf: Add verifier tests for bpf lsm Xu Kuohai

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).