* Re: [bpf/tools] cd17d77705: kernel_selftests.bpf.test_sock_addr.sh.fail
[not found] <20190627090446.GG7221@shao2-debian>
@ 2019-06-27 15:50 ` Stanislav Fomichev
2019-06-27 17:29 ` Stanislav Fomichev
2019-06-28 1:56 ` Andrii Nakryiko
0 siblings, 2 replies; 7+ messages in thread
From: Stanislav Fomichev @ 2019-06-27 15:50 UTC (permalink / raw)
To: kernel test robot
Cc: Stanislav Fomichev, Daniel Borkmann, Martin Lau, LKML,
Stephen Rothwell, bpf, lkp
On 06/27, kernel test robot wrote:
> FYI, we noticed the following commit (built with gcc-7):
>
> commit: cd17d77705780e2270937fb3cbd2b985adab3edc ("bpf/tools: sync bpf.h")
> https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master
>
> in testcase: kernel_selftests
> with following parameters:
>
> group: kselftests-00
>
> test-description: The kernel contains a set of "self tests" under the tools/testing/selftests/ directory. These are intended to be small unit tests to exercise individual code paths in the kernel.
> test-url: https://www.kernel.org/doc/Documentation/kselftest.txt
>
>
> on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 8G
>
> caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
>
> # ; int connect_v6_prog(struct bpf_sock_addr *ctx)
> # 0: (bf) r6 = r1
> # 1: (18) r1 = 0x100000000000000
> # ; tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0);
> # 3: (7b) *(u64 *)(r10 -16) = r1
> # 4: (b7) r1 = 169476096
> # ; memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport));
> # 5: (63) *(u32 *)(r10 -8) = r1
> # 6: (b7) r7 = 0
> # ; tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0);
> # 7: (7b) *(u64 *)(r10 -24) = r7
> # 8: (7b) *(u64 *)(r10 -32) = r7
> # 9: (7b) *(u64 *)(r10 -40) = r7
> # ; if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
> # 10: (61) r1 = *(u32 *)(r6 +32)
> # ; if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
> # 11: (bf) r2 = r1
> # 12: (07) r2 += -1
> # 13: (67) r2 <<= 32
> # 14: (77) r2 >>= 32
> # 15: (25) if r2 > 0x1 goto pc+33
> # R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=1,var_off=(0x0; 0x1)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=00000000 fp-32=00000000 fp-40=00000000
> # ; else if (ctx->type == SOCK_STREAM)
> # 16: (55) if r1 != 0x1 goto pc+8
> # R1=inv1 R2=inv(id=0,umax_value=1,var_off=(0x0; 0x1)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=00000000 fp-32=00000000 fp-40=00000000
> # 17: (bf) r2 = r10
> # ; sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6),
> # 18: (07) r2 += -40
> # 19: (bf) r1 = r6
> # 20: (b7) r3 = 36
> # 21: (b7) r4 = -1
> # 22: (b7) r5 = 0
> # 23: (85) call bpf_sk_lookup_tcp#84
> # 24: (05) goto pc+7
> # ; if (!sk)
> # 32: (15) if r0 == 0x0 goto pc+16
> # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> # 33: (61) r1 = *(u32 *)(r0 +28)
> # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> # 34: (61) r2 = *(u32 *)(r10 -24)
> # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> # 35: (5d) if r1 != r2 goto pc+11
> # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> # ; sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
> # 36: (61) r1 = *(u32 *)(r0 +32)
> # ; sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
> # 37: (61) r2 = *(u32 *)(r10 -20)
> # ; sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
> # 38: (5d) if r1 != r2 goto pc+8
> # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> # ; sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
> # 39: (61) r1 = *(u32 *)(r0 +36)
> # ; sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
> # 40: (61) r2 = *(u32 *)(r10 -16)
> # ; sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
> # 41: (5d) if r1 != r2 goto pc+5
> # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> # ; sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
> # 42: (61) r1 = *(u32 *)(r0 +40)
> # ; sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
> # 43: (61) r2 = *(u32 *)(r10 -12)
> # ; sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
> # 44: (5d) if r1 != r2 goto pc+2
> # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> # ; sk->src_port != DST_REWRITE_PORT6) {
> # 45: (61) r1 = *(u32 *)(r0 +44)
> # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> # 46: (15) if r1 == 0x1a0a goto pc+4
> # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> # ; bpf_sk_release(sk);
> # 47: (bf) r1 = r0
> # 48: (85) call bpf_sk_release#86
> # ; }
> # 49: (bf) r0 = r7
> # 50: (95) exit
> #
> # from 46 to 51: R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv6666 R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> # ; bpf_sk_release(sk);
> # 51: (bf) r1 = r0
> # 52: (85) call bpf_sk_release#86
> # 53: (b7) r1 = 2586
> # ; ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
> # 54: (63) *(u32 *)(r6 +24) = r1
> # 55: (18) r1 = 0x100000000000000
> # ; ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
> # 57: (7b) *(u64 *)(r6 +16) = r1
> # invalid bpf_context access off=16 size=8
This looks like clang doing single u64 write for user_ip6[2] and
user_ip6[3] instead of two u32. I don't think we allow that.
I've seen this a couple of times myself while playing with some
progs, but not sure what's the right way to 'fix' it.
> # processed 49 insns (limit 1000000) max_states_per_insn 0 total_states 13 peak_states 13 mark_read 11
> #
> # libbpf: -- END LOG --
> # libbpf: failed to load program 'cgroup/connect6'
> # libbpf: failed to load object './connect6_prog.o'
> # (test_sock_addr.c:752: errno: Bad file descriptor) >>> Loading program (./connect6_prog.o) error.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [bpf/tools] cd17d77705: kernel_selftests.bpf.test_sock_addr.sh.fail
2019-06-27 15:50 ` [bpf/tools] cd17d77705: kernel_selftests.bpf.test_sock_addr.sh.fail Stanislav Fomichev
@ 2019-06-27 17:29 ` Stanislav Fomichev
2019-06-28 2:22 ` Andrii Nakryiko
2019-06-28 1:56 ` Andrii Nakryiko
1 sibling, 1 reply; 7+ messages in thread
From: Stanislav Fomichev @ 2019-06-27 17:29 UTC (permalink / raw)
To: kernel test robot
Cc: Stanislav Fomichev, Daniel Borkmann, Martin Lau, LKML,
Stephen Rothwell, bpf, lkp
On 06/27, Stanislav Fomichev wrote:
> On 06/27, kernel test robot wrote:
> > FYI, we noticed the following commit (built with gcc-7):
> >
> > commit: cd17d77705780e2270937fb3cbd2b985adab3edc ("bpf/tools: sync bpf.h")
> > https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master
> >
> > in testcase: kernel_selftests
> > with following parameters:
> >
> > group: kselftests-00
> >
> > test-description: The kernel contains a set of "self tests" under the tools/testing/selftests/ directory. These are intended to be small unit tests to exercise individual code paths in the kernel.
> > test-url: https://www.kernel.org/doc/Documentation/kselftest.txt
> >
> >
> > on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 8G
> >
> > caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
> >
> > # ; int connect_v6_prog(struct bpf_sock_addr *ctx)
> > # 0: (bf) r6 = r1
> > # 1: (18) r1 = 0x100000000000000
> > # ; tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0);
> > # 3: (7b) *(u64 *)(r10 -16) = r1
> > # 4: (b7) r1 = 169476096
> > # ; memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport));
> > # 5: (63) *(u32 *)(r10 -8) = r1
> > # 6: (b7) r7 = 0
> > # ; tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0);
> > # 7: (7b) *(u64 *)(r10 -24) = r7
> > # 8: (7b) *(u64 *)(r10 -32) = r7
> > # 9: (7b) *(u64 *)(r10 -40) = r7
> > # ; if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
> > # 10: (61) r1 = *(u32 *)(r6 +32)
> > # ; if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
> > # 11: (bf) r2 = r1
> > # 12: (07) r2 += -1
> > # 13: (67) r2 <<= 32
> > # 14: (77) r2 >>= 32
> > # 15: (25) if r2 > 0x1 goto pc+33
> > # R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=1,var_off=(0x0; 0x1)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=00000000 fp-32=00000000 fp-40=00000000
> > # ; else if (ctx->type == SOCK_STREAM)
> > # 16: (55) if r1 != 0x1 goto pc+8
> > # R1=inv1 R2=inv(id=0,umax_value=1,var_off=(0x0; 0x1)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=00000000 fp-32=00000000 fp-40=00000000
> > # 17: (bf) r2 = r10
> > # ; sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6),
> > # 18: (07) r2 += -40
> > # 19: (bf) r1 = r6
> > # 20: (b7) r3 = 36
> > # 21: (b7) r4 = -1
> > # 22: (b7) r5 = 0
> > # 23: (85) call bpf_sk_lookup_tcp#84
> > # 24: (05) goto pc+7
> > # ; if (!sk)
> > # 32: (15) if r0 == 0x0 goto pc+16
> > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> > # 33: (61) r1 = *(u32 *)(r0 +28)
> > # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> > # 34: (61) r2 = *(u32 *)(r10 -24)
> > # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> > # 35: (5d) if r1 != r2 goto pc+11
> > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
> > # 36: (61) r1 = *(u32 *)(r0 +32)
> > # ; sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
> > # 37: (61) r2 = *(u32 *)(r10 -20)
> > # ; sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
> > # 38: (5d) if r1 != r2 goto pc+8
> > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
> > # 39: (61) r1 = *(u32 *)(r0 +36)
> > # ; sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
> > # 40: (61) r2 = *(u32 *)(r10 -16)
> > # ; sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
> > # 41: (5d) if r1 != r2 goto pc+5
> > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
> > # 42: (61) r1 = *(u32 *)(r0 +40)
> > # ; sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
> > # 43: (61) r2 = *(u32 *)(r10 -12)
> > # ; sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
> > # 44: (5d) if r1 != r2 goto pc+2
> > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; sk->src_port != DST_REWRITE_PORT6) {
> > # 45: (61) r1 = *(u32 *)(r0 +44)
> > # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> > # 46: (15) if r1 == 0x1a0a goto pc+4
> > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; bpf_sk_release(sk);
> > # 47: (bf) r1 = r0
> > # 48: (85) call bpf_sk_release#86
> > # ; }
> > # 49: (bf) r0 = r7
> > # 50: (95) exit
> > #
> > # from 46 to 51: R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv6666 R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; bpf_sk_release(sk);
> > # 51: (bf) r1 = r0
> > # 52: (85) call bpf_sk_release#86
> > # 53: (b7) r1 = 2586
> > # ; ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
> > # 54: (63) *(u32 *)(r6 +24) = r1
> > # 55: (18) r1 = 0x100000000000000
> > # ; ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
> > # 57: (7b) *(u64 *)(r6 +16) = r1
> > # invalid bpf_context access off=16 size=8
> This looks like clang doing single u64 write for user_ip6[2] and
> user_ip6[3] instead of two u32. I don't think we allow that.
>
> I've seen this a couple of times myself while playing with some
> progs, but not sure what's the right way to 'fix' it.
>
Any thoughts about the patch below? Another way to "fix" it
would be to mark context accesses 'volatile' in bpf progs, but that sounds
a bit gross.
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 43b45d6db36d..34a14c950e60 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -746,6 +746,20 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
return size <= size_default && (size & (size - 1)) == 0;
}
+static inline bool __bpf_ctx_wide_store_ok(u32 off, u32 size)
+{
+ /* u64 access is aligned and fits into the field size */
+ return off % sizeof(__u64) == 0 && off + sizeof(__u64) <= size;
+}
+
+#define bpf_ctx_wide_store_ok(off, size, type, field) \
+ (size == sizeof(__u64) && \
+ off >= offsetof(type, field) && \
+ off < offsetofend(type, field) ? \
+ __bpf_ctx_wide_store_ok(off - offsetof(type, field), \
+ FIELD_SIZEOF(type, field)) : 0)
+
+
#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
diff --git a/net/core/filter.c b/net/core/filter.c
index 2014d76e0d2a..2d3787a439ae 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6849,6 +6849,16 @@ static bool sock_addr_is_valid_access(int off, int size,
if (!bpf_ctx_narrow_access_ok(off, size, size_default))
return false;
} else {
+ if (bpf_ctx_wide_store_ok(off, size,
+ struct bpf_sock_addr,
+ user_ip6))
+ return true;
+
+ if (bpf_ctx_wide_store_ok(off, size,
+ struct bpf_sock_addr,
+ msg_src_ip6))
+ return true;
+
if (size != size_default)
return false;
}
@@ -7689,9 +7699,6 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
* SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
*
- * It doesn't support SIZE argument though since narrow stores are not
- * supported for now.
- *
* In addition it uses Temporary Field TF (member of struct S) as the 3rd
* "register" since two registers available in convert_ctx_access are not
* enough: we can't override neither SRC, since it contains value to store, nor
@@ -7699,7 +7706,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
* instructions. But we need a temporary place to save pointer to nested
* structure whose field we want to store to.
*/
-#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
+#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \
do { \
int tmp_reg = BPF_REG_9; \
if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
@@ -7710,8 +7717,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
offsetof(S, TF)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
si->dst_reg, offsetof(S, F)); \
- *insn++ = BPF_STX_MEM( \
- BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
+ *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
target_size) \
+ OFF); \
@@ -7723,8 +7729,8 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
TF) \
do { \
if (type == BPF_WRITE) { \
- SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
- TF); \
+ SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \
+ OFF, TF); \
} else { \
SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
S, NS, F, NF, SIZE, OFF); \
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [bpf/tools] cd17d77705: kernel_selftests.bpf.test_sock_addr.sh.fail
2019-06-27 15:50 ` [bpf/tools] cd17d77705: kernel_selftests.bpf.test_sock_addr.sh.fail Stanislav Fomichev
2019-06-27 17:29 ` Stanislav Fomichev
@ 2019-06-28 1:56 ` Andrii Nakryiko
1 sibling, 0 replies; 7+ messages in thread
From: Andrii Nakryiko @ 2019-06-28 1:56 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: kernel test robot, Stanislav Fomichev, Daniel Borkmann,
Martin Lau, LKML, Stephen Rothwell, bpf, lkp
On Thu, Jun 27, 2019 at 8:50 AM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> On 06/27, kernel test robot wrote:
> > FYI, we noticed the following commit (built with gcc-7):
> >
> > commit: cd17d77705780e2270937fb3cbd2b985adab3edc ("bpf/tools: sync bpf.h")
> > https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master
> >
> > in testcase: kernel_selftests
> > with following parameters:
> >
> > group: kselftests-00
> >
> > test-description: The kernel contains a set of "self tests" under the tools/testing/selftests/ directory. These are intended to be small unit tests to exercise individual code paths in the kernel.
> > test-url: https://www.kernel.org/doc/Documentation/kselftest.txt
> >
> >
> > on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 8G
> >
> > caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
> >
> > # ; int connect_v6_prog(struct bpf_sock_addr *ctx)
> > # 0: (bf) r6 = r1
> > # 1: (18) r1 = 0x100000000000000
> > # ; tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0);
> > # 3: (7b) *(u64 *)(r10 -16) = r1
> > # 4: (b7) r1 = 169476096
> > # ; memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport));
> > # 5: (63) *(u32 *)(r10 -8) = r1
> > # 6: (b7) r7 = 0
> > # ; tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0);
> > # 7: (7b) *(u64 *)(r10 -24) = r7
> > # 8: (7b) *(u64 *)(r10 -32) = r7
> > # 9: (7b) *(u64 *)(r10 -40) = r7
> > # ; if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
> > # 10: (61) r1 = *(u32 *)(r6 +32)
> > # ; if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
> > # 11: (bf) r2 = r1
> > # 12: (07) r2 += -1
> > # 13: (67) r2 <<= 32
> > # 14: (77) r2 >>= 32
> > # 15: (25) if r2 > 0x1 goto pc+33
> > # R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=1,var_off=(0x0; 0x1)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=00000000 fp-32=00000000 fp-40=00000000
> > # ; else if (ctx->type == SOCK_STREAM)
> > # 16: (55) if r1 != 0x1 goto pc+8
> > # R1=inv1 R2=inv(id=0,umax_value=1,var_off=(0x0; 0x1)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=00000000 fp-32=00000000 fp-40=00000000
> > # 17: (bf) r2 = r10
> > # ; sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6),
> > # 18: (07) r2 += -40
> > # 19: (bf) r1 = r6
> > # 20: (b7) r3 = 36
> > # 21: (b7) r4 = -1
> > # 22: (b7) r5 = 0
> > # 23: (85) call bpf_sk_lookup_tcp#84
> > # 24: (05) goto pc+7
> > # ; if (!sk)
> > # 32: (15) if r0 == 0x0 goto pc+16
> > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> > # 33: (61) r1 = *(u32 *)(r0 +28)
> > # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> > # 34: (61) r2 = *(u32 *)(r10 -24)
> > # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> > # 35: (5d) if r1 != r2 goto pc+11
> > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
> > # 36: (61) r1 = *(u32 *)(r0 +32)
> > # ; sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
> > # 37: (61) r2 = *(u32 *)(r10 -20)
> > # ; sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
> > # 38: (5d) if r1 != r2 goto pc+8
> > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
> > # 39: (61) r1 = *(u32 *)(r0 +36)
> > # ; sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
> > # 40: (61) r2 = *(u32 *)(r10 -16)
> > # ; sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
> > # 41: (5d) if r1 != r2 goto pc+5
> > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
> > # 42: (61) r1 = *(u32 *)(r0 +40)
> > # ; sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
> > # 43: (61) r2 = *(u32 *)(r10 -12)
> > # ; sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
> > # 44: (5d) if r1 != r2 goto pc+2
> > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; sk->src_port != DST_REWRITE_PORT6) {
> > # 45: (61) r1 = *(u32 *)(r0 +44)
> > # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> > # 46: (15) if r1 == 0x1a0a goto pc+4
> > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; bpf_sk_release(sk);
> > # 47: (bf) r1 = r0
> > # 48: (85) call bpf_sk_release#86
> > # ; }
> > # 49: (bf) r0 = r7
> > # 50: (95) exit
> > #
> > # from 46 to 51: R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv6666 R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > # ; bpf_sk_release(sk);
> > # 51: (bf) r1 = r0
> > # 52: (85) call bpf_sk_release#86
> > # 53: (b7) r1 = 2586
> > # ; ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
> > # 54: (63) *(u32 *)(r6 +24) = r1
> > # 55: (18) r1 = 0x100000000000000
> > # ; ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
> > # 57: (7b) *(u64 *)(r6 +16) = r1
> > # invalid bpf_context access off=16 size=8
> This looks like clang doing single u64 write for user_ip6[2] and
> user_ip6[3] instead of two u32. I don't think we allow that.
>
> I've seen this a couple of times myself while playing with some
> progs, but not sure what's the right way to 'fix' it.
So I bisected this to the commit below. That change effectively makes
`struct bpf_sock_addr` 8-byte aligned, which apparently triggers some
optimization in clang to generate this 8-byte write for two
consecutive 4-byte array elements.
commit cd17d77705780e2270937fb3cbd2b985adab3edc (HEAD, refs/bisect/bad)
Author: Stanislav Fomichev <sdf@google.com>
Date: Wed Jun 12 10:30:39 2019 -0700
bpf/tools: sync bpf.h
Add sk to struct bpf_sock_addr and struct bpf_sock_ops.
Cc: Martin Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ae0907d8c03a..d0a23476f887 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3247,6 +3247,7 @@ struct bpf_sock_addr {
__u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
* Stored in network byte order.
*/
+ __bpf_md_ptr(struct bpf_sock *, sk);
};
/* User bpf_sock_ops struct to access socket values and specify request ops
@@ -3298,6 +3299,7 @@ struct bpf_sock_ops {
__u32 sk_txhash;
__u64 bytes_received;
__u64 bytes_acked;
+ __bpf_md_ptr(struct bpf_sock *, sk);
};
/* Definitions for bpf_sock_ops_cb_flags */
>
> > # processed 49 insns (limit 1000000) max_states_per_insn 0 total_states 13 peak_states 13 mark_read 11
> > #
> > # libbpf: -- END LOG --
> > # libbpf: failed to load program 'cgroup/connect6'
> > # libbpf: failed to load object './connect6_prog.o'
> > # (test_sock_addr.c:752: errno: Bad file descriptor) >>> Loading program (./connect6_prog.o) error.
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [bpf/tools] cd17d77705: kernel_selftests.bpf.test_sock_addr.sh.fail
2019-06-27 17:29 ` Stanislav Fomichev
@ 2019-06-28 2:22 ` Andrii Nakryiko
2019-06-28 2:38 ` Stanislav Fomichev
0 siblings, 1 reply; 7+ messages in thread
From: Andrii Nakryiko @ 2019-06-28 2:22 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: kernel test robot, Stanislav Fomichev, Daniel Borkmann,
Martin Lau, LKML, Stephen Rothwell, bpf, lkp
On Thu, Jun 27, 2019 at 10:29 AM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> On 06/27, Stanislav Fomichev wrote:
> > On 06/27, kernel test robot wrote:
> > > FYI, we noticed the following commit (built with gcc-7):
> > >
> > > commit: cd17d77705780e2270937fb3cbd2b985adab3edc ("bpf/tools: sync bpf.h")
> > > https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master
> > >
> > > in testcase: kernel_selftests
> > > with following parameters:
> > >
> > > group: kselftests-00
> > >
> > > test-description: The kernel contains a set of "self tests" under the tools/testing/selftests/ directory. These are intended to be small unit tests to exercise individual code paths in the kernel.
> > > test-url: https://www.kernel.org/doc/Documentation/kselftest.txt
> > >
> > >
> > > on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 8G
> > >
> > > caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
> > >
> > > # ; int connect_v6_prog(struct bpf_sock_addr *ctx)
> > > # 0: (bf) r6 = r1
> > > # 1: (18) r1 = 0x100000000000000
> > > # ; tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0);
> > > # 3: (7b) *(u64 *)(r10 -16) = r1
> > > # 4: (b7) r1 = 169476096
> > > # ; memset(&tuple.ipv6.sport, 0, sizeof(tuple.ipv6.sport));
> > > # 5: (63) *(u32 *)(r10 -8) = r1
> > > # 6: (b7) r7 = 0
> > > # ; tuple.ipv6.daddr[0] = bpf_htonl(DST_REWRITE_IP6_0);
> > > # 7: (7b) *(u64 *)(r10 -24) = r7
> > > # 8: (7b) *(u64 *)(r10 -32) = r7
> > > # 9: (7b) *(u64 *)(r10 -40) = r7
> > > # ; if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
> > > # 10: (61) r1 = *(u32 *)(r6 +32)
> > > # ; if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
> > > # 11: (bf) r2 = r1
> > > # 12: (07) r2 += -1
> > > # 13: (67) r2 <<= 32
> > > # 14: (77) r2 >>= 32
> > > # 15: (25) if r2 > 0x1 goto pc+33
> > > # R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=1,var_off=(0x0; 0x1)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=00000000 fp-32=00000000 fp-40=00000000
> > > # ; else if (ctx->type == SOCK_STREAM)
> > > # 16: (55) if r1 != 0x1 goto pc+8
> > > # R1=inv1 R2=inv(id=0,umax_value=1,var_off=(0x0; 0x1)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=00000000 fp-32=00000000 fp-40=00000000
> > > # 17: (bf) r2 = r10
> > > # ; sk = bpf_sk_lookup_tcp(ctx, &tuple, sizeof(tuple.ipv6),
> > > # 18: (07) r2 += -40
> > > # 19: (bf) r1 = r6
> > > # 20: (b7) r3 = 36
> > > # 21: (b7) r4 = -1
> > > # 22: (b7) r5 = 0
> > > # 23: (85) call bpf_sk_lookup_tcp#84
> > > # 24: (05) goto pc+7
> > > # ; if (!sk)
> > > # 32: (15) if r0 == 0x0 goto pc+16
> > > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > > # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> > > # 33: (61) r1 = *(u32 *)(r0 +28)
> > > # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> > > # 34: (61) r2 = *(u32 *)(r10 -24)
> > > # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> > > # 35: (5d) if r1 != r2 goto pc+11
> > > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > > # ; sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
> > > # 36: (61) r1 = *(u32 *)(r0 +32)
> > > # ; sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
> > > # 37: (61) r2 = *(u32 *)(r10 -20)
> > > # ; sk->src_ip6[1] != tuple.ipv6.daddr[1] ||
> > > # 38: (5d) if r1 != r2 goto pc+8
> > > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > > # ; sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
> > > # 39: (61) r1 = *(u32 *)(r0 +36)
> > > # ; sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
> > > # 40: (61) r2 = *(u32 *)(r10 -16)
> > > # ; sk->src_ip6[2] != tuple.ipv6.daddr[2] ||
> > > # 41: (5d) if r1 != r2 goto pc+5
> > > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > > # ; sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
> > > # 42: (61) r1 = *(u32 *)(r0 +40)
> > > # ; sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
> > > # 43: (61) r2 = *(u32 *)(r10 -12)
> > > # ; sk->src_ip6[3] != tuple.ipv6.daddr[3] ||
> > > # 44: (5d) if r1 != r2 goto pc+2
> > > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > > # ; sk->src_port != DST_REWRITE_PORT6) {
> > > # 45: (61) r1 = *(u32 *)(r0 +44)
> > > # ; if (sk->src_ip6[0] != tuple.ipv6.daddr[0] ||
> > > # 46: (15) if r1 == 0x1a0a goto pc+4
> > > # R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > > # ; bpf_sk_release(sk);
> > > # 47: (bf) r1 = r0
> > > # 48: (85) call bpf_sk_release#86
> > > # ; }
> > > # 49: (bf) r0 = r7
> > > # 50: (95) exit
> > > #
> > > # from 46 to 51: R0=sock(id=0,ref_obj_id=2,off=0,imm=0) R1=inv6666 R2=inv(id=0,umax_value=4294967295,var_off=(0x0; 0xffffffff)) R6=ctx(id=0,off=0,imm=0) R7=inv0 R10=fp0,call_-1 fp-8=????mmmm fp-16=mmmmmmmm fp-24=mmmmmmmm fp-32=mmmmmmmm fp-40=mmmmmmmm refs=2
> > > # ; bpf_sk_release(sk);
> > > # 51: (bf) r1 = r0
> > > # 52: (85) call bpf_sk_release#86
> > > # 53: (b7) r1 = 2586
> > > # ; ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
> > > # 54: (63) *(u32 *)(r6 +24) = r1
> > > # 55: (18) r1 = 0x100000000000000
> > > # ; ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
> > > # 57: (7b) *(u64 *)(r6 +16) = r1
> > > # invalid bpf_context access off=16 size=8
> > This looks like clang doing single u64 write for user_ip6[2] and
> > user_ip6[3] instead of two u32. I don't think we allow that.
> >
> > I've seen this a couple of times myself while playing with some
> > progs, but not sure what's the right way to 'fix' it.
> >
> Any thoughts about the patch below? Another way to "fix" it
I'll give it a more thorough look a bit later, but see my comments below.
> would be to mark context accesses 'volatile' in bpf progs, but that sounds
> a bit gross.
>
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 43b45d6db36d..34a14c950e60 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -746,6 +746,20 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
> return size <= size_default && (size & (size - 1)) == 0;
> }
>
> +static inline bool __bpf_ctx_wide_store_ok(u32 off, u32 size)
It seems like bpf_ctx_wide_store_ok and __bpf_ctx_wide_store_ok are
used only inside net/core/filter.c, why declaring them in header file?
> +{
> + /* u64 access is aligned and fits into the field size */
> + return off % sizeof(__u64) == 0 && off + sizeof(__u64) <= size;
> +}
> +
> +#define bpf_ctx_wide_store_ok(off, size, type, field) \
> + (size == sizeof(__u64) && \
> + off >= offsetof(type, field) && \
> + off < offsetofend(type, field) ? \
> + __bpf_ctx_wide_store_ok(off - offsetof(type, field), \
> + FIELD_SIZEOF(type, field)) : 0)
Why do you need ternary operator instead of just a chain of &&s?
It also seems like you can avoid macro and use plain function if
instead of providing (type, field) you provide values of offsetof and
offsetofend (offsetofend - offsetof should equal FIELD_SIZEOF(type,
field), shouldn't it?).
> +
> +
> #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
>
> static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 2014d76e0d2a..2d3787a439ae 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -6849,6 +6849,16 @@ static bool sock_addr_is_valid_access(int off, int size,
> if (!bpf_ctx_narrow_access_ok(off, size, size_default))
> return false;
> } else {
> + if (bpf_ctx_wide_store_ok(off, size,
> + struct bpf_sock_addr,
> + user_ip6))
> + return true;
> +
> + if (bpf_ctx_wide_store_ok(off, size,
> + struct bpf_sock_addr,
> + msg_src_ip6))
> + return true;
> +
> if (size != size_default)
> return false;
> }
> @@ -7689,9 +7699,6 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> /* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
> * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
> *
> - * It doesn't support SIZE argument though since narrow stores are not
> - * supported for now.
> - *
> * In addition it uses Temporary Field TF (member of struct S) as the 3rd
> * "register" since two registers available in convert_ctx_access are not
> * enough: we can't override neither SRC, since it contains value to store, nor
> @@ -7699,7 +7706,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> * instructions. But we need a temporary place to save pointer to nested
> * structure whose field we want to store to.
> */
> -#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
> +#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \
> do { \
> int tmp_reg = BPF_REG_9; \
> if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
> @@ -7710,8 +7717,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> offsetof(S, TF)); \
> *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
> si->dst_reg, offsetof(S, F)); \
> - *insn++ = BPF_STX_MEM( \
> - BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
> + *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
> bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
> target_size) \
> + OFF); \
> @@ -7723,8 +7729,8 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> TF) \
> do { \
> if (type == BPF_WRITE) { \
> - SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
> - TF); \
> + SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \
> + OFF, TF); \
> } else { \
> SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
> S, NS, F, NF, SIZE, OFF); \
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [bpf/tools] cd17d77705: kernel_selftests.bpf.test_sock_addr.sh.fail
2019-06-28 2:22 ` Andrii Nakryiko
@ 2019-06-28 2:38 ` Stanislav Fomichev
2019-06-28 18:14 ` Andrii Nakryiko
0 siblings, 1 reply; 7+ messages in thread
From: Stanislav Fomichev @ 2019-06-28 2:38 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: kernel test robot, Stanislav Fomichev, Daniel Borkmann,
Martin Lau, LKML, Stephen Rothwell, bpf, lkp
On 06/27, Andrii Nakryiko wrote:
> On Thu, Jun 27, 2019 at 10:29 AM Stanislav Fomichev <sdf@fomichev.me> wrote:
> >
> > On 06/27, Stanislav Fomichev wrote:
> > > On 06/27, kernel test robot wrote:
> > > > FYI, we noticed the following commit (built with gcc-7):
> > > >
> > > > commit: cd17d77705780e2270937fb3cbd2b985adab3edc ("bpf/tools: sync bpf.h")
> > > > https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master
> > > >
> > > > in testcase: kernel_selftests
> > > > with following parameters:
> > > >
> > > > group: kselftests-00
> > > >
> > > > test-description: The kernel contains a set of "self tests" under the tools/testing/selftests/ directory. These are intended to be small unit tests to exercise individual code paths in the kernel.
> > > > test-url: https://www.kernel.org/doc/Documentation/kselftest.txt
> > > >
> > > >
> > > > on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 8G
> > > >
> > > > caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
> > > >
> > > > # 55: (18) r1 = 0x100000000000000
> > > > # ; ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
> > > > # 57: (7b) *(u64 *)(r6 +16) = r1
> > > > # invalid bpf_context access off=16 size=8
> > > This looks like clang doing single u64 write for user_ip6[2] and
> > > user_ip6[3] instead of two u32. I don't think we allow that.
> > >
> > > I've seen this a couple of times myself while playing with some
> > > progs, but not sure what's the right way to 'fix' it.
> > >
> > Any thoughts about the patch below? Another way to "fix" it
>
> I'll give it a more thorough look a bit later, but see my comments below.
>
> > would be to mark context accesses 'volatile' in bpf progs, but that sounds
> > a bit gross.
> >
> > diff --git a/include/linux/filter.h b/include/linux/filter.h
> > index 43b45d6db36d..34a14c950e60 100644
> > --- a/include/linux/filter.h
> > +++ b/include/linux/filter.h
> > @@ -746,6 +746,20 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
> > return size <= size_default && (size & (size - 1)) == 0;
> > }
> >
> > +static inline bool __bpf_ctx_wide_store_ok(u32 off, u32 size)
>
> It seems like bpf_ctx_wide_store_ok and __bpf_ctx_wide_store_ok are
> used only inside net/core/filter.c, why declaring them in header file?
I wanted it to be next to bpf_ctx_narrow_access_ok which does the
reverse operation for reads.
> > +{
> > + /* u64 access is aligned and fits into the field size */
> > + return off % sizeof(__u64) == 0 && off + sizeof(__u64) <= size;
> > +}
> > +
> > +#define bpf_ctx_wide_store_ok(off, size, type, field) \
> > + (size == sizeof(__u64) && \
> > + off >= offsetof(type, field) && \
> > + off < offsetofend(type, field) ? \
> > + __bpf_ctx_wide_store_ok(off - offsetof(type, field), \
> > + FIELD_SIZEOF(type, field)) : 0)
>
> Why do you need ternary operator instead of just a chain of &&s?
Good point. I didn't spend too much time on the patch tbh :-)
If it looks good in general, I can add proper tests and do a
proper submition, this patch is just to get the discussion started.
> It also seems like you can avoid macro and use plain function if
> instead of providing (type, field) you provide values of offsetof and
> offsetofend (offsetofend - offsetof should equal FIELD_SIZEOF(type,
> field), shouldn't it?).
But then I'd have to copy-paste the args of offsetof/offsetofend at
the caller, right? I wanted the caller to be clean and simple.
> > #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
> >
> > static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
> > diff --git a/net/core/filter.c b/net/core/filter.c
> > index 2014d76e0d2a..2d3787a439ae 100644
> > --- a/net/core/filter.c
> > +++ b/net/core/filter.c
> > @@ -6849,6 +6849,16 @@ static bool sock_addr_is_valid_access(int off, int size,
> > if (!bpf_ctx_narrow_access_ok(off, size, size_default))
> > return false;
> > } else {
> > + if (bpf_ctx_wide_store_ok(off, size,
> > + struct bpf_sock_addr,
> > + user_ip6))
> > + return true;
> > +
> > + if (bpf_ctx_wide_store_ok(off, size,
> > + struct bpf_sock_addr,
> > + msg_src_ip6))
> > + return true;
> > +
> > if (size != size_default)
> > return false;
> > }
> > @@ -7689,9 +7699,6 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> > /* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
> > * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
> > *
> > - * It doesn't support SIZE argument though since narrow stores are not
> > - * supported for now.
> > - *
> > * In addition it uses Temporary Field TF (member of struct S) as the 3rd
> > * "register" since two registers available in convert_ctx_access are not
> > * enough: we can't override neither SRC, since it contains value to store, nor
> > @@ -7699,7 +7706,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> > * instructions. But we need a temporary place to save pointer to nested
> > * structure whose field we want to store to.
> > */
> > -#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
> > +#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \
> > do { \
> > int tmp_reg = BPF_REG_9; \
> > if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
> > @@ -7710,8 +7717,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> > offsetof(S, TF)); \
> > *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
> > si->dst_reg, offsetof(S, F)); \
> > - *insn++ = BPF_STX_MEM( \
> > - BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
> > + *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
> > bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
> > target_size) \
> > + OFF); \
> > @@ -7723,8 +7729,8 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> > TF) \
> > do { \
> > if (type == BPF_WRITE) { \
> > - SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
> > - TF); \
> > + SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \
> > + OFF, TF); \
> > } else { \
> > SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
> > S, NS, F, NF, SIZE, OFF); \
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [bpf/tools] cd17d77705: kernel_selftests.bpf.test_sock_addr.sh.fail
2019-06-28 2:38 ` Stanislav Fomichev
@ 2019-06-28 18:14 ` Andrii Nakryiko
2019-06-28 20:13 ` Stanislav Fomichev
0 siblings, 1 reply; 7+ messages in thread
From: Andrii Nakryiko @ 2019-06-28 18:14 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: kernel test robot, Stanislav Fomichev, Daniel Borkmann,
Martin Lau, LKML, Stephen Rothwell, bpf, lkp
On Thu, Jun 27, 2019 at 7:38 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
>
> On 06/27, Andrii Nakryiko wrote:
> > On Thu, Jun 27, 2019 at 10:29 AM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > >
> > > On 06/27, Stanislav Fomichev wrote:
> > > > On 06/27, kernel test robot wrote:
> > > > > FYI, we noticed the following commit (built with gcc-7):
> > > > >
> > > > > commit: cd17d77705780e2270937fb3cbd2b985adab3edc ("bpf/tools: sync bpf.h")
> > > > > https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master
> > > > >
> > > > > in testcase: kernel_selftests
> > > > > with following parameters:
> > > > >
> > > > > group: kselftests-00
> > > > >
> > > > > test-description: The kernel contains a set of "self tests" under the tools/testing/selftests/ directory. These are intended to be small unit tests to exercise individual code paths in the kernel.
> > > > > test-url: https://www.kernel.org/doc/Documentation/kselftest.txt
> > > > >
> > > > >
> > > > > on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 8G
> > > > >
> > > > > caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
> > > > >
> > > > > # 55: (18) r1 = 0x100000000000000
> > > > > # ; ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
> > > > > # 57: (7b) *(u64 *)(r6 +16) = r1
> > > > > # invalid bpf_context access off=16 size=8
> > > > This looks like clang doing single u64 write for user_ip6[2] and
> > > > user_ip6[3] instead of two u32. I don't think we allow that.
> > > >
> > > > I've seen this a couple of times myself while playing with some
> > > > progs, but not sure what's the right way to 'fix' it.
> > > >
> > > Any thoughts about the patch below? Another way to "fix" it
> >
> > I'll give it a more thorough look a bit later, but see my comments below.
> >
> > > would be to mark context accesses 'volatile' in bpf progs, but that sounds
> > > a bit gross.
> > >
> > > diff --git a/include/linux/filter.h b/include/linux/filter.h
> > > index 43b45d6db36d..34a14c950e60 100644
> > > --- a/include/linux/filter.h
> > > +++ b/include/linux/filter.h
> > > @@ -746,6 +746,20 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
> > > return size <= size_default && (size & (size - 1)) == 0;
> > > }
> > >
> > > +static inline bool __bpf_ctx_wide_store_ok(u32 off, u32 size)
> >
> > It seems like bpf_ctx_wide_store_ok and __bpf_ctx_wide_store_ok are
> > used only inside net/core/filter.c, why declaring them in header file?
> I wanted it to be next to bpf_ctx_narrow_access_ok which does the
> reverse operation for reads.
Ah, ok, I see that bpf_ctx_narrow_access_ok is used in
kernel/bpf/cgroup.c as well and bpf_ctx_wide_store_ok might be useful
in some other contexts as well, let's keep it here.
>
> > > +{
> > > + /* u64 access is aligned and fits into the field size */
> > > + return off % sizeof(__u64) == 0 && off + sizeof(__u64) <= size;
> > > +}
> > > +
> > > +#define bpf_ctx_wide_store_ok(off, size, type, field) \
> > > + (size == sizeof(__u64) && \
> > > + off >= offsetof(type, field) && \
> > > + off < offsetofend(type, field) ? \
> > > + __bpf_ctx_wide_store_ok(off - offsetof(type, field), \
> > > + FIELD_SIZEOF(type, field)) : 0)
This would be sufficient, right?
#define bpf_ctx_wide_store_ok(off, size, type, field) \
size == sizeof(__u64) && \
off >= offsetof(type, field) && \
off + size <= offsetofend(type, field) && \
off % sizeof(__u64) == 0
> >
> > Why do you need ternary operator instead of just a chain of &&s?
> Good point. I didn't spend too much time on the patch tbh :-)
> If it looks good in general, I can add proper tests and do a
> proper submition, this patch is just to get the discussion started.
Consider it started. :) Talking with Yonghong about preventing this
from happening in the first place in Clang, it seems like that would
be harder and more cumbersome than supporting in BPF verifier. So
please go ahead and submit a proper patch.
>
> > It also seems like you can avoid macro and use plain function if
> > instead of providing (type, field) you provide values of offsetof and
> > offsetofend (offsetofend - offsetof should equal FIELD_SIZEOF(type,
> > field), shouldn't it?).
> But then I'd have to copy-paste the args of offsetof/offsetofend at
> the caller, right? I wanted the caller to be clean and simple.
Yeah, that's a bit verbose, I agree. I don't mind macro, so no worries.
>
> > > #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
> > >
> > > static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
> > > diff --git a/net/core/filter.c b/net/core/filter.c
> > > index 2014d76e0d2a..2d3787a439ae 100644
> > > --- a/net/core/filter.c
> > > +++ b/net/core/filter.c
> > > @@ -6849,6 +6849,16 @@ static bool sock_addr_is_valid_access(int off, int size,
> > > if (!bpf_ctx_narrow_access_ok(off, size, size_default))
> > > return false;
> > > } else {
> > > + if (bpf_ctx_wide_store_ok(off, size,
> > > + struct bpf_sock_addr,
> > > + user_ip6))
> > > + return true;
> > > +
> > > + if (bpf_ctx_wide_store_ok(off, size,
> > > + struct bpf_sock_addr,
> > > + msg_src_ip6))
> > > + return true;
> > > +
> > > if (size != size_default)
> > > return false;
> > > }
> > > @@ -7689,9 +7699,6 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> > > /* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
> > > * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
> > > *
> > > - * It doesn't support SIZE argument though since narrow stores are not
> > > - * supported for now.
> > > - *
> > > * In addition it uses Temporary Field TF (member of struct S) as the 3rd
> > > * "register" since two registers available in convert_ctx_access are not
> > > * enough: we can't override neither SRC, since it contains value to store, nor
> > > @@ -7699,7 +7706,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> > > * instructions. But we need a temporary place to save pointer to nested
> > > * structure whose field we want to store to.
> > > */
> > > -#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
> > > +#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \
> > > do { \
> > > int tmp_reg = BPF_REG_9; \
> > > if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
> > > @@ -7710,8 +7717,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> > > offsetof(S, TF)); \
> > > *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
> > > si->dst_reg, offsetof(S, F)); \
> > > - *insn++ = BPF_STX_MEM( \
> > > - BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
> > > + *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
> > > bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
> > > target_size) \
> > > + OFF); \
> > > @@ -7723,8 +7729,8 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> > > TF) \
> > > do { \
> > > if (type == BPF_WRITE) { \
> > > - SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
> > > - TF); \
> > > + SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \
> > > + OFF, TF); \
> > > } else { \
> > > SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
> > > S, NS, F, NF, SIZE, OFF); \
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [bpf/tools] cd17d77705: kernel_selftests.bpf.test_sock_addr.sh.fail
2019-06-28 18:14 ` Andrii Nakryiko
@ 2019-06-28 20:13 ` Stanislav Fomichev
0 siblings, 0 replies; 7+ messages in thread
From: Stanislav Fomichev @ 2019-06-28 20:13 UTC (permalink / raw)
To: Andrii Nakryiko
Cc: kernel test robot, Stanislav Fomichev, Daniel Borkmann,
Martin Lau, LKML, Stephen Rothwell, bpf, lkp
On 06/28, Andrii Nakryiko wrote:
> On Thu, Jun 27, 2019 at 7:38 PM Stanislav Fomichev <sdf@fomichev.me> wrote:
> >
> > On 06/27, Andrii Nakryiko wrote:
> > > On Thu, Jun 27, 2019 at 10:29 AM Stanislav Fomichev <sdf@fomichev.me> wrote:
> > > >
> > > > On 06/27, Stanislav Fomichev wrote:
> > > > > On 06/27, kernel test robot wrote:
> > > > > > FYI, we noticed the following commit (built with gcc-7):
> > > > > >
> > > > > > commit: cd17d77705780e2270937fb3cbd2b985adab3edc ("bpf/tools: sync bpf.h")
> > > > > > https://git.kernel.org/cgit/linux/kernel/git/next/linux-next.git master
> > > > > >
> > > > > > in testcase: kernel_selftests
> > > > > > with following parameters:
> > > > > >
> > > > > > group: kselftests-00
> > > > > >
> > > > > > test-description: The kernel contains a set of "self tests" under the tools/testing/selftests/ directory. These are intended to be small unit tests to exercise individual code paths in the kernel.
> > > > > > test-url: https://www.kernel.org/doc/Documentation/kselftest.txt
> > > > > >
> > > > > >
> > > > > > on test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 8G
> > > > > >
> > > > > > caused below changes (please refer to attached dmesg/kmsg for entire log/backtrace):
> > > > > >
> > > > > > # 55: (18) r1 = 0x100000000000000
> > > > > > # ; ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
> > > > > > # 57: (7b) *(u64 *)(r6 +16) = r1
> > > > > > # invalid bpf_context access off=16 size=8
> > > > > This looks like clang doing single u64 write for user_ip6[2] and
> > > > > user_ip6[3] instead of two u32. I don't think we allow that.
> > > > >
> > > > > I've seen this a couple of times myself while playing with some
> > > > > progs, but not sure what's the right way to 'fix' it.
> > > > >
> > > > Any thoughts about the patch below? Another way to "fix" it
> > >
> > > I'll give it a more thorough look a bit later, but see my comments below.
> > >
> > > > would be to mark context accesses 'volatile' in bpf progs, but that sounds
> > > > a bit gross.
> > > >
> > > > diff --git a/include/linux/filter.h b/include/linux/filter.h
> > > > index 43b45d6db36d..34a14c950e60 100644
> > > > --- a/include/linux/filter.h
> > > > +++ b/include/linux/filter.h
> > > > @@ -746,6 +746,20 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
> > > > return size <= size_default && (size & (size - 1)) == 0;
> > > > }
> > > >
> > > > +static inline bool __bpf_ctx_wide_store_ok(u32 off, u32 size)
> > >
> > > It seems like bpf_ctx_wide_store_ok and __bpf_ctx_wide_store_ok are
> > > used only inside net/core/filter.c, why declaring them in header file?
> > I wanted it to be next to bpf_ctx_narrow_access_ok which does the
> > reverse operation for reads.
>
> Ah, ok, I see that bpf_ctx_narrow_access_ok is used in
> kernel/bpf/cgroup.c as well and bpf_ctx_wide_store_ok might be useful
> in some other contexts as well, let's keep it here.
>
> >
> > > > +{
> > > > + /* u64 access is aligned and fits into the field size */
> > > > + return off % sizeof(__u64) == 0 && off + sizeof(__u64) <= size;
> > > > +}
> > > > +
> > > > +#define bpf_ctx_wide_store_ok(off, size, type, field) \
> > > > + (size == sizeof(__u64) && \
> > > > + off >= offsetof(type, field) && \
> > > > + off < offsetofend(type, field) ? \
> > > > + __bpf_ctx_wide_store_ok(off - offsetof(type, field), \
> > > > + FIELD_SIZEOF(type, field)) : 0)
>
> This would be sufficient, right?
Thanks, that looks much better and is actually more correct than my
implementation. We should really look at the off alignment, not
the off-offsetof(type, field) as I did.
> #define bpf_ctx_wide_store_ok(off, size, type, field) \
> size == sizeof(__u64) && \
> off >= offsetof(type, field) && \
> off + size <= offsetofend(type, field) && \
> off % sizeof(__u64) == 0
>
> > >
> > > Why do you need ternary operator instead of just a chain of &&s?
> > Good point. I didn't spend too much time on the patch tbh :-)
> > If it looks good in general, I can add proper tests and do a
> > proper submition, this patch is just to get the discussion started.
>
> Consider it started. :) Talking with Yonghong about preventing this
> from happening in the first place in Clang, it seems like that would
> be harder and more cumbersome than supporting in BPF verifier. So
> please go ahead and submit a proper patch.
>
> >
> > > It also seems like you can avoid macro and use plain function if
> > > instead of providing (type, field) you provide values of offsetof and
> > > offsetofend (offsetofend - offsetof should equal FIELD_SIZEOF(type,
> > > field), shouldn't it?).
> > But then I'd have to copy-paste the args of offsetof/offsetofend at
> > the caller, right? I wanted the caller to be clean and simple.
>
> Yeah, that's a bit verbose, I agree. I don't mind macro, so no worries.
>
> >
> > > > #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
> > > >
> > > > static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
> > > > diff --git a/net/core/filter.c b/net/core/filter.c
> > > > index 2014d76e0d2a..2d3787a439ae 100644
> > > > --- a/net/core/filter.c
> > > > +++ b/net/core/filter.c
> > > > @@ -6849,6 +6849,16 @@ static bool sock_addr_is_valid_access(int off, int size,
> > > > if (!bpf_ctx_narrow_access_ok(off, size, size_default))
> > > > return false;
> > > > } else {
> > > > + if (bpf_ctx_wide_store_ok(off, size,
> > > > + struct bpf_sock_addr,
> > > > + user_ip6))
> > > > + return true;
> > > > +
> > > > + if (bpf_ctx_wide_store_ok(off, size,
> > > > + struct bpf_sock_addr,
> > > > + msg_src_ip6))
> > > > + return true;
> > > > +
> > > > if (size != size_default)
> > > > return false;
> > > > }
> > > > @@ -7689,9 +7699,6 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> > > > /* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
> > > > * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
> > > > *
> > > > - * It doesn't support SIZE argument though since narrow stores are not
> > > > - * supported for now.
> > > > - *
> > > > * In addition it uses Temporary Field TF (member of struct S) as the 3rd
> > > > * "register" since two registers available in convert_ctx_access are not
> > > > * enough: we can't override neither SRC, since it contains value to store, nor
> > > > @@ -7699,7 +7706,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> > > > * instructions. But we need a temporary place to save pointer to nested
> > > > * structure whose field we want to store to.
> > > > */
> > > > -#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \
> > > > +#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \
> > > > do { \
> > > > int tmp_reg = BPF_REG_9; \
> > > > if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
> > > > @@ -7710,8 +7717,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> > > > offsetof(S, TF)); \
> > > > *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
> > > > si->dst_reg, offsetof(S, F)); \
> > > > - *insn++ = BPF_STX_MEM( \
> > > > - BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \
> > > > + *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
> > > > bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
> > > > target_size) \
> > > > + OFF); \
> > > > @@ -7723,8 +7729,8 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
> > > > TF) \
> > > > do { \
> > > > if (type == BPF_WRITE) { \
> > > > - SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \
> > > > - TF); \
> > > > + SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \
> > > > + OFF, TF); \
> > > > } else { \
> > > > SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
> > > > S, NS, F, NF, SIZE, OFF); \
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2019-06-28 20:13 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
[not found] <20190627090446.GG7221@shao2-debian>
2019-06-27 15:50 ` [bpf/tools] cd17d77705: kernel_selftests.bpf.test_sock_addr.sh.fail Stanislav Fomichev
2019-06-27 17:29 ` Stanislav Fomichev
2019-06-28 2:22 ` Andrii Nakryiko
2019-06-28 2:38 ` Stanislav Fomichev
2019-06-28 18:14 ` Andrii Nakryiko
2019-06-28 20:13 ` Stanislav Fomichev
2019-06-28 1:56 ` Andrii Nakryiko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).