* [PATCH bpf-next] selftests/bpf: Try to address xdp_metadata crashes
@ 2023-01-30 21:51 Stanislav Fomichev
2023-01-31 5:41 ` Alexei Starovoitov
0 siblings, 1 reply; 4+ messages in thread
From: Stanislav Fomichev @ 2023-01-30 21:51 UTC (permalink / raw)
To: bpf
Cc: ast, daniel, andrii, martin.lau, song, yhs, john.fastabend,
kpsingh, sdf, haoluo, jolsa
Commit e04ce9f4040b ("selftests/bpf: Make crashes more debuggable in
test_progs") hasn't uncovered anything interesting besides
confirming that the test passes successfully, but crashes eventually [0].
I'm assuming the crashes are coming from something overriding
the stack/heap. Probably from the xsk misuse. So I'm trying
a bunch of things to address that:
- More debugging with real memory pointers for the queues/umem
- To confirm that everything is sane
- Set proper tx/fill ring sizes
- In particular, fill ring wasn't fully initialized, but I'm
assuming no packets should be flowing there regardless
- Do the same for xdp_hw_metadata
- Don't refill on tx completion; instead, only ack it
0: https://github.com/kernel-patches/bpf/actions/runs/4032162075/jobs/6931951300
Signed-off-by: Stanislav Fomichev <sdf@google.com>
---
.../selftests/bpf/prog_tests/xdp_metadata.c | 36 +++++++++++++------
tools/testing/selftests/bpf/xdp_hw_metadata.c | 4 +--
2 files changed, 28 insertions(+), 12 deletions(-)
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index e033d48288c0..453b4045a9d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -54,11 +54,11 @@ static int open_xsk(int ifindex, struct xsk *xsk)
int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
const struct xsk_socket_config socket_config = {
.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
- .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .tx_size = UMEM_NUM / 2,
.bind_flags = XDP_COPY,
};
const struct xsk_umem_config umem_config = {
- .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .fill_size = UMEM_NUM / 2,
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
@@ -88,13 +88,24 @@ static int open_xsk(int ifindex, struct xsk *xsk)
if (!ASSERT_OK(ret, "xsk_socket__create"))
return ret;
+ printf("%p: umem=<%p..%p>\n", xsk, xsk->umem_area, xsk->umem_area + UMEM_SIZE);
+ printf("%p: fill=<%p..%p>\n", xsk, xsk->fill.ring,
+ xsk->fill.ring + xsk->fill.size * sizeof(__u64));
+ printf("%p: comp=<%p..%p>\n", xsk, xsk->comp.ring,
+ xsk->comp.ring + xsk->comp.size * sizeof(__u64));
+ printf("%p: rx=<%p..%p>\n", xsk, xsk->rx.ring,
+ xsk->rx.ring + xsk->rx.size * sizeof(struct xdp_desc));
+ printf("%p: tx=<%p..%p>\n", xsk, xsk->tx.ring,
+ xsk->tx.ring + xsk->tx.size * sizeof(struct xdp_desc));
+
/* First half of umem is for TX. This way address matches 1-to-1
* to the completion queue index.
*/
for (i = 0; i < UMEM_NUM / 2; i++) {
addr = i * UMEM_FRAME_SIZE;
- printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
+ printf("%p: tx_desc[%d] -> %lx (%p)\n", xsk, i, addr,
+ xsk_umem__get_data(xsk->umem_area, addr));
}
/* Second half of umem is for RX. */
@@ -107,7 +118,10 @@ static int open_xsk(int ifindex, struct xsk *xsk)
for (i = 0; i < UMEM_NUM / 2; i++) {
addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
- printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
+ printf("%p: rx_desc[%d] -> %lx (%p)\n", xsk, i, addr,
+ xsk_umem__get_data(xsk->umem_area, addr));
+ printf("%p: fill %lx at %p\n", xsk, addr,
+ xsk_ring_prod__fill_addr(&xsk->fill, i));
*xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
}
xsk_ring_prod__submit(&xsk->fill, ret);
@@ -159,6 +173,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
+ printf("%p: tx %llx (%p) at %p\n", xsk, tx_desc->addr, data, tx_desc);
eth = data;
iph = (void *)(eth + 1);
@@ -205,9 +220,8 @@ static void complete_tx(struct xsk *xsk)
if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
- printf("%p: refill idx=%u addr=%llx\n", xsk, idx, addr);
- *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
- xsk_ring_prod__submit(&xsk->fill, 1);
+ printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
+ xsk_ring_cons__release(&xsk->comp, 1);
}
}
@@ -216,7 +230,9 @@ static void refill_rx(struct xsk *xsk, __u64 addr)
__u32 idx;
if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
- printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
+ printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr);
+ printf("%p: fill %llx at %p\n", xsk, addr,
+ xsk_ring_prod__fill_addr(&xsk->fill, idx));
*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
xsk_ring_prod__submit(&xsk->fill, 1);
}
@@ -253,8 +269,8 @@ static int verify_xsk_metadata(struct xsk *xsk)
rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
comp_addr = xsk_umem__extract_addr(rx_desc->addr);
addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
- printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
- xsk, idx, rx_desc->addr, addr, comp_addr);
+ printf("%p: rx_desc[%u]->addr=%llx (%p) addr=%llx comp_addr=%llx\n",
+ xsk, idx, rx_desc->addr, rx_desc, addr, comp_addr);
data = xsk_umem__get_data(xsk->umem_area, addr);
/* Make sure we got the packet offset correctly. */
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 3823b1c499cc..6d715f85ea20 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -59,11 +59,11 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
const struct xsk_socket_config socket_config = {
.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
- .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .tx_size = UMEM_NUM / 2,
.bind_flags = XDP_COPY,
};
const struct xsk_umem_config umem_config = {
- .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .fill_size = UMEM_NUM / 2,
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
--
2.39.1.456.gfc5497dd1b-goog
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH bpf-next] selftests/bpf: Try to address xdp_metadata crashes
2023-01-30 21:51 [PATCH bpf-next] selftests/bpf: Try to address xdp_metadata crashes Stanislav Fomichev
@ 2023-01-31 5:41 ` Alexei Starovoitov
2023-01-31 18:07 ` Stanislav Fomichev
0 siblings, 1 reply; 4+ messages in thread
From: Alexei Starovoitov @ 2023-01-31 5:41 UTC (permalink / raw)
To: Stanislav Fomichev
Cc: bpf, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
KP Singh, Hao Luo, Jiri Olsa
On Mon, Jan 30, 2023 at 1:51 PM Stanislav Fomichev <sdf@google.com> wrote:
>
> Commit e04ce9f4040b ("selftests/bpf: Make crashes more debuggable in
> test_progs") hasn't uncovered anything interesting besides
> confirming that the test passes successfully, but crashes eventually [0].
>
> I'm assuming the crashes are coming from something overriding
> the stack/heap. Probably from the xsk misuse. So I'm trying
> a bunch of things to address that:
>
> - More debugging with real memory pointers for the queues/umem
> - To confirm that everything is sane
> - Set proper tx/fill ring sizes
> - In particular, fill ring wasn't fully initialized, but I'm
> assuming no packets should be flowing there regardless
> - Do the same for xdp_hw_metadata
> - Don't refill on tx completion; instead, only ack it
>
> 0: https://github.com/kernel-patches/bpf/actions/runs/4032162075/jobs/6931951300
>
> Signed-off-by: Stanislav Fomichev <sdf@google.com>
> ---
> .../selftests/bpf/prog_tests/xdp_metadata.c | 36 +++++++++++++------
> tools/testing/selftests/bpf/xdp_hw_metadata.c | 4 +--
> 2 files changed, 28 insertions(+), 12 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
> index e033d48288c0..453b4045a9d1 100644
> --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
> +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
> @@ -54,11 +54,11 @@ static int open_xsk(int ifindex, struct xsk *xsk)
> int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
> const struct xsk_socket_config socket_config = {
> .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> - .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> + .tx_size = UMEM_NUM / 2,
I'm not following. Is this a fix or just random debug code?
> .bind_flags = XDP_COPY,
> };
> const struct xsk_umem_config umem_config = {
> - .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> + .fill_size = UMEM_NUM / 2,
> .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
> .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
> .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
> @@ -88,13 +88,24 @@ static int open_xsk(int ifindex, struct xsk *xsk)
> if (!ASSERT_OK(ret, "xsk_socket__create"))
> return ret;
>
> + printf("%p: umem=<%p..%p>\n", xsk, xsk->umem_area, xsk->umem_area + UMEM_SIZE);
> + printf("%p: fill=<%p..%p>\n", xsk, xsk->fill.ring,
> + xsk->fill.ring + xsk->fill.size * sizeof(__u64));
> + printf("%p: comp=<%p..%p>\n", xsk, xsk->comp.ring,
> + xsk->comp.ring + xsk->comp.size * sizeof(__u64));
> + printf("%p: rx=<%p..%p>\n", xsk, xsk->rx.ring,
> + xsk->rx.ring + xsk->rx.size * sizeof(struct xdp_desc));
> + printf("%p: tx=<%p..%p>\n", xsk, xsk->tx.ring,
> + xsk->tx.ring + xsk->tx.size * sizeof(struct xdp_desc));
> +
This is fine as debug.
> /* First half of umem is for TX. This way address matches 1-to-1
> * to the completion queue index.
> */
>
> for (i = 0; i < UMEM_NUM / 2; i++) {
> addr = i * UMEM_FRAME_SIZE;
> - printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
> + printf("%p: tx_desc[%d] -> %lx (%p)\n", xsk, i, addr,
> + xsk_umem__get_data(xsk->umem_area, addr));
> }
>
> /* Second half of umem is for RX. */
> @@ -107,7 +118,10 @@ static int open_xsk(int ifindex, struct xsk *xsk)
>
> for (i = 0; i < UMEM_NUM / 2; i++) {
> addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
> - printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
> + printf("%p: rx_desc[%d] -> %lx (%p)\n", xsk, i, addr,
> + xsk_umem__get_data(xsk->umem_area, addr));
> + printf("%p: fill %lx at %p\n", xsk, addr,
> + xsk_ring_prod__fill_addr(&xsk->fill, i));
> *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
> }
> xsk_ring_prod__submit(&xsk->fill, ret);
> @@ -159,6 +173,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
> tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
> printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
> data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
> + printf("%p: tx %llx (%p) at %p\n", xsk, tx_desc->addr, data, tx_desc);
>
> eth = data;
> iph = (void *)(eth + 1);
> @@ -205,9 +220,8 @@ static void complete_tx(struct xsk *xsk)
> if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
> addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
>
> - printf("%p: refill idx=%u addr=%llx\n", xsk, idx, addr);
> - *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
> - xsk_ring_prod__submit(&xsk->fill, 1);
> + printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
> + xsk_ring_cons__release(&xsk->comp, 1);
What does this do?
> }
> }
>
> @@ -216,7 +230,9 @@ static void refill_rx(struct xsk *xsk, __u64 addr)
> __u32 idx;
>
> if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
> - printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
> + printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr);
> + printf("%p: fill %llx at %p\n", xsk, addr,
> + xsk_ring_prod__fill_addr(&xsk->fill, idx));
> *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
> xsk_ring_prod__submit(&xsk->fill, 1);
> }
> @@ -253,8 +269,8 @@ static int verify_xsk_metadata(struct xsk *xsk)
> rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
> comp_addr = xsk_umem__extract_addr(rx_desc->addr);
> addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
> - printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
> - xsk, idx, rx_desc->addr, addr, comp_addr);
> + printf("%p: rx_desc[%u]->addr=%llx (%p) addr=%llx comp_addr=%llx\n",
> + xsk, idx, rx_desc->addr, rx_desc, addr, comp_addr);
> data = xsk_umem__get_data(xsk->umem_area, addr);
>
> /* Make sure we got the packet offset correctly. */
> diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> index 3823b1c499cc..6d715f85ea20 100644
> --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
> +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> @@ -59,11 +59,11 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
> int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
> const struct xsk_socket_config socket_config = {
> .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> - .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> + .tx_size = UMEM_NUM / 2,
> .bind_flags = XDP_COPY,
> };
> const struct xsk_umem_config umem_config = {
> - .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> + .fill_size = UMEM_NUM / 2,
> .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
> .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
> .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
> --
> 2.39.1.456.gfc5497dd1b-goog
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH bpf-next] selftests/bpf: Try to address xdp_metadata crashes
2023-01-31 5:41 ` Alexei Starovoitov
@ 2023-01-31 18:07 ` Stanislav Fomichev
2023-02-01 23:28 ` Stanislav Fomichev
0 siblings, 1 reply; 4+ messages in thread
From: Stanislav Fomichev @ 2023-01-31 18:07 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: bpf, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
KP Singh, Hao Luo, Jiri Olsa
On Mon, Jan 30, 2023 at 9:41 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Mon, Jan 30, 2023 at 1:51 PM Stanislav Fomichev <sdf@google.com> wrote:
> >
> > Commit e04ce9f4040b ("selftests/bpf: Make crashes more debuggable in
> > test_progs") hasn't uncovered anything interesting besides
> > confirming that the test passes successfully, but crashes eventually [0].
> >
> > I'm assuming the crashes are coming from something overriding
> > the stack/heap. Probably from the xsk misuse. So I'm trying
> > a bunch of things to address that:
> >
> > - More debugging with real memory pointers for the queues/umem
> > - To confirm that everything is sane
> > - Set proper tx/fill ring sizes
> > - In particular, fill ring wasn't fully initialized, but I'm
> > assuming no packets should be flowing there regardless
> > - Do the same for xdp_hw_metadata
> > - Don't refill on tx completion; instead, only ack it
> >
> > 0: https://github.com/kernel-patches/bpf/actions/runs/4032162075/jobs/6931951300
> >
> > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > ---
> > .../selftests/bpf/prog_tests/xdp_metadata.c | 36 +++++++++++++------
> > tools/testing/selftests/bpf/xdp_hw_metadata.c | 4 +--
> > 2 files changed, 28 insertions(+), 12 deletions(-)
> >
> > diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
> > index e033d48288c0..453b4045a9d1 100644
> > --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
> > +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
> > @@ -54,11 +54,11 @@ static int open_xsk(int ifindex, struct xsk *xsk)
> > int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
> > const struct xsk_socket_config socket_config = {
> > .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > - .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > + .tx_size = UMEM_NUM / 2,
>
> I'm not following. Is this a fix or just random debug code?
This chunk is a potential fix. But the patch overall is a mix of
potential fixes + debug code.
I can't reproduce locally, so I'm trying a bunch of potential fixes +
adding more debugging in case it doesn't help.
> > .bind_flags = XDP_COPY,
> > };
> > const struct xsk_umem_config umem_config = {
> > - .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > + .fill_size = UMEM_NUM / 2,
> > .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
> > .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
> > .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
> > @@ -88,13 +88,24 @@ static int open_xsk(int ifindex, struct xsk *xsk)
> > if (!ASSERT_OK(ret, "xsk_socket__create"))
> > return ret;
> >
> > + printf("%p: umem=<%p..%p>\n", xsk, xsk->umem_area, xsk->umem_area + UMEM_SIZE);
> > + printf("%p: fill=<%p..%p>\n", xsk, xsk->fill.ring,
> > + xsk->fill.ring + xsk->fill.size * sizeof(__u64));
> > + printf("%p: comp=<%p..%p>\n", xsk, xsk->comp.ring,
> > + xsk->comp.ring + xsk->comp.size * sizeof(__u64));
> > + printf("%p: rx=<%p..%p>\n", xsk, xsk->rx.ring,
> > + xsk->rx.ring + xsk->rx.size * sizeof(struct xdp_desc));
> > + printf("%p: tx=<%p..%p>\n", xsk, xsk->tx.ring,
> > + xsk->tx.ring + xsk->tx.size * sizeof(struct xdp_desc));
> > +
>
> This is fine as debug.
Right. It should also be irrelevant for when the test passes since we
are writing this to /dev/null.
> > /* First half of umem is for TX. This way address matches 1-to-1
> > * to the completion queue index.
> > */
> >
> > for (i = 0; i < UMEM_NUM / 2; i++) {
> > addr = i * UMEM_FRAME_SIZE;
> > - printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
> > + printf("%p: tx_desc[%d] -> %lx (%p)\n", xsk, i, addr,
> > + xsk_umem__get_data(xsk->umem_area, addr));
> > }
> >
> > /* Second half of umem is for RX. */
> > @@ -107,7 +118,10 @@ static int open_xsk(int ifindex, struct xsk *xsk)
> >
> > for (i = 0; i < UMEM_NUM / 2; i++) {
> > addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
> > - printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
> > + printf("%p: rx_desc[%d] -> %lx (%p)\n", xsk, i, addr,
> > + xsk_umem__get_data(xsk->umem_area, addr));
> > + printf("%p: fill %lx at %p\n", xsk, addr,
> > + xsk_ring_prod__fill_addr(&xsk->fill, i));
> > *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
> > }
> > xsk_ring_prod__submit(&xsk->fill, ret);
> > @@ -159,6 +173,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
> > tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
> > printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
> > data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
> > + printf("%p: tx %llx (%p) at %p\n", xsk, tx_desc->addr, data, tx_desc);
> >
> > eth = data;
> > iph = (void *)(eth + 1);
> > @@ -205,9 +220,8 @@ static void complete_tx(struct xsk *xsk)
> > if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
> > addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
> >
> > - printf("%p: refill idx=%u addr=%llx\n", xsk, idx, addr);
> > - *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
> > - xsk_ring_prod__submit(&xsk->fill, 1);
> > + printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
> > + xsk_ring_cons__release(&xsk->comp, 1);
>
> What does this do?
I was incorrectly refilling 'fill' ring on tx completion. Changing it
to "consume" the completion
(xsk_ring_cons__peek+xsk_ring_cons__release).
> > }
> > }
> >
> > @@ -216,7 +230,9 @@ static void refill_rx(struct xsk *xsk, __u64 addr)
> > __u32 idx;
> >
> > if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
> > - printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
> > + printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr);
> > + printf("%p: fill %llx at %p\n", xsk, addr,
> > + xsk_ring_prod__fill_addr(&xsk->fill, idx));
> > *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
> > xsk_ring_prod__submit(&xsk->fill, 1);
> > }
> > @@ -253,8 +269,8 @@ static int verify_xsk_metadata(struct xsk *xsk)
> > rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
> > comp_addr = xsk_umem__extract_addr(rx_desc->addr);
> > addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
> > - printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
> > - xsk, idx, rx_desc->addr, addr, comp_addr);
> > + printf("%p: rx_desc[%u]->addr=%llx (%p) addr=%llx comp_addr=%llx\n",
> > + xsk, idx, rx_desc->addr, rx_desc, addr, comp_addr);
> > data = xsk_umem__get_data(xsk->umem_area, addr);
> >
> > /* Make sure we got the packet offset correctly. */
> > diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> > index 3823b1c499cc..6d715f85ea20 100644
> > --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
> > +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> > @@ -59,11 +59,11 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
> > int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
> > const struct xsk_socket_config socket_config = {
> > .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > - .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > + .tx_size = UMEM_NUM / 2,
> > .bind_flags = XDP_COPY,
> > };
> > const struct xsk_umem_config umem_config = {
> > - .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > + .fill_size = UMEM_NUM / 2,
> > .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
> > .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
> > .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
> > --
> > 2.39.1.456.gfc5497dd1b-goog
> >
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH bpf-next] selftests/bpf: Try to address xdp_metadata crashes
2023-01-31 18:07 ` Stanislav Fomichev
@ 2023-02-01 23:28 ` Stanislav Fomichev
0 siblings, 0 replies; 4+ messages in thread
From: Stanislav Fomichev @ 2023-02-01 23:28 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: bpf, Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko,
Martin KaFai Lau, Song Liu, Yonghong Song, John Fastabend,
KP Singh, Hao Luo, Jiri Olsa
On Tue, Jan 31, 2023 at 10:07 AM Stanislav Fomichev <sdf@google.com> wrote:
>
> On Mon, Jan 30, 2023 at 9:41 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Mon, Jan 30, 2023 at 1:51 PM Stanislav Fomichev <sdf@google.com> wrote:
> > >
> > > Commit e04ce9f4040b ("selftests/bpf: Make crashes more debuggable in
> > > test_progs") hasn't uncovered anything interesting besides
> > > confirming that the test passes successfully, but crashes eventually [0].
> > >
> > > I'm assuming the crashes are coming from something overriding
> > > the stack/heap. Probably from the xsk misuse. So I'm trying
> > > a bunch of things to address that:
> > >
> > > - More debugging with real memory pointers for the queues/umem
> > > - To confirm that everything is sane
> > > - Set proper tx/fill ring sizes
> > > - In particular, fill ring wasn't fully initialized, but I'm
> > > assuming no packets should be flowing there regardless
> > > - Do the same for xdp_hw_metadata
> > > - Don't refill on tx completion; instead, only ack it
> > >
> > > 0: https://github.com/kernel-patches/bpf/actions/runs/4032162075/jobs/6931951300
> > >
> > > Signed-off-by: Stanislav Fomichev <sdf@google.com>
> > > ---
> > > .../selftests/bpf/prog_tests/xdp_metadata.c | 36 +++++++++++++------
> > > tools/testing/selftests/bpf/xdp_hw_metadata.c | 4 +--
> > > 2 files changed, 28 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
> > > index e033d48288c0..453b4045a9d1 100644
> > > --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
> > > +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
> > > @@ -54,11 +54,11 @@ static int open_xsk(int ifindex, struct xsk *xsk)
> > > int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
> > > const struct xsk_socket_config socket_config = {
> > > .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > > - .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > > + .tx_size = UMEM_NUM / 2,
> >
> > I'm not following. Is this a fix or just random debug code?
>
> This chunk is a potential fix. But the patch overall is a mix of
> potential fixes + debug code.
> I can't reproduce locally, so I'm trying a bunch of potential fixes +
> adding more debugging in case it doesn't help.
>
> > > .bind_flags = XDP_COPY,
> > > };
> > > const struct xsk_umem_config umem_config = {
> > > - .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > > + .fill_size = UMEM_NUM / 2,
> > > .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
> > > .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
> > > .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
> > > @@ -88,13 +88,24 @@ static int open_xsk(int ifindex, struct xsk *xsk)
> > > if (!ASSERT_OK(ret, "xsk_socket__create"))
> > > return ret;
> > >
> > > + printf("%p: umem=<%p..%p>\n", xsk, xsk->umem_area, xsk->umem_area + UMEM_SIZE);
> > > + printf("%p: fill=<%p..%p>\n", xsk, xsk->fill.ring,
> > > + xsk->fill.ring + xsk->fill.size * sizeof(__u64));
> > > + printf("%p: comp=<%p..%p>\n", xsk, xsk->comp.ring,
> > > + xsk->comp.ring + xsk->comp.size * sizeof(__u64));
> > > + printf("%p: rx=<%p..%p>\n", xsk, xsk->rx.ring,
> > > + xsk->rx.ring + xsk->rx.size * sizeof(struct xdp_desc));
> > > + printf("%p: tx=<%p..%p>\n", xsk, xsk->tx.ring,
> > > + xsk->tx.ring + xsk->tx.size * sizeof(struct xdp_desc));
> > > +
> >
> > This is fine as debug.
>
> Right. It should also be irrelevant for when the test passes since we
> are writing this to /dev/null.
>
> > > /* First half of umem is for TX. This way address matches 1-to-1
> > > * to the completion queue index.
> > > */
> > >
> > > for (i = 0; i < UMEM_NUM / 2; i++) {
> > > addr = i * UMEM_FRAME_SIZE;
> > > - printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
> > > + printf("%p: tx_desc[%d] -> %lx (%p)\n", xsk, i, addr,
> > > + xsk_umem__get_data(xsk->umem_area, addr));
> > > }
> > >
> > > /* Second half of umem is for RX. */
> > > @@ -107,7 +118,10 @@ static int open_xsk(int ifindex, struct xsk *xsk)
> > >
> > > for (i = 0; i < UMEM_NUM / 2; i++) {
> > > addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
> > > - printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
> > > + printf("%p: rx_desc[%d] -> %lx (%p)\n", xsk, i, addr,
> > > + xsk_umem__get_data(xsk->umem_area, addr));
> > > + printf("%p: fill %lx at %p\n", xsk, addr,
> > > + xsk_ring_prod__fill_addr(&xsk->fill, i));
> > > *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
> > > }
> > > xsk_ring_prod__submit(&xsk->fill, ret);
> > > @@ -159,6 +173,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
> > > tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
> > > printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
> > > data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
> > > + printf("%p: tx %llx (%p) at %p\n", xsk, tx_desc->addr, data, tx_desc);
> > >
> > > eth = data;
> > > iph = (void *)(eth + 1);
> > > @@ -205,9 +220,8 @@ static void complete_tx(struct xsk *xsk)
> > > if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
> > > addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
> > >
> > > - printf("%p: refill idx=%u addr=%llx\n", xsk, idx, addr);
> > > - *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
> > > - xsk_ring_prod__submit(&xsk->fill, 1);
> > > + printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
> > > + xsk_ring_cons__release(&xsk->comp, 1);
> >
> > What does this do?
>
> I was incorrectly refilling 'fill' ring on tx completion. Changing it
> to "consume" the completion
> (xsk_ring_cons__peek+xsk_ring_cons__release).
FYI, given Jesper's find with the wrong munmap, I'm gonna respin only
with this part fixed.
> > > }
> > > }
> > >
> > > @@ -216,7 +230,9 @@ static void refill_rx(struct xsk *xsk, __u64 addr)
> > > __u32 idx;
> > >
> > > if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
> > > - printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
> > > + printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr);
> > > + printf("%p: fill %llx at %p\n", xsk, addr,
> > > + xsk_ring_prod__fill_addr(&xsk->fill, idx));
> > > *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
> > > xsk_ring_prod__submit(&xsk->fill, 1);
> > > }
> > > @@ -253,8 +269,8 @@ static int verify_xsk_metadata(struct xsk *xsk)
> > > rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
> > > comp_addr = xsk_umem__extract_addr(rx_desc->addr);
> > > addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
> > > - printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
> > > - xsk, idx, rx_desc->addr, addr, comp_addr);
> > > + printf("%p: rx_desc[%u]->addr=%llx (%p) addr=%llx comp_addr=%llx\n",
> > > + xsk, idx, rx_desc->addr, rx_desc, addr, comp_addr);
> > > data = xsk_umem__get_data(xsk->umem_area, addr);
> > >
> > > /* Make sure we got the packet offset correctly. */
> > > diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> > > index 3823b1c499cc..6d715f85ea20 100644
> > > --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
> > > +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
> > > @@ -59,11 +59,11 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
> > > int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
> > > const struct xsk_socket_config socket_config = {
> > > .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > > - .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > > + .tx_size = UMEM_NUM / 2,
> > > .bind_flags = XDP_COPY,
> > > };
> > > const struct xsk_umem_config umem_config = {
> > > - .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
> > > + .fill_size = UMEM_NUM / 2,
> > > .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
> > > .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
> > > .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
> > > --
> > > 2.39.1.456.gfc5497dd1b-goog
> > >
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2023-02-01 23:29 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-01-30 21:51 [PATCH bpf-next] selftests/bpf: Try to address xdp_metadata crashes Stanislav Fomichev
2023-01-31 5:41 ` Alexei Starovoitov
2023-01-31 18:07 ` Stanislav Fomichev
2023-02-01 23:28 ` Stanislav Fomichev
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox