* Re: [PATCH] net: skmsg: pin the delayed-work psock in sk_psock_backlog
2026-05-15 8:54 ` Jiayuan Chen
@ 2026-05-15 9:10 ` Cen Zhang
0 siblings, 0 replies; 6+ messages in thread
From: Cen Zhang @ 2026-05-15 9:10 UTC (permalink / raw)
To: Jiayuan Chen
Cc: bpf@vger.kernel.org, John Fastabend, Jakub Sitnicki,
David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
Simon Horman, netdev, linux-kernel, zerocling0077, 2045gemini
Hi Jiayuan,
Thanks, understood. I will treat any follow-up submission as bpf, not
net, and I will not send a new patch revision just for this reproducer
information. I will also carry this Fixes tag in the next formal patch:
Fixes: 8259eb0e06d8 ("bpf, sockmap: Avoid using sk_socket after free
when sending")
On the ordering question: moving the TX_ENABLED clear before clearing
sk_user_data would close the window for workers which enter
sk_psock_backlog() after teardown has started, but I do not think it is
sufficient for the interleaving reproduced below.
The backlog worker tests TX_ENABLED before it calls sk_psock_get():
if (!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
return;
if (!sk_psock_get(psock->sk))
return;
That test is not serialized with sk_psock_stop()'s ingress_lock. A worker
can pass the TX_ENABLED test and then be preempted before
sk_psock_get(psock->sk). If teardown clears TX_ENABLED after that point,
then clears sk_user_data, and a new attach publishes a replacement psock,
the old worker still calls sk_psock_get(psock->sk) and can pin the
replacement while continuing to process the old delayed-work psock. The
instrumentation below parks the worker exactly in that window: after the
TX_ENABLED test and before sk_psock_get(psock->sk). So a pure ordering
change in sk_psock_drop() would not affect the worker once it has already
passed the initial TX_ENABLED test.
Below are the two local patches I used for the reproducer, against
917719c41. The first patch only adds the debugfs gate/counters and
logging around the existing skmsg paths. It does not publish a replacement
psock, does not clear sk_user_data, and does not add an extra put. The
second patch adds a one-shot sockmap_redir selftest path which creates
AF_UNIX SOCK_STREAM socket pairs, redirects one byte to the destination
sockmap entry, waits until the old backlog worker is parked, deletes that
sockmap entry, reattaches the same destination fd with BPF_NOEXIST, and
then releases the parked worker. The PMB prefix in the diff is only the
local probe namespace.
I ran it as:
make -C tools/testing/selftests/bpf LLVM=1 \
OUTPUT=/tmp/bpf-out test_progs_sockmap_redir test_sockmap
PMB_SOCKMAP_ONE_SHOT=1 \
PMB_SOCKMAP_REDIR=sk_skb-to-ingress \
PMB_SOCKMAP_FAMILY=u_str \
PMB_SOCKMAP_MAP=sockmap \
PMB_PSOCK_PHASE=pointer \
/tmp/bpf-out/test_progs-sockmap_redir -vv
The warning came from the existing final put path in the old worker:
refcount_t: underflow; use-after-free.
WARNING: lib/refcount.c:28 at refcount_warn_saturate+0xbf/0xf0
Workqueue: events sk_psock_backlog
RIP: 0010:refcount_warn_saturate+0xbf/0xf0
Call Trace:
sk_psock_backlog+0xc8c/0x1ad0
process_one_work+0x8b7/0x1af0
worker_thread+0x574/0xf10
kthread+0x2fc/0x3f0
ret_from_fork+0x58b/0x830
ret_from_fork_asm+0x1a/0x30
Thanks,
Zhang
--- 8< --- kernel instrumentation patch --- 8< ---
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 6187a83bd..79d8769bb 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -4,12 +4,101 @@
#include <linux/skmsg.h>
#include <linux/skbuff.h>
#include <linux/scatterlist.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/sock_diag.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <net/tls.h>
#include <trace/events/sock.h>
+/* PMB_PSOCK_BACKLOG_GATE */
+static u32 pmb_psock_gate_enable;
+static u32 pmb_psock_gate_release;
+static u32 pmb_psock_gate_parked;
+static u32 pmb_psock_mismatch_seen;
+static u32 pmb_psock_get_seen;
+static u32 pmb_psock_last_old_ref;
+static u32 pmb_psock_last_old_ref_before_put;
+static u32 pmb_psock_last_old_tx;
+static u64 pmb_psock_last_cookie;
+static u64 pmb_psock_last_sk;
+static u64 pmb_psock_last_container;
+static u64 pmb_psock_last_current;
+static u64 pmb_psock_last_get;
+
+static void pmb_psock_probe_snapshot(struct sk_psock *container,
+ struct sk_psock *current_psock,
+ struct sk_psock *held)
+{
+ WRITE_ONCE(pmb_psock_last_cookie, sock_gen_cookie(container->sk));
+ WRITE_ONCE(pmb_psock_last_sk, (u64)(unsigned long)container->sk);
+ WRITE_ONCE(pmb_psock_last_container, (u64)(unsigned long)container);
+ WRITE_ONCE(pmb_psock_last_current, (u64)(unsigned long)current_psock);
+ WRITE_ONCE(pmb_psock_last_get, (u64)(unsigned long)held);
+ if (current_psock && current_psock != container)
+ WRITE_ONCE(pmb_psock_mismatch_seen, 1);
+ if (held && held != container)
+ WRITE_ONCE(pmb_psock_mismatch_seen, 1);
+}
+
+static void pmb_psock_maybe_gate(struct sk_psock *psock)
+{
+ unsigned long timeout = jiffies + msecs_to_jiffies(10000);
+
+ if (!READ_ONCE(pmb_psock_gate_enable) || psock->sk->sk_family != AF_UNIX)
+ return;
+
+ WRITE_ONCE(pmb_psock_gate_enable, 0);
+ WRITE_ONCE(pmb_psock_gate_parked, 1);
+ WRITE_ONCE(pmb_psock_last_cookie, sock_gen_cookie(psock->sk));
+ WRITE_ONCE(pmb_psock_last_sk, (u64)(unsigned long)psock->sk);
+ WRITE_ONCE(pmb_psock_last_container, (u64)(unsigned long)psock);
+ WRITE_ONCE(pmb_psock_last_old_ref, refcount_read(&psock->refcnt));
+ WRITE_ONCE(pmb_psock_last_old_tx,
+ sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED));
+ pr_info("PMB_PSOCK_BACKLOG_GATE park cookie=%llu sk=%px
container=%px old_tx=%u old_ref=%u\n",
+ (unsigned long long)READ_ONCE(pmb_psock_last_cookie),
+ psock->sk, psock, READ_ONCE(pmb_psock_last_old_tx),
+ READ_ONCE(pmb_psock_last_old_ref));
+
+ while (!READ_ONCE(pmb_psock_gate_release) && time_before(jiffies, timeout))
+ msleep(1);
+ pr_info("PMB_PSOCK_BACKLOG_GATE resume cookie=%llu sk=%px
container=%px released=%u timeout=%ld\n",
+ (unsigned long long)READ_ONCE(pmb_psock_last_cookie),
+ psock->sk, psock, READ_ONCE(pmb_psock_gate_release),
+ time_before(jiffies, timeout) ? 0L : 1L);
+}
+
+static int __init pmb_psock_probe_debugfs_init(void)
+{
+ struct dentry *dir;
+
+ dir = debugfs_create_dir("pmb_psock", NULL);
+ if (IS_ERR_OR_NULL(dir))
+ return 0;
+
+ debugfs_create_u32("gate_enable", 0600, dir, &pmb_psock_gate_enable);
+ debugfs_create_u32("gate_release", 0600, dir, &pmb_psock_gate_release);
+ debugfs_create_u32("gate_parked", 0600, dir, &pmb_psock_gate_parked);
+ debugfs_create_u32("mismatch_seen", 0600, dir, &pmb_psock_mismatch_seen);
+ debugfs_create_u32("get_seen", 0600, dir, &pmb_psock_get_seen);
+ debugfs_create_u32("last_old_ref", 0600, dir, &pmb_psock_last_old_ref);
+ debugfs_create_u32("last_old_ref_before_put", 0600, dir,
+ &pmb_psock_last_old_ref_before_put);
+ debugfs_create_u32("last_old_tx", 0600, dir, &pmb_psock_last_old_tx);
+ debugfs_create_x64("last_cookie", 0600, dir, &pmb_psock_last_cookie);
+ debugfs_create_x64("last_sk", 0600, dir, &pmb_psock_last_sk);
+ debugfs_create_x64("last_container", 0600, dir, &pmb_psock_last_container);
+ debugfs_create_x64("last_current", 0600, dir, &pmb_psock_last_current);
+ debugfs_create_x64("last_get", 0600, dir, &pmb_psock_last_get);
+ return 0;
+}
+late_initcall(pmb_psock_probe_debugfs_init);
+
static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
{
if (msg->sg.end > msg->sg.start &&
@@ -671,6 +760,8 @@ static void sk_psock_backlog(struct work_struct *work)
{
struct delayed_work *dwork = to_delayed_work(work);
struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
+ struct sk_psock *current_psock;
+ struct sk_psock *held_psock;
struct sk_psock_work_state *state = &psock->work_state;
struct sk_buff *skb = NULL;
u32 len = 0, off = 0;
@@ -684,13 +775,35 @@ static void sk_psock_backlog(struct work_struct *work)
if (!sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
return;
+ pmb_psock_maybe_gate(psock);
+ rcu_read_lock();
+ current_psock = sk_psock(psock->sk);
+ rcu_read_unlock();
+ WRITE_ONCE(pmb_psock_last_old_ref, refcount_read(&psock->refcnt));
+ WRITE_ONCE(pmb_psock_last_old_tx,
+ sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED));
+ pmb_psock_probe_snapshot(psock, current_psock, NULL);
+ pr_info("PMB_PSOCK_BACKLOG_RESUME cookie=%llu sk=%px container=%px
current=%px old_tx=%u old_ref=%u mismatch=%u\n",
+ (unsigned long long)READ_ONCE(pmb_psock_last_cookie),
+ psock->sk, psock, current_psock,
+ READ_ONCE(pmb_psock_last_old_tx),
+ READ_ONCE(pmb_psock_last_old_ref),
+ READ_ONCE(pmb_psock_mismatch_seen));
+
/* Increment the psock refcnt to synchronize with close(fd) path in
* sock_map_close(), ensuring we wait for backlog thread completion
* before sk_socket freed. If refcnt increment fails, it indicates
* sock_map_close() completed with sk_socket potentially already freed.
*/
- if (!sk_psock_get(psock->sk))
+ held_psock = sk_psock_get(psock->sk);
+ if (!held_psock)
return;
+ WRITE_ONCE(pmb_psock_get_seen, 1);
+ pmb_psock_probe_snapshot(psock, current_psock, held_psock);
+ pr_info("PMB_PSOCK_BACKLOG_GET cookie=%llu sk=%px container=%px
held=%px mismatch=%u\n",
+ (unsigned long long)READ_ONCE(pmb_psock_last_cookie),
+ psock->sk, psock, held_psock, READ_ONCE(pmb_psock_mismatch_seen));
+
mutex_lock(&psock->work_mutex);
while ((skb = skb_peek(&psock->ingress_skb))) {
len = skb->len;
@@ -743,6 +856,11 @@ static void sk_psock_backlog(struct work_struct *work)
}
end:
mutex_unlock(&psock->work_mutex);
+ WRITE_ONCE(pmb_psock_last_old_ref_before_put, refcount_read(&psock->refcnt));
+ pr_info("PMB_PSOCK_BACKLOG_PUT cookie=%llu sk=%px container=%px
held=%px ref_before_put=%u\n",
+ (unsigned long long)READ_ONCE(pmb_psock_last_cookie),
+ psock->sk, psock, held_psock,
+ READ_ONCE(pmb_psock_last_old_ref_before_put));
sk_psock_put(psock->sk, psock);
}
@@ -789,10 +907,15 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
refcount_set(&psock->refcnt, 1);
+ pr_info("PMB_PSOCK_INIT_TRACE alloc cookie=%llu sk=%px psock=%px ref=%d\n",
+ (unsigned long long)sock_gen_cookie(sk), sk, psock,
+ refcount_read(&psock->refcnt));
__rcu_assign_sk_user_data_with_flags(sk, psock,
SK_USER_DATA_NOCOPY |
SK_USER_DATA_PSOCK);
+ pr_info("PMB_PSOCK_INIT_TRACE publish cookie=%llu sk=%px psock=%px\n",
+ (unsigned long long)sock_gen_cookie(sk), sk, psock);
sock_hold(sk);
out:
@@ -890,12 +1013,20 @@ void sk_psock_drop(struct sock *sk, struct
sk_psock *psock)
write_lock_bh(&sk->sk_callback_lock);
sk_psock_restore_proto(sk, psock);
rcu_assign_sk_user_data(sk, NULL);
+ pr_info("PMB_PSOCK_DROP_TRACE clear cookie=%llu sk=%px psock=%px
ref=%d tx=%d\n",
+ (unsigned long long)sock_gen_cookie(sk), sk, psock,
+ refcount_read(&psock->refcnt),
+ sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED));
if (psock->progs.stream_parser)
sk_psock_stop_strp(sk, psock);
else if (psock->progs.stream_verdict || psock->progs.skb_verdict)
sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
+ pr_info("PMB_PSOCK_DROP_TRACE stop cookie=%llu sk=%px psock=%px
ref=%d tx=%d\n",
+ (unsigned long long)sock_gen_cookie(sk), sk, psock,
+ refcount_read(&psock->refcnt),
+ sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED));
sk_psock_stop(psock);
INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
--- 8< --- userspace selftest patch --- 8< ---
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
index 9c461d931..ebaf5cf21 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
@@ -22,14 +22,17 @@
#include <errno.h>
#include <error.h>
+#include <fcntl.h>
#include <sched.h>
#include <stdio.h>
+#include <stdlib.h>
#include <unistd.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/un.h>
+#include <sys/wait.h>
#include <linux/string.h>
#include <linux/vm_sockets.h>
@@ -108,6 +111,9 @@ struct socket_spec {
int out[2];
};
+#define PMB_PSOCK_DEBUGFS "/sys/kernel/debug/pmb_psock"
+#define PMB_PSOCK_POLL_TIMEOUT_MS 5000
+
static int socket_spec_pairs(struct socket_spec *s)
{
return create_socket_pairs(s->family, s->sotype,
@@ -123,6 +129,373 @@ static void socket_spec_close(struct socket_spec *s)
xclose(s->out[1]);
}
+static bool pmb_sockmap_one_shot_enabled(void)
+{
+ const char *value = getenv("PMB_SOCKMAP_ONE_SHOT");
+
+ return value && strcmp(value, "0");
+}
+
+static int pmb_probe_path(char *path, size_t path_len, const char *name)
+{
+ int len;
+
+ len = snprintf(path, path_len, PMB_PSOCK_DEBUGFS "/%s", name);
+ if (len < 0 || len >= path_len)
+ return -ENAMETOOLONG;
+ return 0;
+}
+
+static int pmb_write_text(const char *name, const char *value)
+{
+ char path[256];
+ ssize_t len = strlen(value);
+ int fd, err;
+
+ err = pmb_probe_path(path, sizeof(path), name);
+ if (err)
+ return err;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ err = 0;
+ if (write(fd, value, len) != len)
+ err = errno ? -errno : -EIO;
+ close(fd);
+ return err;
+}
+
+static int pmb_write_u64(const char *name, unsigned long long value)
+{
+ char buf[32];
+
+ snprintf(buf, sizeof(buf), "%llu\n", value);
+ return pmb_write_text(name, buf);
+}
+
+static int pmb_read_u64(const char *name, unsigned long long *value)
+{
+ char path[256], buf[64], *end;
+ int fd, err;
+ ssize_t n;
+
+ err = pmb_probe_path(path, sizeof(path), name);
+ if (err)
+ return err;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ n = read(fd, buf, sizeof(buf) - 1);
+ err = n < 0 ? -errno : 0;
+ close(fd);
+ if (err)
+ return err;
+ if (n <= 0)
+ return -EIO;
+
+ buf[n] = '\0';
+ errno = 0;
+ *value = strtoull(buf, &end, 0);
+ if (errno)
+ return -errno;
+ return 0;
+}
+
+static int pmb_wait_u64(const char *name, unsigned long long expected,
+ unsigned int timeout_ms)
+{
+ unsigned int waited;
+ unsigned long long value;
+ int err;
+
+ for (waited = 0; waited < timeout_ms; waited++) {
+ err = pmb_read_u64(name, &value);
+ if (err)
+ return err;
+ if (value == expected)
+ return 0;
+ usleep(1000);
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int pmb_wait_u64_nonzero(const char *name, unsigned int timeout_ms)
+{
+ unsigned int waited;
+ unsigned long long value;
+ int err;
+
+ for (waited = 0; waited < timeout_ms; waited++) {
+ err = pmb_read_u64(name, &value);
+ if (err)
+ return err;
+ if (value)
+ return 0;
+ usleep(1000);
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int pmb_reset_probe_state(void)
+{
+ int err;
+
+ err = pmb_write_u64("gate_enable", 0);
+ if (err)
+ return err;
+ err = pmb_write_u64("gate_release", 0);
+ if (err)
+ return err;
+ err = pmb_write_u64("gate_parked", 0);
+ if (err)
+ return err;
+ err = pmb_write_u64("mismatch_seen", 0);
+ if (err)
+ return err;
+ err = pmb_write_u64("get_seen", 0);
+ if (err)
+ return err;
+ err = pmb_write_u64("last_old_ref", 0);
+ if (err)
+ return err;
+ err = pmb_write_u64("last_old_ref_before_put", 0);
+ if (err)
+ return err;
+ err = pmb_write_u64("last_old_tx", 0);
+ if (err)
+ return err;
+ err = pmb_write_u64("last_cookie", 0);
+ if (err)
+ return err;
+ err = pmb_write_u64("last_sk", 0);
+ if (err)
+ return err;
+ err = pmb_write_u64("last_container", 0);
+ if (err)
+ return err;
+ err = pmb_write_u64("last_current", 0);
+ if (err)
+ return err;
+ return pmb_write_u64("last_get", 0);
+}
+
+static int pmb_retry_reattach(int map_fd, __u32 key, int sock_fd)
+{
+ unsigned int attempt;
+ __u64 value = sock_fd;
+
+ for (attempt = 0; attempt < 200000; attempt++) {
+ if (!bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST))
+ return 0;
+ if (errno != EEXIST && errno != EBUSY)
+ return -errno;
+ sched_yield();
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int pmb_wait_child_ok(pid_t pid)
+{
+ int status;
+
+ if (waitpid(pid, &status, 0) < 0)
+ return -errno;
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ return -EINVAL;
+ return 0;
+}
+
+/* PMB_SOCKMAP_ONE_SHOT */
+static void pmb_test_sockmap_one_shot(void)
+{
+ struct redir_spec redir = {
+ .name = "sk_skb-to-ingress",
+ .idx_send = SEND_OUTER,
+ .idx_recv = RECV_INNER,
+ .prog_type = SK_SKB_INGRESS,
+ };
+ struct socket_spec s = {
+ .family = AF_UNIX,
+ .sotype = SOCK_STREAM,
+ };
+ struct test_sockmap_redir *skel = NULL;
+ enum bpf_attach_type attach_type;
+ const char *phase = getenv("PMB_PSOCK_PHASE");
+ const char *map_name = getenv("PMB_SOCKMAP_MAP");
+ const char *family = getenv("PMB_SOCKMAP_FAMILY");
+ const char *redir_name = getenv("PMB_SOCKMAP_REDIR");
+ struct maps maps = {};
+ unsigned long long mismatch = 0, container = 0, current = 0, held = 0;
+ unsigned long long ref_before_put = 0;
+ bool sockets_ready = false, attached = false;
+ pid_t delete_pid = -1;
+ __u32 key = 0;
+ __u64 value;
+ char send_buf = 'A';
+ int fd_in, fd_out, fd_send;
+ int prog_fd, err;
+ ssize_t n;
+
+ if (!test__start_subtest("pmb_sockmap_one_shot"))
+ return;
+
+ if (access(PMB_PSOCK_DEBUGFS "/gate_enable", F_OK)) {
+ FAIL_ERRNO("missing %s", PMB_PSOCK_DEBUGFS);
+ return;
+ }
+
+ if (!map_name || strcmp(map_name, "sockmap") ||
+ !family || strcmp(family, "u_str") ||
+ !redir_name || strcmp(redir_name, "sk_skb-to-ingress")) {
+ FAIL("unexpected PMB one-shot contract");
+ return;
+ }
+
+ skel = test_sockmap_redir__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto done;
+
+ maps.in = bpf_map__fd(skel->maps.nop_map);
+ maps.out = bpf_map__fd(skel->maps.sock_map);
+ maps.verd = bpf_map__fd(skel->maps.verdict_map);
+ skel->bss->redirect_type = BPF_MAP_TYPE_SOCKMAP;
+
+ get_redir_params(&redir, skel, &prog_fd, &attach_type,
+ &skel->bss->redirect_flags);
+ err = xbpf_prog_attach(prog_fd, maps.in, attach_type, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto done;
+ attached = true;
+
+ err = socket_spec_pairs(&s);
+ if (!ASSERT_OK(err, "create_socket_pairs()"))
+ goto detach;
+ sockets_ready = true;
+
+ fd_in = s.in[0];
+ fd_send = s.in[1];
+ fd_out = s.out[0];
+
+ err = pmb_reset_probe_state();
+ if (!ASSERT_OK(err, "pmb_reset_probe_state"))
+ goto cleanup_sockets;
+ err = pmb_write_u64("gate_enable", 1);
+ if (!ASSERT_OK(err, "gate_enable"))
+ goto cleanup_sockets;
+
+ value = fd_in;
+ err = bpf_map_update_elem(maps.in, &key, &value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(in)"))
+ goto cleanup_gate;
+
+ value = fd_out;
+ err = bpf_map_update_elem(maps.out, &key, &value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(out)"))
+ goto cleanup_maps;
+
+ n = send(fd_send, &send_buf, 1, 0);
+ if (n != 1) {
+ FAIL_ERRNO("send");
+ goto cleanup_maps;
+ }
+
+ err = pmb_wait_u64("gate_parked", 1, PMB_PSOCK_POLL_TIMEOUT_MS);
+ if (!ASSERT_OK(err, "wait_gate_parked"))
+ goto cleanup_maps;
+
+ delete_pid = fork();
+ if (!ASSERT_GE(delete_pid, 0, "fork_delete"))
+ goto cleanup_maps;
+ if (!delete_pid) {
+ err = bpf_map_delete_elem(maps.out, &key);
+ _exit(err ? 1 : 0);
+ }
+
+ err = pmb_retry_reattach(maps.out, key, fd_out);
+ if (!ASSERT_OK(err, "reattach_out"))
+ goto wait_child;
+
+ err = pmb_wait_child_ok(delete_pid);
+ delete_pid = -1;
+ if (!ASSERT_OK(err, "delete_out"))
+ goto cleanup_maps;
+
+ err = pmb_write_u64("gate_release", 1);
+ if (!ASSERT_OK(err, "gate_release"))
+ goto cleanup_maps;
+
+ err = pmb_wait_u64_nonzero("last_get", PMB_PSOCK_POLL_TIMEOUT_MS);
+ if (!ASSERT_OK(err, "wait_last_get"))
+ goto cleanup_maps;
+
+ err = pmb_read_u64("mismatch_seen", &mismatch);
+ if (!ASSERT_OK(err, "read_mismatch_seen"))
+ goto cleanup_maps;
+ err = pmb_read_u64("last_container", &container);
+ if (!ASSERT_OK(err, "read_last_container"))
+ goto cleanup_maps;
+ err = pmb_read_u64("last_current", ¤t);
+ if (!ASSERT_OK(err, "read_last_current"))
+ goto cleanup_maps;
+ err = pmb_read_u64("last_get", &held);
+ if (!ASSERT_OK(err, "read_last_get"))
+ goto cleanup_maps;
+ err = pmb_read_u64("last_old_ref_before_put", &ref_before_put);
+ if (!ASSERT_OK(err, "read_last_old_ref_before_put"))
+ goto cleanup_maps;
+
+ fprintf(stderr,
+ "PMB_SOCKMAP_ONE_SHOT phase=%s container=0x%llx current=0x%llx
held=0x%llx ref_before_put=%llu\n",
+ phase ? phase : "pointer", container, current, held,
+ ref_before_put);
+
+ if (!ASSERT_EQ(mismatch, 1, "mismatch_seen"))
+ goto cleanup_maps;
+ if (!ASSERT_NEQ(container, 0, "container_nonzero"))
+ goto cleanup_maps;
+ if (!ASSERT_NEQ(current, 0, "current_nonzero"))
+ goto cleanup_maps;
+ if (!ASSERT_NEQ(held, 0, "held_nonzero"))
+ goto cleanup_maps;
+ if (!ASSERT_NEQ(container, current, "container_vs_current"))
+ goto cleanup_maps;
+ if (!ASSERT_NEQ(container, held, "container_vs_held"))
+ goto cleanup_maps;
+ if (phase && !strcmp(phase, "refcount") &&
+ !ASSERT_EQ(ref_before_put, 0, "old_ref_before_put"))
+ goto cleanup_maps;
+
+wait_child:
+ if (delete_pid > 0) {
+ err = pmb_wait_child_ok(delete_pid);
+ delete_pid = -1;
+ ASSERT_OK(err, "delete_out_wait");
+ }
+
+cleanup_maps:
+ bpf_map_delete_elem(maps.out, &key);
+ bpf_map_delete_elem(maps.in, &key);
+cleanup_gate:
+ pmb_write_u64("gate_enable", 0);
+ pmb_write_u64("gate_release", 1);
+cleanup_sockets:
+ if (sockets_ready)
+ socket_spec_close(&s);
+detach:
+ if (attached)
+ xbpf_prog_detach2(prog_fd, maps.in, attach_type);
+done:
+ if (skel)
+ test_sockmap_redir__destroy(skel);
+}
+
static void get_redir_params(struct redir_spec *redir,
struct test_sockmap_redir *skel, int *prog_fd,
enum bpf_attach_type *attach_type,
@@ -460,6 +833,11 @@ static void test_map(enum bpf_map_type type)
void serial_test_sockmap_redir(void)
{
+ if (pmb_sockmap_one_shot_enabled()) {
+ pmb_test_sockmap_one_shot();
+ return;
+ }
+
test_map(BPF_MAP_TYPE_SOCKMAP);
test_map(BPF_MAP_TYPE_SOCKHASH);
}
^ permalink raw reply related [flat|nested] 6+ messages in thread