* [PATCH net 1/2] net: tls: preserve split open record on async encrypt
2026-05-15 15:15 [PATCH net 0/2] net: tls: fix async BPF split record loss Christopher Lusk
@ 2026-05-15 15:15 ` Christopher Lusk
2026-05-15 15:15 ` [PATCH net 2/2] selftests: net: add kTLS async split record regression Christopher Lusk
1 sibling, 0 replies; 3+ messages in thread
From: Christopher Lusk @ 2026-05-15 15:15 UTC (permalink / raw)
To: Jakub Kicinski
Cc: John Fastabend, Sabrina Dubroca, David S . Miller, Eric Dumazet,
Paolo Abeni, Simon Horman, Shuah Khan, netdev, bpf,
linux-kselftest, linux-kernel, stable
When the BPF sk_msg verdict sets apply_bytes smaller than the current
open record, tls_push_record() splits ctx->open_rec into the record
being encrypted and a remainder record. The synchronous path reattaches
the remainder to ctx->open_rec before continuing.
If the selected AEAD provider completes asynchronously,
tls_do_encryption() returns -EINPROGRESS after unhooking ctx->open_rec.
tls_push_record() currently returns immediately in that case, before
the split remainder is reattached. The remainder is no longer reachable
through ctx->open_rec or ctx->tx_list, which can silently drop
transmitted data and leak the unreachable tls_rec.
Keep the split remainder rooted even when encryption of the first record
is pending asynchronously, and continue the BPF verdict drain loop after
an async record has been queued. Re-rooting alone is insufficient: the
final split remainder can otherwise remain as ctx->open_rec until close,
where it is freed instead of transmitted.
Fixes: d3b18ad31f93 ("tls: add bpf support to sk_msg handling")
Cc: stable@vger.kernel.org # 4.20+
Signed-off-by: Christopher Lusk <clusk@northecho.dev>
Assisted-by: Codex:gpt-5.5
Assisted-by: Claude:claude-opus-4-7
---
net/tls/tls_sw.c | 29 +++++++++++++++++++++--------
1 file changed, 21 insertions(+), 8 deletions(-)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 964ebc268..6d3df74dd 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -840,16 +840,19 @@ static int tls_push_record(struct sock *sk, int flags,
rc = tls_do_encryption(sk, tls_ctx, ctx, req,
msg_pl->sg.size + prot->tail_size, i);
if (rc < 0) {
- if (rc != -EINPROGRESS) {
- tls_err_abort(sk, -EBADMSG);
- if (split) {
- tls_ctx->pending_open_record_frags = true;
- tls_merge_open_record(sk, rec, tmp, orig_end);
- }
+ if (rc == -EINPROGRESS)
+ goto split_done;
+
+ tls_err_abort(sk, -EBADMSG);
+ if (split) {
+ tls_ctx->pending_open_record_frags = true;
+ tls_merge_open_record(sk, rec, tmp, orig_end);
}
ctx->async_capable = 1;
return rc;
- } else if (split) {
+ }
+split_done:
+ if (split) {
msg_pl = &tmp->msg_plaintext;
msg_en = &tmp->msg_encrypted;
sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size);
@@ -857,6 +860,11 @@ static int tls_push_record(struct sock *sk, int flags,
ctx->open_rec = tmp;
}
+ if (rc < 0) {
+ ctx->async_capable = 1;
+ return rc;
+ }
+
return tls_tx_records(sk, flags);
}
@@ -871,6 +879,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
struct sock *sk_redir;
struct tls_rec *rec;
bool enospc, policy, redir_ingress;
+ bool async = false;
int err = 0, send;
u32 delta = 0;
@@ -920,6 +929,10 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
switch (psock->eval) {
case __SK_PASS:
err = tls_push_record(sk, flags, record_type);
+ if (err == -EINPROGRESS) {
+ async = true;
+ err = 0;
+ }
if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
@@ -989,7 +1002,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
}
out_err:
sk_psock_put(sk, psock);
- return err;
+ return err ?: (async ? -EINPROGRESS : 0);
}
static int tls_sw_push_pending_record(struct sock *sk, int flags)
--
2.54.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH net 2/2] selftests: net: add kTLS async split record regression
2026-05-15 15:15 [PATCH net 0/2] net: tls: fix async BPF split record loss Christopher Lusk
2026-05-15 15:15 ` [PATCH net 1/2] net: tls: preserve split open record on async encrypt Christopher Lusk
@ 2026-05-15 15:15 ` Christopher Lusk
1 sibling, 0 replies; 3+ messages in thread
From: Christopher Lusk @ 2026-05-15 15:15 UTC (permalink / raw)
To: Jakub Kicinski
Cc: John Fastabend, Sabrina Dubroca, David S . Miller, Eric Dumazet,
Paolo Abeni, Simon Horman, Shuah Khan, netdev, bpf,
linux-kselftest, linux-kernel
Add a kTLS TX regression test for the BPF sk_msg apply_bytes
split-record path. The test sends four 4096-byte writes through
a sockmap verdict program that applies 512-byte verdict windows,
then verifies that the peer receives the full TLS-protected stream.
Run the case once with the synchronous provider and once after
instantiating an async pcrypt gcm(aes) provider. Vulnerable kernels
silently truncate the async run because the split open-record remainder
is lost when encryption returns -EINPROGRESS.
Signed-off-by: Christopher Lusk <clusk@northecho.dev>
Assisted-by: Codex:gpt-5.5
Assisted-by: Claude:claude-opus-4-7
---
tools/testing/selftests/net/Makefile | 5 +
.../selftests/net/ktls_async_split.bpf.c | 24 ++
.../testing/selftests/net/ktls_async_split.c | 391 ++++++++++++++++++
3 files changed, 420 insertions(+)
create mode 100644 tools/testing/selftests/net/ktls_async_split.bpf.c
create mode 100644 tools/testing/selftests/net/ktls_async_split.c
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index f3da38c54..0435327e9 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -179,6 +179,7 @@ TEST_GEN_PROGS := \
epoll_busy_poll \
icmp_rfc4884 \
ipv6_fragmentation \
+ ktls_async_split \
proc_net_pktgen \
reuseaddr_conflict \
reuseport_bpf \
@@ -236,3 +237,7 @@ $(OUTPUT)/bind_bhash: LDLIBS += -lpthread
$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/
include bpf.mk
+
+$(OUTPUT)/ktls_async_split: CFLAGS += -I../../../lib
+$(OUTPUT)/ktls_async_split: LDLIBS += $(BPFOBJ) -lelf -lz
+$(OUTPUT)/ktls_async_split: $(BPFOBJ)
diff --git a/tools/testing/selftests/net/ktls_async_split.bpf.c b/tools/testing/selftests/net/ktls_async_split.bpf.c
new file mode 100644
index 000000000..c7caafb73
--- /dev/null
+++ b/tools/testing/selftests/net/ktls_async_split.bpf.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#ifndef SK_PASS
+#define SK_PASS 1
+#endif
+
+char LICENSE[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 4);
+ __type(key, __u32);
+ __type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_msg")
+int apply_bytes_verdict(struct sk_msg_md *msg)
+{
+ bpf_msg_apply_bytes(msg, 512);
+ return SK_PASS;
+}
diff --git a/tools/testing/selftests/net/ktls_async_split.c b/tools/testing/selftests/net/ktls_async_split.c
new file mode 100644
index 000000000..a9a84e0f5
--- /dev/null
+++ b/tools/testing/selftests/net/ktls_async_split.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/if_alg.h>
+#include <linux/bpf.h>
+#include <linux/tls.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+
+#ifndef SOL_TLS
+#define SOL_TLS 282
+#endif
+
+#define EXPECTED_BYTES 17312
+
+static void fill_seq(unsigned char *p, size_t n, unsigned char seed)
+{
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ p[i] = (unsigned char)(seed + i);
+}
+
+static void bump_memlock(void)
+{
+ struct rlimit r = { RLIM_INFINITY, RLIM_INFINITY };
+
+ setrlimit(RLIMIT_MEMLOCK, &r);
+}
+
+static int run_cmd(const char *cmd)
+{
+ int ret = system(cmd);
+
+ if (ret == -1)
+ return -1;
+ if (WIFEXITED(ret))
+ return WEXITSTATUS(ret);
+ return 1;
+}
+
+static int instantiate_aead(const char *name)
+{
+ struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ };
+ int fd, ret;
+
+ strncpy((char *)sa.salg_type, "aead", sizeof(sa.salg_type) - 1);
+ strncpy((char *)sa.salg_name, name, sizeof(sa.salg_name) - 1);
+
+ fd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (fd < 0)
+ return -errno;
+
+ ret = bind(fd, (struct sockaddr *)&sa, sizeof(sa));
+ if (ret < 0)
+ ret = -errno;
+
+ close(fd);
+ return ret;
+}
+
+static bool have_async_pcrypt(void)
+{
+ FILE *f = fopen("/proc/crypto", "r");
+ char line[256];
+ bool in_driver = false;
+ bool async = false;
+
+ if (!f)
+ return false;
+
+ while (fgets(line, sizeof(line), f)) {
+ if (!strncmp(line, "driver", 6)) {
+ in_driver = strstr(line, "pcrypt(") &&
+ strstr(line, "gcm");
+ async = false;
+ continue;
+ }
+ if (in_driver && !strncmp(line, "async", 5)) {
+ async = strstr(line, "yes");
+ if (async) {
+ fclose(f);
+ return true;
+ }
+ }
+ }
+
+ fclose(f);
+ return false;
+}
+
+static bool module_loaded(const char *name)
+{
+ FILE *f = fopen("/proc/modules", "r");
+ char line[256], mod[128];
+
+ if (!f)
+ return false;
+
+ while (fgets(line, sizeof(line), f)) {
+ if (sscanf(line, "%127s", mod) == 1 && !strcmp(mod, name)) {
+ fclose(f);
+ return true;
+ }
+ }
+
+ fclose(f);
+ return false;
+}
+
+static int make_listener(unsigned short *port)
+{
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = 0,
+ };
+ socklen_t len = sizeof(addr);
+ int fd, one = 1;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
+ goto err;
+ if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ goto err;
+ if (listen(fd, 1) < 0)
+ goto err;
+ if (getsockname(fd, (struct sockaddr *)&addr, &len) < 0)
+ goto err;
+
+ *port = ntohs(addr.sin_port);
+ return fd;
+
+err:
+ close(fd);
+ return -errno;
+}
+
+static int connect_client(unsigned short port)
+{
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = htons(port),
+ };
+ int fd;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -errno;
+ if (connect(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ close(fd);
+ return -errno;
+ }
+
+ return fd;
+}
+
+static int make_tcp_pair(int *client_fd, int *peer_fd)
+{
+ unsigned short port = 0;
+ int listen_fd, c, p;
+
+ listen_fd = make_listener(&port);
+ if (listen_fd < 0)
+ return listen_fd;
+
+ c = connect_client(port);
+ if (c < 0) {
+ close(listen_fd);
+ return c;
+ }
+
+ p = accept(listen_fd, NULL, NULL);
+ close(listen_fd);
+ if (p < 0) {
+ close(c);
+ return -errno;
+ }
+
+ *client_fd = c;
+ *peer_fd = p;
+ return 0;
+}
+
+static int enable_ktls_tx(int fd)
+{
+ struct tls12_crypto_info_aes_gcm_128 info;
+ static const char ulp[] = "tls";
+
+ if (setsockopt(fd, IPPROTO_TCP, TCP_ULP, ulp, sizeof(ulp)) < 0)
+ return -errno;
+
+ memset(&info, 0, sizeof(info));
+ info.info.version = TLS_1_2_VERSION;
+ info.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ fill_seq(info.iv, sizeof(info.iv), 0x11);
+ fill_seq(info.key, sizeof(info.key), 0x22);
+ fill_seq(info.salt, sizeof(info.salt), 0x33);
+ fill_seq(info.rec_seq, sizeof(info.rec_seq), 0x44);
+
+ if (setsockopt(fd, SOL_TLS, TLS_TX, &info, sizeof(info)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int attach_bpf(const char *obj_path, int sock_fd, struct bpf_object **obj_out)
+{
+ struct bpf_object *obj;
+ struct bpf_program *prog;
+ int map_fd, prog_fd;
+ __u32 key = 0;
+
+ obj = bpf_object__open_file(obj_path, NULL);
+ if (libbpf_get_error(obj))
+ return -EINVAL;
+ if (bpf_object__load(obj))
+ return -EINVAL;
+
+ prog = bpf_object__find_program_by_name(obj, "apply_bytes_verdict");
+ if (!prog)
+ return -ENOENT;
+ prog_fd = bpf_program__fd(prog);
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "sock_map");
+ if (map_fd < 0)
+ return -ENOENT;
+
+ if (bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0))
+ return -errno;
+ if (bpf_map_update_elem(map_fd, &key, &sock_fd, BPF_ANY))
+ return -errno;
+
+ *obj_out = obj;
+ return 0;
+}
+
+static void server_loop(int fd, int out_fd)
+{
+ unsigned char buf[16384];
+ int total = 0;
+
+ for (;;) {
+ ssize_t n = read(fd, buf, sizeof(buf));
+
+ if (n < 0) {
+ if (errno == EINTR)
+ continue;
+ _exit(1);
+ }
+ if (!n)
+ break;
+ total += (int)n;
+ }
+
+ (void)write(out_fd, &total, sizeof(total));
+ close(fd);
+ close(out_fd);
+ _exit(0);
+}
+
+static int run_case(const char *obj_path, int *server_read)
+{
+ struct bpf_object *obj = NULL;
+ unsigned char buf[4096];
+ int client_fd = -1, peer_fd = -1, pipefd[2] = { -1, -1 };
+ pid_t pid;
+ int status, ret, i;
+ ssize_t n, got;
+
+ ret = make_tcp_pair(&client_fd, &peer_fd);
+ if (ret)
+ return ret;
+ if (pipe(pipefd)) {
+ close(client_fd);
+ close(peer_fd);
+ return -errno;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ close(client_fd);
+ close(peer_fd);
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return -errno;
+ }
+ if (!pid) {
+ close(pipefd[0]);
+ server_loop(peer_fd, pipefd[1]);
+ }
+
+ close(peer_fd);
+ close(pipefd[1]);
+
+ ret = attach_bpf(obj_path, client_fd, &obj);
+ if (ret)
+ goto out;
+ ret = enable_ktls_tx(client_fd);
+ if (ret)
+ goto out;
+
+ fill_seq(buf, sizeof(buf), 0x80);
+ for (i = 0; i < 4; i++) {
+ n = send(client_fd, buf, sizeof(buf), 0);
+ if (n != sizeof(buf)) {
+ ret = n < 0 ? -errno : -EIO;
+ goto out;
+ }
+ }
+
+ shutdown(client_fd, SHUT_WR);
+ got = read(pipefd[0], server_read, sizeof(*server_read));
+ if (got != sizeof(*server_read))
+ ret = -EIO;
+
+out:
+ close(client_fd);
+ close(pipefd[0]);
+ if (obj)
+ bpf_object__close(obj);
+ if (waitpid(pid, &status, 0) < 0)
+ return -errno;
+ if (!ret && (!WIFEXITED(status) || WEXITSTATUS(status)))
+ ret = -EIO;
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ const char *obj_path = argc > 1 ? argv[1] : "./ktls_async_split.bpf.o";
+ int sync_read = 0, async_read = 0, ret;
+
+ ksft_print_header();
+ ksft_set_plan(2);
+ bump_memlock();
+
+ if (run_cmd("modprobe tls >/dev/null 2>&1") && !module_loaded("tls"))
+ ksft_exit_skip("missing tls module\n");
+
+ /* Keep the first run on the synchronous provider. */
+ run_cmd("modprobe -r pcrypt >/dev/null 2>&1");
+ ret = run_case(obj_path, &sync_read);
+ if (ret)
+ ksft_exit_fail_msg("sync case failed: %s\n", strerror(-ret));
+ if (sync_read != EXPECTED_BYTES)
+ ksft_exit_fail_msg("sync case read %d, expected %d\n",
+ sync_read, EXPECTED_BYTES);
+ ksft_test_result_pass("sync provider transmits split record\n");
+
+ run_cmd("modprobe af_alg algif_aead pcrypt >/dev/null 2>&1");
+ ret = instantiate_aead("pcrypt(generic-gcm-vaes-avx2)");
+ if (ret)
+ ret = instantiate_aead("pcrypt(gcm(aes))");
+ if (ret || !have_async_pcrypt())
+ ksft_exit_skip("missing async pcrypt gcm(aes) provider\n");
+
+ ret = run_case(obj_path, &async_read);
+ if (ret)
+ ksft_exit_fail_msg("async case failed: %s\n", strerror(-ret));
+ if (async_read != EXPECTED_BYTES)
+ ksft_exit_fail_msg("async case read %d, expected %d\n",
+ async_read, EXPECTED_BYTES);
+ ksft_test_result_pass("async provider transmits split record\n");
+
+ ksft_finished();
+}
--
2.54.0
^ permalink raw reply related [flat|nested] 3+ messages in thread