* [PATCH net v2 1/2] net: tls: preserve split open record on async encrypt
2026-05-21 2:58 [PATCH net v2 0/2] net: tls: fix async split record handling Christopher Lusk
@ 2026-05-21 2:58 ` Christopher Lusk
2026-05-21 2:58 ` [PATCH net v2 2/2] selftests: net: add kTLS async split record regression Christopher Lusk
1 sibling, 0 replies; 3+ messages in thread
From: Christopher Lusk @ 2026-05-21 2:58 UTC (permalink / raw)
To: Jakub Kicinski
Cc: John Fastabend, Sabrina Dubroca, David S. Miller, Eric Dumazet,
Paolo Abeni, Simon Horman, Shuah Khan, Alexei Starovoitov,
Daniel Borkmann, netdev, bpf, linux-kselftest, linux-kernel,
stable
When the BPF sk_msg verdict sets apply_bytes smaller than the current
open record, tls_push_record() splits ctx->open_rec into the record
being encrypted and a remainder record. The synchronous path reattaches
the remainder to ctx->open_rec before continuing.
If the selected AEAD provider completes asynchronously,
tls_do_encryption() returns -EINPROGRESS after unhooking ctx->open_rec.
tls_push_record() currently returns immediately in that case, before
the split remainder is reattached. The remainder is no longer reachable
through ctx->open_rec or ctx->tx_list, which can silently drop
transmitted data and leak the unreachable tls_rec.
Keep the split remainder rooted even when encryption of the first record
is pending asynchronously, and continue the BPF verdict drain loop after
an async record has been queued. If that loop then hits a later verdict
error, wait for the pending async encryption before returning the error
so zerocopy user pages cannot be released while cryptd still reads them.
Fixes: d3b18ad31f93 ("tls: add bpf support to sk_msg handling")
Cc: stable@vger.kernel.org # 4.20+
Signed-off-by: Christopher Lusk <clusk@northecho.dev>
Assisted-by: Codex:gpt-5.5
Assisted-by: Claude:claude-opus-4-7
---
net/tls/tls_sw.c | 40 ++++++++++++++++++++++++++++++++--------
1 file changed, 32 insertions(+), 8 deletions(-)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 964ebc268..5b20be5b4 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -840,16 +840,19 @@ static int tls_push_record(struct sock *sk, int flags,
rc = tls_do_encryption(sk, tls_ctx, ctx, req,
msg_pl->sg.size + prot->tail_size, i);
if (rc < 0) {
- if (rc != -EINPROGRESS) {
- tls_err_abort(sk, -EBADMSG);
- if (split) {
- tls_ctx->pending_open_record_frags = true;
- tls_merge_open_record(sk, rec, tmp, orig_end);
- }
+ if (rc == -EINPROGRESS)
+ goto split_done;
+
+ tls_err_abort(sk, -EBADMSG);
+ if (split) {
+ tls_ctx->pending_open_record_frags = true;
+ tls_merge_open_record(sk, rec, tmp, orig_end);
}
ctx->async_capable = 1;
return rc;
- } else if (split) {
+ }
+split_done:
+ if (split) {
msg_pl = &tmp->msg_plaintext;
msg_en = &tmp->msg_encrypted;
sk_msg_trim(sk, msg_en, msg_pl->sg.size + prot->overhead_size);
@@ -857,6 +860,11 @@ static int tls_push_record(struct sock *sk, int flags,
ctx->open_rec = tmp;
}
+ if (rc < 0) {
+ ctx->async_capable = 1;
+ return rc;
+ }
+
return tls_tx_records(sk, flags);
}
@@ -871,6 +879,8 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
struct sock *sk_redir;
struct tls_rec *rec;
bool enospc, policy, redir_ingress;
+ bool async = false;
+ int async_err = 0;
int err = 0, send;
u32 delta = 0;
@@ -920,6 +930,10 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
switch (psock->eval) {
case __SK_PASS:
err = tls_push_record(sk, flags, record_type);
+ if (err == -EINPROGRESS) {
+ async = true;
+ err = 0;
+ }
if (err && err != -EINPROGRESS && sk->sk_err == EBADMSG) {
*copied -= sk_msg_free(sk, msg);
tls_free_open_rec(sk);
@@ -988,8 +1002,18 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
goto more_data;
}
out_err:
+ if (async && err && err != -EINPROGRESS) {
+ async_err = tls_encrypt_async_wait(ctx);
+ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
+ /* tx_lock is held; the worker will reschedule if needed. */
+ cancel_delayed_work(&ctx->tx_work.work);
+ tls_tx_records(sk, flags);
+ }
+ if (async_err)
+ err = async_err;
+ }
sk_psock_put(sk, psock);
- return err;
+ return err ?: (async ? -EINPROGRESS : 0);
}
static int tls_sw_push_pending_record(struct sock *sk, int flags)
--
2.54.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH net v2 2/2] selftests: net: add kTLS async split record regression
2026-05-21 2:58 [PATCH net v2 0/2] net: tls: fix async split record handling Christopher Lusk
2026-05-21 2:58 ` [PATCH net v2 1/2] net: tls: preserve split open record on async encrypt Christopher Lusk
@ 2026-05-21 2:58 ` Christopher Lusk
1 sibling, 0 replies; 3+ messages in thread
From: Christopher Lusk @ 2026-05-21 2:58 UTC (permalink / raw)
To: Jakub Kicinski
Cc: John Fastabend, Sabrina Dubroca, David S. Miller, Eric Dumazet,
Paolo Abeni, Simon Horman, Shuah Khan, Alexei Starovoitov,
Daniel Borkmann, netdev, bpf, linux-kselftest, linux-kernel
Add a kTLS TX regression test for the BPF sk_msg apply_bytes
split-record path. The test sends four 4096-byte writes through
a sockmap verdict program that applies 512-byte verdict windows,
then verifies that the peer receives the full TLS-protected stream.
Run the case once with the synchronous provider and once after
instantiating an async pcrypt gcm(aes) provider. Vulnerable kernels
silently truncate the async run because the split open-record remainder
is lost when encryption returns -EINPROGRESS.
Validate the loaded BPF program fd before attaching it so setup failures
are reported directly instead of being passed to bpf_prog_attach().
Signed-off-by: Christopher Lusk <clusk@northecho.dev>
Assisted-by: Codex:gpt-5.5
Assisted-by: Claude:claude-opus-4-7
---
tools/testing/selftests/net/Makefile | 5 +
.../selftests/net/ktls_async_split.bpf.c | 24 ++
.../testing/selftests/net/ktls_async_split.c | 393 ++++++++++++++++++
3 files changed, 422 insertions(+)
create mode 100644 tools/testing/selftests/net/ktls_async_split.bpf.c
create mode 100644 tools/testing/selftests/net/ktls_async_split.c
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index f3da38c54..0435327e9 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -179,6 +179,7 @@ TEST_GEN_PROGS := \
epoll_busy_poll \
icmp_rfc4884 \
ipv6_fragmentation \
+ ktls_async_split \
proc_net_pktgen \
reuseaddr_conflict \
reuseport_bpf \
@@ -236,3 +237,7 @@ $(OUTPUT)/bind_bhash: LDLIBS += -lpthread
$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/
include bpf.mk
+
+$(OUTPUT)/ktls_async_split: CFLAGS += -I../../../lib
+$(OUTPUT)/ktls_async_split: LDLIBS += $(BPFOBJ) -lelf -lz
+$(OUTPUT)/ktls_async_split: $(BPFOBJ)
diff --git a/tools/testing/selftests/net/ktls_async_split.bpf.c b/tools/testing/selftests/net/ktls_async_split.bpf.c
new file mode 100644
index 000000000..c7caafb73
--- /dev/null
+++ b/tools/testing/selftests/net/ktls_async_split.bpf.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#ifndef SK_PASS
+#define SK_PASS 1
+#endif
+
+char LICENSE[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 4);
+ __type(key, __u32);
+ __type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_msg")
+int apply_bytes_verdict(struct sk_msg_md *msg)
+{
+ bpf_msg_apply_bytes(msg, 512);
+ return SK_PASS;
+}
diff --git a/tools/testing/selftests/net/ktls_async_split.c b/tools/testing/selftests/net/ktls_async_split.c
new file mode 100644
index 000000000..5601ce4b5
--- /dev/null
+++ b/tools/testing/selftests/net/ktls_async_split.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/if_alg.h>
+#include <linux/bpf.h>
+#include <linux/tls.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+
+#ifndef SOL_TLS
+#define SOL_TLS 282
+#endif
+
+#define EXPECTED_BYTES 17312
+
+static void fill_seq(unsigned char *p, size_t n, unsigned char seed)
+{
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ p[i] = (unsigned char)(seed + i);
+}
+
+static void bump_memlock(void)
+{
+ struct rlimit r = { RLIM_INFINITY, RLIM_INFINITY };
+
+ setrlimit(RLIMIT_MEMLOCK, &r);
+}
+
+static int run_cmd(const char *cmd)
+{
+ int ret = system(cmd);
+
+ if (ret == -1)
+ return -1;
+ if (WIFEXITED(ret))
+ return WEXITSTATUS(ret);
+ return 1;
+}
+
+static int instantiate_aead(const char *name)
+{
+ struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ };
+ int fd, ret;
+
+ strncpy((char *)sa.salg_type, "aead", sizeof(sa.salg_type) - 1);
+ strncpy((char *)sa.salg_name, name, sizeof(sa.salg_name) - 1);
+
+ fd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (fd < 0)
+ return -errno;
+
+ ret = bind(fd, (struct sockaddr *)&sa, sizeof(sa));
+ if (ret < 0)
+ ret = -errno;
+
+ close(fd);
+ return ret;
+}
+
+static bool have_async_pcrypt(void)
+{
+ FILE *f = fopen("/proc/crypto", "r");
+ char line[256];
+ bool in_driver = false;
+ bool async = false;
+
+ if (!f)
+ return false;
+
+ while (fgets(line, sizeof(line), f)) {
+ if (!strncmp(line, "driver", 6)) {
+ in_driver = strstr(line, "pcrypt(") &&
+ strstr(line, "gcm");
+ async = false;
+ continue;
+ }
+ if (in_driver && !strncmp(line, "async", 5)) {
+ async = strstr(line, "yes");
+ if (async) {
+ fclose(f);
+ return true;
+ }
+ }
+ }
+
+ fclose(f);
+ return false;
+}
+
+static bool module_loaded(const char *name)
+{
+ FILE *f = fopen("/proc/modules", "r");
+ char line[256], mod[128];
+
+ if (!f)
+ return false;
+
+ while (fgets(line, sizeof(line), f)) {
+ if (sscanf(line, "%127s", mod) == 1 && !strcmp(mod, name)) {
+ fclose(f);
+ return true;
+ }
+ }
+
+ fclose(f);
+ return false;
+}
+
+static int make_listener(unsigned short *port)
+{
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = 0,
+ };
+ socklen_t len = sizeof(addr);
+ int fd, one = 1;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -errno;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) < 0)
+ goto err;
+ if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ goto err;
+ if (listen(fd, 1) < 0)
+ goto err;
+ if (getsockname(fd, (struct sockaddr *)&addr, &len) < 0)
+ goto err;
+
+ *port = ntohs(addr.sin_port);
+ return fd;
+
+err:
+ close(fd);
+ return -errno;
+}
+
+static int connect_client(unsigned short port)
+{
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = htons(port),
+ };
+ int fd;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (fd < 0)
+ return -errno;
+ if (connect(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ close(fd);
+ return -errno;
+ }
+
+ return fd;
+}
+
+static int make_tcp_pair(int *client_fd, int *peer_fd)
+{
+ unsigned short port = 0;
+ int listen_fd, c, p;
+
+ listen_fd = make_listener(&port);
+ if (listen_fd < 0)
+ return listen_fd;
+
+ c = connect_client(port);
+ if (c < 0) {
+ close(listen_fd);
+ return c;
+ }
+
+ p = accept(listen_fd, NULL, NULL);
+ close(listen_fd);
+ if (p < 0) {
+ close(c);
+ return -errno;
+ }
+
+ *client_fd = c;
+ *peer_fd = p;
+ return 0;
+}
+
+static int enable_ktls_tx(int fd)
+{
+ struct tls12_crypto_info_aes_gcm_128 info;
+ static const char ulp[] = "tls";
+
+ if (setsockopt(fd, IPPROTO_TCP, TCP_ULP, ulp, sizeof(ulp)) < 0)
+ return -errno;
+
+ memset(&info, 0, sizeof(info));
+ info.info.version = TLS_1_2_VERSION;
+ info.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ fill_seq(info.iv, sizeof(info.iv), 0x11);
+ fill_seq(info.key, sizeof(info.key), 0x22);
+ fill_seq(info.salt, sizeof(info.salt), 0x33);
+ fill_seq(info.rec_seq, sizeof(info.rec_seq), 0x44);
+
+ if (setsockopt(fd, SOL_TLS, TLS_TX, &info, sizeof(info)) < 0)
+ return -errno;
+
+ return 0;
+}
+
+static int attach_bpf(const char *obj_path, int sock_fd, struct bpf_object **obj_out)
+{
+ struct bpf_object *obj;
+ struct bpf_program *prog;
+ int map_fd, prog_fd;
+ __u32 key = 0;
+
+ obj = bpf_object__open_file(obj_path, NULL);
+ if (libbpf_get_error(obj))
+ return -EINVAL;
+ if (bpf_object__load(obj))
+ return -EINVAL;
+
+ prog = bpf_object__find_program_by_name(obj, "apply_bytes_verdict");
+ if (!prog)
+ return -ENOENT;
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0)
+ return prog_fd;
+
+ map_fd = bpf_object__find_map_fd_by_name(obj, "sock_map");
+ if (map_fd < 0)
+ return -ENOENT;
+
+ if (bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0))
+ return -errno;
+ if (bpf_map_update_elem(map_fd, &key, &sock_fd, BPF_ANY))
+ return -errno;
+
+ *obj_out = obj;
+ return 0;
+}
+
+static void server_loop(int fd, int out_fd)
+{
+ unsigned char buf[16384];
+ int total = 0;
+
+ for (;;) {
+ ssize_t n = read(fd, buf, sizeof(buf));
+
+ if (n < 0) {
+ if (errno == EINTR)
+ continue;
+ _exit(1);
+ }
+ if (!n)
+ break;
+ total += (int)n;
+ }
+
+ (void)write(out_fd, &total, sizeof(total));
+ close(fd);
+ close(out_fd);
+ _exit(0);
+}
+
+static int run_case(const char *obj_path, int *server_read)
+{
+ struct bpf_object *obj = NULL;
+ unsigned char buf[4096];
+ int client_fd = -1, peer_fd = -1, pipefd[2] = { -1, -1 };
+ pid_t pid;
+ int status, ret, i;
+ ssize_t n, got;
+
+ ret = make_tcp_pair(&client_fd, &peer_fd);
+ if (ret)
+ return ret;
+ if (pipe(pipefd)) {
+ close(client_fd);
+ close(peer_fd);
+ return -errno;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ close(client_fd);
+ close(peer_fd);
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return -errno;
+ }
+ if (!pid) {
+ close(pipefd[0]);
+ server_loop(peer_fd, pipefd[1]);
+ }
+
+ close(peer_fd);
+ close(pipefd[1]);
+
+ ret = attach_bpf(obj_path, client_fd, &obj);
+ if (ret)
+ goto out;
+ ret = enable_ktls_tx(client_fd);
+ if (ret)
+ goto out;
+
+ fill_seq(buf, sizeof(buf), 0x80);
+ for (i = 0; i < 4; i++) {
+ n = send(client_fd, buf, sizeof(buf), 0);
+ if (n != sizeof(buf)) {
+ ret = n < 0 ? -errno : -EIO;
+ goto out;
+ }
+ }
+
+ shutdown(client_fd, SHUT_WR);
+ got = read(pipefd[0], server_read, sizeof(*server_read));
+ if (got != sizeof(*server_read))
+ ret = -EIO;
+
+out:
+ close(client_fd);
+ close(pipefd[0]);
+ if (obj)
+ bpf_object__close(obj);
+ if (waitpid(pid, &status, 0) < 0)
+ return -errno;
+ if (!ret && (!WIFEXITED(status) || WEXITSTATUS(status)))
+ ret = -EIO;
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ const char *obj_path = argc > 1 ? argv[1] : "./ktls_async_split.bpf.o";
+ int sync_read = 0, async_read = 0, ret;
+
+ ksft_print_header();
+ ksft_set_plan(2);
+ bump_memlock();
+
+ if (run_cmd("modprobe tls >/dev/null 2>&1") && !module_loaded("tls"))
+ ksft_exit_skip("missing tls module\n");
+
+ /* Keep the first run on the synchronous provider. */
+ run_cmd("modprobe -r pcrypt >/dev/null 2>&1");
+ ret = run_case(obj_path, &sync_read);
+ if (ret)
+ ksft_exit_fail_msg("sync case failed: %s\n", strerror(-ret));
+ if (sync_read != EXPECTED_BYTES)
+ ksft_exit_fail_msg("sync case read %d, expected %d\n",
+ sync_read, EXPECTED_BYTES);
+ ksft_test_result_pass("sync provider transmits split record\n");
+
+ run_cmd("modprobe af_alg algif_aead pcrypt >/dev/null 2>&1");
+ ret = instantiate_aead("pcrypt(generic-gcm-vaes-avx2)");
+ if (ret)
+ ret = instantiate_aead("pcrypt(gcm(aes))");
+ if (ret || !have_async_pcrypt())
+ ksft_exit_skip("missing async pcrypt gcm(aes) provider\n");
+
+ ret = run_case(obj_path, &async_read);
+ if (ret)
+ ksft_exit_fail_msg("async case failed: %s\n", strerror(-ret));
+ if (async_read != EXPECTED_BYTES)
+ ksft_exit_fail_msg("async case read %d, expected %d\n",
+ async_read, EXPECTED_BYTES);
+ ksft_test_result_pass("async provider transmits split record\n");
+
+ ksft_finished();
+}
--
2.54.0
^ permalink raw reply related [flat|nested] 3+ messages in thread