linux-trace-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* KASAN: null-ptr-deref Write in tctx_task_work_run
@ 2024-03-18  0:59 Ubisectech Sirius
  2024-03-18  2:02 ` Jens Axboe
  0 siblings, 1 reply; 3+ messages in thread
From: Ubisectech Sirius @ 2024-03-18  0:59 UTC (permalink / raw)
  To: linux-kernel, linux-trace-kernel; +Cc: axboe

[-- Attachment #1: Type: text/plain, Size: 2666 bytes --]

Hello.
We are Ubisectech Sirius Team, the vulnerability lab of China ValiantSec. Recently, our team has discovered a issue in Linux kernel 6.8.0-ge5e038b7ae9d. Attached to the email were a POC file of the issue.

Stack dump:

==================================================================
BUG: KASAN: null-ptr-deref in instrument_atomic_read_write include/linux/instrumented.h:96 [inline]
BUG: KASAN: null-ptr-deref in llist_del_all include/linux/llist.h:266 [inline]
BUG: KASAN: null-ptr-deref in tctx_task_work_run+0x7d/0x330 io_uring/io_uring.c:1267
Write of size 8 at addr 00000000000001c0 by task iou-sqp-215603/215604

CPU: 0 PID: 215604 Comm: iou-sqp-215603 Not tainted 6.8.0-ge5e038b7ae9d #40
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x116/0x1b0 lib/dump_stack.c:114
 kasan_report+0xbd/0xf0 mm/kasan/report.c:601
 check_region_inline mm/kasan/generic.c:183 [inline]
 kasan_check_range+0xf4/0x1a0 mm/kasan/generic.c:189
 instrument_atomic_read_write include/linux/instrumented.h:96 [inline]
 llist_del_all include/linux/llist.h:266 [inline]
 tctx_task_work_run+0x7d/0x330 io_uring/io_uring.c:1267
 io_sq_tw+0x12a/0x1d0 io_uring/sqpoll.c:245
 io_sq_thread+0x8d7/0x18a0 io_uring/sqpoll.c:308
 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243
 </TASK>
==================================================================
Kernel panic - not syncing: KASAN: panic_on_warn set ...
CPU: 0 PID: 215604 Comm: iou-sqp-215603 Not tainted 6.8.0-ge5e038b7ae9d #40
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x3d/0x1b0 lib/dump_stack.c:114
 panic+0x6d2/0x780 kernel/panic.c:344
 check_panic_on_warn+0xb1/0xc0 kernel/panic.c:237
 end_report+0x107/0x150 mm/kasan/report.c:226
 kasan_report+0xcd/0xf0 mm/kasan/report.c:603
 check_region_inline mm/kasan/generic.c:183 [inline]
 kasan_check_range+0xf4/0x1a0 mm/kasan/generic.c:189
 instrument_atomic_read_write include/linux/instrumented.h:96 [inline]
 llist_del_all include/linux/llist.h:266 [inline]
 tctx_task_work_run+0x7d/0x330 io_uring/io_uring.c:1267
 io_sq_tw+0x12a/0x1d0 io_uring/sqpoll.c:245
 io_sq_thread+0x8d7/0x18a0 io_uring/sqpoll.c:308
 ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243
 </TASK>
Kernel Offset: disabled
Rebooting in 86400 seconds..

Thank you for taking the time to read this email and we look forward to working with you further.



[-- Attachment #2: poc.c --]
[-- Type: application/octet-stream, Size: 7061 bytes --]

// autogenerated by syzkaller (https://github.com/google/syzkaller)

#define _GNU_SOURCE

#include <dirent.h>
#include <endian.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/prctl.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#ifndef __NR_io_uring_setup
#define __NR_io_uring_setup 425
#endif

static void sleep_ms(uint64_t ms)
{
  usleep(ms * 1000);
}

static uint64_t current_time_ms(void)
{
  struct timespec ts;
  if (clock_gettime(CLOCK_MONOTONIC, &ts))
    exit(1);
  return (uint64_t)ts.tv_sec * 1000 + (uint64_t)ts.tv_nsec / 1000000;
}

static bool write_file(const char* file, const char* what, ...)
{
  char buf[1024];
  va_list args;
  va_start(args, what);
  vsnprintf(buf, sizeof(buf), what, args);
  va_end(args);
  buf[sizeof(buf) - 1] = 0;
  int len = strlen(buf);
  int fd = open(file, O_WRONLY | O_CLOEXEC);
  if (fd == -1)
    return false;
  if (write(fd, buf, len) != len) {
    int err = errno;
    close(fd);
    errno = err;
    return false;
  }
  close(fd);
  return true;
}

#define SIZEOF_IO_URING_SQE 64
#define SIZEOF_IO_URING_CQE 16
#define SQ_HEAD_OFFSET 0
#define SQ_TAIL_OFFSET 64
#define SQ_RING_MASK_OFFSET 256
#define SQ_RING_ENTRIES_OFFSET 264
#define SQ_FLAGS_OFFSET 276
#define SQ_DROPPED_OFFSET 272
#define CQ_HEAD_OFFSET 128
#define CQ_TAIL_OFFSET 192
#define CQ_RING_MASK_OFFSET 260
#define CQ_RING_ENTRIES_OFFSET 268
#define CQ_RING_OVERFLOW_OFFSET 284
#define CQ_FLAGS_OFFSET 280
#define CQ_CQES_OFFSET 320

struct io_sqring_offsets {
  uint32_t head;
  uint32_t tail;
  uint32_t ring_mask;
  uint32_t ring_entries;
  uint32_t flags;
  uint32_t dropped;
  uint32_t array;
  uint32_t resv1;
  uint64_t resv2;
};

struct io_cqring_offsets {
  uint32_t head;
  uint32_t tail;
  uint32_t ring_mask;
  uint32_t ring_entries;
  uint32_t overflow;
  uint32_t cqes;
  uint64_t resv[2];
};

struct io_uring_params {
  uint32_t sq_entries;
  uint32_t cq_entries;
  uint32_t flags;
  uint32_t sq_thread_cpu;
  uint32_t sq_thread_idle;
  uint32_t features;
  uint32_t resv[4];
  struct io_sqring_offsets sq_off;
  struct io_cqring_offsets cq_off;
};

#define IORING_OFF_SQ_RING 0
#define IORING_OFF_SQES 0x10000000ULL

static long syz_io_uring_setup(volatile long a0, volatile long a1,
                               volatile long a2, volatile long a3)
{
  uint32_t entries = (uint32_t)a0;
  struct io_uring_params* setup_params = (struct io_uring_params*)a1;
  void** ring_ptr_out = (void**)a2;
  void** sqes_ptr_out = (void**)a3;
  uint32_t fd_io_uring = syscall(__NR_io_uring_setup, entries, setup_params);
  uint32_t sq_ring_sz =
      setup_params->sq_off.array + setup_params->sq_entries * sizeof(uint32_t);
  uint32_t cq_ring_sz = setup_params->cq_off.cqes +
                        setup_params->cq_entries * SIZEOF_IO_URING_CQE;
  uint32_t ring_sz = sq_ring_sz > cq_ring_sz ? sq_ring_sz : cq_ring_sz;
  *ring_ptr_out =
      mmap(0, ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
           fd_io_uring, IORING_OFF_SQ_RING);
  uint32_t sqes_sz = setup_params->sq_entries * SIZEOF_IO_URING_SQE;
  *sqes_ptr_out = mmap(0, sqes_sz, PROT_READ | PROT_WRITE,
                       MAP_SHARED | MAP_POPULATE, fd_io_uring, IORING_OFF_SQES);
  uint32_t* array =
      (uint32_t*)((uintptr_t)*ring_ptr_out + setup_params->sq_off.array);
  for (uint32_t index = 0; index < entries; index++)
    array[index] = index;
  return fd_io_uring;
}

static int inject_fault(int nth)
{
  int fd;
  fd = open("/proc/thread-self/fail-nth", O_RDWR);
  if (fd == -1)
    exit(1);
  char buf[16];
  sprintf(buf, "%d", nth);
  if (write(fd, buf, strlen(buf)) != (ssize_t)strlen(buf))
    exit(1);
  return fd;
}

static void kill_and_wait(int pid, int* status)
{
  kill(-pid, SIGKILL);
  kill(pid, SIGKILL);
  for (int i = 0; i < 100; i++) {
    if (waitpid(-1, status, WNOHANG | __WALL) == pid)
      return;
    usleep(1000);
  }
  DIR* dir = opendir("/sys/fs/fuse/connections");
  if (dir) {
    for (;;) {
      struct dirent* ent = readdir(dir);
      if (!ent)
        break;
      if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0)
        continue;
      char abort[300];
      snprintf(abort, sizeof(abort), "/sys/fs/fuse/connections/%s/abort",
               ent->d_name);
      int fd = open(abort, O_WRONLY);
      if (fd == -1) {
        continue;
      }
      if (write(fd, abort, 1) < 0) {
      }
      close(fd);
    }
    closedir(dir);
  } else {
  }
  while (waitpid(-1, status, __WALL) != pid) {
  }
}

static void setup_test()
{
  prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
  setpgrp();
  write_file("/proc/self/oom_score_adj", "1000");
}

static void setup_fault()
{
  static struct {
    const char* file;
    const char* val;
    bool fatal;
  } files[] = {
      {"/sys/kernel/debug/failslab/ignore-gfp-wait", "N", true},
      {"/sys/kernel/debug/fail_futex/ignore-private", "N", false},
      {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-highmem", "N", false},
      {"/sys/kernel/debug/fail_page_alloc/ignore-gfp-wait", "N", false},
      {"/sys/kernel/debug/fail_page_alloc/min-order", "0", false},
  };
  unsigned i;
  for (i = 0; i < sizeof(files) / sizeof(files[0]); i++) {
    if (!write_file(files[i].file, files[i].val)) {
      if (files[i].fatal)
        exit(1);
    }
  }
}

static void execute_one(void);

#define WAIT_FLAGS __WALL

static void loop(void)
{
  int iter = 0;
  for (;; iter++) {
    int pid = fork();
    if (pid < 0)
      exit(1);
    if (pid == 0) {
      setup_test();
      execute_one();
      exit(0);
    }
    int status = 0;
    uint64_t start = current_time_ms();
    for (;;) {
      if (waitpid(-1, &status, WNOHANG | WAIT_FLAGS) == pid)
        break;
      sleep_ms(1);
      if (current_time_ms() - start < 5000)
        continue;
      kill_and_wait(pid, &status);
      break;
    }
  }
}

void execute_one(void)
{
  *(uint32_t*)0x20000004 = 0;
  *(uint32_t*)0x20000008 = 6;
  *(uint32_t*)0x2000000c = 0;
  *(uint32_t*)0x20000010 = 0;
  *(uint32_t*)0x20000018 = -1;
  memset((void*)0x2000001c, 0, 12);
  inject_fault(12);
  syz_io_uring_setup(/*entries=*/0x7fc2, /*params=*/0x20000000, /*ring_ptr=*/0,
                     /*sqes_ptr=*/0);
}
int main(void)
{
  syscall(__NR_mmap, /*addr=*/0x1ffff000ul, /*len=*/0x1000ul, /*prot=*/0ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
          /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x20000000ul, /*len=*/0x1000000ul,
          /*prot=PROT_WRITE|PROT_READ|PROT_EXEC*/ 7ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
          /*offset=*/0ul);
  syscall(__NR_mmap, /*addr=*/0x21000000ul, /*len=*/0x1000ul, /*prot=*/0ul,
          /*flags=MAP_FIXED|MAP_ANONYMOUS|MAP_PRIVATE*/ 0x32ul, /*fd=*/-1,
          /*offset=*/0ul);
  setup_fault();
  loop();
  return 0;
}

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: KASAN: null-ptr-deref Write in tctx_task_work_run
  2024-03-18  0:59 KASAN: null-ptr-deref Write in tctx_task_work_run Ubisectech Sirius
@ 2024-03-18  2:02 ` Jens Axboe
  2024-03-18  6:35   ` 回复:KASAN: " Ubisectech Sirius
  0 siblings, 1 reply; 3+ messages in thread
From: Jens Axboe @ 2024-03-18  2:02 UTC (permalink / raw)
  To: Ubisectech Sirius, linux-kernel, linux-trace-kernel

On 3/17/24 6:59 PM, Ubisectech Sirius wrote:
> Hello.
> We are Ubisectech Sirius Team, the vulnerability lab of China ValiantSec. Recently, our team has discovered a issue in Linux kernel 6.8.0-ge5e038b7ae9d. Attached to the email were a POC file of the issue.
> 
> Stack dump:
> 
> ==================================================================
> BUG: KASAN: null-ptr-deref in instrument_atomic_read_write include/linux/instrumented.h:96 [inline]
> BUG: KASAN: null-ptr-deref in llist_del_all include/linux/llist.h:266 [inline]
> BUG: KASAN: null-ptr-deref in tctx_task_work_run+0x7d/0x330 io_uring/io_uring.c:1267
> Write of size 8 at addr 00000000000001c0 by task iou-sqp-215603/215604
> 
> CPU: 0 PID: 215604 Comm: iou-sqp-215603 Not tainted 6.8.0-ge5e038b7ae9d #40
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
> Call Trace:
>  <TASK>
>  __dump_stack lib/dump_stack.c:88 [inline]
>  dump_stack_lvl+0x116/0x1b0 lib/dump_stack.c:114
>  kasan_report+0xbd/0xf0 mm/kasan/report.c:601
>  check_region_inline mm/kasan/generic.c:183 [inline]
>  kasan_check_range+0xf4/0x1a0 mm/kasan/generic.c:189
>  instrument_atomic_read_write include/linux/instrumented.h:96 [inline]
>  llist_del_all include/linux/llist.h:266 [inline]
>  tctx_task_work_run+0x7d/0x330 io_uring/io_uring.c:1267
>  io_sq_tw+0x12a/0x1d0 io_uring/sqpoll.c:245
>  io_sq_thread+0x8d7/0x18a0 io_uring/sqpoll.c:308
>  ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
>  ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243
>  </TASK>
> ==================================================================
> Kernel panic - not syncing: KASAN: panic_on_warn set ...
> CPU: 0 PID: 215604 Comm: iou-sqp-215603 Not tainted 6.8.0-ge5e038b7ae9d #40
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014

I think you snipped the fault injection that came before this. It looks
like an allocation failure, so we don't get tsk->io_uring setup for the
SQPOLL thread. Not a great way to handle this, but can you try the
below? Would be nicer if we could just prune the task rather than wake
it and have it error.

diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 363052b4ea76..db7b0fdfe1cb 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -274,6 +274,10 @@ static int io_sq_thread(void *data)
 	char buf[TASK_COMM_LEN];
 	DEFINE_WAIT(wait);
 
+	/* offload context creation failed, just exit */
+	if (!current->io_uring) {
+		goto err_out;
+
 	snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
 	set_task_comm(current, buf);
 
@@ -371,7 +375,7 @@ static int io_sq_thread(void *data)
 		atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags);
 	io_run_task_work();
 	mutex_unlock(&sqd->lock);
-
+err_out:
 	complete(&sqd->exited);
 	do_exit(0);
 }

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* 回复:KASAN: null-ptr-deref Write in tctx_task_work_run
  2024-03-18  2:02 ` Jens Axboe
@ 2024-03-18  6:35   ` Ubisectech Sirius
  0 siblings, 0 replies; 3+ messages in thread
From: Ubisectech Sirius @ 2024-03-18  6:35 UTC (permalink / raw)
  To: Jens Axboe, linux-kernel, linux-trace-kernel

> I think you snipped the fault injection that came before this. It looks
> like an allocation failure, so we don't get tsk->io_uring setup for the
> SQPOLL thread. Not a great way to handle this, but can you try the
> below? Would be nicer if we could just prune the task rather than wake
> it and have it error.

Hi.
  The issue does not appear again when I apply the patch to the Linux kernel.


On 3/17/24 6:59 PM, Ubisectech Sirius wrote:
> Hello.
> We are Ubisectech Sirius Team, the vulnerability lab of China ValiantSec. Recently, our team has discovered a issue in Linux kernel 6.8.0-ge5e038b7ae9d. Attached to the email were a POC file of the issue.
> 
> Stack dump:
> 
> ==================================================================
> BUG: KASAN: null-ptr-deref in instrument_atomic_read_write include/linux/instrumented.h:96 [inline]
> BUG: KASAN: null-ptr-deref in llist_del_all include/linux/llist.h:266 [inline]
> BUG: KASAN: null-ptr-deref in tctx_task_work_run+0x7d/0x330 io_uring/io_uring.c:1267
> Write of size 8 at addr 00000000000001c0 by task iou-sqp-215603/215604
> 
> CPU: 0 PID: 215604 Comm: iou-sqp-215603 Not tainted 6.8.0-ge5e038b7ae9d #40
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
> Call Trace:
>  <TASK>
>  __dump_stack lib/dump_stack.c:88 [inline]
>  dump_stack_lvl+0x116/0x1b0 lib/dump_stack.c:114
>  kasan_report+0xbd/0xf0 mm/kasan/report.c:601
>  check_region_inline mm/kasan/generic.c:183 [inline]
>  kasan_check_range+0xf4/0x1a0 mm/kasan/generic.c:189
>  instrument_atomic_read_write include/linux/instrumented.h:96 [inline]
>  llist_del_all include/linux/llist.h:266 [inline]
>  tctx_task_work_run+0x7d/0x330 io_uring/io_uring.c:1267
>  io_sq_tw+0x12a/0x1d0 io_uring/sqpoll.c:245
>  io_sq_thread+0x8d7/0x18a0 io_uring/sqpoll.c:308
>  ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147
>  ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243
>  </TASK>
> ==================================================================
> Kernel panic - not syncing: KASAN: panic_on_warn set ...
> CPU: 0 PID: 215604 Comm: iou-sqp-215603 Not tainted 6.8.0-ge5e038b7ae9d #40
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014

I think you snipped the fault injection that came before this. It looks
like an allocation failure, so we don't get tsk->io_uring setup for the
SQPOLL thread. Not a great way to handle this, but can you try the
below? Would be nicer if we could just prune the task rather than wake
it and have it error.

diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 363052b4ea76..db7b0fdfe1cb 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -274,6 +274,10 @@ static int io_sq_thread(void *data)
  char buf[TASK_COMM_LEN];
  DEFINE_WAIT(wait);
 
+ /* offload context creation failed, just exit */
+ if (!current->io_uring) {
+  goto err_out;
+
  snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
  set_task_comm(current, buf);
 
@@ -371,7 +375,7 @@ static int io_sq_thread(void *data)
   atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags);
  io_run_task_work();
  mutex_unlock(&sqd->lock);
-
+err_out:
  complete(&sqd->exited);
  do_exit(0);
 }

-- 
Jens Axboe


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2024-03-18  6:40 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-18  0:59 KASAN: null-ptr-deref Write in tctx_task_work_run Ubisectech Sirius
2024-03-18  2:02 ` Jens Axboe
2024-03-18  6:35   ` 回复:KASAN: " Ubisectech Sirius

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).