public inbox for v9fs@lists.linux.dev
 help / color / mirror / Atom feed
* p9_client_write doesn't check for negative "written" from server
@ 2024-12-17 15:17 rtm
  2024-12-18  0:35 ` Dominique Martinet
  0 siblings, 1 reply; 2+ messages in thread
From: rtm @ 2024-12-17 15:17 UTC (permalink / raw)
  To: Eric Van Hensbergen, Latchesar Ionkov, Dominique Martinet, v9fs

[-- Attachment #1: Type: text/plain, Size: 2096 bytes --]

If the kernel's 9p client sends a write request, and the server returns
a successful Rwrite reply, but with a negative "written" value, then
this line towards the end of p9_client_write():

                iov_iter_revert(from, count - written - iov_iter_count(from));

can pass an unroll argument to iov_iter_revert() that's larger than
the number of bytes in the iov. This causes iov_iter_folioq_revert()
to try to back up beyond the beginning of the iov data, and this code

                if (slot == 0) { 
                        folioq = folioq->prev;
                        slot = folioq_nr_slots(folioq);

tries to dereference a NULL folioq, since folioq->prev is NULL.

I've attached a demo.

# cc 9p2a.c
# ./a.out
...
BUG: kernel NULL pointer dereference, address: 000000000000011e
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: Oops: 0000 [#1] SMP DEBUG_PAGEALLOC PTI
CPU: 1 UID: 0 PID: 1613 Comm: sh Not tainted 6.13.0-rc3-00017-gf44d154d6e3d #12
Hardware name: FreeBSD BHYVE/BHYVE, BIOS 14.0 10/17/2021
RIP: 0010:iov_iter_revert+0xc9/0x150
Code: cc cc cc 48 29 f1 48 89 4f 08 c3 cc cc cc cc 48 c7 47 08 00 00 00 00 0f b6
 57 20 bf 00 10 00 00 eb 1f 83 ea 01 89 d1 49 89 f9 <41> 0f b6 8c 08 00 01 00 00
 49 d3 e1 4c 89 c9 49 39 f1 73 15 4c 29
RSP: 0018:ffffc900020dfae8 EFLAGS: 00010246
Call Trace:
 <TASK>
 ? __die+0x1e/0x60
 ? page_fault_oops+0x157/0x450
 ? check_object+0xc2/0x430
 ? exc_page_fault+0x66/0x140
 ? asm_exc_page_fault+0x26/0x30
 ? entry_SYSCALL_64_after_hwframe+0x76/0x7e
 ? iov_iter_revert+0xc9/0x150
 p9_client_write+0x115/0x250
 v9fs_issue_write+0x38/0x70
 netfs_advance_write+0xc6/0x2a0
 netfs_write_folio+0x2e7/0x9a0
 netfs_writepages+0xd7/0x3e0
 ? filemap_dirty_folio+0x5b/0x70
 do_writepages+0xc6/0x240
 __filemap_fdatawrite_range+0x9a/0xb0
 v9fs_dir_release+0x153/0x180
 ? task_work_add+0x71/0xe0
 __fput+0xe8/0x290
 task_work_run+0x57/0xa0
 syscall_exit_to_user_mode+0x105/0x110
 do_syscall_64+0x4b/0xd0
 entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7fdfc55fe96b

Robert Morris
rtm@mit.edu


[-- Attachment #2: 9p2a.c --]
[-- Type: application/octet-stream, Size: 6061 bytes --]

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <sys/wait.h>
#include <sys/resource.h>

int readn(int fd, char *buf, int n) {
  int orig = n;
  while(n > 0){
    int cc = read(fd, buf, n);
    if(cc <= 0) { perror("read"); return -1; }
    n -= cc;
    buf += cc;
  }
  return orig;
}

char *
getstr(unsigned char *p)
{
  unsigned int n = *(unsigned short *)p;
  char *buf = malloc(n+1);
  memcpy(buf, p+2, n);
  buf[n] = '\0';
  return buf;
}

int
main(){
  struct rlimit r;
  r.rlim_cur = r.rlim_max = 0;
  setrlimit(RLIMIT_CORE, &r);

  int s = socket(AF_INET, SOCK_STREAM, 0);
  { int yes = 1;
    setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes));
  }
  struct sockaddr_in sin;
  memset(&sin, 0, sizeof(sin));
  sin.sin_family = AF_INET;
  sin.sin_port = htons(564);
  if(bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0){
    perror("bind"); exit(1);
  }
  listen(s, 10);
  sync(); sleep(1);

  if(fork() == 0){
    close(s);
    // -o ...,debug=0x10f
    if(system("echo -n mount: ; mount -t 9p -o trans=tcp,cache=loose,access=any,debug=0x0 127.0.0.1 /mnt") == 0){
      system("echo -n ls: ; ls -l /mnt/. /mnt/z");
      system("echo -n echo: ; echo hi > /mnt/a");
      system("echo -n mkdir: ; mkdir /mnt/d");

      // system("echo -n mv: ; mv /mnt/x /mnt/d/y");
      printf("mv: "); fflush(stdout); usleep(50000);
      if(rename("/mnt/x", "/mnt/d/y") < 0) perror("rename");

      printf("open: "); fflush(stdout); usleep(50000);
      int fd = open("/mnt/x", 0);
      if(fd < 0) perror("open");

      printf("read: "); fflush(stdout); usleep(50000);
      { char junk[2]; if(read(fd, junk, 2) < 0) perror("read"); }
      close(fd);

      system("echo -n rm: ; rm -f /mnt/d/y");
      system("echo -n rm: ; rm -f /mnt/d/x");
      system("echo -n umount: ; umount /mnt");
    }
    exit(0);
  }

  if(1){
    socklen_t sinlen = sizeof(sin);
    int s1 = accept(s, (struct sockaddr *) &sin, &sinlen);
    if(s1 < 0) { perror("accept"); exit(1); }
    close(s);
  
    int opno = 0;
    while(1){
      char ibuf[1024];
      if(readn(s1, ibuf, 4) < 0) break;
      int ilen = *(int*)(ibuf+0);
      if(readn(s1, ibuf+4, ilen - 4) < 0) break;

      printf("%d: ", opno);
      fflush(stdout);

      char obuf[sizeof(ibuf)];
      memset(obuf, 0xff, sizeof(obuf));
      *(int*)(obuf+0) = ilen; // length
      if(ibuf[4] == 100){ // Tversion
        printf("version %d %s\n", *(int*)(ibuf+7), getstr(ibuf+11));
        memcpy(obuf, ibuf, ilen);
      } else if(ibuf[4] == 24){ // Tgetattr (different from Tstat!)
        printf("getattr\n");
        // https://github.com/chaos/diod/blob/master/protocol.md
        int sz = 161;
        *(int*)(obuf+0) = sz + 7;
        *(int*)(obuf+32) = 0; // uid
        *(int*)(obuf+36) = 0; // gid
        if(opno == 25 || opno == 34){
          *(int*)(obuf+28) = 0100777; // S_IFREG, rwxrwxrwx
        } else {
          *(int*)(obuf+28) = 0040777; // S_IFDIR, rwxrwxrwx
        }
      } else if(ibuf[4] == 110){ // Twalk
        int nwqid = *(short*)(ibuf+15);
        printf("walk %d %s\n", nwqid, nwqid?getstr(ibuf+17):"-");
        if(0 && opno == 24){
          // error...
          ibuf[4] = 106; // Terror
          *(int*)(obuf+0) = 11;
          *(int*)(obuf+7) = ENOENT;
        } else {
          *(short*)(obuf+7) = nwqid;
          *(int*)(obuf+0) = 9 + nwqid*13;
          if(opno == 24){
            *(char*)(obuf+21) = 1;
          }
        }
      } else if(ibuf[4] == 104){ // Tattach
        printf("attach\n");
        *(int*)(obuf+0) = 20;
      } else if(ibuf[4] == 120){ // Tclunk
        printf("clunk\n");
        *(int*)(obuf+0) = 7;
      } else if(ibuf[4] == 30){ // Txattrwalk
        printf("xattrwalk\n");
        *(int*)(obuf+0) = 15;
        *(long*)(obuf+7) = 2; // size
      } else if(ibuf[4] == 116){ // Tread
        unsigned long offset = *(long*)(ibuf+11);
        unsigned int count = *(int*)(ibuf+19);
        printf("read %ld %d\n", offset, count); fflush(stdout);
        int n = 0;
        if(offset == 0 && count > 2){
          unsigned char *p = obuf+11;
          unsigned char *p0 = p;

          p += 2; // size;
          p += 2; // type
          p += 4; // dev
          p += 1; // qid.type
          p += 4; // qid.vers
          p += 8; // qid.path
          p += 4; // permissions
          p += 4; // atime
          p += 4; // mtime
          p += 8; // length
          *(short*)p = 1; // name length
          p++;
          *p++ = 'x';
          *(short*)p = 1; // owner name length
          *p++ = 'x';
          *(short*)p = 1; // group name length
          *p++ = 'x';
          *(short*)p = 1; // last modify user name length
          *p++ = 'x';
            
          n = p - p0;
          printf(" >>> n=%d <<< ", n); fflush(stdout);
          *(short*)(p0) = n;
        }
        *(int*)(obuf+0) = n + 11;
        *(int*)(obuf+7) = n;
      } else if(ibuf[4] == 12){ // Tlopen
        printf("lopen\n");
        *(int*)(obuf+0) = 24;
      } else if(ibuf[4] == 40){ // Treaddir
        printf("readdir\n");
        // each dirent is 25 bytes
        unsigned long offset = *(long*)(ibuf+11);
        unsigned int count = *(int*)(ibuf+19);
        int n = 0;
        if(offset == 0){
          n = 1;
          unsigned char *p0 = obuf + 11;
          unsigned char *p = p0;
          p += 13; // qid
          p += 8; // offset
          p += 1; // type
          *(short*)p = 1;
          p += 2;
          *p++ = 'x';
        }
        *(int*)(obuf+0) = 11 + n*25;
      } else if(ibuf[4] == 8){ // Tstatfs
        printf("statfs\n");
        *(int*)(obuf+0) = 67;
      } else {
        printf("%d ???\n", ibuf[4] & 0xff);
      }
      fflush(stdout);
      obuf[4] = ibuf[4] + 1; // convert Txxx to Rxxx
      *(short*)(obuf+5) = *(short*)(ibuf+5); // tag

      if(write(s1, obuf, *(int*)(obuf+0))<=0) perror("write");

      opno += 1;
    }
  }
  close(s);
}

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-12-18  0:35 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-17 15:17 p9_client_write doesn't check for negative "written" from server rtm
2024-12-18  0:35 ` Dominique Martinet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox