public inbox for v9fs@lists.linux.dev
 help / color / mirror / Atom feed
* p9_client_write doesn't check for negative "written" from server
@ 2024-12-17 15:17 rtm
  2024-12-18  0:35 ` Dominique Martinet
  0 siblings, 1 reply; 2+ messages in thread
From: rtm @ 2024-12-17 15:17 UTC (permalink / raw)
  To: Eric Van Hensbergen, Latchesar Ionkov, Dominique Martinet, v9fs

[-- Attachment #1: Type: text/plain, Size: 2096 bytes --]

If the kernel's 9p client sends a write request, and the server returns
a successful Rwrite reply, but with a negative "written" value, then
this line towards the end of p9_client_write():

                iov_iter_revert(from, count - written - iov_iter_count(from));

can pass an unroll argument to iov_iter_revert() that's larger than
the number of bytes in the iov. This causes iov_iter_folioq_revert()
to try to back up beyond the beginning of the iov data, and this code

                if (slot == 0) { 
                        folioq = folioq->prev;
                        slot = folioq_nr_slots(folioq);

tries to dereference a NULL folioq, since folioq->prev is NULL.

I've attached a demo.

# cc 9p2a.c
# ./a.out
...
BUG: kernel NULL pointer dereference, address: 000000000000011e
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
PGD 0 P4D 0
Oops: Oops: 0000 [#1] SMP DEBUG_PAGEALLOC PTI
CPU: 1 UID: 0 PID: 1613 Comm: sh Not tainted 6.13.0-rc3-00017-gf44d154d6e3d #12
Hardware name: FreeBSD BHYVE/BHYVE, BIOS 14.0 10/17/2021
RIP: 0010:iov_iter_revert+0xc9/0x150
Code: cc cc cc 48 29 f1 48 89 4f 08 c3 cc cc cc cc 48 c7 47 08 00 00 00 00 0f b6
 57 20 bf 00 10 00 00 eb 1f 83 ea 01 89 d1 49 89 f9 <41> 0f b6 8c 08 00 01 00 00
 49 d3 e1 4c 89 c9 49 39 f1 73 15 4c 29
RSP: 0018:ffffc900020dfae8 EFLAGS: 00010246
Call Trace:
 <TASK>
 ? __die+0x1e/0x60
 ? page_fault_oops+0x157/0x450
 ? check_object+0xc2/0x430
 ? exc_page_fault+0x66/0x140
 ? asm_exc_page_fault+0x26/0x30
 ? entry_SYSCALL_64_after_hwframe+0x76/0x7e
 ? iov_iter_revert+0xc9/0x150
 p9_client_write+0x115/0x250
 v9fs_issue_write+0x38/0x70
 netfs_advance_write+0xc6/0x2a0
 netfs_write_folio+0x2e7/0x9a0
 netfs_writepages+0xd7/0x3e0
 ? filemap_dirty_folio+0x5b/0x70
 do_writepages+0xc6/0x240
 __filemap_fdatawrite_range+0x9a/0xb0
 v9fs_dir_release+0x153/0x180
 ? task_work_add+0x71/0xe0
 __fput+0xe8/0x290
 task_work_run+0x57/0xa0
 syscall_exit_to_user_mode+0x105/0x110
 do_syscall_64+0x4b/0xd0
 entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7fdfc55fe96b

Robert Morris
rtm@mit.edu


[-- Attachment #2: 9p2a.c --]
[-- Type: application/octet-stream, Size: 6061 bytes --]

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <sys/wait.h>
#include <sys/resource.h>

int readn(int fd, char *buf, int n) {
  int orig = n;
  while(n > 0){
    int cc = read(fd, buf, n);
    if(cc <= 0) { perror("read"); return -1; }
    n -= cc;
    buf += cc;
  }
  return orig;
}

char *
getstr(unsigned char *p)
{
  unsigned int n = *(unsigned short *)p;
  char *buf = malloc(n+1);
  memcpy(buf, p+2, n);
  buf[n] = '\0';
  return buf;
}

int
main(){
  struct rlimit r;
  r.rlim_cur = r.rlim_max = 0;
  setrlimit(RLIMIT_CORE, &r);

  int s = socket(AF_INET, SOCK_STREAM, 0);
  { int yes = 1;
    setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes));
  }
  struct sockaddr_in sin;
  memset(&sin, 0, sizeof(sin));
  sin.sin_family = AF_INET;
  sin.sin_port = htons(564);
  if(bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0){
    perror("bind"); exit(1);
  }
  listen(s, 10);
  sync(); sleep(1);

  if(fork() == 0){
    close(s);
    // -o ...,debug=0x10f
    if(system("echo -n mount: ; mount -t 9p -o trans=tcp,cache=loose,access=any,debug=0x0 127.0.0.1 /mnt") == 0){
      system("echo -n ls: ; ls -l /mnt/. /mnt/z");
      system("echo -n echo: ; echo hi > /mnt/a");
      system("echo -n mkdir: ; mkdir /mnt/d");

      // system("echo -n mv: ; mv /mnt/x /mnt/d/y");
      printf("mv: "); fflush(stdout); usleep(50000);
      if(rename("/mnt/x", "/mnt/d/y") < 0) perror("rename");

      printf("open: "); fflush(stdout); usleep(50000);
      int fd = open("/mnt/x", 0);
      if(fd < 0) perror("open");

      printf("read: "); fflush(stdout); usleep(50000);
      { char junk[2]; if(read(fd, junk, 2) < 0) perror("read"); }
      close(fd);

      system("echo -n rm: ; rm -f /mnt/d/y");
      system("echo -n rm: ; rm -f /mnt/d/x");
      system("echo -n umount: ; umount /mnt");
    }
    exit(0);
  }

  if(1){
    socklen_t sinlen = sizeof(sin);
    int s1 = accept(s, (struct sockaddr *) &sin, &sinlen);
    if(s1 < 0) { perror("accept"); exit(1); }
    close(s);
  
    int opno = 0;
    while(1){
      char ibuf[1024];
      if(readn(s1, ibuf, 4) < 0) break;
      int ilen = *(int*)(ibuf+0);
      if(readn(s1, ibuf+4, ilen - 4) < 0) break;

      printf("%d: ", opno);
      fflush(stdout);

      char obuf[sizeof(ibuf)];
      memset(obuf, 0xff, sizeof(obuf));
      *(int*)(obuf+0) = ilen; // length
      if(ibuf[4] == 100){ // Tversion
        printf("version %d %s\n", *(int*)(ibuf+7), getstr(ibuf+11));
        memcpy(obuf, ibuf, ilen);
      } else if(ibuf[4] == 24){ // Tgetattr (different from Tstat!)
        printf("getattr\n");
        // https://github.com/chaos/diod/blob/master/protocol.md
        int sz = 161;
        *(int*)(obuf+0) = sz + 7;
        *(int*)(obuf+32) = 0; // uid
        *(int*)(obuf+36) = 0; // gid
        if(opno == 25 || opno == 34){
          *(int*)(obuf+28) = 0100777; // S_IFREG, rwxrwxrwx
        } else {
          *(int*)(obuf+28) = 0040777; // S_IFDIR, rwxrwxrwx
        }
      } else if(ibuf[4] == 110){ // Twalk
        int nwqid = *(short*)(ibuf+15);
        printf("walk %d %s\n", nwqid, nwqid?getstr(ibuf+17):"-");
        if(0 && opno == 24){
          // error...
          ibuf[4] = 106; // Terror
          *(int*)(obuf+0) = 11;
          *(int*)(obuf+7) = ENOENT;
        } else {
          *(short*)(obuf+7) = nwqid;
          *(int*)(obuf+0) = 9 + nwqid*13;
          if(opno == 24){
            *(char*)(obuf+21) = 1;
          }
        }
      } else if(ibuf[4] == 104){ // Tattach
        printf("attach\n");
        *(int*)(obuf+0) = 20;
      } else if(ibuf[4] == 120){ // Tclunk
        printf("clunk\n");
        *(int*)(obuf+0) = 7;
      } else if(ibuf[4] == 30){ // Txattrwalk
        printf("xattrwalk\n");
        *(int*)(obuf+0) = 15;
        *(long*)(obuf+7) = 2; // size
      } else if(ibuf[4] == 116){ // Tread
        unsigned long offset = *(long*)(ibuf+11);
        unsigned int count = *(int*)(ibuf+19);
        printf("read %ld %d\n", offset, count); fflush(stdout);
        int n = 0;
        if(offset == 0 && count > 2){
          unsigned char *p = obuf+11;
          unsigned char *p0 = p;

          p += 2; // size;
          p += 2; // type
          p += 4; // dev
          p += 1; // qid.type
          p += 4; // qid.vers
          p += 8; // qid.path
          p += 4; // permissions
          p += 4; // atime
          p += 4; // mtime
          p += 8; // length
          *(short*)p = 1; // name length
          p++;
          *p++ = 'x';
          *(short*)p = 1; // owner name length
          *p++ = 'x';
          *(short*)p = 1; // group name length
          *p++ = 'x';
          *(short*)p = 1; // last modify user name length
          *p++ = 'x';
            
          n = p - p0;
          printf(" >>> n=%d <<< ", n); fflush(stdout);
          *(short*)(p0) = n;
        }
        *(int*)(obuf+0) = n + 11;
        *(int*)(obuf+7) = n;
      } else if(ibuf[4] == 12){ // Tlopen
        printf("lopen\n");
        *(int*)(obuf+0) = 24;
      } else if(ibuf[4] == 40){ // Treaddir
        printf("readdir\n");
        // each dirent is 25 bytes
        unsigned long offset = *(long*)(ibuf+11);
        unsigned int count = *(int*)(ibuf+19);
        int n = 0;
        if(offset == 0){
          n = 1;
          unsigned char *p0 = obuf + 11;
          unsigned char *p = p0;
          p += 13; // qid
          p += 8; // offset
          p += 1; // type
          *(short*)p = 1;
          p += 2;
          *p++ = 'x';
        }
        *(int*)(obuf+0) = 11 + n*25;
      } else if(ibuf[4] == 8){ // Tstatfs
        printf("statfs\n");
        *(int*)(obuf+0) = 67;
      } else {
        printf("%d ???\n", ibuf[4] & 0xff);
      }
      fflush(stdout);
      obuf[4] = ibuf[4] + 1; // convert Txxx to Rxxx
      *(short*)(obuf+5) = *(short*)(ibuf+5); // tag

      if(write(s1, obuf, *(int*)(obuf+0))<=0) perror("write");

      opno += 1;
    }
  }
  close(s);
}

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: p9_client_write doesn't check for negative "written" from server
  2024-12-17 15:17 p9_client_write doesn't check for negative "written" from server rtm
@ 2024-12-18  0:35 ` Dominique Martinet
  0 siblings, 0 replies; 2+ messages in thread
From: Dominique Martinet @ 2024-12-18  0:35 UTC (permalink / raw)
  To: rtm; +Cc: Eric Van Hensbergen, Latchesar Ionkov, v9fs

Hi,

rtm@csail.mit.edu wrote on Tue, Dec 17, 2024 at 10:17:11AM -0500:
> If the kernel's 9p client sends a write request, and the server returns
> a successful Rwrite reply, but with a negative "written" value, then
> this line towards the end of p9_client_write():
> 
>                 iov_iter_revert(from, count - written - iov_iter_count(from));
> 
> can pass an unroll argument to iov_iter_revert() that's larger than
> the number of bytes in the iov

Thanks for the report, I'm surprised this didn't come up sooner through
syzbot...

That is correct, written (and simlarily received in p9_client_read_once)
should be made unsigned.
They're actually compared with 'rsize' which is an int but is set from
fid->iounit which is a u32, so just making these three variables u32
will sort the issue.

The patch is trivial so would you like to send it?
If you don't want to (or there is no reply in a while) then I'll send a
patch this weekend crediting you as reported-by.
-- 
Dominique

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2024-12-18  0:35 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-12-17 15:17 p9_client_write doesn't check for negative "written" from server rtm
2024-12-18  0:35 ` Dominique Martinet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox