From: rtm@csail.mit.edu
To: Eric Van Hensbergen <ericvh@kernel.org>,
Latchesar Ionkov <lucho@ionkov.net>,
Dominique Martinet <asmadeus@codewreck.org>,
v9fs@lists.linux.dev
Subject: 9p server can confuse client about FIFO vs regular file -> crash
Date: Wed, 18 Dec 2024 15:04:24 -0500 [thread overview]
Message-ID: <67125.1734552264@localhost> (raw)
[-- Attachment #1: Type: text/plain, Size: 6253 bytes --]
If a 9p client asks to creat() a new file, and the server does so but
unexpectedly claims in answer to the ensuing Tgetattr that the new
file is a FIFO, then the client's file->f_op is &pipefifo_fops, but
file->private_data points to a p9_fid, not a pipe_inode_info. This
causes trouble when pipe_*() try to use file->private_data.
Ordinarily, d_dentry_open() both sets file->f_op and calls
file->f_op->open(), which sets file->private_data consistently with
file->f_op.
But v9fs_vfs_atomic_open_dotl() calls finish_open() with the open
argument set to generic_file_open, which causes d_dentry_open() to *not*
call file->f_op->open(). And v9fs_vfs_atomic_open_dotl() sets
file->private_data to a p9_fid.
A summary:
v9fs_vfs_atomic_open_dotl()
v9fs_get_new_inode_from_fid()
v9fs_inode_from_fid_dotl()
v9fs_qid_iget_dotl()
v9fs_init_inode()
case S_IFIFO:
init_special_inode()
} else if (S_ISFIFO(mode))
inode->i_fop = &pipefifo_fops;
finish_open(..., open=generic_file_open)
do_dentry_open(..., open=generic_file_open)
f->f_op = fops_get(inode->i_fop)
if (!open)
open = f->f_op->open;
if (open) {
error = open(inode, f); // calls generic_file_open, not pipe_open
file->private_data = ofid
I've attached a demo, which first gets a mutex error because
pipe_write() thinks file->private_data ought to start with a mutex,
and then a page fault.
# uname -a
Linux xxx 6.13.0-rc3-00017-gf44d154d6e3d #13 SMP Tue Dec 17 07:03:22 EST 2024 x86_64 x86_64 x86_64 GNU/Linux
# cc 9p6c.c
# ./a.out
...
------------[ cut here ]------------
DEBUG_LOCKS_WARN_ON(lock->magic != lock)
WARNING: CPU: 3 PID: 1551 at kernel/locking/mutex.c:564 __mutex_lock.constprop.0
+0x6b9/0x990
CPU: 3 UID: 0 PID: 1551 Comm: a.out Not tainted 6.13.0-rc3-00017-gf44d154d6e3d #
13
Hardware name: FreeBSD BHYVE/BHYVE, BIOS 14.0 10/17/2021
RIP: 0010:__mutex_lock.constprop.0+0x6b9/0x990
Code: ff 85 c0 0f 84 cc f9 ff ff 8b 15 c2 5a 5d 01 85 d2 0f 85 be f9 ff ff 48 c7
c6 4d 5e c7 82 48 c7 c7 8e e1 c6 82 e8 e7 7b d6 fe <0f> 0b e9 a4 f9 ff ff 0f 0b
e9 d1 fa ff ff 48 8b 03 a8 08 0f 85 fa
RSP: 0018:ffffc90001f03d50 EFLAGS: 00010286
RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000027
RDX: ffff88842dadc848 RSI: 0000000000000001 RDI: ffff88842dadc840
RBP: ffffc90001f03de0 R08: 00000000ffffefff R09: 0000000000000001
R10: 00000000ffffefff R11: ffffffff8365b2c0 R12: ffff8881021b5a80
R13: ffff888113d33280 R14: ffffc90001f03f10 R15: 0000000000000000
FS: 00007f294d1df740(0000) GS:ffff88842dac0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000560867bb0008 CR3: 0000000109c66004 CR4: 00000000003706f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
? __warn+0x7f/0x130
? __mutex_lock.constprop.0+0x6b9/0x990
? report_bug+0x16e/0x1a0
? prb_read_valid+0x16/0x20
? handle_bug+0x53/0x90
? exc_invalid_op+0x17/0x70
? asm_exc_invalid_op+0x1a/0x20
? __mutex_lock.constprop.0+0x6b9/0x990
? do_sys_openat2+0x78/0xc0
? set_track_prepare+0x3b/0x60
? do_sys_openat2+0x78/0xc0
? check_bytes_and_report.isra.0+0x48/0x120
pipe_write+0x48/0x660
? free_to_partial_list+0x116/0x5e0
? do_sys_openat2+0x78/0xc0
vfs_write+0x23d/0x400
ksys_write+0x67/0xe0
do_syscall_64+0x3f/0xd0
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f294d2fe574
Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d d5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89
RSP: 002b:00007fffb311e078 EFLAGS: 00000202 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007fffb311ea58 RCX: 00007f294d2fe574
RDX: 0000000000000001 RSI: 0000560867bad0a6 RDI: 0000000000000003
RBP: 00007fffb311e930 R08: 00007f294d3e5b20 R09: 0000000000000410
R10: 0000000000000001 R11: 0000000000000202 R12: 0000000000000001
R13: 0000000000000000 R14: 0000560867baece8 R15: 00007f294d440000
</TASK>
---[ end trace 0000000000000000 ]---
BUG: unable to handle page fault for address: 000000000002fcc0
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
PGD 0 P4D 0
Oops: Oops: 0002 [#1] SMP DEBUG_PAGEALLOC PTI
CPU: 3 UID: 0 PID: 1551 Comm: a.out Tainted: G W 6.13.0-rc3-00017-gf44d154d6e3d #13
Tainted: [W]=WARN
Hardware name: FreeBSD BHYVE/BHYVE, BIOS 14.0 10/17/2021
RIP: 0010:osq_lock+0x57/0xf0
Code: 00 00 00 00 00 89 42 14 87 07 85 c0 0f 84 98 00 00 00 83 e8 01 48 c7 c1 c0 fc 02 00 48 98 48 03 0c c5 c0 49 cf 82 48 89 4a 08 <48> 89 11 8b 42 10 85 c0 75 76 65 48 8b 3d 17 e6 f2 7e eb 09 f3 90
RSP: 0018:ffffc90001f03d48 EFLAGS: 00010206
RAX: fffffffffffffffe RBX: ffff888107269940 RCX: 000000000002fcc0
RDX: ffff88842daefcc0 RSI: ffff888113d332a0 RDI: ffff888113d332a0
RBP: ffffc90001f03de0 R08: 00000000ffffefff R09: 0000000000000001
R10: 00000000ffffefff R11: ffffffff8365b2c0 R12: ffff88810cc6ba00
R13: ffff888113d33280 R14: ffff888113d332a0 R15: 0000000000000000
FS: 00007f294d1df740(0000) GS:ffff88842dac0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000002fcc0 CR3: 0000000109c66004 CR4: 00000000003706f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
? __die+0x1e/0x60
? page_fault_oops+0x157/0x450
? __warn+0xa5/0x130
? __mutex_lock.constprop.0+0x6b9/0x990
? nbcon_get_cpu_emergency_nesting+0x5/0x30
? exc_page_fault+0x66/0x140
? asm_exc_page_fault+0x26/0x30
? osq_lock+0x57/0xf0
__mutex_lock.constprop.0+0x2b2/0x990
? do_sys_openat2+0x78/0xc0
? set_track_prepare+0x3b/0x60
? do_sys_openat2+0x78/0xc0
? check_bytes_and_report.isra.0+0x48/0x120
pipe_write+0x48/0x660
? free_to_partial_list+0x116/0x5e0
? do_sys_openat2+0x78/0xc0
vfs_write+0x23d/0x400
ksys_write+0x67/0xe0
do_syscall_64+0x3f/0xd0
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f294d2fe574
...
Kernel panic - not syncing: Fatal exception
Kernel Offset: disabled
---[ end Kernel panic - not syncing: Fatal exception ]---
Robert Morris
rtm@mit.edu
[-- Attachment #2: 9p6c.c --]
[-- Type: application/octet-stream, Size: 6910 bytes --]
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <time.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <sys/wait.h>
#include <sys/resource.h>
int readn(int fd, char *buf, int n) {
int orig = n;
while(n > 0){
int cc = read(fd, buf, n);
if(cc <= 0) { perror("read"); return -1; }
n -= cc;
buf += cc;
}
return orig;
}
char *
getstr(unsigned char *p)
{
unsigned int n = *(unsigned short *)p;
char *buf = malloc(n+1);
memcpy(buf, p+2, n);
buf[n] = '\0';
return buf;
}
int
main(){
struct rlimit r;
r.rlim_cur = r.rlim_max = 0;
setrlimit(RLIMIT_CORE, &r);
int s = socket(AF_INET, SOCK_STREAM, 0);
{ int yes = 1;
setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes));
}
struct sockaddr_in sin;
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_port = htons(564);
if(bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0){
perror("bind"); exit(1);
}
listen(s, 10);
sync(); sleep(1);
if(fork() == 0){
close(s);
// -o ...,debug=0x10f
if(system("echo -n mount: ; mount -t 9p -o nodevmap,trans=tcp,cache=none,access=any,debug=0x0 127.0.0.1 /mnt") == 0){
system("mount | grep /mnt");
printf("open /mnt/b:\n");
int fd = creat("/mnt/b", 0777);
if(fd < 0) perror("creat");
write(fd, "x", 1);
char junk[1];
read(fd, junk, 1);
printf("close /mnt/b:\n");
close(fd);
system("echo -n umount: ; umount -f /mnt");
}
exit(0);
}
int spid = fork();
if(spid == 0){
socklen_t sinlen = sizeof(sin);
int s1 = accept(s, (struct sockaddr *) &sin, &sinlen);
if(s1 < 0) { perror("accept"); exit(1); }
close(s);
int opno = 0;
while(1){
char ibuf[1024];
if(readn(s1, ibuf, 4) < 0) break;
int ilen = *(int*)(ibuf+0);
if(readn(s1, ibuf+4, ilen - 4) < 0) break;
printf("%d: ", opno);
fflush(stdout);
char obuf[sizeof(ibuf)];
memset(obuf, 0xff, sizeof(obuf));
*(int*)(obuf+0) = ilen; // length
if(ibuf[4] == 100){ // Tversion
printf("version %d %s\n", *(int*)(ibuf+7), getstr(ibuf+11));
memcpy(obuf, ibuf, ilen);
} else if(ibuf[4] == 24){ // Tgetattr (different from Tstat!)
printf("getattr\n");
// https://github.com/chaos/diod/blob/master/protocol.md
int sz = 161;
*(int*)(obuf+0) = sz + 7;
*(int*)(obuf+32) = 0; // uid
*(int*)(obuf+36) = 0; // gid
if(opno == 7){
//*(int*)(obuf+28) = 0100777; // S_IFREG, rwxrwxrwx
*(int*)(obuf+28) = 010777; // S_IFIFO, rwxrwxrwx
} else {
*(int*)(obuf+28) = 0040777; // S_IFDIR, rwxrwxrwx
}
} else if(ibuf[4] == 110){ // Twalk
int nwqid = *(short*)(ibuf+15);
printf("walk %d %s\n", nwqid, nwqid?getstr(ibuf+17):"-");
if(opno == 3){
// error...
ibuf[4] = 106; // Terror
*(int*)(obuf+0) = 11;
*(int*)(obuf+7) = ENOENT;
} else {
*(short*)(obuf+7) = nwqid;
*(int*)(obuf+0) = 9 + nwqid*13;
if(opno == 24){
*(char*)(obuf+21) = 1;
}
}
} else if(ibuf[4] == 104){ // Tattach
printf("attach\n");
*(int*)(obuf+0) = 20;
} else if(ibuf[4] == 120){ // Tclunk
printf("clunk\n");
*(int*)(obuf+0) = 7;
} else if(ibuf[4] == 30){ // Txattrwalk
printf("xattrwalk\n");
*(int*)(obuf+0) = 15;
*(long*)(obuf+7) = 2; // size
} else if(ibuf[4] == 116){ // Tread
unsigned long offset = *(long*)(ibuf+11);
unsigned int count = *(int*)(ibuf+19);
printf("read %ld %d\n", offset, count); fflush(stdout);
int n = 0;
if(offset == 0 && count > 2){
unsigned char *p = obuf+11;
unsigned char *p0 = p;
p += 2; // size;
p += 2; // type
p += 4; // dev
p += 1; // qid.type
p += 4; // qid.vers
p += 8; // qid.path
p += 4; // permissions
p += 4; // atime
p += 4; // mtime
p += 8; // length
*(short*)p = 1; // name length
p++;
*p++ = 'x';
*(short*)p = 1; // owner name length
*p++ = 'x';
*(short*)p = 1; // group name length
*p++ = 'x';
*(short*)p = 1; // last modify user name length
*p++ = 'x';
n = p - p0;
printf(" >>> n=%d <<< ", n); fflush(stdout);
*(short*)(p0) = n;
}
*(int*)(obuf+0) = n + 11;
*(int*)(obuf+7) = n;
} else if(ibuf[4] == 12){ // Tlopen
printf("lopen\n");
*(int*)(obuf+0) = 24;
} else if(ibuf[4] == 40){ // Treaddir
printf("readdir\n");
// each dirent is 25 bytes
unsigned long offset = *(long*)(ibuf+11);
unsigned int count = *(int*)(ibuf+19);
int n = 0;
if(offset == 0){
n = 1;
unsigned char *p0 = obuf + 11;
unsigned char *p = p0;
p += 13; // qid
p += 8; // offset
p += 1; // type
*(short*)p = 1;
p += 2;
*p++ = 'x';
}
*(int*)(obuf+0) = 11 + n*25;
} else if(ibuf[4] == 8){ // Tstatfs
printf("statfs\n");
*(int*)(obuf+0) = 67;
} else if(ibuf[4] == 72){ // Tmkdir
printf("mkdir %s\n", getstr(ibuf + 11));
*(int*)(obuf+0) = 20;
} else if(ibuf[4] == 74){ // Trenameat
printf("renameat %s\n", getstr(ibuf + 11));
*(int*)(obuf+0) = 7;
} else if(ibuf[4] == 14){ // Tlcreate
printf("lcreate %s\n", getstr(ibuf + 11));
*(int*)(obuf+0) = 24;
} else if(ibuf[4] == 26){ // Tsetattr
printf("setattr %s\n", getstr(ibuf + 11));
*(int*)(obuf+0) = 7;
} else if(ibuf[4] == 76){ // Tunlinkat
printf("unlinkat %s\n", getstr(ibuf + 11));
*(int*)(obuf+0) = 7;
} else {
printf("%d ???\n", ibuf[4] & 0xff);
}
fflush(stdout);
obuf[4] = ibuf[4] + 1; // convert Txxx to Rxxx
*(short*)(obuf+5) = *(short*)(ibuf+5); // tag
if(obuf[4] == 25){
printf("Rgetattr #%d: ", opno);
// https://github.com/chaos/diod/blob/master/protocol.md
printf("op %d ", obuf[4]);
printf("mode 0%o ", *(unsigned int *)(obuf+28));
printf("\n");
}
if(write(s1, obuf, *(int*)(obuf+0))<=0) perror("write");
opno += 1;
}
exit(0);
}
close(s);
time_t t0 = time(0);
while(1){
int st;
int ret = waitpid(-1, &st, WNOHANG);
if(ret > 0)
break;
usleep(200000);
time_t t1 = time(0);
if(t1 - t0 >= 10){
printf("9pnew: timeout\n");
break;
}
}
}
reply other threads:[~2024-12-18 20:04 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=67125.1734552264@localhost \
--to=rtm@csail.mit.edu \
--cc=asmadeus@codewreck.org \
--cc=ericvh@kernel.org \
--cc=lucho@ionkov.net \
--cc=v9fs@lists.linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.