* _nfs4_open_and_get_state() should check d_splice_alias() return for error
@ 2025-09-07 17:56 rtm
2025-09-07 21:08 ` Trond Myklebust
0 siblings, 1 reply; 2+ messages in thread
From: rtm @ 2025-09-07 17:56 UTC (permalink / raw)
To: Trond Myklebust, Anna Schumaker; +Cc: linux-nfs
[-- Attachment #1: Type: text/plain, Size: 3584 bytes --]
In this code in fs/nfs/nfs4proc.c _nfs_open_and_get_state():
dentry = opendata->dentry;
if (d_really_is_negative(dentry)) {
struct dentry *alias;
d_drop(dentry);
alias = d_splice_alias(igrab(state->inode), dentry);
/* d_splice_alias() can't fail here - it's a non-directory */
if (alias) {
dput(ctx->dentry);
ctx->dentry = dentry = alias;
}
d_splice_alias() can fail, returning ERR_PTR(-ELOOP). Then this call
later on causes a crash:
nfs_set_verifier(dentry, dir_verifier);
I've attached a demo program:
# uname -a
Linux xxx 6.17.0-rc4-00231-gc8ed9b5c02a5 #27 SMP PREEMPT_DYNAMIC Fri Sep 5 15:07:50 EDT 2025 x86_64 x86_64 x86_64 GNU/Linux
# cc nfs166c.c
# ./a.out
...
BUG: kernel NULL pointer dereference, address: 0000000000000058
Here's a gdb backtrace:
#0 nfs_set_verifier (dentry=dentry@entry=0xffffffffffffffd8, verf=verf@entry=0)
at ./include/linux/spinlock.h:351
#1 0xffffffff8043a6c2 in _nfs4_open_and_get_state (ctx=0xffffffd602f93c00,
opendata=0xffffffd60558a000) at fs/nfs/nfs4proc.c:3197
#2 _nfs4_do_open (dir=0xffffffd60288c7e0, opened=0xffffffc6002f3b4c,
c=0xffffffc6002f3a30, flags=32768, ctx=0xffffffd602f93c00) at fs/nfs/nfs4proc.c:3274
#3 nfs4_do_open (dir=dir@entry=0xffffffd60288c7e0, ctx=ctx@entry=0xffffffd602f93c00,
flags=flags@entry=32768, sattr=sattr@entry=0xffffffc6002f3b78,
label=label@entry=0x0, opened=opened@entry=0xffffffc6002f3b4c)
at fs/nfs/nfs4proc.c:3345
#4 0xffffffff8043aa92 in nfs4_atomic_open (dir=0xffffffd60288c7e0,
ctx=0xffffffd602f93c00, open_flags=32768, attr=0xffffffc6002f3b78,
opened=0xffffffc6002f3b4c) at fs/nfs/nfs4proc.c:3919
#5 0xffffffff804056c4 in nfs_atomic_open (dir=0xffffffd60288c7e0,
dentry=0xffffffd6029af9c8, file=0xffffffd604734040, open_flags=32768,
mode=<optimized out>) at fs/nfs/dir.c:2138
#6 0xffffffff802c2a8a in atomic_open (mode=0, open_flag=<optimized out>,
file=0xffffffd604734040, dentry=0xffffffd6029af9c8, nd=0xffffffc6002f3cf8)
at fs/namei.c:3528
#7 lookup_open (op=0xffffffc6002f3e30, got_write=false, file=0xffffffd604734040,
nd=0xffffffc6002f3cf8) at fs/namei.c:3687
#8 open_last_lookups (op=0xffffffc6002f3e30, file=0xffffffd604734040,
nd=0xffffffc6002f3cf8) at fs/namei.c:3815
#9 path_openat (nd=nd@entry=0xffffffc6002f3cf8, op=op@entry=0xffffffc6002f3e30,
flags=flags@entry=257) at fs/namei.c:4051
#10 0xffffffff802c3098 in do_filp_open (dfd=dfd@entry=-100,
pathname=pathname@entry=0xffffffd6027f1100, op=op@entry=0xffffffc6002f3e30)
at fs/namei.c:4081
#11 0xffffffff802ae234 in do_sys_openat2 (dfd=-100, filename=<optimized out>,
how=how@entry=0xffffffc6002f3e90) at fs/open.c:1435
#12 0xffffffff802ae5a2 in do_sys_open (mode=<optimized out>, flags=<optimized out>,
filename=<optimized out>, dfd=<optimized out>) at fs/open.c:1450
#13 __do_sys_openat (mode=<optimized out>, flags=<optimized out>,
filename=<optimized out>, dfd=<optimized out>) at fs/open.c:1466
#14 __se_sys_openat (mode=<optimized out>, flags=<optimized out>,
filename=<optimized out>, dfd=<optimized out>) at fs/open.c:1461
#15 __riscv_sys_openat (regs=<optimized out>) at fs/open.c:1461
#16 0xffffffff812197ee in syscall_handler (syscall=<optimized out>,
regs=0xffffffc6002f3ee0) at ./arch/riscv/include/asm/syscall.h:112
#17 do_trap_ecall_u (regs=0xffffffc6002f3ee0) at arch/riscv/kernel/traps.c:343
Robert Morris
rtm@mit.edu
[-- Attachment #2: nfs166c.c --]
[-- Type: application/octet-stream, Size: 38434 bytes --]
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/socket.h>
#include <sys/file.h>
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <sys/wait.h>
#include <sys/resource.h>
#include <sys/param.h>
#include <sys/mount.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/vfs.h>
#include <dirent.h>
#include <assert.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/xattr.h>
// return an error rather than OK for the first
// GETATTR after the first open.
int doit = 0;
int sym_op = 18;
int sym_skip = 0;
int opcounts[256];
long long next_cookie = 3;
int current_fh = 0;
int compound_status;
int send_back;
// map file/dir names to file handle
char *fhnames[100] = {
"",
"tmp",
"x",
"y",
"z",
"zzz",
0
};
int
name2fh(char *name, int create)
{
for(int i = 0; fhnames[i]; i++){
if(strcmp(name, fhnames[i]) == 0)
return i;
}
if(create){
for(int i = 0; ; i++){
if(fhnames[i] == 0){
fhnames[i] = malloc(100);
strcpy(fhnames[i], name);
return i;
}
}
}
return -1;
}
char ibuf[16*1024];
int ilen = 0;
int ii = 0;
char obuf[16*1024];
int oi = 0;
extern ssize_t copy_file_range(int, off_t*, int, off_t*, size_t, unsigned int);
int readn(int fd, void *xbuf, int n) {
char *buf = (char *) xbuf;
int orig = n;
while(n > 0){
int cc = read(fd, buf, n);
if(cc <= 0) { perror("read"); return -1; }
n -= cc;
buf += cc;
}
return orig;
}
unsigned int
parse32()
{
if(ii >= ilen){
printf("parsed beyond the end of the input\n");
return 0xffffffff;
}
unsigned int x = *(int*)(ibuf+ii);
ii += 4;
return ntohl(x);
}
unsigned long long
parse64()
{
unsigned long long hi = parse32();
unsigned long long lo = parse32();
return (hi << 32) | lo;
}
// sessionid4 -- 16 bytes
void
parse_sid(char *sid)
{
for(int i = 0; i < 16; i++){
if(sid)
sid[i] = ibuf[ii];
ii++;
}
}
unsigned int
parse_opaque(char *buf)
{
if(buf)
buf[0] = 0;
int nominal_n = parse32();
if(nominal_n > 4096){
printf("crazy opaque length %d\n", nominal_n);
return 0;
}
int real_n = nominal_n;
while((real_n%4) != 0) real_n += 1;
for(int i = 0; i < real_n; i++){
if(buf && i < real_n)
buf[i] = ibuf[ii];
ii++;
}
return nominal_n;
}
void
put32(unsigned int x)
{
assert((oi % 4) == 0);
*(int*)(obuf+oi) = htonl(x);
oi += 4;
}
void
put64(unsigned long long x)
{
put32(x >> 32);
put32(x);
}
void
put_opaque(int n, char *buf)
{
int nn = n;
put32(nn);
for(int i = 0; i < n; i++)
obuf[oi++] = (buf ? buf[i] : 0);
while(n & 3){
obuf[oi++] = 0;
n++;
}
}
void
put_sid(char *sid)
{
for(int i = 0; i < 16; i++){
obuf[oi++] = (sid ? sid[i] : 0);
}
}
void
parse_nop()
{
}
void
parse_op_exchange_id()
{
parse32(); // verifier4, first half
parse32(); // verifier4, second half
parse_opaque(0); // eia_clientowner
int cflags = parse32(); // eia_flags
printf("cflags 0x%x\n", cflags);
parse32(); // state_protect4_a.spa_how, assume SP4_NONE
int nimpl = parse32(); // length of client_impl_id
for(int impli = 0; impli < nimpl; impli++){
char junk[512];
parse_opaque(junk); // nii_domain
// printf("nii_domain: %s\n", junk);
parse_opaque(junk); // nii_name
// printf("nii_name: %s\n", junk);
parse64(); // 1/2 of nfstime4
parse32(); // 1/2 of nfstime4
}
// finish EXCHANGE_ID4res
put32(0); // eir_status = NFS4_OK
put64(1); // clientid4
put32(1); // sequenceid4
//int sflags = 0x103 | 0x10000; // EXCHGID4_FLAG_USE_NON_PNFS
int sflags = 0x103 | 0x20000 | 0x40000 | 0x80000000; // try to enable pNFS
put32(sflags); // eir_flags
put32(0); // state_protect4_r.spr_how = SP4_NONE
put64(1); // server_owner4.so_minor_id
put32(4); // length of so_major_id<>
put32(0x11223344); // so_major_id<>
put32(4); // length of eir_server_scope
put32(0x11223344);
put32(1); // length of eir_server_impl_id<1>
put32(4); // nfs_impl_id4.nii_domain
put32(0x11223344);
put32(4); // nfs_impl_id4.nii_name
put32(0x11223344);
put64(0); // nii_date 1/2
put32(0); // nii_date 2/2
}
void
parse_op_create_session()
{
parse64(); // csa_clientid
int seq = parse32(); // csa_sequence
parse32(); // csa_flags
// csa_fore_chan_attrs, csa_back_chan_attrs
int attrs[2][6];
for(int i = 0; i < 2; i++){
for(int j = 0; j < 6; j++){
attrs[i][j] = parse32();
}
parse_opaque(0); // ca_rdma_ird<1>
}
// XXX
ii = ilen;
put32(0); // OK
for(int i = 0; i < 4; i++)
put32(1); // csr_sessionid i/4
put32(seq); // csr_sequence
put32(0x3); // csr_flags
for(int i = 0; i < 2; i++){
for(int j = 0; j < 6; j++)
put32(attrs[i][j]);
put32(0); // ca_rdma_ird
}
}
void
parse_op_setclientid()
{
parse32(); // verifier4
parse32(); // verifier4
parse_opaque(0); // id<>
parse32(); // cb_client4.cb_program
parse_opaque(0); // clientaddr4 r_netid
parse_opaque(0); // clientaddr4 r_addr
parse32(); // callback_ident
put32(0); // OK
put64(1); // clientid4
put64(1); // verifier4
}
void
parse_op_setclientid_confirm()
{
parse32(); // clientid4
parse32();
parse32(); // verifier4
parse32();
put32(0); // OK
}
void
parse_op_sequence()
{
char sid[16];
parse_sid(sid); // sa_sessionid
int seq = parse32(); // sa_sequenceid
int slot = parse32(); // sa_slotid
int hislot = parse32(); // sa_highest_slotid
parse32(); // sa_cachethis
put32(0); // OK
put_sid(sid); // sr_sessionid
put32(seq); // sr_sequenceid
put32(slot); // sr_slotid
put32(hislot); // sr_highest_slotid
put32(hislot); // sr_target_highest_slotid
put32(0); // sr_status_flags
}
void
parse_op_reclaim_complete()
{
parse32(); // rca_one_fs
put32(0); // rcr_status
}
void
parse_op_putrootfh()
{
// no arguments
put32(0); // OK
current_fh = 0;
}
void
parse_op_secinfo_no_name()
{
parse32(); // secinfo_style4
put32(0); // OK
put32(1); // # of secinfo4
#if 1
put32(0); // flavor = AUTH_NULL
#else
put32(6); // flavor = RPCSEC_GSS
put32(4); // size of sec_oid4
put32(0xffffffff);
put32(0); // qop4
put32(1); // rpc_gss_svc_t
#endif
}
void
parse_op_destroy_session()
{
parse_sid(0);
put32(0); // OK
}
void
parse_op_destroy_clientid()
{
parse64(); // clientid
put32(0); // OK
}
void
parse_op_getfh()
{
// no arguments
put32(0); // OK
int xfh = current_fh;
put_opaque(4, (char*)&xfh); // fh
}
//
// called by getattr and readdir.
// generates a fattr4 (bitmap4 then attrlist4).
//
void
put_fattr4(int xwords[], int fh)
{
int words[3];
for(int i = 0; i < 3; i++){
words[i] = xwords[i];
}
int bitwords = 3;
put32(bitwords);
int word0i = oi;
for(int i = 0; i < bitwords; i++)
put32(words[i]);
int leni = oi;
put32(0); // placeholder for total length of attrs
for(int a = 0; a < bitwords*32; a++){
if(words[a/32] & (1 << (a % 32))){
if(a == 0){
put32(3); // # bitmap words of supported attrs
put32(0xffffffff);
put32(0xffffffff);
put32(0xffffffff);
} else if(a == 1){
int type = 1;
if(fh == 0 || fh == 1)
type = 2;
put32(type); // NF4DIR=2 or NF4REG=1
} else if(a == 2){
put32(0); // fh_expire_type
} else if(a == 3){
put64(0); // change
} else if(a == 4){
put64(4096*10); // size
} else if(a == 5){
put32(1); // link support
} else if(a == 6){
put32(1); // symlink support
} else if(a == 8){
put64(1); // fsid major
put64(1); // fsid minor
} else if(a == 10){
put32(10); // lease time
} else if(a == 11){
put32(0); // rdattr_error
} else if(a == 12){
// ACL
printf("replying with an ACL\n");
int n = 2;
put32(n);
for(int i = 0; i < n; i++){
put32(0); // type
put32(0); // flag
put32(0); // mask
char who[32];
memset(who, 0, sizeof(who));
strcpy(who, "bin@x.com");
put_opaque(strlen(who), who);
}
} else if(a == 13){
put32(0xf); // aclsupport
} else if(a == 15){
put32(1); // cansettime
} else if(a == 16){
put32(0); // case insensitive
} else if(a == 17){
put32(1); // case preserving
} else if(a == 18){
put32(0); // chown_restricted
} else if(a == 19){
// filehandle
int xfh = fh;
put_opaque(4, (char*)&xfh); // fh
} else if(a == 20){
put64(fh); // fileid
} else if(a == 21){
put64(9999); // files_avail
} else if(a == 22){
put64(9999); // files_free
} else if(a == 23){
put64(99999); // files_total
} else if(a == 24){
// fs_locations
put32(1); // # path components
put_opaque(1, "r"); // pathname4
put32(1); // locations<>
put32(1); // servers<>
put_opaque(1, "s"); // server
put32(1); // # path components
put_opaque(1, "x"); // rootpath
} else if(a == 26){
put32(1); // homogeneous
} else if(a == 27){
put64(0xffffffffffff); // max file size
} else if(a == 28){
put32(0xffff); // max link
} else if(a == 29){
put32(256); // max name
} else if(a == 30){
put64(10*4096); // max read
} else if(a == 31){
put64(10*4096); // max write
} else if(a == 33){
put32(0777); // mode
} else if(a == 34){
put32(1); // no_trunc
} else if(a == 35){
put32(3); // numlinks
} else if(a == 36){
printf("replying with an owner\n");
// put_opaque(6, "other"); // owner
put_opaque(9, "bin@x.com"); // owner
// put_opaque(5, "65534"); // owner
} else if(a == 37){
// put_opaque(6, "other"); // owner_group
put_opaque(9, "bin@x.com"); // owner_group
// put_opaque(5, "65534"); // owner_group
} else if(a == 41){
put32(0); // rawdev major
put32(0); // rawdev minor
} else if(a == 42){
put64(10*1024*1024); // space_avail
} else if(a == 43){
put64(10*1024*1024); // space_free
} else if(a == 44){
put64(20*1024*1024); // space_total
} else if(a == 45){
put64(4096*10); // space used
} else if(a == 47){
put64(0); // time access seconds
put32(0); // nseconds
} else if(a == 50){
put64(0); // time create seconds
put32(0); // nseconds
} else if(a == 51){
put64(0); // time delta seconds
put32(0); // nseconds
} else if(a == 52){
put64(0); // time metadata seconds
put32(0); // nseconds
} else if(a == 53){
put64(0); // time modify seconds
put32(0); // nseconds
} else if(a == 55){
put64(0); // mounted_on_fileid ???
} else if(a == 62){
// fs_layout_types
put32(1);
// put32(3); // LAYOUT4_BLOCK_VOLUME
// put32(2); // LAYOUT4_OSD2_OBJECTS
put32(1); // LAYOUT4_NFSV4_1_FILES
} else if(a == 65){
// LAYOUT_BLKSIZE
put32(1024);
} else if(a == 77){
// CLONE_BLKSIZE
put32(1024);
} else if(a == 79){
// CHANGE_ATTR_TYPE
put32(4); // ???
} else if(a == 82){
// XATTR_SUPPORT
put32(1);
} else if(a == 75){
// FATTR4_SUPPATTR_EXCLCREAT
put32(3); // bitmap length
put32(0xffffffff);
put32(0xffffffff);
put32(0xffffffff);
} else if(a == 86){
// FATTR4_OPEN_ARGUMENTS
for(int i = 0; i < 5; i++){
put32(1); // bitmap length
put32(0xffffffff);
}
} else if(a == 80){
// FATTR4_SEC_LABEL, FATTR4_WORD2_SECURITY_LABEL
put32(0); // lfs
put32(0); // pi
put32(4); // label->len
put32(0); // ???
} else if(a == 68){
// FATTR4_MDSTHRESHOLD
put32(1); // num
put32(0); // l_type
put32(3); // bitmap size
put32(0xffffffff);
put32(0xffffffff);
put32(0xffffffff);
put32(32); // attr_length
put32(0); // threshold_hint
put32(0);
put32(0); // threshold_hint
put32(0);
put32(0); // threshold_hint
put32(0);
put32(0); // threshold_hint
put32(0);
} else {
printf("unknown requested attr %d\n", a);
}
}
}
*(int*)(obuf+leni) = htonl(oi - leni - 4);
}
void
parse_op_getattr()
{
int bitwords = parse32();
int words[4];
memset(words, 0, sizeof(words));
if(bitwords < 1 || bitwords > 3)
printf("parse_op_getattr: crazy bitwords %d\n", bitwords);
for(int i = 0; i < bitwords && i < 4; i++)
words[i] = parse32();
//put32(0); // OK
if(doit){
printf(">>> returning status=0x80 for GETATTR\n");
put32(0x80);
doit = 0;
} else {
put32(0); // OK
}
put_fattr4(words, current_fh);
}
void
parse_op_putfh()
{
char buf[64];
int n = parse_opaque(buf); // fh
if(n != 4){
printf("op_putfh fh size %d, not 4\n", n);
exit(1);
}
int fh = *(int*)buf;
current_fh = fh;
put32(0); // OK
}
void
parse_op_access()
{
int mask = parse32(); // mask of rights to query
put32(0); // OK
put32(mask); // supported = all rights
put32(mask); // access = all rights
}
void
parse_op_lookup()
{
char name[256];
int n = parse_opaque(name);
name[n>=0?n:0] = '\0';
int xfh = name2fh(name, 0);
if(xfh < 0){
printf("lookup %s -> ENOENT\n", name);
put32(2); // NFS4ERR_NOENT
if(compound_status == 0)
compound_status = 2;
} else {
put32(0); // OK
current_fh = xfh;
printf("lookup %s -> fh %d\n", name, current_fh);
}
}
void
parse_op_lookupp()
{
current_fh = 1; // /tmp
put32(0); // OK
}
void
parse_op_readdir()
{
long long cookie = parse64();
long long verf = parse64(); // cookie verifier
parse32(); // dircount
parse32(); // maxcount
// attr_request
int bitwords = parse32();
int words[4];
memset(words, 0, sizeof(words));
for(int i = 0; i < bitwords && i < 4; i++)
words[i] = parse32();
put32(0); // OK
put64(verf); // cookieverf
char *names[] = { "z", "zzz" };
for(int i = 0; i < 2; i++){
put32(1); // *nextentry
put64(next_cookie++); // cookie
put_opaque(3, names[i]); // name
put_fattr4(words, name2fh(names[i], 1));
}
put32(0); // *nextentry
put32(1); // eof
}
void
parse_op_open()
{
char name[256];
name[0] = 0;
parse32(); // seqid
parse32(); // share_access
parse32(); // share_deny
parse64(); // owner client id
parse_opaque(0); // owner owner
// openflag4
int opentype = parse32();
if(opentype == 1){
// OPEN4_CREATE
int mode = parse32(); // createhow4
if(mode == 0){
// UNCHECKED4
// fattr4 createattrs
int bitwords = parse32();
int words[32];
for(int i = 0; i < bitwords; i++)
words[i] = parse32();
parse_opaque(0); // attrlist4
} else {
printf("OPEN4_CREATE unknown mode %d\n", mode);
exit(1);
}
} else if(opentype == 0){
// OPEN4_NOCREATE
} else {
printf("unknown opentype %d\n", opentype);
exit(1);
}
int open_claim_type = parse32();
if(open_claim_type == 0){
// CLAIM_NULL
parse_opaque(name); // file name
} else if(open_claim_type == 2){
// CLAIM_DELEGATE_CUR
// open_claim_delegate_cur4
// stateid4
parse32(); // seqid
parse32();
parse32();
parse32();
parse_opaque(name); // file name
} else if(open_claim_type == 4){
// CLAIM_FH
} else {
printf("oy, open_claim_type %d\n", open_claim_type);
exit(1);
}
int xfh = name2fh(name, opentype);
if(xfh < 0){
printf("open(%s) ENOENT\n", name);
put32(2); // NFS4ERR_NOENT
} else {
put32(0); // OK
// stateid4
put32(1); // seqid
put32(1); // other
put32(1);
put32(1);
// change_info4
put32(1);
put64(0); // before
put64(0); // after
put32(0); // rflags
put32(3); // attrset bitmap length / bitmap4
put32(0);
put32(0);
put32(0);
// open_delegation4
put32(2); // OPEN_DELEGATE_WRITE
put32(0); // stateid4 seqid
put32(0); // stateid4 other
put32(0); // stateid4 other
put32(0); // stateid4 other
put32(0); // recall
put32(1); // NFS_LIMIT_SIZE
put64(10240);
put32(0); // acetype4
put32(0); // aceflag4
put32(0); // acemask4
put_opaque(4, "abcd");
printf(" name=%s\n", name);
if(name[0]){
current_fh = xfh;
} else {
printf("op_open: no name with which to set fh\n");
}
}
}
void
parse_op_setattr()
{
// stateid4
parse32(); // seqid
parse32(); // other
parse32(); // other
parse32(); // other
// fattr4
int bitwords = parse32();
int words[64];
for(int i = 0; i < bitwords; i++)
words[i] = parse32();
parse_opaque(0); // attrlist4
put32(0); // OK
put32(bitwords);
for(int i = 0; i < bitwords; i++)
put32(words[i]);
}
void
parse_op_layoutget()
{
parse32(); // loga_signal_layout_avail
parse32(); // layouttype4
parse32(); // layoutiomode4
parse64(); // offset
parse64(); // length
parse64(); // minlength
parse32(); // stateid4 seqid
parse32(); // stateid4 other
parse32(); // stateid4 other
parse32(); // stateid4 other
parse32(); // count32
put32(0); // OK
put32(0); // return_on_close
put32(0); // stateid4 seqid
put32(0); // stateid4 other
put32(0); // stateid4 other
put32(0); // stateid4 other
put32(1); // # of layout4
#if 0
// block thing, fs/nfs/blocklayout/blocklayout.c
put64(0); // offset
put64(1000000); // length
put32(2); // layoutiomode4
put32(3); // layouttype4
// https://datatracker.ietf.org/doc/html/rfc5663
put32(4+44+44); // size of the following "opaque"
// for bl_alloc_lseg()
put32(2); // count of following extents
for(int i = 0; i < 2; i++){
// 16 bytes of devicedid4 bex_vol_id
for(int i = 0; i < 4; i++)
put32(1);
put64(0); // bex_file_offset
put64(0); // bex_length
put64(0); // bex_storage_offset
put32(0); // bex_state
}
#endif
#if 0
put64(0); // offset
put64(1000000); // length
put32(2); // layoutiomode4
put32(2); // layouttype4
put_opaque(8, "xxxxxxxx"); // loc_body
#endif
put64(0); // offset
put64(1000000); // length
put32(2); // layoutiomode4
put32(1); // layouttype4 LAYOUT4_NFSV4_1_FILES
put32(16 + 20); // total size of following, for filelayout_decode_layout
put64(0); // device id
put64(0); // device id
put32(0x100); // nfl_util
put32(0); // first_stripe_index
put64(0); // pattern_offset
put32(0); // num_fh
}
void
parse_op_getdeviceinfo()
{
parse32(); // deviceid4
parse32();
parse32();
parse32();
parse32(); // layouttype4
parse32(); // count4
int nb = parse32(); // # bitmapwords
for(int i = 0; i < nb; i++)
parse32(); // bitmap4
put32(0); // OK
// device_addr4 gdir_device_addr
put32(1); // layouttype4
// da_addr_body, for nfs4_fl_alloc_deviceid_node
put32(22*4);
put32(2); // stripe count
put32(0); // index
put32(1); // index
put32(2); // ds_num
for(int i = 0; i < 2; i++){
put32(1); // mp_count
put_opaque(3, "tcp"); // netid
put_opaque(11, "0.0.0.0.2049.2049");
}
put32(1); // bitmap4<>
put32(0xffffffff); // bitmap4
}
void
parse_op_write()
{
parse32(); // stateid4
parse32(); // stateid4
parse32(); // stateid4
parse32(); // stateid4
parse64(); // offset
parse32(); // stable_how4
int n = parse_opaque(0); // data
put32(0); // OK
put32(n); // count
put32(0); // UNSTABLE4
put64(1); // verifier
}
void
parse_op_read()
{
parse32(); // stateid4
parse32(); // stateid4
parse32(); // stateid4
parse32(); // stateid4
parse64(); // offset
parse32(); // count
put32(0); // OK
put32(1); // eof
put_opaque(4, "abcd");
}
void
parse_op_read_plus()
{
parse32(); // stateid4
parse32(); // stateid4
parse32(); // stateid4
parse32(); // stateid4
parse64(); // offset
parse32(); // count
put32(0); // OK
put32(1); // eof
put32(2); // # of read_plus_content
put32(0); // NFS4_CONTENT_DATA
put64(0); // offset
put_opaque(4, "abcd");
put32(1); // NFS4_CONTENT_HOLE
put64(0); // offset
put64(1); // length
}
void
parse_op_commit()
{
parse64(); // offset
parse32(); // count
put32(0); // OK
put64(1); // verifier4
}
void
parse_op_close()
{
parse32(); // seqid
parse32(); // stateid4.seqid
parse32(); // stateid4.other
parse32(); // stateid4.other
parse32(); // stateid4.other
put32(0); // OK
put32(2); // seqid
put32(1); // other
put32(1);
put32(1);
}
void
parse_op_create()
{
int type = parse32(); // type
char name[128];
memset(name, 0, sizeof(name));
int namelen = parse_opaque(name);
// fattr4
int bitwords = parse32();
int words[64];
for(int i = 0; i < bitwords; i++)
words[i] = parse32();
parse_opaque(0); // attrlist4
printf("create type=%d name=%s\n", type, name);
current_fh = name2fh(name, 1);
put32(0); // OK
put32(1); // change_info4.atomic
put64(1); // before
put64(2); // after
put32(bitwords);
for(int i = 0; i < bitwords; i++)
put32(words[i]);
}
void
parse_op_remove()
{
char name[256];
memset(name, 0, sizeof(name));
int namelen = parse_opaque(name);
put32(0); // OK
put32(1); // change_info4.atomic
put64(1); // before
put64(2); // after
}
int saved_fh;
void
parse_op_savefh()
{
saved_fh = current_fh;
put32(0); // OK
}
void
parse_op_restorefh()
{
current_fh = saved_fh;
put32(0); // OK
}
void
parse_op_rename()
{
char name1[256], name2[256];
memset(name1, 0, sizeof(name1));
memset(name2, 0, sizeof(name2));
parse_opaque(name1);
parse_opaque(name2);
printf("rename %s %s\n", name1, name2);
put32(0); // OK
// change_info4
put32(1);
put64(1); // before
put64(2); // after
// change_info4
put32(1);
put64(3); // before
put64(4); // after
}
void
parse_op_seek()
{
// stateid4
parse32(); // seqid
parse32();
parse32();
parse32();
long offset = parse64(); // offset
int what = parse32();
printf("seek offset=%ld what=%d\n", offset, what);
put32(0); // OK
if(what == 0){
// next data?
if(offset >= 32){
put32(1);
put64(offset);
} else {
put32(0);
put64(offset + 1);
}
} else {
// next hole?
if(offset >= 32){
put32(1);
put64(offset);
} else {
put32(0);
put64(32);
}
}
}
void
parse_op_copy()
{
// stateid4
parse32(); // seqid
parse32();
parse32();
parse32();
// stateid4
parse32(); // seqid
parse32();
parse32();
parse32();
parse64(); // offset
parse64(); // offset
long long count = parse64(); // count
parse32(); // consecutive
parse32(); // synchronous
int nloc = parse32();
for(int i = 0; i < nloc; i++){
int type = parse32();
if(type == 1 || type == 2){
parse_opaque(0); // name of url
} else {
parse_opaque(0); // network id
parse_opaque(0); // universal address
}
}
static int already = 0;
if(already >= 2){
printf("too many op_copy\n");
put32(1); // not OK
return;
}
already++;
put32(0); // OK
// stateid4 callback_id<1>
if(1){
put32(0); // n
} else {
put32(1); // n
put32(1); // seqid
put32(1); // other
put32(1);
put32(1);
}
put64(count); // count
put32(0); // stable_how UNSTABLE
put64(1); // verifier
put32(1); // consecutive
put32(0); // synchronous
}
void
parse_op_offload_status()
{
// stateid4
parse32(); // seqid
parse32(); // other
parse32(); // other
parse32(); // other
put32(0); // OK
put64(1); // osr_count
put32(1); // osr_complete
put32(0); // nfsstat4
}
void
parse_op_clone()
{
// stateid4
parse32(); // seqid
parse32(); // other
parse32(); // other
parse32(); // other
// stateid4
parse32(); // seqid
parse32(); // other
parse32(); // other
parse32(); // other
parse64(); // src offset
parse64(); // dst offset
parse64(); // count
put32(0); // nfsstat4
}
void
parse_op_lock()
{
parse32(); // lock type
parse32(); // reclaim
parse64(); // offset
parse64(); // length
int owner = parse32(); // lock4 new_lock_owner
if(owner){
parse32(); // open_seqid
// stateid4
parse32(); // seqid
parse32();
parse32();
parse32();
parse32(); // lock_seqid
parse64(); // clientid
parse_opaque(0); // owner
} else {
// stateid4
parse32(); // seqid
parse32();
parse32();
parse32();
parse32(); // seqid
}
put32(0); // OK
// stateid4
put32(1); // seqid
put32(1); // other
put32(1);
put32(1);
send_back = 1;
}
void
parse_op_locku()
{
parse32(); // lock type
parse32(); // seqid
// stateid4
parse32(); // seqid
parse32();
parse32();
parse32();
parse64(); // offset
parse64(); // length
put32(0); // OK
// stateid4
put32(1); // seqid
put32(1); // other
put32(1);
put32(1);
}
void
parse_op_free_stateid()
{
// stateid4
parse32(); // seqid
parse32();
parse32();
parse32();
put32(0); // OK
}
void
parse_compound()
{
char tag[512];
int taglen = parse_opaque(tag); // tag
int cmin = parse32(); // client minor version
if(cmin != 2){
printf("hmm, client minor %d\n", cmin);
}
int nops = parse32();
// start a COMPOUND4res
int status_oi = oi;
compound_status = 0;
put32(compound_status); // place-holder
put_opaque(taglen, tag);
put32(nops); // length of resarray<>
int opindex = 0;
for(opindex = 0; opindex < nops && ii < ilen; opindex++){
int op = parse32();
printf("op %d #%d\n", op, opcounts[op&0xff]);
put32(op); // resop in nfs_resop4
if(sym_op == op){
if(sym_skip == 0) doit = 1;
sym_skip -= 1;
}
if(op == 42){
parse_op_exchange_id();
} else if(op == 35){
parse_op_setclientid();
} else if(op == 36){
parse_op_setclientid_confirm();
} else if(op == 43){
parse_op_create_session();
} else if(op == 53){
parse_op_sequence();
} else if(op == 58){
parse_op_reclaim_complete();
} else if(op == 24){
parse_op_putrootfh();
} else if(op == 52){
parse_op_secinfo_no_name();
} else if(op == 44){
parse_op_destroy_session();
} else if(op == 57){
parse_op_destroy_clientid();
} else if(op == 10){
parse_op_getfh();
} else if(op == 9){
parse_op_getattr();
} else if(op == 22){
parse_op_putfh();
} else if(op == 3){
parse_op_access();
} else if(op == 15){
parse_op_lookup();
} else if(op == 16){
parse_op_lookupp();
} else if(op == 26){
parse_op_readdir();
} else if(op == 18){
parse_op_open();
} else if(op == 45){
parse_op_free_stateid();
} else if(op == 12){
parse_op_lock();
} else if(op == 14){
parse_op_locku();
} else if(op == 6){
parse_op_create();
} else if(op == 28){
parse_op_remove();
} else if(op == 4){
parse_op_close();
} else if(op == 34){
parse_op_setattr();
} else if(op == 50){
parse_op_layoutget();
} else if(op == 47){
parse_op_getdeviceinfo();
} else if(op == 38){
parse_op_write();
} else if(op == 25){
parse_op_read();
} else if(op == 68){
parse_op_read_plus();
} else if(op == 5){
parse_op_commit();
} else if(op == 32){
parse_op_savefh();
} else if(op == 31){
parse_op_restorefh();
} else if(op == 29){
parse_op_rename();
} else if(op == 69){
parse_op_seek();
} else if(op == 60){
parse_op_copy();
} else if(op == 67){
parse_op_offload_status();
} else if(op == 71){
parse_op_clone();
} else {
printf("unknown op %d\n", op);
// cannot continue to the next op since
// we don't know how long this one is.
break;
}
opcounts[op&0xff] += 1;
}
*(int*)(obuf+status_oi) = htonl(compound_status);
if(opindex != nops)
printf("compound with %d nops but only enough bytes for %d\n", nops, opindex);
if(ii != ilen)
printf("compound consumed only %d of %d bytes\n", ii, ilen);
}
void
parse_rpc()
{
// SUN RPC
int xid = parse32();
int mtype = parse32(); // mtype, 0=CALL, 1=REPLY
if(mtype == 1){
// rpc reply
int stat0 = parse32(); // MSG_ACCEPTED
int flavor = parse32();
parse_opaque(0); // verf
int stat1 = parse32(); // status
int stat2 = parse32(); // status
parse_opaque(0);
int nops = parse32();
int op = parse32();
printf("got a backchannel reply, stat %d %d, nops %d op1 %d\n", stat1, stat2, nops, op);
// printf("got a backchannel reply\n");
return;
}
parse32(); // rpc version
int progno = parse32(); // prog#
int progvers = parse32(); // prog vers
int proc = parse32();
int credtype = parse32(); // cred type
parse_opaque(0); // cred
int verftype = parse32(); // verf type
parse_opaque(0); // verf
put32(xid);
put32(1); // REPLY
put32(0); // MSG_ACCEPTED
put32(0); // opaque_auth flavor = AUTH_NULL
put32(0); // opaque_auth length
put32(0); // SUCCESS
if(proc == 0){
parse_nop();
} else if(progno == 100003 && proc == 1){
parse_compound();
} else {
printf("unknown rpc progno=%d progvers=%d proc=%d\n",
progno, progvers, proc);
}
}
void
put_rpc_header(int prog, int proc)
{
int xid = 1;
put32(xid++);
put32(0); // mtype=CALL
put32(2); // rpc version
put32(prog); // prog # -- nfs v4 callback
put32(1); // prog vers
put32(proc); // proc
if(proc == 0){
put32(0); // cred type
put32(0); // cred len
} else {
put32(1); // cred type AUTH_SYS / AUTH_UNIX
put32(32); // cred length
put32(0); // stamp
put_opaque(9, "localhost");
put32(65534); // uid
put32(65534); // gid
put32(0); // # gids
}
put32(0); // verf type
put32(0); // verf len
}
void
sys(const char *cmd)
{
volatile int x = system(cmd);
(void) x;
}
int
main(){
int s = socket(AF_INET, SOCK_STREAM, 0);
struct sockaddr_in sin;
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_port = htons(2049);
int yes = 1;
setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes));
if(bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0){
perror("bind"); exit(1);
}
listen(s, 10);
sync();
int pid1 = fork();
if(pid1 == 0){
close(s);
sleep(1);
if(system("echo -n mount: ; mount -t nfs4 -o trunkdiscovery,nolock,nodev 127.0.0.1:/tmp /mnt") == 0){
if(0){
printf("statfs: "); fflush(stdout);
struct statfs sb;
int ret = statfs("/mnt/.", &sb);
if(ret < 0) perror("statfs");
sys("echo -n umount: ; umount /mnt");
}
if(0){
printf("open:\n");
int fd = open("/mnt", 0);
if(fd < 0) { perror("/mnt"); exit(1); }
off_t base = 0;
char buf[4096];
ssize_t ret;
printf("getdirentries #1:\n");
ret = getdirentries(fd, buf, sizeof(buf), &base);
if(ret < 0) perror("getdirentries");
printf("getdirentries #2:\n");
ret = getdirentries(fd, buf, sizeof(buf), &base);
if(ret < 0) perror("getdirentries");
printf("close:\n");
close(fd);
sys("echo -n umount: ; umount /mnt");
}
if(0){
int fd = open("/mnt/x", 0);
printf("open /mnt/x -> %d errno %d\n", fd, errno);
if(fd < 0) perror("/mnt/x");
close(fd);
}
if(0){
printf("create /mnt/new:\n");
int fd = open("/mnt/new", O_RDWR|O_CREAT, 0666);
if(fd < 0) perror("/mnt/new");
printf("write /mnt/new:\n");
if(write(fd, "x", 1) < 0) perror("write");
printf("fsync /mnt/new:\n");
fsync(fd);
lseek(fd, 0L, 0);
char buf[512];
printf("read /mnt/new:\n");
volatile int junk = read(fd, buf, sizeof(buf));
(void) junk;
//mkdir("/mnt/new/newnew", 0777);
//if(unlink("/mnt/new") < 0) perror("unlink");
//close(fd);
printf("flock /mnt/new:\n");
if(flock(fd, LOCK_EX) < 0) perror("flock");
printf("close /mnt/new:\n");
close(fd);
sys("echo -n umount: ; umount /mnt");
}
if(0){
printf("open:\n");
int fd = open("/mnt/x", O_CREAT|O_RDWR, 0666);
if(fd < 0) perror("open");
printf("flock:\n");
if(flock(fd, LOCK_EX) < 0) perror("flock");
printf("close:\n");
close(fd);
sys("echo -n umount: ; umount /mnt");
}
if(0){
printf("mkdir:\n");
int ret = mkdir("/mnt/x", 0777);
if(ret < 0) perror("mkdir");
printf("creat:\n");
int fd = creat("/mnt/x/e", 0666);
if(fd < 0) perror("creat");
close(fd);
printf("rmdir:\n");
if(rmdir("/mnt/x") < 0)
perror("rmdir");
sys("echo -n umount: ; umount /mnt");
}
if(0){
sys("nfs4_getfacl /mnt/x > /dev/null 2>&1");
}
if(0){
int fd = open("/mnt/x", O_RDWR|O_CREAT, 0666);
if(fd < 0) perror("open /mnt/x");
char buf[32];
memset(buf, 1, sizeof(buf));
if(write(fd, buf, sizeof(buf)) < 0) perror("write");
lseek(fd, (off_t)0, 0);
int fd2 = creat("/mnt/newnew", 0666);
if(fd2 < 0) perror("creat /mnt/newnew");
off_t o1 = 0;
off_t o2 = 0;
printf("copy:\n");
if(copy_file_range(fd, &o1, fd2, &o2, 16, 0) < 0) perror("copy_file_range");
printf("close:\n");
fsync(fd);
close(fd);
fsync(fd2);
close(fd2);
sys("echo -n umount: ; umount /mnt");
}
if(0){
struct stat sb;
int ret = stat("/mnt/x", &sb);
printf("stat /mnt/x ret=%d uid=%d\n", ret, sb.st_uid);
}
if(0){
sys("echo -n ls: ; ls -l /mnt");
sys("echo -n ls: ; ls -l /mnt/. /mnt/z");
sys("echo -n echo: ; echo hi > /mnt/x");
sys("echo -n dd: ; dd if=/mnt/y of=/dev/null bs=512 count=1");
sys("echo -n umount: ; umount /mnt");
}
if(0){
sys("echo -n mv z y: ; mv -f /mnt/z /mnt/y");
sys("echo -n mv y z: ; mv -f /mnt/z /mnt/y");
sys("echo -n umount: ; umount /mnt");
}
if(0){
sys("echo -n echo: ; echo hi > /mnt/x");
sys("echo -n umount: ; umount /mnt");
}
if(0){
sys("echo -n ls: ; ls -l /mnt");
sys("echo -n umount: ; umount /mnt");
}
if(0){
printf("open x:\n");
int fd = open("/mnt/x", O_RDWR);
if(fd < 0)
perror("/mnt/x");
printf("listxattr:\n");
char buf[128];
if(flistxattr(fd, buf, sizeof(buf)) < 0)
perror("listxattr");
// system.nfs4_acl
// system.nfs4_dacl
// system.nfs4_sacl
printf("getxattr:\n");
if(fgetxattr(fd, "system.nfs4_acl", buf, sizeof(buf)) < 0)
perror("getxattr");
printf("fsync/close:\n");
fsync(fd);
close(fd);
sys("echo -n umount: ; umount /mnt");
}
if(0){
printf("open:\n");
int fd = open("/mnt/x", 0);
if(fd < 0) perror("open");
char buf[2048];
printf("read:\n");
int n = read(fd, buf, sizeof(buf));
if(n < 0) perror("read");
close(fd);
sys("echo -n umount: ; umount /mnt");
}
if(0){
printf("open:\n");
int fd = open("/mnt/x", O_WRONLY|O_CREAT|O_TRUNC, 0666);
if(fd < 0) perror("open");
char buf[2048];
printf("write:\n");
int n = write(fd, buf, sizeof(buf));
if(n < 0) perror("write");
close(fd);
sys("echo -n umount: ; umount /mnt");
}
if(0){
sys("echo -n cp: ; cp --reflink=always /mnt/x /mnt/y");
sys("echo -n umount: ; umount /mnt");
}
if(1){
printf("open:\n");
int fd = open("/mnt/tmp", O_RDONLY);
if(fd < 0) perror("open");
printf("mkdirat:\n");
int ret = mkdirat(fd, "r", 0777);
if(ret < 0) perror("mkdirat");
printf("creat:\n");
int fd1 = creat("/mnt/tmp/r/e", 0666);
if(fd < 0) perror("creat");
close(fd1);
close(fd);
printf("rmdir:\n");
if(rmdir("/mnt/tmp/r") < 0)
perror("rmdir");
sys("echo -n umount: ; umount /mnt");
}
}
exit(0);
}
int pid2 = fork();
if(pid2 == 0){
while(1){
socklen_t sinlen = sizeof(sin);
printf("calling accept\n");
int s1 = accept(s, (struct sockaddr *) &sin, &sinlen);
printf("accept returned %d\n", s1);
if(s1 < 0) { perror("accept"); exit(1); }
while(1){
if(readn(s1, &ilen, 4) < 0) break;
ilen = ntohl(ilen);
ilen &= 0x7fffffff;
if(readn(s1, ibuf, ilen) < 0) break;
oi = ii = 0;
put32(0); // place-holder for length
parse_rpc();
*(int*)(obuf+0) = htonl((oi - 4) | 0x80000000);
if(oi > 0){
if(write(s1, obuf, oi)<=0) perror("write");
}
if(send_back){
send_back = 0;
oi = 0;
put32(0); // dummy length
put_rpc_header(0x40000000, 1);
// CB_COMPOUND
put_opaque(0, ""); // compound tag
put32(2); // minor version
put32(0); // callback_ident
put32(2); // operations in the compound
// CB_SEQUENCE
put32(11);
for(int i = 0; i < 4; i++)
put32(1); // sessionid
put32(1); // sequenceid ???
put32(0); // slotid
put32(0); // highest_slotid
put32(0); // cachethis
put32(0); // csa_referring_call_lists<>
int xoi = oi;
for(int i = 0; i < 32; i++)
put32(0xffffffff);
*(int*)(obuf+0) = htonl((oi - 4) | 0x80000000);
if(write(s1, obuf, oi)<=0) perror("write");
}
}
close(s1);
}
exit(1);
}
close(s);
sleep(10);
kill(pid1, 9);
kill(pid2, 9);
sleep(1);
}
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: _nfs4_open_and_get_state() should check d_splice_alias() return for error
2025-09-07 17:56 _nfs4_open_and_get_state() should check d_splice_alias() return for error rtm
@ 2025-09-07 21:08 ` Trond Myklebust
0 siblings, 0 replies; 2+ messages in thread
From: Trond Myklebust @ 2025-09-07 21:08 UTC (permalink / raw)
To: rtm, Anna Schumaker; +Cc: linux-nfs
On Sun, 2025-09-07 at 13:56 -0400, rtm@csail.mit.edu wrote:
> [You don't often get email from rtm@csail.mit.edu. Learn why this is
> important at https://aka.ms/LearnAboutSenderIdentification ]
>
> In this code in fs/nfs/nfs4proc.c _nfs_open_and_get_state():
>
> dentry = opendata->dentry;
> if (d_really_is_negative(dentry)) {
> struct dentry *alias;
> d_drop(dentry);
> alias = d_splice_alias(igrab(state->inode), dentry);
> /* d_splice_alias() can't fail here - it's a non-
> directory */
> if (alias) {
> dput(ctx->dentry);
> ctx->dentry = dentry = alias;
> }
>
> d_splice_alias() can fail, returning ERR_PTR(-ELOOP). Then this call
> later on causes a crash:
No. It can't return ELOOP.
The reason why it can't is that OP_OPEN is not allowed to return
NFS4_OK for anything other than a regular file. Hence the server cannot
return a directory, or anything which could otherwise be an ancestor to
'dentry'. Hence the comment above...
IOW: Your implementation of OP_OPEN is broken because it only returns
either NFS4ERR_NOENT or NFS4_OK, whereas in this case it should clearly
have return NFS4ERR_ISDIR.
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trondmy@kernel.org, trond.myklebust@hammerspace.com
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2025-09-07 21:08 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-09-07 17:56 _nfs4_open_and_get_state() should check d_splice_alias() return for error rtm
2025-09-07 21:08 ` Trond Myklebust
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox