From mboxrd@z Thu Jan 1 00:00:00 1970 From: Oleg Drokin Subject: Re: nfsd-fh: found a name that I didn't expect Date: Sat, 29 Mar 2003 20:37:11 +0300 Message-ID: <20030329203711.A16762@namesys.com> References: <200303281828.55093.bernd-schubert@web.de> <200303291509.23681.bernd-schubert@web.de> <20030329171321.A15899@namesys.com> <200303291606.54910.bernd-schubert@web.de> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="2fHTh5uZTiUOsy+g" Return-path: list-help: list-unsubscribe: list-post: Errors-To: flx@namesys.com Content-Disposition: inline In-Reply-To: <200303291606.54910.bernd-schubert@web.de> List-Id: To: Bernd Schubert Cc: reiserfs-list@namesys.com --2fHTh5uZTiUOsy+g Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Hello! On Sat, Mar 29, 2003 at 04:06:54PM +0100, Bernd Schubert wrote: > > The problems seems to have something to do with strange dentries appearing > > in dentry cache, also you have verified that this is not because of fs > > error. > > What kernel are you running? > 2.4.20 with the ptrace patch. Ok. > > I think may be race with iget4 might cause this, but I am not sure. > > We have a patch for this iget4 race, and if you are willing to test it, I > > can send it to you. > Yes of course I am. I hope the other admins won't kill me, but I guess they > know that I willing to do experiments from time to time ;-) > Could something worse happen when I try the patch, that could not be fixed by > simple rebooting ? See the patch below. It should not break anything. > The problems is to detect if this is really caused by this race. Since I still Well, race is of type when several parallel processes try to access the file whose inode is not in memory. This resembles what you describe with lots of clients accessing same file over NFS at the same time. > Update: While I was writing and thinking about how to trigger it, I just again > saw the problem for /usr/bin/uptime and another file and again a reboot could > fix it for those files. Bye, Oleg --2fHTh5uZTiUOsy+g Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="iget5_locked_2.4.20.diff" ===== Documentation/filesystems/Locking 1.5 vs edited ===== --- 1.5/Documentation/filesystems/Locking Thu Sep 5 00:14:54 2002 +++ edited/Documentation/filesystems/Locking Fri Mar 21 15:23:44 2003 @@ -112,7 +112,7 @@ remount_fs: yes yes maybe (see below) umount_begin: yes no maybe (see below) -->read_inode() is not a method - it's a callback used in iget()/iget4(). +->read_inode() is not a method - it's a callback used in iget(). rules for mount_sem are not too nice - it is going to die and be replaced by better scheme anyway. ===== fs/Makefile 1.16 vs edited ===== --- 1.16/fs/Makefile Thu Sep 12 04:00:00 2002 +++ edited/fs/Makefile Fri Mar 21 15:23:46 2003 @@ -7,7 +7,7 @@ O_TARGET := fs.o -export-objs := filesystems.o open.o dcache.o buffer.o +export-objs := filesystems.o open.o dcache.o buffer.o inode.o mod-subdirs := nls obj-y := open.o read_write.o devices.o file_table.o buffer.o \ ===== fs/inode.c 1.35 vs edited ===== --- 1.35/fs/inode.c Sat Mar 16 01:06:57 2002 +++ edited/fs/inode.c Fri Mar 21 16:20:29 2003 @@ -17,6 +17,7 @@ #include #include #include +#include /* * New inode.c implementation. @@ -643,7 +644,7 @@ invalidate_buffers(dev); return res; } - +EXPORT_SYMBOL(unlock_new_inode); /* * This is called with the inode lock held. It searches @@ -734,7 +735,32 @@ * by hand after calling find_inode now! This simplifies iunique and won't * add any additional branch in the common code. */ -static struct inode * find_inode(struct super_block * sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque) +static struct inode * find_inode(struct super_block * sb, struct list_head *head, int (*test)(struct inode *, void *), void *data) +{ + struct list_head *tmp; + struct inode * inode; + + tmp = head; + for (;;) { + tmp = tmp->next; + inode = NULL; + if (tmp == head) + break; + inode = list_entry(tmp, struct inode, i_hash); + if (inode->i_sb != sb) + continue; + if (!test(inode, data)) + continue; + break; + } + return inode; +} + +/* + * find_inode_fast is the fast path version of find_inode, see the comment at + * iget_locked for details. + */ +static struct inode * find_inode_fast(struct super_block * sb, struct list_head *head, unsigned long ino) { struct list_head *tmp; struct inode * inode; @@ -750,8 +776,6 @@ continue; if (inode->i_sb != sb) continue; - if (find_actor && !find_actor(inode, ino, opaque)) - continue; break; } return inode; @@ -827,13 +851,28 @@ return inode; } +void unlock_new_inode(struct inode *inode) +{ + /* + * This is special! We do not need the spinlock + * when clearing I_LOCK, because we're guaranteed + * that nobody else tries to do anything about the + * state of the inode when it is locked, as we + * just created it (so there can be no old holders + * that haven't tested I_LOCK). + */ + inode->i_state &= ~(I_LOCK|I_NEW); + wake_up(&inode->i_wait); +} + + /* * This is called without the inode lock held.. Be careful. * * We no longer cache the sb_flags in i_flags - see fs.h * -- rmk@arm.uk.linux.org */ -static struct inode * get_new_inode(struct super_block *sb, unsigned long ino, struct list_head *head, find_inode_t find_actor, void *opaque) +static struct inode * get_new_inode(struct super_block *sb, struct list_head *head, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *data) { struct inode * inode; @@ -843,44 +882,27 @@ spin_lock(&inode_lock); /* We released the lock, so.. */ - old = find_inode(sb, ino, head, find_actor, opaque); + old = find_inode(sb, head, test, data); if (!old) { + if (set(inode, data)) + goto set_failed; + inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_hash, head); inode->i_sb = sb; inode->i_dev = sb->s_dev; inode->i_blkbits = sb->s_blocksize_bits; - inode->i_ino = ino; inode->i_flags = 0; atomic_set(&inode->i_count, 1); - inode->i_state = I_LOCK; + inode->i_state = I_LOCK|I_NEW; spin_unlock(&inode_lock); clean_inode(inode); - /* reiserfs specific hack right here. We don't - ** want this to last, and are looking for VFS changes - ** that will allow us to get rid of it. - ** -- mason@suse.com - */ - if (sb->s_op->read_inode2) { - sb->s_op->read_inode2(inode, opaque) ; - } else { - sb->s_op->read_inode(inode); - } - - /* - * This is special! We do not need the spinlock - * when clearing I_LOCK, because we're guaranteed - * that nobody else tries to do anything about the - * state of the inode when it is locked, as we - * just created it (so there can be no old holders - * that haven't tested I_LOCK). + /* Return the locked inode with I_NEW set, the + * caller is responsible for filling in the contents */ - inode->i_state &= ~I_LOCK; - wake_up(&inode->i_wait); - return inode; } @@ -896,11 +918,53 @@ wait_on_inode(inode); } return inode; + +set_failed: + spin_unlock(&inode_lock); + destroy_inode(inode); + return NULL; +} + +/* + * get_new_inode_fast is the fast path version of get_new_inode, see the + * comment at iget_locked for details. + */ +static struct inode * get_new_inode_fast(struct super_block *sb, struct list_head *head, unsigned long ino) +{ + struct inode * inode; + + inode = alloc_inode(); + if (inode) { + struct inode * old; + + spin_lock(&inode_lock); + /* We released the lock, so.. */ + old = find_inode_fast(sb, head, ino); + if (!old) { + inode->i_ino = ino; + inodes_stat.nr_inodes++; + list_add(&inode->i_list, &inode_in_use); + list_add(&inode->i_hash, head); + inode->i_sb = sb; + inode->i_dev = sb->s_dev; + inode->i_blkbits = sb->s_blocksize_bits; + inode->i_flags = 0; + inode->i_state = I_LOCK|I_NEW; + spin_unlock(&inode_lock); + + clean_inode(inode); + /* Return the locked inode with I_NEW set, the + * caller is responsible for filling in the contents + */ + return inode; + } + } + return inode; } -static inline unsigned long hash(struct super_block *sb, unsigned long i_ino) +static inline unsigned long hash(struct super_block *sb, unsigned long hashval) { - unsigned long tmp = i_ino + ((unsigned long) sb / L1_CACHE_BYTES); + unsigned long tmp = hashval + ((unsigned long) sb / L1_CACHE_BYTES); tmp = tmp + (tmp >> I_HASHBITS); return tmp & I_HASHMASK; } @@ -932,7 +996,8 @@ retry: if (counter > max_reserved) { head = inode_hashtable + hash(sb,counter); - inode = find_inode(sb, res = counter++, head, NULL, NULL); + res = counter++; + inode = find_inode_fast(sb, head, res); if (!inode) { spin_unlock(&inode_lock); return res; @@ -960,14 +1025,63 @@ return inode; } +/** + * ifind - internal function, you want ilookup5() or iget5(). + * @sb: super block of file system to search + * @hashval: hash value (usually inode number) to search for + * @test: callback used for comparisons between inodes + * @data: opaque data pointer to pass to @test + * + * ifind() searches for the inode specified by @hashval and @data in the inode + * cache. This is a generalized version of ifind_fast() for file systems where + * the inode number is not sufficient for unique identification of an inode. + * + * If the inode is in the cache, the inode is returned with an incremented + * reference count. + * + * Otherwise NULL is returned. + * + * Note, @test is called with the inode_lock held, so can't sleep. + */ +static inline struct inode *ifind(struct super_block *sb, + struct list_head *head, int (*test)(struct inode *, void *), + void *data) +{ + struct inode *inode; + + spin_lock(&inode_lock); + inode = find_inode(sb, head, test, data); + if (inode) { + __iget(inode); + spin_unlock(&inode_lock); + wait_on_inode(inode); + return inode; + } + spin_unlock(&inode_lock); + return NULL; +} -struct inode *iget4(struct super_block *sb, unsigned long ino, find_inode_t find_actor, void *opaque) +/** + * ifind_fast - internal function, you want ilookup() or iget(). + * @sb: super block of file system to search + * @ino: inode number to search for + * + * ifind_fast() searches for the inode @ino in the inode cache. This is for + * file systems where the inode number is sufficient for unique identification + * of an inode. + * + * If the inode is in the cache, the inode is returned with an incremented + * reference count. + * + * Otherwise NULL is returned. + */ +static inline struct inode *ifind_fast(struct super_block *sb, + struct list_head *head, unsigned long ino) { - struct list_head * head = inode_hashtable + hash(sb,ino); - struct inode * inode; + struct inode *inode; spin_lock(&inode_lock); - inode = find_inode(sb, ino, head, find_actor, opaque); + inode = find_inode_fast(sb, head, ino); if (inode) { __iget(inode); spin_unlock(&inode_lock); @@ -975,27 +1089,147 @@ return inode; } spin_unlock(&inode_lock); + return NULL; +} +/** + * ilookup5 - search for an inode in the inode cache + * @sb: super block of file system to search + * @hashval: hash value (usually inode number) to search for + * @test: callback used for comparisons between inodes + * @data: opaque data pointer to pass to @test + * + * ilookup5() uses ifind() to search for the inode specified by @hashval and + * @data in the inode cache. This is a generalized version of ilookup() for + * file systems where the inode number is not sufficient for unique + * identification of an inode. + * + * If the inode is in the cache, the inode is returned with an incremented + * reference count. + * + * Otherwise NULL is returned. + * + * Note, @test is called with the inode_lock held, so can't sleep. + */ +struct inode *ilookup5(struct super_block *sb, unsigned long hashval, + int (*test)(struct inode *, void *), void *data) +{ + struct list_head *head = inode_hashtable + hash(sb, hashval); + + return ifind(sb, head, test, data); +} +EXPORT_SYMBOL(ilookup5); + +/** + * ilookup - search for an inode in the inode cache + * @sb: super block of file system to search + * @ino: inode number to search for + * + * ilookup() uses ifind_fast() to search for the inode @ino in the inode cache. + * This is for file systems where the inode number is sufficient for unique + * identification of an inode. + * + * If the inode is in the cache, the inode is returned with an incremented + * reference count. + * + * Otherwise NULL is returned. + */ +struct inode *ilookup(struct super_block *sb, unsigned long ino) +{ + struct list_head *head = inode_hashtable + hash(sb, ino); + + return ifind_fast(sb, head, ino); +} +EXPORT_SYMBOL(ilookup); + +/** + * iget5_locked - obtain an inode from a mounted file system + * @sb: super block of file system + * @hashval: hash value (usually inode number) to get + * @test: callback used for comparisons between inodes + * @set: callback used to initialize a new struct inode + * @data: opaque data pointer to pass to @test and @set + * + * This is iget() without the read_inode() portion of get_new_inode(). + * + * iget5_locked() uses ifind() to search for the inode specified by @hashval + * and @data in the inode cache and if present it is returned with an increased + * reference count. This is a generalized version of iget_locked() for file + * systems where the inode number is not sufficient for unique identification + * of an inode. + * + * If the inode is not in cache, get_new_inode() is called to allocate a new + * inode and this is returned locked, hashed, and with the I_NEW flag set. The + * file system gets to fill it in before unlocking it via unlock_new_inode(). + * + * Note both @test and @set are called with the inode_lock held, so can't sleep. + */ +struct inode *iget5_locked(struct super_block *sb, unsigned long hashval, + int (*test)(struct inode *, void *), + int (*set)(struct inode *, void *), void *data) +{ + struct list_head *head = inode_hashtable + hash(sb, hashval); + struct inode *inode; + + inode = ifind(sb, head, test, data); + if (inode) + return inode; /* * get_new_inode() will do the right thing, re-trying the search * in case it had to block at any point. */ - return get_new_inode(sb, ino, head, find_actor, opaque); + return get_new_inode(sb, head, test, set, data); +} +EXPORT_SYMBOL(iget5_locked); + +/** + * iget_locked - obtain an inode from a mounted file system + * @sb: super block of file system + * @ino: inode number to get + * + * This is iget() without the read_inode() portion of get_new_inode_fast(). + * + * iget_locked() uses ifind_fast() to search for the inode specified by @ino in + * the inode cache and if present it is returned with an increased reference + * count. This is for file systems where the inode number is sufficient for + * unique identification of an inode. + * + * If the inode is not in cache, get_new_inode_fast() is called to allocate a + * new inode and this is returned locked, hashed, and with the I_NEW flag set. + * The file system gets to fill it in before unlocking it via + * unlock_new_inode(). + */ +struct inode *iget_locked(struct super_block *sb, unsigned long ino) +{ + struct list_head *head = inode_hashtable + hash(sb, ino); + struct inode *inode; + + inode = ifind_fast(sb, head, ino); + if (inode) + return inode; + /* + * get_new_inode_fast() will do the right thing, re-trying the search + * in case it had to block at any point. + */ + return get_new_inode_fast(sb, head, ino); } +EXPORT_SYMBOL(iget_locked); /** - * insert_inode_hash - hash an inode + * __insert_inode_hash - hash an inode * @inode: unhashed inode + * @hashval: unsigned long value used to locate this object in the + * inode_hashtable. * * Add an inode to the inode hash for this superblock. If the inode * has no superblock it is added to a separate anonymous chain. */ -void insert_inode_hash(struct inode *inode) +void __insert_inode_hash(struct inode *inode, unsigned long hashval) { struct list_head *head = &anon_hash_chain; if (inode->i_sb) - head = inode_hashtable + hash(inode->i_sb, inode->i_ino); + head = inode_hashtable + hash(inode->i_sb, hashval); spin_lock(&inode_lock); list_add(&inode->i_hash, head); spin_unlock(&inode_lock); ===== fs/coda/cnode.c 1.8 vs edited ===== --- 1.8/fs/coda/cnode.c Wed May 29 19:20:33 2002 +++ edited/fs/coda/cnode.c Fri Mar 21 15:23:49 2003 @@ -27,11 +27,6 @@ return 1; } -static int coda_inocmp(struct inode *inode, unsigned long ino, void *opaque) -{ - return (coda_fideq((ViceFid *)opaque, &(ITOC(inode)->c_fid))); -} - static struct inode_operations coda_symlink_inode_operations = { readlink: page_readlink, follow_link: page_follow_link, @@ -62,29 +57,46 @@ init_special_inode(inode, inode->i_mode, attr->va_rdev); } +static int coda_test_inode(struct inode *inode, void *data) +{ + ViceFid *fid = (ViceFid *)data; + return coda_fideq(&(ITOC(inode)->c_fid), fid); +} + +static int coda_set_inode(struct inode *inode, void *data) +{ + ViceFid *fid = (ViceFid *)data; + ITOC(inode)->c_fid = *fid; + return 0; +} + +static int coda_fail_inode(struct inode *inode, void *data) +{ + return -1; +} + struct inode * coda_iget(struct super_block * sb, ViceFid * fid, struct coda_vattr * attr) { struct inode *inode; struct coda_inode_info *cii; - ino_t ino = coda_f2i(fid); struct coda_sb_info *sbi = coda_sbp(sb); + unsigned long hash = coda_f2i(fid); - down(&sbi->sbi_iget4_mutex); - inode = iget4(sb, ino, coda_inocmp, fid); + inode = iget5_locked(sb, hash, coda_test_inode, coda_set_inode, fid); if ( !inode ) { - CDEBUG(D_CNODE, "coda_iget: no inode\n"); - up(&sbi->sbi_iget4_mutex); return ERR_PTR(-ENOMEM); } - /* check if the inode is already initialized */ - cii = ITOC(inode); - if (coda_isnullfid(&cii->c_fid)) - /* new, empty inode found... initializing */ - cii->c_fid = *fid; - up(&sbi->sbi_iget4_mutex); + if (inode->i_state & I_NEW) { + cii = ITOC(inode); + /* we still need to set i_ino for things like stat(2) */ + inode->i_ino = hash; + list_add(&cii->c_cilist, &sbi->sbi_cihead); + unlock_new_inode(inode); + } + /* always replace the attributes, type might have changed */ coda_fill_inode(inode, attr); @@ -129,6 +141,7 @@ struct ViceFid *newfid) { struct coda_inode_info *cii; + unsigned long hash = coda_f2i(newfid); cii = ITOC(inode); @@ -139,17 +152,16 @@ /* XXX we probably need to hold some lock here! */ remove_inode_hash(inode); cii->c_fid = *newfid; - inode->i_ino = coda_f2i(newfid); - insert_inode_hash(inode); + inode->i_ino = hash; + __insert_inode_hash(inode, hash); } /* convert a fid to an inode. */ struct inode *coda_fid_to_inode(ViceFid *fid, struct super_block *sb) { - ino_t nr; + struct inode *inode; - struct coda_inode_info *cii; - struct coda_sb_info *sbi; + unsigned long hash = coda_f2i(fid); if ( !sb ) { printk("coda_fid_to_inode: no sb!\n"); @@ -158,47 +170,29 @@ CDEBUG(D_INODE, "%s\n", coda_f2s(fid)); - sbi = coda_sbp(sb); - nr = coda_f2i(fid); - down(&sbi->sbi_iget4_mutex); - inode = iget4(sb, nr, coda_inocmp, fid); - if ( !inode ) { - printk("coda_fid_to_inode: null from iget, sb %p, nr %ld.\n", - sb, (long)nr); - goto out_unlock; - } - - cii = ITOC(inode); + inode = iget5_locked(sb, hash, coda_test_inode, coda_fail_inode, fid); + if ( !inode ) + return NULL; - /* The inode could already be purged due to memory pressure */ - if (coda_isnullfid(&cii->c_fid)) { - inode->i_nlink = 0; - iput(inode); - goto out_unlock; - } + /* we should never see newly created inodes because we intentionally + * fail in the initialization callback */ + BUG_ON(inode->i_state & I_NEW); CDEBUG(D_INODE, "found %ld\n", inode->i_ino); - up(&sbi->sbi_iget4_mutex); return inode; - -out_unlock: - up(&sbi->sbi_iget4_mutex); - return NULL; } /* the CONTROL inode is made without asking attributes from Venus */ int coda_cnode_makectl(struct inode **inode, struct super_block *sb) { - int error = 0; + int error = -ENOMEM; *inode = iget(sb, CTL_INO); - if ( *inode ) { + if (*inode) { (*inode)->i_op = &coda_ioctl_inode_operations; (*inode)->i_fop = &coda_ioctl_operations; (*inode)->i_mode = 0444; error = 0; - } else { - error = -ENOMEM; } return error; ===== fs/coda/inode.c 1.9 vs edited ===== --- 1.9/fs/coda/inode.c Wed May 29 19:17:41 2002 +++ edited/fs/coda/inode.c Fri Mar 21 15:23:51 2003 @@ -34,7 +34,6 @@ /* VFS super_block ops */ static struct super_block *coda_read_super(struct super_block *, void *, int); -static void coda_read_inode(struct inode *); static void coda_clear_inode(struct inode *); static void coda_put_super(struct super_block *); static int coda_statfs(struct super_block *sb, struct statfs *buf); @@ -42,7 +41,6 @@ /* exported operations */ struct super_operations coda_super_operations = { - read_inode: coda_read_inode, clear_inode: coda_clear_inode, put_super: coda_put_super, statfs: coda_statfs, @@ -179,24 +177,6 @@ printk("Coda: Bye bye.\n"); kfree(sbi); -} - -/* all filling in of inodes postponed until lookup */ -static void coda_read_inode(struct inode *inode) -{ - struct coda_sb_info *sbi = coda_sbp(inode->i_sb); - struct coda_inode_info *cii; - - if (!sbi) BUG(); - - cii = ITOC(inode); - if (!coda_isnullfid(&cii->c_fid)) { - printk("coda_read_inode: initialized inode"); - return; - } - - cii->c_mapcount = 0; - list_add(&cii->c_cilist, &sbi->sbi_cihead); } static void coda_clear_inode(struct inode *inode) ===== fs/nfs/inode.c 1.18 vs edited ===== --- 1.18/fs/nfs/inode.c Thu Aug 15 05:05:32 2002 +++ edited/fs/nfs/inode.c Fri Mar 21 15:23:51 2003 @@ -45,7 +45,6 @@ void nfs_zap_caches(struct inode *); static void nfs_invalidate_inode(struct inode *); -static void nfs_read_inode(struct inode *); static void nfs_write_inode(struct inode *,int); static void nfs_delete_inode(struct inode *); static void nfs_put_super(struct super_block *); @@ -55,7 +54,6 @@ static int nfs_show_options(struct seq_file *, struct vfsmount *); static struct super_operations nfs_sops = { - read_inode: nfs_read_inode, write_inode: nfs_write_inode, delete_inode: nfs_delete_inode, put_super: nfs_put_super, @@ -92,30 +90,6 @@ return nfs_fileid_to_ino_t(fattr->fileid); } -/* - * The "read_inode" function doesn't actually do anything: - * the real data is filled in later in nfs_fhget. Here we - * just mark the cache times invalid, and zero out i_mode - * (the latter makes "nfs_refresh_inode" do the right thing - * wrt pipe inodes) - */ -static void -nfs_read_inode(struct inode * inode) -{ - inode->i_blksize = inode->i_sb->s_blocksize; - inode->i_mode = 0; - inode->i_rdev = 0; - /* We can't support UPDATE_ATIME(), since the server will reset it */ - inode->i_flags |= S_NOATIME; - INIT_LIST_HEAD(&inode->u.nfs_i.read); - INIT_LIST_HEAD(&inode->u.nfs_i.dirty); - INIT_LIST_HEAD(&inode->u.nfs_i.commit); - INIT_LIST_HEAD(&inode->u.nfs_i.writeback); - NFS_CACHEINV(inode); - NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); - NFS_ATTRTIMEO_UPDATE(inode) = jiffies; -} - static void nfs_write_inode(struct inode *inode, int sync) { @@ -634,7 +608,6 @@ * do this once. (We don't allow inodes to change types.) */ if (inode->i_mode == 0) { - NFS_FILEID(inode) = fattr->fileid; inode->i_mode = fattr->mode; /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. @@ -650,9 +623,7 @@ inode->i_op = &nfs_symlink_inode_operations; else init_special_inode(inode, inode->i_mode, fattr->rdev); - memcpy(&inode->u.nfs_i.fh, fh, sizeof(inode->u.nfs_i.fh)); } - nfs_refresh_inode(inode, fattr); } struct nfs_find_desc { @@ -667,7 +638,7 @@ * i_ino. */ static int -nfs_find_actor(struct inode *inode, unsigned long ino, void *opaque) +nfs_find_actor(struct inode *inode, void *opaque) { struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; struct nfs_fh *fh = desc->fh; @@ -685,6 +656,18 @@ return 1; } +static int +nfs_init_locked(struct inode *inode, void *opaque) +{ + struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; + struct nfs_fh *fh = desc->fh; + struct nfs_fattr *fattr = desc->fattr; + + NFS_FILEID(inode) = fattr->fileid; + memcpy(NFS_FH(inode), fh, sizeof(struct nfs_fh)); + return 0; +} + /* * This is our own version of iget that looks up inodes by file handle * instead of inode number. We use this technique instead of using @@ -712,7 +695,7 @@ { struct nfs_find_desc desc = { fh, fattr }; struct inode *inode = NULL; - unsigned long ino; + unsigned long hash; if ((fattr->valid & NFS_ATTR_FATTR) == 0) goto out_no_inode; @@ -722,12 +705,29 @@ goto out_no_inode; } - ino = nfs_fattr_to_ino_t(fattr); + hash = nfs_fattr_to_ino_t(fattr); - if (!(inode = iget4(sb, ino, nfs_find_actor, &desc))) + if (!(inode = iget5_locked(sb, hash, nfs_find_actor, nfs_init_locked, &desc))) goto out_no_inode; - nfs_fill_inode(inode, fh, fattr); + if (inode->i_state & I_NEW) { + inode->i_ino = hash; + inode->i_blksize = inode->i_sb->s_blocksize; + inode->i_mode = 0; + inode->i_rdev = 0; + /* We can't support UPDATE_ATIME(), since the server will reset it */ + inode->i_flags |= S_NOATIME; + INIT_LIST_HEAD(&inode->u.nfs_i.read); + INIT_LIST_HEAD(&inode->u.nfs_i.dirty); + INIT_LIST_HEAD(&inode->u.nfs_i.commit); + INIT_LIST_HEAD(&inode->u.nfs_i.writeback); + NFS_CACHEINV(inode); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; + nfs_fill_inode(inode, fh, fattr); + unlock_new_inode(inode); + } else + nfs_refresh_inode(inode, fattr); dprintk("NFS: __nfs_fhget(%x/%Ld ct=%d)\n", inode->i_dev, (long long)NFS_FILEID(inode), atomic_read(&inode->i_count)); ===== fs/reiserfs/inode.c 1.39 vs edited ===== --- 1.39/fs/reiserfs/inode.c Thu Sep 12 12:39:21 2002 +++ edited/fs/reiserfs/inode.c Fri Mar 21 15:23:52 2003 @@ -30,7 +30,7 @@ lock_kernel() ; /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ - if (INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ + if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ down (&inode->i_sem); journal_begin(&th, inode->i_sb, jbegin_count) ; @@ -867,7 +867,7 @@ // item version directly // -// called by read_inode +// called by read_locked_inode static void init_inode (struct inode * inode, struct path * path) { struct buffer_head * bh; @@ -1107,27 +1107,24 @@ make_bad_inode(inode); } -void reiserfs_read_inode(struct inode *inode) { - reiserfs_make_bad_inode(inode) ; +int reiserfs_init_locked_inode (struct inode * inode, void *p) +{ + struct reiserfs_iget_args *args = (struct reiserfs_iget_args *)p ; + inode->i_ino = args->objectid; + INODE_PKEY(inode)->k_dir_id = cpu_to_le32(args->dirid); + return 0; } - /* looks for stat data in the tree, and fills up the fields of in-core inode stat data fields */ -void reiserfs_read_inode2 (struct inode * inode, void *p) +void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args *args) { INITIALIZE_PATH (path_to_sd); struct cpu_key key; - struct reiserfs_iget4_args *args = (struct reiserfs_iget4_args *)p ; unsigned long dirino; int retval; - if (!p) { - reiserfs_make_bad_inode(inode) ; - return; - } - - dirino = args->objectid ; + dirino = args->dirid ; /* set version 1, version 2 could be used too, because stat data key is the same in both versions */ @@ -1140,7 +1137,7 @@ /* look for the object's stat data */ retval = search_item (inode->i_sb, &key, &path_to_sd); if (retval == IO_ERROR) { - reiserfs_warning ("vs-13070: reiserfs_read_inode2: " + reiserfs_warning ("vs-13070: reiserfs_read_locked_inode: " "i/o failure occurred trying to find stat data of %K\n", &key); reiserfs_make_bad_inode(inode) ; @@ -1172,7 +1169,7 @@ during mount (fs/reiserfs/super.c:finish_unfinished()). */ if( ( inode -> i_nlink == 0 ) && ! inode -> i_sb -> u.reiserfs_sb.s_is_unlinked_ok ) { - reiserfs_warning( "vs-13075: reiserfs_read_inode2: " + reiserfs_warning( "vs-13075: reiserfs_read_locked_inode: " "dead inode read from disk %K. " "This is likely to be race with knfsd. Ignore\n", &key ); @@ -1184,38 +1181,43 @@ } /** - * reiserfs_find_actor() - "find actor" reiserfs supplies to iget4(). + * reiserfs_find_actor() - "find actor" reiserfs supplies to iget5_locked(). * * @inode: inode from hash table to check - * @inode_no: inode number we are looking for - * @opaque: "cookie" passed to iget4(). This is &reiserfs_iget4_args. + * @opaque: "cookie" passed to iget5_locked(). This is &reiserfs_iget_args. * - * This function is called by iget4() to distinguish reiserfs inodes + * This function is called by iget5_locked() to distinguish reiserfs inodes * having the same inode numbers. Such inodes can only exist due to some * error condition. One of them should be bad. Inodes with identical * inode numbers (objectids) are distinguished by parent directory ids. * */ -static int reiserfs_find_actor( struct inode *inode, - unsigned long inode_no, void *opaque ) +int reiserfs_find_actor( struct inode *inode, void *opaque ) { - struct reiserfs_iget4_args *args; + struct reiserfs_iget_args *args; args = opaque; /* args is already in CPU order */ - return le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args -> objectid; + return (inode->i_ino == args->objectid) && + (le32_to_cpu(INODE_PKEY(inode)->k_dir_id) == args->dirid); } struct inode * reiserfs_iget (struct super_block * s, const struct cpu_key * key) { struct inode * inode; - struct reiserfs_iget4_args args ; + struct reiserfs_iget_args args ; - args.objectid = key->on_disk_key.k_dir_id ; - inode = iget4 (s, key->on_disk_key.k_objectid, - reiserfs_find_actor, (void *)(&args)); + args.objectid = key->on_disk_key.k_objectid ; + args.dirid = key->on_disk_key.k_dir_id ; + inode = iget5_locked (s, key->on_disk_key.k_objectid, + reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); if (!inode) return ERR_PTR(-ENOMEM) ; + + if (inode->i_state & I_NEW) { + reiserfs_read_locked_inode(inode, &args); + unlock_new_inode(inode); + } if (comp_short_keys (INODE_PKEY (inode), key) || is_bad_inode (inode)) { /* either due to i/o error or a stale NFS handle */ ===== fs/reiserfs/super.c 1.27 vs edited ===== --- 1.27/fs/reiserfs/super.c Wed Oct 30 19:42:36 2002 +++ edited/fs/reiserfs/super.c Fri Mar 21 15:23:53 2003 @@ -381,8 +381,6 @@ struct super_operations reiserfs_sops = { - read_inode: reiserfs_read_inode, - read_inode2: reiserfs_read_inode2, write_inode: reiserfs_write_inode, dirty_inode: reiserfs_dirty_inode, delete_inode: reiserfs_delete_inode, @@ -1117,7 +1115,7 @@ int old_format = 0; unsigned long blocks; int jinit_done = 0 ; - struct reiserfs_iget4_args args ; + struct reiserfs_iget_args args ; int old_magic; struct reiserfs_super_block * rs; @@ -1194,11 +1192,17 @@ printk("clm-7000: Detected readonly device, marking FS readonly\n") ; s->s_flags |= MS_RDONLY ; } - args.objectid = REISERFS_ROOT_PARENT_OBJECTID ; - root_inode = iget4 (s, REISERFS_ROOT_OBJECTID, 0, (void *)(&args)); + args.objectid = REISERFS_ROOT_OBJECTID ; + args.dirid = REISERFS_ROOT_PARENT_OBJECTID ; + root_inode = iget5_locked (s, REISERFS_ROOT_OBJECTID, reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); if (!root_inode) { printk ("reiserfs_read_super: get root inode failed\n"); goto error; + } + + if (root_inode->i_state & I_NEW) { + reiserfs_read_locked_inode(root_inode, &args); + unlock_new_inode(root_inode); } s->s_root = d_alloc_root(root_inode); ===== include/linux/fs.h 1.69 vs edited ===== --- 1.69/include/linux/fs.h Thu Sep 5 00:14:54 2002 +++ edited/include/linux/fs.h Fri Mar 21 16:14:25 2003 @@ -881,13 +881,6 @@ struct super_operations { void (*read_inode) (struct inode *); - /* reiserfs kludge. reiserfs needs 64 bits of information to - ** find an inode. We are using the read_inode2 call to get - ** that information. We don't like this, and are waiting on some - ** VFS changes for the real solution. - ** iget4 calls read_inode2, iff it is defined - */ - void (*read_inode2) (struct inode *, void *) ; void (*dirty_inode) (struct inode *); void (*write_inode) (struct inode *, int); void (*put_inode) (struct inode *); @@ -935,6 +928,7 @@ #define I_LOCK 8 #define I_FREEING 16 #define I_CLEAR 32 +#define I_NEW 64 #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) @@ -1347,11 +1341,24 @@ extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); -typedef int (*find_inode_t)(struct inode *, unsigned long, void *); -extern struct inode * iget4(struct super_block *, unsigned long, find_inode_t, void *); +extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval, + int (*test)(struct inode *, void *), void *data); +extern struct inode *ilookup(struct super_block *sb, unsigned long ino); + +extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); +extern struct inode * iget_locked(struct super_block *, unsigned long); +extern void unlock_new_inode(struct inode *); + static inline struct inode *iget(struct super_block *sb, unsigned long ino) { - return iget4(sb, ino, NULL, NULL); + struct inode *inode = iget_locked(sb, ino); + + if (inode && (inode->i_state & I_NEW)) { + sb->s_op->read_inode(inode); + unlock_new_inode(inode); + } + + return inode; } extern void clear_inode(struct inode *); @@ -1369,8 +1376,12 @@ } extern void remove_suid(struct inode *inode); -extern void insert_inode_hash(struct inode *); +extern void __insert_inode_hash(struct inode *, unsigned long hashval); extern void remove_inode_hash(struct inode *); +static inline void insert_inode_hash(struct inode *inode) { + __insert_inode_hash(inode, inode->i_ino); +} + extern struct file * get_empty_filp(void); extern void file_move(struct file *f, struct list_head *list); extern struct buffer_head * get_hash_table(kdev_t, int, int); ===== include/linux/reiserfs_fs.h 1.25 vs edited ===== --- 1.25/include/linux/reiserfs_fs.h Thu Sep 12 12:39:21 2002 +++ edited/include/linux/reiserfs_fs.h Fri Mar 21 16:15:38 2003 @@ -1478,8 +1478,9 @@ #define B_I_POS_UNFM_POINTER(bh,ih,pos) le32_to_cpu(*(((unp_t *)B_I_PITEM(bh,ih)) + (pos))) #define PUT_B_I_POS_UNFM_POINTER(bh,ih,pos, val) do {*(((unp_t *)B_I_PITEM(bh,ih)) + (pos)) = cpu_to_le32(val); } while (0) -struct reiserfs_iget4_args { +struct reiserfs_iget_args { __u32 objectid ; + __u32 dirid ; } ; /***************************************************************************/ @@ -1730,8 +1731,9 @@ /* inode.c */ -void reiserfs_read_inode (struct inode * inode) ; -void reiserfs_read_inode2(struct inode * inode, void *p) ; +void reiserfs_read_locked_inode(struct inode * inode, struct reiserfs_iget_args *args) ; +int reiserfs_find_actor(struct inode * inode, void *p) ; +int reiserfs_init_locked_inode(struct inode * inode, void *p) ; void reiserfs_delete_inode (struct inode * inode); void reiserfs_write_inode (struct inode * inode, int) ; struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, __u32 *data, ===== kernel/ksyms.c 1.64 vs edited ===== --- 1.64/kernel/ksyms.c Thu Sep 19 04:55:42 2002 +++ edited/kernel/ksyms.c Fri Mar 21 16:01:41 2003 @@ -140,7 +140,6 @@ EXPORT_SYMBOL(fget); EXPORT_SYMBOL(igrab); EXPORT_SYMBOL(iunique); -EXPORT_SYMBOL(iget4); EXPORT_SYMBOL(iput); EXPORT_SYMBOL(force_delete); EXPORT_SYMBOL(follow_up); @@ -524,7 +523,7 @@ EXPORT_SYMBOL(read_ahead); EXPORT_SYMBOL(get_hash_table); EXPORT_SYMBOL(get_empty_inode); -EXPORT_SYMBOL(insert_inode_hash); +EXPORT_SYMBOL(__insert_inode_hash); EXPORT_SYMBOL(remove_inode_hash); EXPORT_SYMBOL(buffer_insert_inode_queue); EXPORT_SYMBOL(buffer_insert_inode_data_queue); --2fHTh5uZTiUOsy+g--