All of lore.kernel.org
 help / color / mirror / Atom feed
From: "J. Bruce Fields" <bfields@fieldses.org>
To: Neil Brown <neilb@suse.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>,
	linux-nfs@vger.kernel.org, Theodore Tso <tytso@mit.edu>
Subject: Re: Thoughts about cache consistency and directories in particular.
Date: Thu, 23 Apr 2009 17:34:36 -0400	[thread overview]
Message-ID: <20090423213436.GA1906@fieldses.org> (raw)
In-Reply-To: <20090421214349.GD27411@fieldses.org>

On Tue, Apr 21, 2009 at 05:43:49PM -0400, bfields wrote:
> There's also the pre-/post- op attributes to take care of.  I'm thinking
> of applying something like the following.

Having heard no objections I'll go ahead and apply this, though I hope
we'll come up with a better solution for turning on the ext4 i_version
support in the future.  Also:

> Actually, I did this and then realized: I'm using IS_I_VERSION(inode) to
> turn on use of the i_version as the change attribute on both files and
> directories.  But it actually only makes a difference for files.
> 
> So I guess I should just be using i_version as the change attribute
> unconditionally for directories?  (Will that work on any filesystem?)

I'm still curious about this.

--b.

> commit 6bc1eaa4eb1bb8af1878be354962e6cedcaece60
> Author: J. Bruce Fields <bfields@citi.umich.edu>
> Date:   Thu Apr 16 17:33:25 2009 -0400
> 
>     nfsd: support ext4 i_version
>     
>     ext4 supports a real NFSv4 change attribute, which is bumped whenever
>     the ctime would be updated, including times when two updates arrive
>     within a jiffy of each other.  (Note that although ext4 has space for
>     nanosecond-precision ctime, the real resolution is lower: it actually
>     uses jiffies as the time-source.)  This ensures clients will invalidate
>     their caches when they need to.
>     
>     There is some fear that keeping the i_version up-to-date could have
>     performance drawbacks, so for now it's turned on only by a mount option.
>     We hope to do something better eventually.
>     
>     Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
>     Cc: Theodore Tso <tytso@mit.edu>
> 
> diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
> index 17d0dd9..01d4ec1 100644
> --- a/fs/nfsd/nfs3xdr.c
> +++ b/fs/nfsd/nfs3xdr.c
> @@ -272,6 +272,7 @@ void fill_post_wcc(struct svc_fh *fhp)
>  
>  	err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
>  			&fhp->fh_post_attr);
> +	fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
>  	if (err)
>  		fhp->fh_post_saved = 0;
>  	else
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 4a71fcd..12d36a7 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1490,13 +1490,41 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
>  	memcpy(p, ptr, nbytes);					\
>  	p += XDR_QUADLEN(nbytes);				\
>  }} while (0)
> -#define WRITECINFO(c)		do {				\
> -	*p++ = htonl(c.atomic);					\
> -	*p++ = htonl(c.before_ctime_sec);				\
> -	*p++ = htonl(c.before_ctime_nsec);				\
> -	*p++ = htonl(c.after_ctime_sec);				\
> -	*p++ = htonl(c.after_ctime_nsec);				\
> -} while (0)
> +
> +static void write32(__be32 **p, u32 n)
> +{
> +	*(*p)++ = n;
> +}
> +
> +static void write64(__be32 **p, u64 n)
> +{
> +	write32(p, (u32)(n >> 32));
> +	write32(p, (u32)n);
> +}
> +
> +static void write_change(__be32 **p, struct kstat *stat, struct inode *inode)
> +{
> +	if (IS_I_VERSION(inode)) {
> +		write64(p, inode->i_version);
> +	} else {
> +		write32(p, stat->ctime.tv_sec);
> +		write32(p, stat->ctime.tv_nsec);
> +	}
> +}
> +
> +static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
> +{
> +	write32(p, c->atomic);
> +	if (c->change_supported) {
> +		write64(p, c->before_change);
> +		write64(p, c->after_change);
> +	} else {
> +		write32(p, c->before_ctime_sec);
> +		write32(p, c->before_ctime_nsec);
> +		write32(p, c->after_ctime_sec);
> +		write32(p, c->after_ctime_nsec);
> +	}
> +}
>  
>  #define RESERVE_SPACE(nbytes)	do {				\
>  	p = resp->p;						\
> @@ -1849,16 +1877,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
>  			WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME);
>  	}
>  	if (bmval0 & FATTR4_WORD0_CHANGE) {
> -		/*
> -		 * Note: This _must_ be consistent with the scheme for writing
> -		 * change_info, so any changes made here must be reflected there
> -		 * as well.  (See xdr4.h:set_change_info() and the WRITECINFO()
> -		 * macro above.)
> -		 */
>  		if ((buflen -= 8) < 0)
>  			goto out_resource;
> -		WRITE32(stat.ctime.tv_sec);
> -		WRITE32(stat.ctime.tv_nsec);
> +		write_change(&p, &stat, dentry->d_inode);
>  	}
>  	if (bmval0 & FATTR4_WORD0_SIZE) {
>  		if ((buflen -= 8) < 0)
> @@ -2364,7 +2385,7 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
>  
>  	if (!nfserr) {
>  		RESERVE_SPACE(32);
> -		WRITECINFO(create->cr_cinfo);
> +		write_cinfo(&p, &create->cr_cinfo);
>  		WRITE32(2);
>  		WRITE32(create->cr_bmval[0]);
>  		WRITE32(create->cr_bmval[1]);
> @@ -2475,7 +2496,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
>  
>  	if (!nfserr) {
>  		RESERVE_SPACE(20);
> -		WRITECINFO(link->li_cinfo);
> +		write_cinfo(&p, &link->li_cinfo);
>  		ADJUST_ARGS();
>  	}
>  	return nfserr;
> @@ -2493,7 +2514,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
>  
>  	nfsd4_encode_stateid(resp, &open->op_stateid);
>  	RESERVE_SPACE(40);
> -	WRITECINFO(open->op_cinfo);
> +	write_cinfo(&p, &open->op_cinfo);
>  	WRITE32(open->op_rflags);
>  	WRITE32(2);
>  	WRITE32(open->op_bmval[0]);
> @@ -2771,7 +2792,7 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
>  
>  	if (!nfserr) {
>  		RESERVE_SPACE(20);
> -		WRITECINFO(remove->rm_cinfo);
> +		write_cinfo(&p, &remove->rm_cinfo);
>  		ADJUST_ARGS();
>  	}
>  	return nfserr;
> @@ -2784,8 +2805,8 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
>  
>  	if (!nfserr) {
>  		RESERVE_SPACE(40);
> -		WRITECINFO(rename->rn_sinfo);
> -		WRITECINFO(rename->rn_tinfo);
> +		write_cinfo(&p, &rename->rn_sinfo);
> +		write_cinfo(&p, &rename->rn_tinfo);
>  		ADJUST_ARGS();
>  	}
>  	return nfserr;
> diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
> index afa1901..8f641c9 100644
> --- a/include/linux/nfsd/nfsfh.h
> +++ b/include/linux/nfsd/nfsfh.h
> @@ -151,9 +151,15 @@ typedef struct svc_fh {
>  	__u64			fh_pre_size;	/* size before operation */
>  	struct timespec		fh_pre_mtime;	/* mtime before oper */
>  	struct timespec		fh_pre_ctime;	/* ctime before oper */
> +	/*
> +	 * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
> +	 *  to find out if it is valid.
> +	 */
> +	u64			fh_pre_change;
>  
>  	/* Post-op attributes saved in fh_unlock */
>  	struct kstat		fh_post_attr;	/* full attrs after operation */
> +	u64			fh_post_change; /* nfsv4 change; see above */
>  #endif /* CONFIG_NFSD_V3 */
>  
>  } svc_fh;
> @@ -298,6 +304,7 @@ fill_pre_wcc(struct svc_fh *fhp)
>  		fhp->fh_pre_mtime = inode->i_mtime;
>  		fhp->fh_pre_ctime = inode->i_ctime;
>  		fhp->fh_pre_size  = inode->i_size;
> +		fhp->fh_pre_change = inode->i_version;
>  		fhp->fh_pre_saved = 1;
>  	}
>  }
> diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
> index f80d601..d0f050f 100644
> --- a/include/linux/nfsd/xdr4.h
> +++ b/include/linux/nfsd/xdr4.h
> @@ -64,10 +64,13 @@ static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
>  
>  struct nfsd4_change_info {
>  	u32		atomic;
> +	bool		change_supported;
>  	u32		before_ctime_sec;
>  	u32		before_ctime_nsec;
> +	u64		before_change;
>  	u32		after_ctime_sec;
>  	u32		after_ctime_nsec;
> +	u64		after_change;
>  };
>  
>  struct nfsd4_access {
> @@ -503,10 +506,16 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
>  {
>  	BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
>  	cinfo->atomic = 1;
> -	cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
> -	cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
> -	cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
> -	cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
> +	cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
> +	if (cinfo->change_supported) {
> +		cinfo->before_change = fhp->fh_pre_change;
> +		cinfo->after_change = fhp->fh_post_change;
> +	} else {
> +		cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
> +		cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
> +		cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
> +		cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
> +	}
>  }
>  
>  int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);

  reply	other threads:[~2009-04-23 21:34 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-02-20  2:18 Thoughts about cache consistency and directories in particular Neil Brown
2009-02-20 18:23 ` J. Bruce Fields
2009-02-20 19:47   ` Neil Brown
     [not found]     ` <18847.2286.101191.989726-wvvUuzkyo1EYVZTmpyfIwg@public.gmane.org>
2009-02-20 20:14       ` J. Bruce Fields
2009-02-20 21:04         ` Neil Brown
     [not found]           ` <18847.6886.50844.260910-wvvUuzkyo1EYVZTmpyfIwg@public.gmane.org>
2009-04-21 21:43             ` J. Bruce Fields
2009-04-23 21:34               ` J. Bruce Fields [this message]
2009-04-23 21:52                 ` Trond Myklebust
     [not found]                   ` <1240523577.8583.13.camel-rJ7iovZKK19ZJLDQqaL3InhyD016LWXt@public.gmane.org>
2009-04-23 22:07                     ` J. Bruce Fields
2009-04-23 22:24                       ` Trond Myklebust
     [not found] ` <18846.4842.625445.980681-wvvUuzkyo1EYVZTmpyfIwg@public.gmane.org>
2009-02-20 18:56   ` Trond Myklebust
2009-02-20 19:52     ` Neil Brown
     [not found]       ` <18847.2578.480148.216735-wvvUuzkyo1EYVZTmpyfIwg@public.gmane.org>
2009-02-20 20:32         ` Trond Myklebust
2009-02-20 21:06           ` Neil Brown
     [not found]             ` <18847.6988.418374.839185-wvvUuzkyo1EYVZTmpyfIwg@public.gmane.org>
2009-02-20 22:14               ` Trond Myklebust

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090423213436.GA1906@fieldses.org \
    --to=bfields@fieldses.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=neilb@suse.de \
    --cc=trond.myklebust@fys.uio.no \
    --cc=tytso@mit.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.