Linux NFS development
 help / color / mirror / Atom feed
From: "J. Bruce Fields" <bfields@fieldses.org>
To: Neil Brown <neilb@suse.com>
Cc: Olaf Kirch <okir@suse.de>, linux-nfs@vger.kernel.org
Subject: Re: [PATCH v2] sunrpc/cache: make cache flushing more reliable.
Date: Thu, 22 Oct 2015 15:14:35 -0400	[thread overview]
Message-ID: <20151022191435.GE5205@fieldses.org> (raw)
In-Reply-To: <871tcvx07n.fsf@notabene.neil.brown.name>

Thanks, applying.--b.

On Fri, Oct 16, 2015 at 08:59:08AM +1100, Neil Brown wrote:
> 
> The caches used to store sunrpc authentication information can be
> flushed by writing a timestamp to a file in /proc.
> 
> This timestamp has a one-second resolution and any entry in cache that
> was last_refreshed *before* that time is treated as expired.
> 
> This is problematic as it is not possible to reliably flush the cache
> without interrupting NFS service.
> If the current time is written to the "flush" file, any entry that was
> added since the current second started will still be treated as valid.
> If one second beyond than the current time is written to the file
> then no entries can be valid until the second ticks over.  This will
> mean that no NFS request will be handled for up to 1 second.
> 
> To resolve this issue we make two changes:
> 
> 1/ treat an entry as expired if the timestamp when it was last_refreshed
>   is before *or the same as* the expiry time.  This means that current
>   code which writes out the current time will now flush the cache
>   reliably.
> 
> 2/ when a new entry in added to the cache -  set the last_refresh timestamp
>   to 1 second *beyond* the current flush time, when that not in the
>   past.
>   This ensures that newly added entries will always be valid.
> 
> 
> Now that we have a very reliable way to flush the cache, and also
> since we are using "since-boot" timestamps which are monotonic,
> change cache_purge() to set the smallest future flush_time which
> will work, and leave it there: don't revert to '1'.
> 
> Also disable the setting of the 'flush_time' far into the future.
> That has never been useful and is now awkward as it would cause
> last_refresh times to be strange.
> Finally: if a request is made to set the 'flush_time' to the current
> second, assume the intent is to flush the cache and advance it, if
> necessary, to 1 second beyond the current 'flush_time' so that all
> active entries will be deemed to be expired.
> 
> As part of this we need to add a 'cache_detail' arg to cache_init()
> and cache_fresh_locked() so they can find the current ->flush_time.
> 
> Signed-off-by: NeilBrown <neilb@suse.com>
> Reported-by: Olaf Kirch <okir@suse.com>
> 
> diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
> index 03d3b4c92d9f..ed03c9f7f908 100644
> --- a/include/linux/sunrpc/cache.h
> +++ b/include/linux/sunrpc/cache.h
> @@ -48,8 +48,10 @@
>  struct cache_head {
>  	struct hlist_node	cache_list;
>  	time_t		expiry_time;	/* After time time, don't use the data */
> -	time_t		last_refresh;   /* If CACHE_PENDING, this is when upcall 
> -					 * was sent, else this is when update was received
> +	time_t		last_refresh;   /* If CACHE_PENDING, this is when upcall was
> +					 * sent, else this is when update was
> +					 * received, though it is alway set to
> +					 * be *after* ->flush_time.
>  					 */
>  	struct kref	ref;
>  	unsigned long	flags;
> @@ -105,8 +107,12 @@ struct cache_detail {
>  	/* fields below this comment are for internal use
>  	 * and should not be touched by cache owners
>  	 */
> -	time_t			flush_time;		/* flush all cache items with last_refresh
> -							 * earlier than this */
> +	time_t			flush_time;		/* flush all cache items with
> +							 * last_refresh at or earlier
> +							 * than this.  last_refresh
> +							 * is never set at or earlier
> +							 * than this.
> +							 */
>  	struct list_head	others;
>  	time_t			nextcheck;
>  	int			entries;
> @@ -203,7 +209,7 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd)
>  static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h)
>  {
>  	return  (h->expiry_time < seconds_since_boot()) ||
> -		(detail->flush_time > h->last_refresh);
> +		(detail->flush_time >= h->last_refresh);
>  }
>  
>  extern int cache_check(struct cache_detail *detail,
> diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
> index 4a2340a54401..5e4f815c2b34 100644
> --- a/net/sunrpc/cache.c
> +++ b/net/sunrpc/cache.c
> @@ -41,13 +41,16 @@
>  static bool cache_defer_req(struct cache_req *req, struct cache_head *item);
>  static void cache_revisit_request(struct cache_head *item);
>  
> -static void cache_init(struct cache_head *h)
> +static void cache_init(struct cache_head *h, struct cache_detail *detail)
>  {
>  	time_t now = seconds_since_boot();
>  	INIT_HLIST_NODE(&h->cache_list);
>  	h->flags = 0;
>  	kref_init(&h->ref);
>  	h->expiry_time = now + CACHE_NEW_EXPIRY;
> +	if (now <= detail->flush_time)
> +		/* ensure it isn't already expired */
> +		now = detail->flush_time + 1;
>  	h->last_refresh = now;
>  }
>  
> @@ -81,7 +84,7 @@ struct cache_head *sunrpc_cache_lookup(struct cache_detail *detail,
>  	 * we might get lose if we need to
>  	 * cache_put it soon.
>  	 */
> -	cache_init(new);
> +	cache_init(new, detail);
>  	detail->init(new, key);
>  
>  	write_lock(&detail->hash_lock);
> @@ -116,10 +119,15 @@ EXPORT_SYMBOL_GPL(sunrpc_cache_lookup);
>  
>  static void cache_dequeue(struct cache_detail *detail, struct cache_head *ch);
>  
> -static void cache_fresh_locked(struct cache_head *head, time_t expiry)
> +static void cache_fresh_locked(struct cache_head *head, time_t expiry,
> +			       struct cache_detail *detail)
>  {
> +	time_t now = seconds_since_boot();
> +	if (now <= detail->flush_time)
> +		/* ensure it isn't immediately treated as expired */
> +		now = detail->flush_time + 1;
>  	head->expiry_time = expiry;
> -	head->last_refresh = seconds_since_boot();
> +	head->last_refresh = now;
>  	smp_wmb(); /* paired with smp_rmb() in cache_is_valid() */
>  	set_bit(CACHE_VALID, &head->flags);
>  }
> @@ -149,7 +157,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
>  				set_bit(CACHE_NEGATIVE, &old->flags);
>  			else
>  				detail->update(old, new);
> -			cache_fresh_locked(old, new->expiry_time);
> +			cache_fresh_locked(old, new->expiry_time, detail);
>  			write_unlock(&detail->hash_lock);
>  			cache_fresh_unlocked(old, detail);
>  			return old;
> @@ -162,7 +170,7 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
>  		cache_put(old, detail);
>  		return NULL;
>  	}
> -	cache_init(tmp);
> +	cache_init(tmp, detail);
>  	detail->init(tmp, old);
>  
>  	write_lock(&detail->hash_lock);
> @@ -173,8 +181,8 @@ struct cache_head *sunrpc_cache_update(struct cache_detail *detail,
>  	hlist_add_head(&tmp->cache_list, &detail->hash_table[hash]);
>  	detail->entries++;
>  	cache_get(tmp);
> -	cache_fresh_locked(tmp, new->expiry_time);
> -	cache_fresh_locked(old, 0);
> +	cache_fresh_locked(tmp, new->expiry_time, detail);
> +	cache_fresh_locked(old, 0, detail);
>  	write_unlock(&detail->hash_lock);
>  	cache_fresh_unlocked(tmp, detail);
>  	cache_fresh_unlocked(old, detail);
> @@ -219,7 +227,8 @@ static int try_to_negate_entry(struct cache_detail *detail, struct cache_head *h
>  	rv = cache_is_valid(h);
>  	if (rv == -EAGAIN) {
>  		set_bit(CACHE_NEGATIVE, &h->flags);
> -		cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY);
> +		cache_fresh_locked(h, seconds_since_boot()+CACHE_NEW_EXPIRY,
> +				   detail);
>  		rv = -ENOENT;
>  	}
>  	write_unlock(&detail->hash_lock);
> @@ -487,10 +496,13 @@ EXPORT_SYMBOL_GPL(cache_flush);
>  
>  void cache_purge(struct cache_detail *detail)
>  {
> -	detail->flush_time = LONG_MAX;
> +	time_t now = seconds_since_boot();
> +	if (detail->flush_time >= now)
> +		now = detail->flush_time + 1;
> +	/* 'now' is the maximum value any 'last_refresh' can have */
> +	detail->flush_time = now;
>  	detail->nextcheck = seconds_since_boot();
>  	cache_flush();
> -	detail->flush_time = 1;
>  }
>  EXPORT_SYMBOL_GPL(cache_purge);
>  
> @@ -1436,6 +1448,7 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
>  {
>  	char tbuf[20];
>  	char *bp, *ep;
> +	time_t then, now;
>  
>  	if (*ppos || count > sizeof(tbuf)-1)
>  		return -EINVAL;
> @@ -1447,8 +1460,22 @@ static ssize_t write_flush(struct file *file, const char __user *buf,
>  		return -EINVAL;
>  
>  	bp = tbuf;
> -	cd->flush_time = get_expiry(&bp);
> -	cd->nextcheck = seconds_since_boot();
> +	then = get_expiry(&bp);
> +	now = seconds_since_boot();
> +	cd->nextcheck = now;
> +	/* Can only set flush_time to 1 second beyond "now", or
> +	 * possibly 1 second beyond flushtime.  This is because
> +	 * flush_time never goes backwards so it mustn't get too far
> +	 * ahead of time.
> +	 */
> +	if (then >= now) {
> +		/* Want to flush everything, so behave like cache_purge() */
> +		if (cd->flush_time >= now)
> +			now = cd->flush_time + 1;
> +		then = now;
> +	}
> +
> +	cd->flush_time = then;
>  	cache_flush();
>  
>  	*ppos += count;



      reply	other threads:[~2015-10-22 19:14 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-15  6:31 [PATCH/RFC] sunrpc/cache: make cache flushing more reliable Neil Brown
2015-10-15  6:39 ` Neil Brown
2015-10-15 13:50   ` J. Bruce Fields
2015-10-15 21:57     ` Neil Brown
2015-10-15 21:59     ` [PATCH v2] " Neil Brown
2015-10-22 19:14       ` J. Bruce Fields [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20151022191435.GE5205@fieldses.org \
    --to=bfields@fieldses.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=neilb@suse.com \
    --cc=okir@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox