All of lore.kernel.org
 help / color / mirror / Atom feed
From: "J. Bruce Fields" <bfields@fieldses.org>
To: NeilBrown <neilb@suse.de>
Cc: linux-nfs@vger.kernel.org, Anton Blanchard <anton@au1.ibm.com>
Subject: Re: [PATCH 1.5/6] Fix race in new request delay code.
Date: Thu, 26 Aug 2010 17:08:00 -0400	[thread overview]
Message-ID: <20100826210800.GD10636@fieldses.org> (raw)
In-Reply-To: <20100817051509.26151.91127.stgit@localhost.localdomain>

On Tue, Aug 17, 2010 at 03:15:09PM +1000, NeilBrown wrote:
> If the 'wait_for_completion_interruptible_timeout' completes due
> to interrupt or timeout, just before cache_revisit request gets around
> to calling ->revisit and thus the completion, we have a race with bad
> consequences.
> 
> cache_defer_req will see that sleeper.handle.hash is empty (because
> cache_revisit_request has already done the list_del_init), and that
> CACHE_PENDING is clear, and so will exit that function leaving any
> local variables such as 'sleeper' undefined.
> 
> Meanwhile cache_revisit_request could delete sleeper.handle.recent
> from the 'pending' list, and call sleeper.hande.revisit, any of which
> could have new garbage values and so could trigger a BUG.

Thanks, this looks correct to me!

I wish it were simpler, but don't see how right now.

Some superficial cleanup might help me at least--see below (untested),
which attempts to make obvious the first-try-sleeping-then-try-deferral
logic, by turning cache_defer_req into basically:

	if (req->thread_wait) {
		ret = cache_wait_req(req, item);
		if (ret != -ETIMEDOUT)
			return ret;
	}
	dreq = req->defer(req);
	if (dreq == NULL)
		return -ENOMEM;
...

I also wonder whether there would be a way to make most of this just
another ->defer() method, so that we wouldn't need to switch on
req->thread_wait here at all.  But maybe there's not a nice way to do
that.  (Certainly I prefer your approach to what the idmap code was
doing.)

--b.

diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2c5297f..da872f9 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -509,60 +509,39 @@ static LIST_HEAD(cache_defer_list);
 static struct list_head cache_defer_hash[DFR_HASHSIZE];
 static int cache_defer_cnt;
 
-struct thread_deferred_req {
-	struct cache_deferred_req handle;
-	struct completion completion;
-};
-static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
+static void __unhash_deferred_req(struct cache_deferred_req *dreq)
 {
-	struct thread_deferred_req *dr =
-		container_of(dreq, struct thread_deferred_req, handle);
-	complete(&dr->completion);
+	list_del_init(&dreq->recent);
+	list_del_init(&dreq->hash);
+	cache_defer_cnt--;
 }
 
-static int cache_defer_req(struct cache_req *req, struct cache_head *item)
+static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item)
 {
-	struct cache_deferred_req *dreq, *discard;
 	int hash = DFR_HASH(item);
-	struct thread_deferred_req sleeper;
 
-	if (cache_defer_cnt >= DFR_MAX) {
-		/* too much in the cache, randomly drop this one,
-		 * or continue and drop the oldest below
-		 */
-		if (net_random()&1)
-			return -ENOMEM;
-	}
-	if (req->thread_wait) {
-		dreq = &sleeper.handle;
-		sleeper.completion =
-			COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
-		dreq->revisit = cache_restart_thread;
-	} else
-		dreq = req->defer(req);
+	list_add(&dreq->recent, &cache_defer_list);
+	if (cache_defer_hash[hash].next == NULL)
+		INIT_LIST_HEAD(&cache_defer_hash[hash]);
+	list_add(&dreq->hash, &cache_defer_hash[hash]);
+}
 
- retry:
-	if (dreq == NULL)
-		return -ENOMEM;
+static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item)
+{
+	struct cache_deferred_req *discard;
 
 	dreq->item = item;
 
 	spin_lock(&cache_defer_lock);
 
-	list_add(&dreq->recent, &cache_defer_list);
-
-	if (cache_defer_hash[hash].next == NULL)
-		INIT_LIST_HEAD(&cache_defer_hash[hash]);
-	list_add(&dreq->hash, &cache_defer_hash[hash]);
+	__hash_deferred_req(dreq, item);
 
 	/* it is in, now maybe clean up */
 	discard = NULL;
 	if (++cache_defer_cnt > DFR_MAX) {
 		discard = list_entry(cache_defer_list.prev,
 				     struct cache_deferred_req, recent);
-		list_del_init(&discard->recent);
-		list_del_init(&discard->hash);
-		cache_defer_cnt--;
+		__unhash_deferred_req(discard);
 	}
 	spin_unlock(&cache_defer_lock);
 
@@ -575,44 +554,88 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
 		cache_revisit_request(item);
 		return -EAGAIN;
 	}
+	return 0;
+}
 
-	if (dreq == &sleeper.handle) {
-		if (wait_for_completion_interruptible_timeout(
-			    &sleeper.completion, req->thread_wait) <= 0) {
-			/* The completion wasn't completed, so we need
-			 * to clean up
-			 */
-			spin_lock(&cache_defer_lock);
-			if (!list_empty(&sleeper.handle.hash)) {
-				list_del_init(&sleeper.handle.recent);
-				list_del_init(&sleeper.handle.hash);
-				cache_defer_cnt--;
-				spin_unlock(&cache_defer_lock);
-			} else {
-				/* cache_revisit_request already removed
-				 * this from the hash table, but hasn't
-				 * called ->revisit yet.  It will very soon
-				 * and we need to wait for it.
-				 */
-				spin_unlock(&cache_defer_lock);
-				wait_for_completion(&sleeper.completion);
-			}
-		}
-		if (test_bit(CACHE_PENDING, &item->flags)) {
-			/* item is still pending, try request
-			 * deferral
+struct thread_deferred_req {
+	struct cache_deferred_req handle;
+	struct completion completion;
+};
+
+static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
+{
+	struct thread_deferred_req *dr =
+		container_of(dreq, struct thread_deferred_req, handle);
+	complete(&dr->completion);
+}
+
+static int cache_wait_req(struct cache_req *req, struct cache_head *item)
+{
+	struct thread_deferred_req sleeper;
+	struct cache_deferred_req *dreq = &sleeper.handle;
+	int ret;
+
+	sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
+	dreq->revisit = cache_restart_thread;
+
+	ret = setup_deferral(dreq, item);
+	if (ret)
+		return ret;
+
+	if (wait_for_completion_interruptible_timeout(
+		    &sleeper.completion, req->thread_wait) <= 0) {
+		/* The completion wasn't completed, so we need
+		 * to clean up
+		 */
+		spin_lock(&cache_defer_lock);
+		if (!list_empty(&sleeper.handle.hash)) {
+			__unhash_deferred_req(&sleeper.handle);
+			spin_unlock(&cache_defer_lock);
+		} else {
+			/* cache_revisit_request already removed
+			 * this from the hash table, but hasn't
+			 * called ->revisit yet.  It will very soon
+			 * and we need to wait for it.
 			 */
-			dreq = req->defer(req);
-			goto retry;
+			spin_unlock(&cache_defer_lock);
+			wait_for_completion(&sleeper.completion);
 		}
-		/* only return success if we actually deferred the
-		 * request.  In this case we waited until it was
-		 * answered so no deferral has happened - rather
-		 * an answer already exists.
+	}
+	if (test_bit(CACHE_PENDING, &item->flags)) {
+		/* item is still pending, try request
+		 * deferral
 		 */
-		return -EEXIST;
+		return -ETIMEDOUT;
 	}
-	return 0;
+	/* only return success if we actually deferred the
+	 * request.  In this case we waited until it was
+	 * answered so no deferral has happened - rather
+	 * an answer already exists.
+	 */
+	return -EEXIST;
+}
+
+static int cache_defer_req(struct cache_req *req, struct cache_head *item)
+{
+	struct cache_deferred_req *dreq;
+	int ret;
+
+	if (cache_defer_cnt >= DFR_MAX) {
+		/* too much in the cache, randomly drop this one,
+		 * or continue and drop the oldest
+		 */
+		if (net_random()&1)
+			return -ENOMEM;
+	}
+	if (req->thread_wait) {
+		ret = cache_wait_req(req, item);
+		if (ret != -ETIMEDOUT)
+			return ret;
+	}
+	dreq = req->defer(req);
+	if (dreq == NULL)
+		return -ENOMEM;
+	return setup_deferral(dreq, item);
 }
 
 static void cache_revisit_request(struct cache_head *item)
@@ -632,9 +655,8 @@ static void cache_revisit_request(struct cache_head *item)
 			dreq = list_entry(lp, struct cache_deferred_req, hash);
 			lp = lp->next;
 			if (dreq->item == item) {
-				list_del_init(&dreq->hash);
-				list_move(&dreq->recent, &pending);
-				cache_defer_cnt--;
+				__unhash_deferred_req(dreq);
+				list_add(&dreq->recent, &pending);
 			}
 		}
 	}
@@ -657,11 +679,8 @@ void cache_clean_deferred(void *owner)
 	spin_lock(&cache_defer_lock);
 
 	list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
-		if (dreq->owner == owner) {
-			list_del_init(&dreq->hash);
-			list_move(&dreq->recent, &pending);
-			cache_defer_cnt--;
-		}
+		if (dreq->owner == owner)
+			__unhash_deferred_req(dreq);
 	}
 	spin_unlock(&cache_defer_lock);
 
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 8ff6840..95fc3e8 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -665,7 +665,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
 
 		/* As there is a shortage of threads and this request
-		 * had to be queue, don't allow the thread to wait so
+		 * had to be queued, don't allow the thread to wait so
 		 * long for cache updates.
 		 */
 		rqstp->rq_chandle.thread_wait = 1*HZ;

  reply	other threads:[~2010-08-26 21:08 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-08-12  7:04 [PATCH 0/6] Cache deferral improvements - try N+2 NeilBrown
2010-08-12  7:04 ` [PATCH 2/6] nfsd/idmap: drop special request deferal in favour of improved default NeilBrown
     [not found]   ` <20100812070406.11459.89468.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-09-21 21:08     ` J. Bruce Fields
2010-08-12  7:04 ` [PATCH 1/6] sunrpc/cache: allow threads to block while waiting for cache update NeilBrown
2010-08-12  7:04 ` [PATCH 5/6] svcauth_gss: replace a trivial 'switch' with an 'if' NeilBrown
2010-08-12  7:04 ` [PATCH 3/6] sunrpc: close connection when a request is irretrievably lost NeilBrown
2010-09-21 20:53   ` J. Bruce Fields
2010-09-21 23:37     ` Neil Brown
2010-09-22  2:13       ` J. Bruce Fields
2010-08-12  7:04 ` [PATCH 4/6] nfsd: disable deferral for NFSv4 NeilBrown
     [not found]   ` <20100812070407.11459.2929.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-09-21 21:01     ` J. Bruce Fields
2010-08-12  7:04 ` [PATCH 6/6] sunrpc/cache: change deferred-request hash table to use hlist NeilBrown
     [not found] ` <20100812065722.11459.18978.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-08-12 12:03   ` [PATCH 0/6] Cache deferral improvements - try N+2 J. Bruce Fields
2010-08-17  5:15 ` [PATCH 1.5/6] Fix race in new request delay code NeilBrown
2010-08-26 21:08   ` J. Bruce Fields [this message]
2010-08-29 23:36     ` Neil Brown
2010-09-01 11:31       ` J. Bruce Fields
2010-09-21  8:35       ` Neil Brown
2010-09-22  2:15         ` J. Bruce Fields

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100826210800.GD10636@fieldses.org \
    --to=bfields@fieldses.org \
    --cc=anton@au1.ibm.com \
    --cc=linux-nfs@vger.kernel.org \
    --cc=neilb@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.