From: "J. Bruce Fields" <bfields@fieldses.org>
To: NeilBrown <neilb@suse.de>
Cc: linux-nfs@vger.kernel.org, Anton Blanchard <anton@au1.ibm.com>
Subject: Re: [PATCH 1.5/6] Fix race in new request delay code.
Date: Thu, 26 Aug 2010 17:08:00 -0400 [thread overview]
Message-ID: <20100826210800.GD10636@fieldses.org> (raw)
In-Reply-To: <20100817051509.26151.91127.stgit@localhost.localdomain>
On Tue, Aug 17, 2010 at 03:15:09PM +1000, NeilBrown wrote:
> If the 'wait_for_completion_interruptible_timeout' completes due
> to interrupt or timeout, just before cache_revisit request gets around
> to calling ->revisit and thus the completion, we have a race with bad
> consequences.
>
> cache_defer_req will see that sleeper.handle.hash is empty (because
> cache_revisit_request has already done the list_del_init), and that
> CACHE_PENDING is clear, and so will exit that function leaving any
> local variables such as 'sleeper' undefined.
>
> Meanwhile cache_revisit_request could delete sleeper.handle.recent
> from the 'pending' list, and call sleeper.hande.revisit, any of which
> could have new garbage values and so could trigger a BUG.
Thanks, this looks correct to me!
I wish it were simpler, but don't see how right now.
Some superficial cleanup might help me at least--see below (untested),
which attempts to make obvious the first-try-sleeping-then-try-deferral
logic, by turning cache_defer_req into basically:
if (req->thread_wait) {
ret = cache_wait_req(req, item);
if (ret != -ETIMEDOUT)
return ret;
}
dreq = req->defer(req);
if (dreq == NULL)
return -ENOMEM;
...
I also wonder whether there would be a way to make most of this just
another ->defer() method, so that we wouldn't need to switch on
req->thread_wait here at all. But maybe there's not a nice way to do
that. (Certainly I prefer your approach to what the idmap code was
doing.)
--b.
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 2c5297f..da872f9 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -509,60 +509,39 @@ static LIST_HEAD(cache_defer_list);
static struct list_head cache_defer_hash[DFR_HASHSIZE];
static int cache_defer_cnt;
-struct thread_deferred_req {
- struct cache_deferred_req handle;
- struct completion completion;
-};
-static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
+static void __unhash_deferred_req(struct cache_deferred_req *dreq)
{
- struct thread_deferred_req *dr =
- container_of(dreq, struct thread_deferred_req, handle);
- complete(&dr->completion);
+ list_del_init(&dreq->recent);
+ list_del_init(&dreq->hash);
+ cache_defer_cnt--;
}
-static int cache_defer_req(struct cache_req *req, struct cache_head *item)
+static void __hash_deferred_req(struct cache_deferred_req *dreq, struct cache_head *item)
{
- struct cache_deferred_req *dreq, *discard;
int hash = DFR_HASH(item);
- struct thread_deferred_req sleeper;
- if (cache_defer_cnt >= DFR_MAX) {
- /* too much in the cache, randomly drop this one,
- * or continue and drop the oldest below
- */
- if (net_random()&1)
- return -ENOMEM;
- }
- if (req->thread_wait) {
- dreq = &sleeper.handle;
- sleeper.completion =
- COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
- dreq->revisit = cache_restart_thread;
- } else
- dreq = req->defer(req);
+ list_add(&dreq->recent, &cache_defer_list);
+ if (cache_defer_hash[hash].next == NULL)
+ INIT_LIST_HEAD(&cache_defer_hash[hash]);
+ list_add(&dreq->hash, &cache_defer_hash[hash]);
+}
- retry:
- if (dreq == NULL)
- return -ENOMEM;
+static int setup_deferral(struct cache_deferred_req *dreq, struct cache_head *item)
+{
+ struct cache_deferred_req *discard;
dreq->item = item;
spin_lock(&cache_defer_lock);
- list_add(&dreq->recent, &cache_defer_list);
-
- if (cache_defer_hash[hash].next == NULL)
- INIT_LIST_HEAD(&cache_defer_hash[hash]);
- list_add(&dreq->hash, &cache_defer_hash[hash]);
+ __hash_deferred_req(dreq, item);
/* it is in, now maybe clean up */
discard = NULL;
if (++cache_defer_cnt > DFR_MAX) {
discard = list_entry(cache_defer_list.prev,
struct cache_deferred_req, recent);
- list_del_init(&discard->recent);
- list_del_init(&discard->hash);
- cache_defer_cnt--;
+ __unhash_deferred_req(discard);
}
spin_unlock(&cache_defer_lock);
@@ -575,44 +554,88 @@ static int cache_defer_req(struct cache_req *req, struct cache_head *item)
cache_revisit_request(item);
return -EAGAIN;
}
+ return 0;
+}
- if (dreq == &sleeper.handle) {
- if (wait_for_completion_interruptible_timeout(
- &sleeper.completion, req->thread_wait) <= 0) {
- /* The completion wasn't completed, so we need
- * to clean up
- */
- spin_lock(&cache_defer_lock);
- if (!list_empty(&sleeper.handle.hash)) {
- list_del_init(&sleeper.handle.recent);
- list_del_init(&sleeper.handle.hash);
- cache_defer_cnt--;
- spin_unlock(&cache_defer_lock);
- } else {
- /* cache_revisit_request already removed
- * this from the hash table, but hasn't
- * called ->revisit yet. It will very soon
- * and we need to wait for it.
- */
- spin_unlock(&cache_defer_lock);
- wait_for_completion(&sleeper.completion);
- }
- }
- if (test_bit(CACHE_PENDING, &item->flags)) {
- /* item is still pending, try request
- * deferral
+struct thread_deferred_req {
+ struct cache_deferred_req handle;
+ struct completion completion;
+};
+
+static void cache_restart_thread(struct cache_deferred_req *dreq, int too_many)
+{
+ struct thread_deferred_req *dr =
+ container_of(dreq, struct thread_deferred_req, handle);
+ complete(&dr->completion);
+}
+
+static int cache_wait_req(struct cache_req *req, struct cache_head *item)
+{
+ struct thread_deferred_req sleeper;
+ struct cache_deferred_req *dreq = &sleeper.handle;
+ int ret;
+
+ sleeper.completion = COMPLETION_INITIALIZER_ONSTACK(sleeper.completion);
+ dreq->revisit = cache_restart_thread;
+
+ ret = setup_deferral(dreq, item);
+ if (ret)
+ return ret;
+
+ if (wait_for_completion_interruptible_timeout(
+ &sleeper.completion, req->thread_wait) <= 0) {
+ /* The completion wasn't completed, so we need
+ * to clean up
+ */
+ spin_lock(&cache_defer_lock);
+ if (!list_empty(&sleeper.handle.hash)) {
+ __unhash_deferred_req(&sleeper.handle);
+ spin_unlock(&cache_defer_lock);
+ } else {
+ /* cache_revisit_request already removed
+ * this from the hash table, but hasn't
+ * called ->revisit yet. It will very soon
+ * and we need to wait for it.
*/
- dreq = req->defer(req);
- goto retry;
+ spin_unlock(&cache_defer_lock);
+ wait_for_completion(&sleeper.completion);
}
- /* only return success if we actually deferred the
- * request. In this case we waited until it was
- * answered so no deferral has happened - rather
- * an answer already exists.
+ }
+ if (test_bit(CACHE_PENDING, &item->flags)) {
+ /* item is still pending, try request
+ * deferral
*/
- return -EEXIST;
+ return -ETIMEDOUT;
}
- return 0;
+ /* only return success if we actually deferred the
+ * request. In this case we waited until it was
+ * answered so no deferral has happened - rather
+ * an answer already exists.
+ */
+ return -EEXIST;
+}
+
+static int cache_defer_req(struct cache_req *req, struct cache_head *item)
+{
+ struct cache_deferred_req *dreq;
+ int ret;
+
+ if (cache_defer_cnt >= DFR_MAX) {
+ /* too much in the cache, randomly drop this one,
+ * or continue and drop the oldest
+ */
+ if (net_random()&1)
+ return -ENOMEM;
+ }
+ if (req->thread_wait) {
+ ret = cache_wait_req(req, item);
+ if (ret != -ETIMEDOUT)
+ return ret;
+ }
+ dreq = req->defer(req);
+ if (dreq == NULL)
+ return -ENOMEM;
+ return setup_deferral(dreq, item);
}
static void cache_revisit_request(struct cache_head *item)
@@ -632,9 +655,8 @@ static void cache_revisit_request(struct cache_head *item)
dreq = list_entry(lp, struct cache_deferred_req, hash);
lp = lp->next;
if (dreq->item == item) {
- list_del_init(&dreq->hash);
- list_move(&dreq->recent, &pending);
- cache_defer_cnt--;
+ __unhash_deferred_req(dreq);
+ list_add(&dreq->recent, &pending);
}
}
}
@@ -657,11 +679,8 @@ void cache_clean_deferred(void *owner)
spin_lock(&cache_defer_lock);
list_for_each_entry_safe(dreq, tmp, &cache_defer_list, recent) {
- if (dreq->owner == owner) {
- list_del_init(&dreq->hash);
- list_move(&dreq->recent, &pending);
- cache_defer_cnt--;
- }
+ if (dreq->owner == owner)
+ __unhash_deferred_req(dreq);
}
spin_unlock(&cache_defer_lock);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 8ff6840..95fc3e8 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -665,7 +665,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
/* As there is a shortage of threads and this request
- * had to be queue, don't allow the thread to wait so
+ * had to be queued, don't allow the thread to wait so
* long for cache updates.
*/
rqstp->rq_chandle.thread_wait = 1*HZ;
next prev parent reply other threads:[~2010-08-26 21:08 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-08-12 7:04 [PATCH 0/6] Cache deferral improvements - try N+2 NeilBrown
2010-08-12 7:04 ` [PATCH 2/6] nfsd/idmap: drop special request deferal in favour of improved default NeilBrown
[not found] ` <20100812070406.11459.89468.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-09-21 21:08 ` J. Bruce Fields
2010-08-12 7:04 ` [PATCH 1/6] sunrpc/cache: allow threads to block while waiting for cache update NeilBrown
2010-08-12 7:04 ` [PATCH 3/6] sunrpc: close connection when a request is irretrievably lost NeilBrown
2010-09-21 20:53 ` J. Bruce Fields
2010-09-21 23:37 ` Neil Brown
2010-09-22 2:13 ` J. Bruce Fields
2010-08-12 7:04 ` [PATCH 4/6] nfsd: disable deferral for NFSv4 NeilBrown
[not found] ` <20100812070407.11459.2929.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-09-21 21:01 ` J. Bruce Fields
2010-08-12 7:04 ` [PATCH 5/6] svcauth_gss: replace a trivial 'switch' with an 'if' NeilBrown
2010-08-12 7:04 ` [PATCH 6/6] sunrpc/cache: change deferred-request hash table to use hlist NeilBrown
[not found] ` <20100812065722.11459.18978.stgit-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-08-12 12:03 ` [PATCH 0/6] Cache deferral improvements - try N+2 J. Bruce Fields
2010-08-17 5:15 ` [PATCH 1.5/6] Fix race in new request delay code NeilBrown
2010-08-26 21:08 ` J. Bruce Fields [this message]
2010-08-29 23:36 ` Neil Brown
2010-09-01 11:31 ` J. Bruce Fields
2010-09-21 8:35 ` Neil Brown
2010-09-22 2:15 ` J. Bruce Fields
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100826210800.GD10636@fieldses.org \
--to=bfields@fieldses.org \
--cc=anton@au1.ibm.com \
--cc=linux-nfs@vger.kernel.org \
--cc=neilb@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).