From: NeilBrown <neilb@suse.de>
To: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: NFS <linux-nfs@vger.kernel.org>, Tejun Heo <tj@kernel.org>,
Christoph Hellwig <hch@infradead.org>
Subject: [PATCH] NFS: state manager thread must stay running.
Date: Wed, 13 Aug 2014 14:08:31 +1000 [thread overview]
Message-ID: <20140813140831.22f3e9c7@notabene.brown> (raw)
[-- Attachment #1: Type: text/plain, Size: 7129 bytes --]
If the server restarts at an awkward time it is possible for write
requests to block waiting for the state manager to run.
If the state manager isn't already running a new thread will
need to be started which requires a GFP_KERNEL allocation
(for do_fork).
If memory is short, that GFP_KERNEL allocation could block on the
writes going out via NFS, resulting in a deadlock.
The easiest solution is to keep the manager thread running
always.
There are two interesting requirements for the manager thread:
1/ It must allow SIGKILL, which can abort NFS transactions to
a dead server.
2/ It may continue running after the filesystem is unmounted,
until the server recovers or the thread is SIGKILLed
These, particularly the last, make it unsuitable for kthread_worker,
which can only be killed synchronously (via kthread_stop()).
In this patch the manager thread no longer holds a counted reference
on the client. Instead a new flag NFS_CS_MANAGER indicates that the
thread is running and so holds an implicit reference.
nfs_put_client will only free the client if this flag is clear.
If it is set, the manager must free the client.
nn->nfs_client_lock is used to ensure nfs_put_client() and the
thread don't trip over each other at shutdown.
Signed-off-by: NeilBrown <neilb@suse.de>
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 180d1ec9c32e..9973675737dd 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -272,13 +272,16 @@ void nfs_put_client(struct nfs_client *clp)
nn = net_generic(clp->cl_net, nfs_net_id);
if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
+ int manager_will_free =
+ test_bit(NFS_CS_MANAGER, &clp->cl_res_state);
list_del(&clp->cl_share_link);
nfs_cb_idr_remove_locked(clp);
- spin_unlock(&nn->nfs_client_lock);
-
+ if (manager_will_free)
+ nfs4_schedule_state_manager(clp);
WARN_ON_ONCE(!list_empty(&clp->cl_superblocks));
-
- clp->rpc_ops->free_client(clp);
+ spin_unlock(&nn->nfs_client_lock);
+ if (!manager_will_free)
+ clp->rpc_ops->free_client(clp);
}
}
EXPORT_SYMBOL_GPL(nfs_put_client);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ba2affa51941..014e730ee7a2 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -432,6 +432,7 @@ extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern int nfs4_wait_clnt_recover(struct nfs_client *clp);
extern int nfs4_client_recover_expired_lease(struct nfs_client *clp);
extern void nfs4_schedule_state_manager(struct nfs_client *);
+extern int nfs4_start_state_manager(struct nfs_client *);
extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp);
extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
extern int nfs4_schedule_migration_recovery(const struct nfs_server *);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index aa9ef4876046..907111174886 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -401,6 +401,11 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
}
__set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
+ error = nfs4_start_state_manager(clp);
+ if (error < 0)
+ goto error;
+ __set_bit(NFS_CS_MANAGER, &clp->cl_res_state);
+
error = nfs4_init_client_minor_version(clp);
if (error < 0)
goto error;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 42f121182167..69d12decd04a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1150,15 +1150,12 @@ static void nfs4_clear_state_manager_bit(struct nfs_client *clp)
/*
* Schedule the nfs_client asynchronous state management routine
*/
-void nfs4_schedule_state_manager(struct nfs_client *clp)
+int nfs4_start_state_manager(struct nfs_client *clp)
{
struct task_struct *task;
char buf[INET6_ADDRSTRLEN + sizeof("-manager") + 1];
- if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
- return;
__module_get(THIS_MODULE);
- atomic_inc(&clp->cl_count);
/* The rcu_read_lock() is not strictly necessary, as the state
* manager is the only thread that ever changes the rpc_xprt
@@ -1171,10 +1168,18 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
if (IS_ERR(task)) {
printk(KERN_ERR "%s: kthread_run: %ld\n",
__func__, PTR_ERR(task));
- nfs4_clear_state_manager_bit(clp);
- nfs_put_client(clp);
module_put(THIS_MODULE);
+ return PTR_ERR(task);
}
+ return 0;
+}
+
+void nfs4_schedule_state_manager(struct nfs_client *clp)
+{
+ if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
+ return;
+ smp_mb__after_atomic();
+ wake_up_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING);
}
/*
@@ -2328,8 +2333,12 @@ static void nfs4_state_manager(struct nfs_client *clp)
int status = 0;
const char *section = "", *section_sep = "";
- /* Ensure exclusive access to NFSv4 state */
- do {
+ while (atomic_read(&clp->cl_count) && clp->cl_state) {
+ /* If we found more work to do, we must ensure the
+ * bit is set so other code can wait for it.
+ */
+ set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state);
+
if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
section = "purge state";
status = nfs4_purge_lease(clp);
@@ -2423,12 +2432,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
}
nfs4_clear_state_manager_bit(clp);
- /* Did we race with an attempt to give us more work? */
- if (clp->cl_state == 0)
- break;
- if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
- break;
- } while (atomic_read(&clp->cl_count) > 1);
+ }
return;
out_error:
if (strlen(section))
@@ -2444,10 +2448,23 @@ out_error:
static int nfs4_run_state_manager(void *ptr)
{
struct nfs_client *clp = ptr;
+ struct nfs_net *nn;
allow_signal(SIGKILL);
- nfs4_state_manager(clp);
- nfs_put_client(clp);
+ while (atomic_read(&clp->cl_count)) {
+ if (signal_pending(current))
+ flush_signals(current);
+ wait_event_interruptible(
+ *bit_waitqueue(&clp->cl_state,
+ NFS4CLNT_MANAGER_RUNNING),
+ test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state));
+ nfs4_state_manager(clp);
+ }
+ nn = net_generic(clp->cl_net, nfs_net_id);
+ spin_lock(&nn->nfs_client_lock);
+ /* nfs_put_client has definitely stopped using its reference */
+ spin_unlock(&nn->nfs_client_lock);
+ clp->rpc_ops->free_client(clp);
module_put_and_exit(0);
return 0;
}
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 1150ea41b626..40c7d1a53388 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -36,6 +36,7 @@ struct nfs_client {
#define NFS_CS_RENEWD 3 /* - renewd started */
#define NFS_CS_STOP_RENEW 4 /* no more state to renew */
#define NFS_CS_CHECK_LEASE_TIME 5 /* need to check lease time */
+#define NFS_CS_MANAGER 6 /* NFSv4 manager thread running */
unsigned long cl_flags; /* behavior switches */
#define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */
#define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 828 bytes --]
next reply other threads:[~2014-08-13 4:08 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-08-13 4:08 NeilBrown [this message]
2014-08-17 13:11 ` [PATCH] NFS: state manager thread must stay running Tejun Heo
2014-08-17 21:14 ` NeilBrown
2014-08-18 14:29 ` Tejun Heo
2014-09-17 1:43 ` Trond Myklebust
2014-09-17 3:05 ` NeilBrown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140813140831.22f3e9c7@notabene.brown \
--to=neilb@suse.de \
--cc=hch@infradead.org \
--cc=linux-nfs@vger.kernel.org \
--cc=tj@kernel.org \
--cc=trond.myklebust@primarydata.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).