From: Fred Isaman <iisaman@netapp.com>
To: linux-nfs@vger.kernel.org
Subject: [PATCH 13/18] pnfs-submit: rewrite of layout state handling and cb_layoutrecall
Date: Thu, 4 Nov 2010 11:22:26 -0400 [thread overview]
Message-ID: <1288884151-11128-14-git-send-email-iisaman@netapp.com> (raw)
In-Reply-To: <1288884151-11128-1-git-send-email-iisaman@netapp.com>
Remove NFS_LAYOUT_STATEID_SET in favor of just checking list_empty(lo->segs).
LAYOUTGETs with openstateid are serialized. Waiting on the condition
(list_empty(lo->segs) && plh_outstanding>0) both drains outstanding RPCs once
the stateid is invalidated and allows only a single LAYOUTGET(openstateid)
through at a time.
Before sending a LAYOUTRETURN, plh_block_lgets is incremented. It is
decremented in the rpc_release function. While set, LAYOUTGETs are
paused in their rpc_prepare function, and any responses are
forgotten.
Callbacks are handled by blocking any matching LAYOUTGETS while processing and
initiating drain of IO. A notification system is set up so that when
all relevant IO is finished, the state manger thread is invoked, which
synchronously sends the final matching LAYOUTRETURN before unblocking
LAYOUTGETS.
Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
fs/nfs/callback.h | 4 +-
fs/nfs/callback_proc.c | 471 +++++++++++++++++++++++----------------------
fs/nfs/client.c | 3 +
fs/nfs/inode.c | 3 +-
fs/nfs/nfs4proc.c | 105 +++++++---
fs/nfs/nfs4state.c | 4 +
fs/nfs/nfs4xdr.c | 16 ++-
fs/nfs/pnfs.c | 181 +++++++++++++----
fs/nfs/pnfs.h | 41 +++-
include/linux/nfs_fs.h | 1 +
include/linux/nfs_fs_sb.h | 4 +
11 files changed, 518 insertions(+), 315 deletions(-)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 817b0f4..c1c7f3e 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -161,7 +161,8 @@ struct cb_layoutrecallargs {
extern unsigned nfs4_callback_layoutrecall(
struct cb_layoutrecallargs *args,
void *dummy, struct cb_process_state *cps);
-
+extern bool matches_outstanding_recall(struct inode *ino,
+ struct pnfs_layout_range *range);
#endif /* CONFIG_NFS_V4_1 */
extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
@@ -171,6 +172,7 @@ extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
struct cb_process_state *cps);
#ifdef CONFIG_NFS_V4
+extern void nfs_client_return_layouts(struct nfs_client *clp);
extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
extern void nfs_callback_down(int minorversion);
extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 1509c34..583446b 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -132,270 +132,291 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
#if defined(CONFIG_NFS_V4_1)
static bool
-pnfs_is_next_layout_stateid(const struct pnfs_layout_hdr *lo,
- const nfs4_stateid stateid)
+_recall_matches_lget(struct pnfs_cb_lrecall_info *cb_info,
+ struct inode *ino, struct pnfs_layout_range *range)
{
- bool res;
- u32 oldseqid, newseqid;
-
- spin_lock(&lo->inode->i_lock);
- {
- oldseqid = be32_to_cpu(lo->stateid.stateid.seqid);
- newseqid = be32_to_cpu(stateid.stateid.seqid);
- res = !memcmp(lo->stateid.stateid.other,
- stateid.stateid.other,
- NFS4_STATEID_OTHER_SIZE);
- if (res) { /* comparing layout stateids */
- if (oldseqid == ~0)
- res = (newseqid == 1);
- else
- res = (newseqid == oldseqid + 1);
- } else { /* open stateid */
- res = !memcmp(lo->stateid.data,
- &zero_stateid,
- NFS4_STATEID_SIZE);
- if (res)
- res = (newseqid == 1);
- }
- }
- spin_unlock(&lo->inode->i_lock);
+ struct cb_layoutrecallargs *cb_args = &cb_info->pcl_args;
- return res;
+ switch (cb_args->cbl_recall_type) {
+ case RETURN_ALL:
+ return true;
+ case RETURN_FSID:
+ return !memcmp(&NFS_SERVER(ino)->fsid, &cb_args->cbl_fsid,
+ sizeof(struct nfs_fsid));
+ case RETURN_FILE:
+ if (ino != cb_info->pcl_ino)
+ return false;
+ return should_free_lseg(range, &cb_args->cbl_range);
+ default:
+ BUG();
+ }
}
-/*
- * Retrieve an inode based on layout recall parameters
- *
- * Note: caller must iput(inode) to dereference the inode.
- */
-static struct inode *
-nfs_layoutrecall_find_inode(struct nfs_client *clp,
- const struct cb_layoutrecallargs *args)
+bool
+matches_outstanding_recall(struct inode *ino, struct pnfs_layout_range *range)
{
- struct nfs_inode *nfsi;
- struct pnfs_layout_hdr *lo;
- struct nfs_server *server;
- struct inode *ino = NULL;
-
- dprintk("%s: Begin recall_type=%d clp %p\n",
- __func__, args->cbl_recall_type, clp);
-
- spin_lock(&clp->cl_lock);
- list_for_each_entry(lo, &clp->cl_layouts, layouts) {
- nfsi = NFS_I(lo->inode);
- if (!nfsi)
- continue;
-
- dprintk("%s: Searching inode=%lu\n",
- __func__, nfsi->vfs_inode.i_ino);
-
- if (args->cbl_recall_type == RETURN_FILE) {
- if (nfs_compare_fh(&args->cbl_fh, &nfsi->fh))
- continue;
- } else if (args->cbl_recall_type == RETURN_FSID) {
- server = NFS_SERVER(&nfsi->vfs_inode);
- if (server->fsid.major != args->cbl_fsid.major ||
- server->fsid.minor != args->cbl_fsid.minor)
- continue;
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
+ struct pnfs_cb_lrecall_info *cb_info;
+ bool rv = false;
+
+ assert_spin_locked(&clp->cl_lock);
+ list_for_each_entry(cb_info, &clp->cl_layoutrecalls, pcl_list) {
+ if (_recall_matches_lget(cb_info, ino, range)) {
+ rv = true;
+ break;
}
-
- /* Make sure client didn't clean up layout without
- * telling the server */
- if (!has_layout(nfsi))
- continue;
-
- ino = igrab(&nfsi->vfs_inode);
- dprintk("%s: Found inode=%p\n", __func__, ino);
- break;
}
- spin_unlock(&clp->cl_lock);
- return ino;
+ return rv;
}
-struct recall_layout_threadargs {
- struct inode *inode;
- struct nfs_client *clp;
- struct completion started;
- struct cb_layoutrecallargs *rl;
- int result;
-};
-
-static int pnfs_recall_layout(void *data)
+/* Send a synchronous LAYOUTRETURN. By the time this is called, we know
+ * all IO has been drained, any matching lsegs deleted, and that no
+ * overlapping LAYOUTGETs will be sent or processed for the duration
+ * of this call.
+ * Note that it is possible that when this is called, the stateid has
+ * been invalidated. But will not be cleared, so can still use.
+ */
+static int
+pnfs_send_layoutreturn(struct nfs_client *clp,
+ struct pnfs_cb_lrecall_info *cb_info)
{
- struct inode *inode, *ino;
- struct nfs_client *clp;
- struct cb_layoutrecallargs rl;
+ struct cb_layoutrecallargs *args = &cb_info->pcl_args;
struct nfs4_layoutreturn *lrp;
- struct recall_layout_threadargs *args =
- (struct recall_layout_threadargs *)data;
- int status = 0;
-
- daemonize("nfsv4-layoutreturn");
-
- dprintk("%s: recall_type=%d fsid 0x%llx-0x%llx start\n",
- __func__, args->rl->cbl_recall_type,
- args->rl->cbl_fsid.major, args->rl->cbl_fsid.minor);
-
- clp = args->clp;
- inode = args->inode;
- rl = *args->rl;
-
- /* support whole file layouts only */
- rl.cbl_range.offset = 0;
- rl.cbl_range.length = NFS4_MAX_UINT64;
-
- if (rl.cbl_recall_type == RETURN_FILE) {
- if (pnfs_is_next_layout_stateid(NFS_I(inode)->layout,
- rl.cbl_stateid))
- status = pnfs_return_layout(inode, &rl.cbl_range,
- &rl.cbl_stateid, RETURN_FILE,
- false);
- else
- status = cpu_to_be32(NFS4ERR_DELAY);
- if (status)
- dprintk("%s RETURN_FILE error: %d\n", __func__, status);
- else
- status = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
- args->result = status;
- complete(&args->started);
- goto out;
- }
-
- status = cpu_to_be32(NFS4_OK);
- args->result = status;
- complete(&args->started);
- args = NULL;
-
- /* IMPROVEME: This loop is inefficient, running in O(|s_inodes|^2) */
- while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
- /* FIXME: need to check status on pnfs_return_layout */
- pnfs_return_layout(ino, &rl.cbl_range, NULL, RETURN_FILE, false);
- iput(ino);
- }
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
- if (!lrp) {
- dprintk("%s: allocation failed. Cannot send last LAYOUTRETURN\n",
- __func__);
- goto out;
- }
-
- /* send final layoutreturn */
+ if (!lrp)
+ return -ENOMEM;
lrp->args.reclaim = 0;
- lrp->args.layout_type = rl.cbl_layout_type;
- lrp->args.return_type = rl.cbl_recall_type;
+ lrp->args.layout_type = args->cbl_layout_type;
+ lrp->args.return_type = args->cbl_recall_type;
lrp->clp = clp;
- lrp->args.range = rl.cbl_range;
- lrp->args.inode = inode;
- nfs4_proc_layoutreturn(lrp, true);
-
-out:
- clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
- nfs_put_client(clp);
- module_put_and_exit(0);
- dprintk("%s: exit status %d\n", __func__, 0);
- return 0;
+ if (args->cbl_recall_type == RETURN_FILE) {
+ lrp->args.range = args->cbl_range;
+ lrp->args.inode = cb_info->pcl_ino;
+ } else {
+ lrp->args.range.iomode = IOMODE_ANY;
+ lrp->args.inode = NULL;
+ }
+ return nfs4_proc_layoutreturn(lrp, true);
}
-/*
- * Asynchronous layout recall!
+/* Called by state manager to finish CB_LAYOUTRECALLS initiated by
+ * nfs4_callback_layoutrecall().
*/
-static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
- struct cb_layoutrecallargs *rl)
+void nfs_client_return_layouts(struct nfs_client *clp)
{
- struct recall_layout_threadargs data = {
- .clp = clp,
- .inode = inode,
- .rl = rl,
- };
- struct task_struct *t;
- int status = -EAGAIN;
+ struct pnfs_cb_lrecall_info *cb_info;
- dprintk("%s: -->\n", __func__);
+ spin_lock(&clp->cl_lock);
+ while (true) {
+ if (list_empty(&clp->cl_layoutrecalls)) {
+ spin_unlock(&clp->cl_lock);
+ break;
+ }
+ cb_info = list_first_entry(&clp->cl_layoutrecalls,
+ struct pnfs_cb_lrecall_info,
+ pcl_list);
+ spin_unlock(&clp->cl_lock);
+ if (atomic_read(&cb_info->pcl_count) != 0)
+ break;
+ /* What do on error return? These layoutreturns are
+ * required by the protocol. So if do not get
+ * successful reply, probably have to do something
+ * more drastic.
+ */
+ pnfs_send_layoutreturn(clp, cb_info);
+ spin_lock(&clp->cl_lock);
+ /* Removing from the list unblocks LAYOUTGETs */
+ list_del(&cb_info->pcl_list);
+ clp->cl_cb_lrecall_count--;
+ rpc_wake_up(&clp->cl_rpcwaitq_recall);
+ kfree(cb_info);
+ }
+}
- /* FIXME: do not allow two concurrent layout recalls */
- if (test_and_set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state))
- return status;
-
- init_completion(&data.started);
- __module_get(THIS_MODULE);
- atomic_inc(&clp->cl_count);
-
- t = kthread_run(pnfs_recall_layout, &data, "%s", "pnfs_recall_layout");
- if (IS_ERR(t)) {
- printk(KERN_INFO "NFS: Layout recall callback thread failed "
- "for client (clientid %08x/%08x)\n",
- (unsigned)(clp->cl_clientid >> 32),
- (unsigned)(clp->cl_clientid));
- status = PTR_ERR(t);
- goto out_module_put;
+void notify_drained(struct pnfs_cb_lrecall_info *d)
+{
+ if (d && atomic_dec_and_test(&d->pcl_count)) {
+ set_bit(NFS4CLNT_LAYOUT_RECALL, &d->pcl_clp->cl_state);
+ nfs4_schedule_state_manager(d->pcl_clp);
}
- wait_for_completion(&data.started);
- return data.result;
-out_module_put:
- nfs_put_client(clp);
- clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
- module_put(THIS_MODULE);
- return status;
}
-static int pnfs_recall_all_layouts(struct nfs_client *clp)
+static int initiate_layout_draining(struct pnfs_cb_lrecall_info *cb_info)
{
- struct cb_layoutrecallargs rl;
- struct inode *inode;
- int status = 0;
-
- rl.cbl_recall_type = RETURN_ALL;
- rl.cbl_range.iomode = IOMODE_ANY;
- rl.cbl_range.offset = 0;
- rl.cbl_range.length = NFS4_MAX_UINT64;
-
- /* we need the inode to get the nfs_server struct */
- inode = nfs_layoutrecall_find_inode(clp, &rl);
- if (!inode)
- return status;
- status = pnfs_async_return_layout(clp, inode, &rl);
- iput(inode);
+ struct nfs_client *clp = cb_info->pcl_clp;
+ struct pnfs_layout_hdr *lo;
+ int rv = NFS4ERR_NOMATCHING_LAYOUT;
+ struct cb_layoutrecallargs *args = &cb_info->pcl_args;
+
+ if (args->cbl_recall_type == RETURN_FILE) {
+ LIST_HEAD(free_me_list);
+
+ spin_lock(&clp->cl_lock);
+ list_for_each_entry(lo, &clp->cl_layouts, layouts) {
+ if (nfs_compare_fh(&args->cbl_fh,
+ &NFS_I(lo->inode)->fh))
+ continue;
+ if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags))
+ rv = NFS4ERR_DELAY;
+ else {
+ /* FIXME I need to better understand igrab and
+ * does having a layout ref keep ino around?
+ * It should.
+ */
+ /* We need to hold the reference until any
+ * potential LAYOUTRETURN is finished.
+ */
+ get_layout_hdr(lo);
+ cb_info->pcl_ino = lo->inode;
+ rv = NFS4_OK;
+ }
+ break;
+ }
+ spin_unlock(&clp->cl_lock);
+
+ spin_lock(&lo->inode->i_lock);
+ if (rv == NFS4_OK) {
+ lo->plh_block_lgets++;
+ nfs4_asynch_forget_layouts(lo, &args->cbl_range,
+ cb_info, &free_me_list);
+ }
+ pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
+ spin_unlock(&lo->inode->i_lock);
+ pnfs_free_lseg_list(&free_me_list);
+ } else {
+ struct pnfs_layout_hdr *tmp;
+ LIST_HEAD(recall_list);
+ LIST_HEAD(free_me_list);
+ struct pnfs_layout_range range = {
+ .iomode = IOMODE_ANY,
+ .offset = 0,
+ .length = NFS4_MAX_UINT64,
+ };
+
+ spin_lock(&clp->cl_lock);
+ /* Per RFC 5661, 12.5.5.2.1.5, bulk recall must be serialized */
+ if (!list_is_singular(&clp->cl_layoutrecalls)) {
+ spin_unlock(&clp->cl_lock);
+ return NFS4ERR_DELAY;
+ }
+ list_for_each_entry(lo, &clp->cl_layouts, layouts) {
+ if ((args->cbl_recall_type == RETURN_FSID) &&
+ memcmp(&NFS_SERVER(lo->inode)->fsid,
+ &args->cbl_fsid, sizeof(struct nfs_fsid)))
+ continue;
+ get_layout_hdr(lo);
+ /* We could list_del(&lo->layouts) here */
+ BUG_ON(!list_empty(&lo->plh_bulk_recall));
+ list_add(&lo->plh_bulk_recall, &recall_list);
+ }
+ spin_unlock(&clp->cl_lock);
+ list_for_each_entry_safe(lo, tmp,
+ &recall_list, plh_bulk_recall) {
+ spin_lock(&lo->inode->i_lock);
+ set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+ nfs4_asynch_forget_layouts(lo, &range, cb_info,
+ &free_me_list);
+ list_del_init(&lo->plh_bulk_recall);
+ spin_unlock(&lo->inode->i_lock);
+ put_layout_hdr(lo->inode);
+ rv = NFS4_OK;
+ }
+ pnfs_free_lseg_list(&free_me_list);
+ }
+ return rv;
+}
+
+static u32 do_callback_layoutrecall(struct nfs_client *clp,
+ struct cb_layoutrecallargs *args)
+{
+ struct pnfs_cb_lrecall_info *new;
+ u32 res;
+
+ dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new) {
+ res = NFS4ERR_RESOURCE;
+ goto out;
+ }
+ memcpy(&new->pcl_args, args, sizeof(*args));
+ atomic_set(&new->pcl_count, 1);
+ new->pcl_clp = clp;
+ new->pcl_ino = NULL;
+ spin_lock(&clp->cl_lock);
+ if (clp->cl_cb_lrecall_count >= PNFS_MAX_CB_LRECALLS) {
+ kfree(new);
+ res = NFS4ERR_DELAY;
+ spin_unlock(&clp->cl_lock);
+ goto out;
+ }
+ clp->cl_cb_lrecall_count++;
+ /* Adding to the list will block conflicting LGET activity */
+ list_add_tail(&new->pcl_list, &clp->cl_layoutrecalls);
+ spin_unlock(&clp->cl_lock);
+ res = initiate_layout_draining(new);
+ if (res || atomic_dec_and_test(&new->pcl_count)) {
+ spin_lock(&clp->cl_lock);
+ list_del(&new->pcl_list);
+ clp->cl_cb_lrecall_count--;
+ rpc_wake_up(&clp->cl_rpcwaitq_recall);
+ spin_unlock(&clp->cl_lock);
+ if (res == NFS4_OK) {
+ if (args->cbl_recall_type == RETURN_FILE) {
+ struct pnfs_layout_hdr *lo;
+
+ lo = NFS_I(new->pcl_ino)->layout;
+ spin_lock(&lo->inode->i_lock);
+ lo->plh_block_lgets--;
+ if (!pnfs_layoutgets_blocked(lo, NULL))
+ rpc_wake_up(&NFS_I(lo->inode)->lo_rpcwaitq_stateid);
+ spin_unlock(&lo->inode->i_lock);
+ put_layout_hdr(new->pcl_ino);
+ }
+ res = NFS4ERR_NOMATCHING_LAYOUT;
+ }
+ kfree(new);
+ }
+out:
+ dprintk("%s returning %i\n", __func__, res);
+ return res;
- return status;
}
__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
void *dummy, struct cb_process_state *cps)
{
struct nfs_client *clp;
- struct inode *inode = NULL;
- __be32 res;
- int status;
+ u32 res;
dprintk("%s: -->\n", __func__);
- res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
if (cps->session) /* set in cb_sequence */
clp = cps->session->clp;
- else
+ else {
+ res = NFS4ERR_OP_NOT_IN_SESSION;
goto out;
-
+ }
/* the callback must come from the MDS personality */
- res = cpu_to_be32(NFS4ERR_NOTSUPP);
- if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
+ if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) {
+ res = NFS4ERR_INVAL;
goto out;
-
- res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
- /*
- * In the _ALL or _FSID case, we need the inode to get
- * the nfs_server struct.
- */
- inode = nfs_layoutrecall_find_inode(clp, args);
- if (!inode)
- goto out;
- status = pnfs_async_return_layout(clp, inode, args);
- if (status)
- res = cpu_to_be32(NFS4ERR_DELAY);
- iput(inode);
+ }
+ res = do_callback_layoutrecall(clp, args);
out:
- dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
- return res;
+ dprintk("%s: exit with status = %d\n", __func__, res);
+ return cpu_to_be32(res);
+}
+
+static void pnfs_recall_all_layouts(struct nfs_client *clp)
+{
+ struct cb_layoutrecallargs args;
+
+ /* Pretend we got a CB_LAYOUTRECALL(ALL) */
+ memset(&args, 0, sizeof(args));
+ args.cbl_recall_type = RETURN_ALL;
+ /* FIXME we ignore errors, what should we do? */
+ do_callback_layoutrecall(clp, &args);
}
int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
@@ -677,9 +698,7 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy,
flags |= FMODE_WRITE;
if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
&args->craa_type_mask))
- if (pnfs_recall_all_layouts(clp) == -EAGAIN)
- status = cpu_to_be32(NFS4ERR_DELAY);
-
+ pnfs_recall_all_layouts(clp);
if (flags)
nfs_expire_all_delegation_types(clp, flags);
out:
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 3c8c841..dbf43e7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -158,6 +158,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
clp->cl_machine_cred = cred;
#if defined(CONFIG_NFS_V4_1)
INIT_LIST_HEAD(&clp->cl_layouts);
+ INIT_LIST_HEAD(&clp->cl_layoutrecalls);
+ rpc_init_wait_queue(&clp->cl_rpcwaitq_recall,
+ "NFS client CB_LAYOUTRECALLS");
#endif
nfs_fscache_get_client_cookie(clp);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 72f27cc..8727ade 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1459,7 +1459,8 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
nfsi->delegation = NULL;
nfsi->delegation_state = 0;
init_rwsem(&nfsi->rwsem);
- rpc_init_wait_queue(&nfsi->lo_rpcwaitq, "pNFS Layout");
+ rpc_init_wait_queue(&nfsi->lo_rpcwaitq, "pNFS Layoutreturn");
+ rpc_init_wait_queue(&nfsi->lo_rpcwaitq_stateid, "pNFS Layoutstateid");
nfsi->layout = NULL;
#endif
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index be19e225..87b2b63 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5346,42 +5346,58 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
struct inode *ino = lgp->args.inode;
struct nfs_inode *nfsi = NFS_I(ino);
struct nfs_server *server = NFS_SERVER(ino);
- struct pnfs_layout_segment *lseg;
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
dprintk("--> %s\n", __func__);
+ spin_lock(&clp->cl_lock);
+ if (matches_outstanding_recall(ino, &lgp->args.range)) {
+ rpc_sleep_on(&clp->cl_rpcwaitq_recall, task, NULL);
+ spin_unlock(&clp->cl_lock);
+ return;
+ }
+ spin_unlock(&clp->cl_lock);
+ /* Note the is a race here, where a CB_LAYOUTRECALL can come in
+ * right now covering the LAYOUTGET we are about to send.
+ * However, that is not so catastrophic, and there seems
+ * to be no way to prevent it completely.
+ */
spin_lock(&ino->i_lock);
- lseg = pnfs_has_layout(nfsi->layout, &lgp->args.range);
- if (likely(!lseg)) {
+ if (pnfs_layoutgets_blocked(nfsi->layout, NULL)) {
+ rpc_sleep_on(&nfsi->lo_rpcwaitq_stateid, task, NULL);
spin_unlock(&ino->i_lock);
- dprintk("%s: no lseg found, proceeding\n", __func__);
- if (!nfs4_setup_sequence(server, NULL, &lgp->args.seq_args,
- &lgp->res.seq_res, 0, task))
- rpc_call_start(task);
return;
}
- if (!lseg->valid) {
+ /* This needs after but atomic with above check in order to properly
+ * serialize openstateid LAYOUTGETs.
+ */
+ nfsi->layout->plh_outstanding++;
+ spin_unlock(&ino->i_lock);
+
+ if (nfs4_setup_sequence(server, NULL, &lgp->args.seq_args,
+ &lgp->res.seq_res, 0, task)) {
+ spin_lock(&ino->i_lock);
+ nfsi->layout->plh_outstanding--;
spin_unlock(&ino->i_lock);
- dprintk("%s: invalid lseg found, waiting\n", __func__);
- rpc_sleep_on(&nfsi->lo_rpcwaitq, task, NULL);
return;
}
- get_lseg(lseg);
- *lgp->lsegpp = lseg;
- spin_unlock(&ino->i_lock);
- dprintk("%s: valid lseg found, no rpc required\n", __func__);
- rpc_exit(task, NFS4_OK);
+ rpc_call_start(task);
}
static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
{
struct nfs4_layoutget *lgp = calldata;
- struct nfs_server *server = NFS_SERVER(lgp->args.inode);
+ struct inode *ino = lgp->args.inode;
dprintk("--> %s\n", __func__);
- if (!nfs4_sequence_done(task, &lgp->res.seq_res))
+ if (!nfs4_sequence_done(task, &lgp->res.seq_res)) {
+ /* layout code relies on fact that in this case
+ * code falls back to tk_action=call_start, but not
+ * back to rpc_prepare_task, to keep plh_outstanding
+ * correct.
+ */
return;
-
+ }
switch (task->tk_status) {
case 0:
break;
@@ -5390,7 +5406,11 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
task->tk_status = -NFS4ERR_DELAY;
/* Fall through */
default:
- if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) {
+ if (nfs4_async_handle_error(task, NFS_SERVER(ino),
+ NULL, NULL) == -EAGAIN) {
+ spin_lock(&ino->i_lock);
+ NFS_I(ino)->layout->plh_outstanding--;
+ spin_unlock(&ino->i_lock);
rpc_restart_call_prepare(task);
return;
}
@@ -5448,13 +5468,20 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
if (IS_ERR(task))
return PTR_ERR(task);
status = nfs4_wait_for_completion_rpc_task(task);
- if (status != 0)
- goto out;
- status = task->tk_status;
- if (status != 0)
- goto out;
- status = pnfs_layout_process(lgp);
-out:
+ if (status == 0)
+ status = task->tk_status;
+ if (status == 0)
+ status = pnfs_layout_process(lgp);
+ else {
+ struct inode *ino = lgp->args.inode;
+ struct pnfs_layout_hdr *lo = NFS_I(ino)->layout;
+
+ spin_lock(&ino->i_lock);
+ lo->plh_outstanding--;
+ if (!pnfs_layoutgets_blocked(lo, NULL))
+ rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq_stateid);
+ spin_unlock(&ino->i_lock);
+ }
rpc_put_task(task);
dprintk("<-- %s status=%d\n", __func__, status);
return status;
@@ -5598,9 +5625,9 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
spin_lock(&lo->inode->i_lock);
if (lrp->res.lrs_present)
- pnfs_set_layout_stateid(lo, &lrp->res.stateid);
+ pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
else
- pnfs_invalidate_layout_stateid(lo);
+ BUG_ON(!list_empty(&lo->segs));
spin_unlock(&lo->inode->i_lock);
}
dprintk("<-- %s\n", __func__);
@@ -5611,8 +5638,18 @@ static void nfs4_layoutreturn_release(void *calldata)
struct nfs4_layoutreturn *lrp = calldata;
dprintk("--> %s return_type %d\n", __func__, lrp->args.return_type);
- if (lrp->args.return_type == RETURN_FILE)
- put_layout_hdr(lrp->args.inode);
+ if (lrp->args.return_type == RETURN_FILE) {
+ struct inode *ino = lrp->args.inode;
+ struct pnfs_layout_hdr *lo = NFS_I(ino)->layout;
+
+ spin_lock(&ino->i_lock);
+ lo->plh_block_lgets--;
+ lo->plh_outstanding--;
+ if (!pnfs_layoutgets_blocked(lo, NULL))
+ rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq_stateid);
+ spin_unlock(&ino->i_lock);
+ put_layout_hdr(ino);
+ }
kfree(calldata);
dprintk("<-- %s\n", __func__);
}
@@ -5641,6 +5678,14 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool issync)
int status = 0;
dprintk("--> %s\n", __func__);
+ if (lrp->args.return_type == RETURN_FILE) {
+ struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
+ /* FIXME we should test for BULK here */
+ spin_lock(&lo->inode->i_lock);
+ BUG_ON(lo->plh_block_lgets == 0);
+ lo->plh_outstanding++;
+ spin_unlock(&lo->inode->i_lock);
+ }
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return PTR_ERR(task);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 00632f6..ceb0d66 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1560,6 +1560,10 @@ static void nfs4_state_manager(struct nfs_client *clp)
nfs_client_return_marked_delegations(clp);
continue;
}
+ if (test_and_clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state)) {
+ nfs_client_return_layouts(clp);
+ continue;
+ }
/* Recall session slots */
if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)
&& nfs4_has_session(clp)) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 10a6f4a..5208ef7 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1827,13 +1827,14 @@ encode_getdeviceinfo(struct xdr_stream *xdr,
hdr->replen += decode_getdeviceinfo_maxsz;
}
-static void
+static int
encode_layoutget(struct xdr_stream *xdr,
const struct nfs4_layoutget_args *args,
struct compound_hdr *hdr)
{
nfs4_stateid stateid;
__be32 *p;
+ int status;
p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(OP_LAYOUTGET);
@@ -1843,8 +1844,11 @@ encode_layoutget(struct xdr_stream *xdr,
p = xdr_encode_hyper(p, args->range.offset);
p = xdr_encode_hyper(p, args->range.length);
p = xdr_encode_hyper(p, args->minlength);
- pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
- args->ctx->state);
+ status = pnfs_choose_layoutget_stateid(&stateid,
+ NFS_I(args->inode)->layout,
+ args->ctx->state);
+ if (status)
+ return status;
p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE);
*p = cpu_to_be32(args->maxcount);
@@ -1857,6 +1861,7 @@ encode_layoutget(struct xdr_stream *xdr,
args->maxcount);
hdr->nops++;
hdr->replen += decode_layoutget_maxsz;
+ return 0;
}
static int
@@ -2781,12 +2786,15 @@ static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p,
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
};
+ int status;
xdr_init_encode(&xdr, &req->rq_snd_buf, p);
encode_compound_hdr(&xdr, req, &hdr);
encode_sequence(&xdr, &args->seq_args, &hdr);
encode_putfh(&xdr, NFS_FH(args->inode), &hdr);
- encode_layoutget(&xdr, args, &hdr);
+ status = encode_layoutget(&xdr, args, &hdr);
+ if (status)
+ return status;
encode_nops(&hdr);
return 0;
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index ca8be8d..8d04cf2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
*/
/* Need to hold i_lock if caller does not already hold reference */
-static void
+void
get_layout_hdr(struct pnfs_layout_hdr *lo)
{
atomic_inc(&lo->plh_refcount);
@@ -278,24 +278,29 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
smp_mb();
lseg->valid = true;
lseg->layout = lo;
+ lseg->drain_notification = NULL;
}
static void
_put_lseg_common(struct pnfs_layout_segment *lseg)
{
+ struct inode *ino = lseg->layout->inode;
+
BUG_ON(lseg->valid == true);
list_del(&lseg->fi_list);
if (list_empty(&lseg->layout->segs)) {
struct nfs_client *clp;
- clp = NFS_SERVER(lseg->layout->inode)->nfs_client;
+ clp = NFS_SERVER(ino)->nfs_client;
spin_lock(&clp->cl_lock);
/* List does not take a reference, so no need for put here */
list_del_init(&lseg->layout->layouts);
spin_unlock(&clp->cl_lock);
- pnfs_invalidate_layout_stateid(lseg->layout);
+ clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->layout->plh_flags);
+ if (!pnfs_layoutgets_blocked(lseg->layout, NULL))
+ rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq_stateid);
}
- rpc_wake_up(&NFS_I(lseg->layout->inode)->lo_rpcwaitq);
+ rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq);
}
/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
@@ -325,9 +330,12 @@ put_lseg(struct pnfs_layout_segment *lseg)
atomic_read(&lseg->pls_refcount), lseg->valid);
ino = lseg->layout->inode;
if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) {
+ struct pnfs_cb_lrecall_info *drain_info = lseg->drain_notification;
+
_put_lseg_common(lseg);
spin_unlock(&ino->i_lock);
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+ notify_drained(drain_info);
/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
put_layout_hdr(ino);
}
@@ -345,7 +353,7 @@ EXPORT_SYMBOL_GPL(put_lseg);
* READ READ true
* READ RW false
*/
-static int
+bool
should_free_lseg(struct pnfs_layout_range *lseg_range,
struct pnfs_layout_range *recall_range)
{
@@ -388,16 +396,19 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
dprintk("%s:Return\n", __func__);
}
-static void
+void
pnfs_free_lseg_list(struct list_head *free_me)
{
struct pnfs_layout_segment *lseg, *tmp;
struct inode *ino;
+ struct pnfs_cb_lrecall_info *drain_info;
list_for_each_entry_safe(lseg, tmp, free_me, fi_list) {
BUG_ON(atomic_read(&lseg->pls_refcount) != 0);
ino = lseg->layout->inode;
+ drain_info = lseg->drain_notification;
NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+ notify_drained(drain_info);
/* Matched by get_layout_hdr_locked in pnfs_insert_layout */
put_layout_hdr(ino);
}
@@ -453,31 +464,32 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
}
}
-/* update lo->stateid with new if is more recent
- *
- * lo->stateid could be the open stateid, in which case we just use what given.
- */
+/* update lo->stateid with new if is more recent */
void
-pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
- const nfs4_stateid *new)
+pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
+ bool update_barrier)
{
- nfs4_stateid *old = &lo->stateid;
- bool overwrite = false;
+ u32 oldseq, newseq;
assert_spin_locked(&lo->inode->i_lock);
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags) ||
- memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other)))
- overwrite = true;
- else {
- u32 oldseq, newseq;
-
- oldseq = be32_to_cpu(old->stateid.seqid);
- newseq = be32_to_cpu(new->stateid.seqid);
- if ((int)(newseq - oldseq) > 0)
- overwrite = true;
+ oldseq = be32_to_cpu(lo->stateid.stateid.seqid);
+ newseq = be32_to_cpu(new->stateid.seqid);
+ if ((int)(newseq - oldseq) > 0) {
+ memcpy(&lo->stateid, &new->stateid, sizeof(new->stateid));
+ if (update_barrier)
+ lo->plh_barrier = be32_to_cpu(new->stateid.seqid);
+ else {
+ /* Because of wraparound, we want to keep the barrier
+ * "close" to the current seqids. It needs to be
+ * within 2**31 to count as "behind", so if it
+ * gets too near that limit, give us a litle leeway
+ * and bring it to within 2**30.
+ * NOTE - and yes, this is all unsigned arithmetic.
+ */
+ if (unlikely((newseq - lo->plh_barrier) > (3 << 29)))
+ lo->plh_barrier = newseq - (1 << 30);
+ }
}
- if (overwrite)
- memcpy(&old->stateid, &new->stateid, sizeof(new->stateid));
}
/* Layoutreturn may use an invalid stateid, just copy what is there */
@@ -487,13 +499,21 @@ void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
}
-void
-pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
- struct nfs4_state *open_state)
+int
+pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
+ struct nfs4_state *open_state)
{
+ int status = 0;
+
dprintk("--> %s\n", __func__);
spin_lock(&lo->inode->i_lock);
- if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags)) {
+ if (lo->plh_block_lgets ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags)) {
+ /* We avoid -EAGAIN, as that has special meaning to
+ * some callers.
+ */
+ status = -NFS4ERR_LAYOUTTRYLATER;
+ } else if (list_empty(&lo->segs)) {
int seq;
do {
@@ -501,12 +521,11 @@ pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
memcpy(dst->data, open_state->stateid.data,
sizeof(open_state->stateid.data));
} while (read_seqretry(&open_state->seqlock, seq));
- set_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags);
} else
- memcpy(dst->data, lo->stateid.data,
- sizeof(lo->stateid.data));
+ memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
spin_unlock(&lo->inode->i_lock);
dprintk("<-- %s\n", __func__);
+ return status;
}
/*
@@ -573,6 +592,28 @@ has_layout_to_return(struct pnfs_layout_hdr *lo,
return out;
}
+void nfs4_asynch_forget_layouts(struct pnfs_layout_hdr *lo,
+ struct pnfs_layout_range *range,
+ struct pnfs_cb_lrecall_info *drain_info,
+ struct list_head *tmp_list)
+{
+ struct pnfs_layout_segment *lseg, *tmp;
+
+ assert_spin_locked(&lo->inode->i_lock);
+ list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
+ if (should_free_lseg(&lseg->range, range)) {
+ /* FIXME - need to change to something like a
+ * notification bitmap to remove the restriction
+ * of only being able to process a single
+ * CB_LAYOUTRECALL at a time.
+ */
+ BUG_ON(lseg->drain_notification);
+ lseg->drain_notification = drain_info;
+ atomic_inc(&drain_info->pcl_count);
+ mark_lseg_invalid(lseg, tmp_list);
+ }
+}
+
/* Return true if there is layout based io in progress in the given range.
* Assumes range has already been marked invalid, and layout marked to
* prevent any new lseg from being inserted.
@@ -661,6 +702,7 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
goto out;
}
+ lo->plh_block_lgets++;
list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
if (should_free_lseg(&lseg->range, &arg))
mark_lseg_invalid(lseg, &tmp_list);
@@ -717,14 +759,6 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
dprintk("%s:Begin\n", __func__);
assert_spin_locked(&lo->inode->i_lock);
- if (list_empty(&lo->segs)) {
- struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client;
-
- spin_lock(&clp->cl_lock);
- BUG_ON(!list_empty(&lo->layouts));
- list_add_tail(&lo->layouts, &clp->cl_layouts);
- spin_unlock(&clp->cl_lock);
- }
list_for_each_entry(lp, &lo->segs, fi_list) {
if (cmp_layout(&lp->range, &lseg->range) > 0)
continue;
@@ -741,6 +775,9 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
}
if (!found) {
list_add_tail(&lseg->fi_list, &lo->segs);
+ if (list_is_singular(&lo->segs) &&
+ !pnfs_layoutgets_blocked(lo, NULL))
+ rpc_wake_up(&NFS_I(lo->inode)->lo_rpcwaitq_stateid);
dprintk("%s: inserted lseg %p "
"iomode %d offset %llu length %llu at tail\n",
__func__, lseg, lseg->range.iomode,
@@ -762,6 +799,7 @@ alloc_init_layout_hdr(struct inode *ino)
atomic_set(&lo->plh_refcount, 1);
INIT_LIST_HEAD(&lo->layouts);
INIT_LIST_HEAD(&lo->segs);
+ INIT_LIST_HEAD(&lo->plh_bulk_recall);
lo->inode = ino;
return lo;
}
@@ -849,6 +887,7 @@ pnfs_update_layout(struct inode *ino,
.length = NFS4_MAX_UINT64,
};
struct nfs_inode *nfsi = NFS_I(ino);
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
struct pnfs_layout_hdr *lo;
struct pnfs_layout_segment *lseg = NULL;
@@ -884,9 +923,28 @@ pnfs_update_layout(struct inode *ino,
goto out_unlock;
get_layout_hdr(lo); /* Matched in pnfs_layoutget_release */
+ if (list_empty(&lo->segs)) {
+ /* The lo must be on the clp list if there is any
+ * chance of a CB_LAYOUTRECALL(FILE) coming in.
+ */
+ spin_lock(&clp->cl_lock);
+ BUG_ON(!list_empty(&lo->layouts));
+ list_add_tail(&lo->layouts, &clp->cl_layouts);
+ spin_unlock(&clp->cl_lock);
+ }
spin_unlock(&ino->i_lock);
lseg = send_layoutget(lo, ctx, &arg);
+ if (!lseg) {
+ spin_lock(&ino->i_lock);
+ if (list_empty(&lo->segs)) {
+ spin_lock(&clp->cl_lock);
+ list_del_init(&lo->layouts);
+ spin_unlock(&clp->cl_lock);
+ clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+ }
+ spin_unlock(&ino->i_lock);
+ }
out:
dprintk("%s end, state 0x%lx lseg %p\n", __func__,
nfsi->layout->plh_flags, lseg);
@@ -896,6 +954,18 @@ out_unlock:
goto out;
}
+bool
+pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid)
+{
+ assert_spin_locked(&lo->inode->i_lock);
+ if ((stateid) &&
+ (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
+ return true;
+ return lo->plh_block_lgets ||
+ test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
+ (list_empty(&lo->segs) && lo->plh_outstanding);
+}
+
int
pnfs_layout_process(struct nfs4_layoutget *lgp)
{
@@ -903,6 +973,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
struct nfs4_layoutget_res *res = &lgp->res;
struct pnfs_layout_segment *lseg;
struct inode *ino = lo->inode;
+ struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
int status = 0;
/* Inject layout blob into I/O device driver */
@@ -914,10 +985,25 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
status = PTR_ERR(lseg);
dprintk("%s: Could not allocate layout: error %d\n",
__func__, status);
+ spin_lock(&ino->i_lock);
goto out;
}
spin_lock(&ino->i_lock);
+ /* decrement needs to be done before call to pnfs_layoutget_blocked */
+ lo->plh_outstanding--;
+ spin_lock(&clp->cl_lock);
+ if (matches_outstanding_recall(ino, &res->range)) {
+ spin_unlock(&clp->cl_lock);
+ dprintk("%s forget reply due to recall\n", __func__);
+ goto out_forget_reply;
+ }
+ spin_unlock(&clp->cl_lock);
+
+ if (pnfs_layoutgets_blocked(lo, &res->stateid)) {
+ dprintk("%s forget reply due to state\n", __func__);
+ goto out_forget_reply;
+ }
init_lseg(lo, lseg);
lseg->range = res->range;
get_lseg(lseg);
@@ -933,10 +1019,19 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
}
/* Done processing layoutget. Set the layout stateid */
- pnfs_set_layout_stateid(lo, &res->stateid);
- spin_unlock(&ino->i_lock);
+ pnfs_set_layout_stateid(lo, &res->stateid, false);
out:
+ if (!pnfs_layoutgets_blocked(lo, NULL))
+ rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq_stateid);
+ spin_unlock(&ino->i_lock);
return status;
+
+out_forget_reply:
+ spin_unlock(&ino->i_lock);
+ lseg->layout = lo;
+ NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
+ spin_lock(&ino->i_lock);
+ goto out;
}
void
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e631487..810714a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -31,6 +31,7 @@
#define FS_NFS_PNFS_H
#include <linux/nfs_page.h>
+#include "callback.h" /* for cb_layoutrecallargs */
struct pnfs_layout_segment {
struct list_head fi_list;
@@ -38,6 +39,7 @@ struct pnfs_layout_segment {
atomic_t pls_refcount;
bool valid;
struct pnfs_layout_hdr *layout;
+ struct pnfs_cb_lrecall_info *drain_notification;
};
enum pnfs_try_status {
@@ -52,7 +54,7 @@ enum pnfs_try_status {
enum {
NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */
NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
- NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */
+ NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
NFS_LAYOUT_NEED_LCOMMIT, /* LAYOUTCOMMIT needed */
};
@@ -94,9 +96,13 @@ struct pnfs_layoutdriver_type {
struct pnfs_layout_hdr {
atomic_t plh_refcount;
struct list_head layouts; /* other client layouts */
+ struct list_head plh_bulk_recall; /* clnt list of bulk recalls */
struct list_head segs; /* layout segments list */
int roc_iomode;/* return on close iomode, 0=none */
nfs4_stateid stateid;
+ unsigned long plh_outstanding; /* number of RPCs out */
+ unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */
+ u32 plh_barrier; /* ignore lower seqids */
unsigned long plh_flags;
struct rpc_cred *cred; /* layoutcommit credential */
/* DH: These vars keep track of the maximum write range
@@ -117,6 +123,14 @@ struct pnfs_device {
unsigned int pglen;
};
+struct pnfs_cb_lrecall_info {
+ struct list_head pcl_list; /* hook into cl_layoutrecalls list */
+ atomic_t pcl_count;
+ struct nfs_client *pcl_clp;
+ struct inode *pcl_ino;
+ struct cb_layoutrecallargs pcl_args;
+};
+
/*
* Device ID RCU cache. A device ID is unique per client ID and layout type.
*/
@@ -175,7 +189,10 @@ extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool wait);
/* pnfs.c */
+void get_layout_hdr(struct pnfs_layout_hdr *lo);
void put_lseg(struct pnfs_layout_segment *lseg);
+bool should_free_lseg(struct pnfs_layout_range *lseg_range,
+ struct pnfs_layout_range *recall_range);
struct pnfs_layout_segment *
pnfs_has_layout(struct pnfs_layout_hdr *lo, struct pnfs_layout_range *range);
struct pnfs_layout_segment *
@@ -200,15 +217,25 @@ enum pnfs_try_status pnfs_try_to_commit(struct nfs_write_data *,
void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
struct nfs_open_context *, struct list_head *);
void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
+bool pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid);
int pnfs_layout_process(struct nfs4_layoutget *lgp);
+void pnfs_free_lseg_list(struct list_head *tmp_list);
void pnfs_destroy_layout(struct nfs_inode *);
void pnfs_destroy_all_layouts(struct nfs_client *);
void put_layout_hdr(struct inode *inode);
void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
- const nfs4_stateid *new);
+ const nfs4_stateid *new,
+ bool update_barrier);
void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
-void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
- struct nfs4_state *open_state);
+int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
+ struct pnfs_layout_hdr *lo,
+ struct nfs4_state *open_state);
+void nfs4_asynch_forget_layouts(struct pnfs_layout_hdr *lo,
+ struct pnfs_layout_range *range,
+ struct pnfs_cb_lrecall_info *drain_info,
+ struct list_head *tmp_list);
+/* FIXME - this should be in callback.h, but pnfs_cb_lrecall_info needs to be there too */
+extern void notify_drained(struct pnfs_cb_lrecall_info *d);
static inline bool
has_layout(struct nfs_inode *nfsi)
@@ -222,12 +249,6 @@ static inline int lo_fail_bit(u32 iomode)
NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
}
-static inline void pnfs_invalidate_layout_stateid(struct pnfs_layout_hdr *lo)
-{
- assert_spin_locked(&lo->inode->i_lock);
- clear_bit(NFS_LAYOUT_STATEID_SET, &lo->plh_flags);
-}
-
static inline void get_lseg(struct pnfs_layout_segment *lseg)
{
atomic_inc(&lseg->pls_refcount);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d8bfa42..061d81a 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -191,6 +191,7 @@ struct nfs_inode {
/* pNFS layout information */
struct rpc_wait_queue lo_rpcwaitq;
+ struct rpc_wait_queue lo_rpcwaitq_stateid;
struct pnfs_layout_hdr *layout;
#endif /* CONFIG_NFS_V4*/
#ifdef CONFIG_NFS_FSCACHE
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 3cae408..80dcc00 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -83,6 +83,10 @@ struct nfs_client {
u32 cl_exchange_flags;
struct nfs4_session *cl_session; /* sharred session */
struct list_head cl_layouts;
+ struct list_head cl_layoutrecalls;
+ unsigned long cl_cb_lrecall_count;
+#define PNFS_MAX_CB_LRECALLS (1)
+ struct rpc_wait_queue cl_rpcwaitq_recall;
struct pnfs_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
#endif /* CONFIG_NFS_V4_1 */
--
1.7.2.1
next prev parent reply other threads:[~2010-11-04 15:22 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-11-04 15:22 [PATCH 00/18] rewrite of CB_LAYOUTRECALL and layoutstate code Fred Isaman
2010-11-04 15:22 ` [PATCH 01/18] NFSv4.1: Callback share session between ops Fred Isaman
2010-11-10 13:37 ` Benny Halevy
2010-11-10 13:41 ` [PATCH] SQUASHME: pnfs-submit: fixups for nfsv4.1 callbacks Benny Halevy
2010-11-10 14:53 ` Fred Isaman
2010-11-04 15:22 ` [PATCH 02/18] pnfs-submit: change pnfs_layout_segment refcounting from kref to atomic_t Fred Isaman
2010-11-04 15:22 ` [PATCH 03/18] pnfs-submit: remove _pnfs_can_return_lseg call from pnfs_clear_lseg_list Fred Isaman
2010-11-10 14:35 ` Benny Halevy
2010-11-10 14:46 ` Fred Isaman
2010-11-11 7:00 ` Benny Halevy
2010-11-11 13:52 ` Fred Isaman
2010-11-11 14:39 ` Benny Halevy
2010-11-04 15:22 ` [PATCH 04/18] pnfs-submit: change layout state seqlock to a spinlock Fred Isaman
2010-11-11 15:00 ` Benny Halevy
2010-11-11 15:09 ` Fred Isaman
2010-11-04 15:22 ` [PATCH 05/18] pnfs-submit: layoutreturn' rpc_call_op functions need to handle bulk returns Fred Isaman
2010-11-11 15:01 ` Benny Halevy
2010-11-04 15:22 ` [PATCH 06/18] pnfs_submit: nfs4_layoutreturn_release should not reference results Fred Isaman
2010-11-11 15:16 ` Benny Halevy
2010-11-04 15:22 ` [PATCH 07/18] pnfs-submit: reorganize struct cb_layoutrecallargs Fred Isaman
2010-11-04 15:22 ` [PATCH 08/18] pnfs-submit: rename lo->state to lo->plh_flags Fred Isaman
2010-11-04 15:22 ` [PATCH 09/18] pnfs-submit: change pnfs_layout_hdr refcount to atomic_t Fred Isaman
2010-11-04 15:22 ` [PATCH 10/18] pnfs-submit: argument to should_free_lseg changed from lseg to range Fred Isaman
2010-11-04 15:22 ` [PATCH 11/18] pnfs-submit: remove unnecessary field lgp->status Fred Isaman
2010-11-04 15:22 ` [PATCH 12/18] pnfs-submit: remove RPC_ASSASSINATED(task) checks Fred Isaman
2010-11-04 15:22 ` Fred Isaman [this message]
2010-11-04 15:22 ` [PATCH 14/18] pnfs-submit: increase number of outstanding CB_LAYOUTRECALLS we can handle Fred Isaman
2010-11-04 15:22 ` [PATCH 15/18] pnfs-submit: roc add layoutreturn op to close compound Fred Isaman
2010-11-04 15:22 ` [PATCH 16/18] pnfs-submit refactor layoutcommit xdr structures Fred Isaman
2010-11-04 15:22 ` [PATCH 17/18] pnfs-submit refactor pnfs_layoutcommit_setup Fred Isaman
2010-11-04 15:22 ` [PATCH 18/18] pnfs_submit: roc add layoutcommit op to close compound Fred Isaman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1288884151-11128-14-git-send-email-iisaman@netapp.com \
--to=iisaman@netapp.com \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).