* two pNFS server fixes for 4.1
@ 2015-04-26 15:48 Christoph Hellwig
2015-04-26 15:48 ` [PATCH 1/2] nfsd/blocklayout: pretend we can send deviceid notifications Christoph Hellwig
2015-04-26 15:48 ` [PATCH 2/2] nfsd: fix pNFS return on close semantics Christoph Hellwig
0 siblings, 2 replies; 5+ messages in thread
From: Christoph Hellwig @ 2015-04-26 15:48 UTC (permalink / raw)
To: J. Bruce Fields; +Cc: linux-nfs
The first one is a workaround for a regression introduced in the Linux
client in the current pull requesdt. The second is standards compliance
fix to comply with the latest rfc5661 errata on return on close semantics.
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 1/2] nfsd/blocklayout: pretend we can send deviceid notifications
2015-04-26 15:48 two pNFS server fixes for 4.1 Christoph Hellwig
@ 2015-04-26 15:48 ` Christoph Hellwig
2015-04-26 15:48 ` [PATCH 2/2] nfsd: fix pNFS return on close semantics Christoph Hellwig
1 sibling, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2015-04-26 15:48 UTC (permalink / raw)
To: J. Bruce Fields; +Cc: linux-nfs, stable
Commit df52699e4fcef ("NFSv4.1: Don't cache deviceids that have no
notifications") causes the Linux NFS client stop caching deviceid
unless a server pretends to support deviceid notifications. While
this behavior is stupid and the language around this area in rfc5661
is a mess carified by an errata that I submittted, Trond insists
on this behavior. Not caching deviceids degrades block layout
performance massively as a GETDEVICEINFO is fairly expensive.
So add this hack to make the Linux client happy again.
Cc: stable@vger.kernel.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/nfsd/blocklayout.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 03d647b..58298fa6 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -181,6 +181,16 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
}
const struct nfsd4_layout_ops bl_layout_ops = {
+ /*
+ * Pretend that we send notification to the client. This is a blatant
+ * lie to force recent Linux clients to cache our device IDs.
+ * We rarely ever change the device ID, so the harm of leaking deviceids
+ * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
+ * in this regard, but I filed errata 4119 for this a while ago, and
+ * hopefully the Linux client will eventually start caching deviceids
+ * without this again.
+ */
+ .notify_types = NOTIFY_DEVICEID4_DELETE,
.proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo,
.encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
.proc_layoutget = nfsd4_block_proc_layoutget,
--
1.9.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] nfsd: fix pNFS return on close semantics
2015-04-26 15:48 two pNFS server fixes for 4.1 Christoph Hellwig
2015-04-26 15:48 ` [PATCH 1/2] nfsd/blocklayout: pretend we can send deviceid notifications Christoph Hellwig
@ 2015-04-26 15:48 ` Christoph Hellwig
1 sibling, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2015-04-26 15:48 UTC (permalink / raw)
To: J. Bruce Fields; +Cc: linux-nfs, Sachin Bhamare, Jeff Layton
From: Sachin Bhamare <sachin.bhamare@primarydata.com>
In case of forgetful clients, server should return the layouts to the
file system on 'last close' of a file (assuming that there are no
delegations outstanding to that particular client) or on delegreturn
(assuming that there are no opens on a file from that particular client).
This patch introduces infrastructure to maintain per-client opens and
delegation counters on per-file basis.
[hch: ported to the mainline pNFS support, merged various fixes from Jeff]
Signed-off-by: Sachin Bhamare <sachin.bhamare@primarydata.com>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/nfsd/nfs4state.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++----
fs/nfsd/state.h | 14 +++++++
fs/nfsd/xdr4.h | 1 +
3 files changed, 125 insertions(+), 7 deletions(-)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d42786e..c7c7287 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -281,6 +281,7 @@ put_nfs4_file(struct nfs4_file *fi)
if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
hlist_del_rcu(&fi->fi_hash);
spin_unlock(&state_lock);
+ WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate));
WARN_ON_ONCE(!list_empty(&fi->fi_delegations));
call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu);
}
@@ -471,6 +472,87 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
__nfs4_file_put_access(fp, O_RDONLY);
}
+/*
+ * Allocate a new open/delegation state counter. This is needed for
+ * pNFS for proper return on close semantics. For v4.0 however, it's
+ * not needed.
+ */
+static struct nfs4_clnt_odstate *
+alloc_clnt_odstate(struct nfs4_client *clp)
+{
+ struct nfs4_clnt_odstate *co;
+
+ co = kzalloc(sizeof(struct nfs4_clnt_odstate), GFP_KERNEL);
+ if (co) {
+ co->co_client = clp;
+ atomic_set(&co->co_odcount, 1);
+ }
+ return co;
+}
+
+static void
+hash_clnt_odstate_locked(struct nfs4_clnt_odstate *co)
+{
+ struct nfs4_file *fp = co->co_file;
+
+ lockdep_assert_held(&fp->fi_lock);
+ list_add(&co->co_perfile, &fp->fi_clnt_odstate);
+}
+
+static inline void
+get_clnt_odstate(struct nfs4_clnt_odstate *co)
+{
+ /* This is always NULL in v4.0 */
+ if (co)
+ atomic_inc(&co->co_odcount);
+}
+
+static void
+put_clnt_odstate(struct nfs4_clnt_odstate *co)
+{
+ struct nfs4_file *fp;
+
+ /* This is always NULL in v4.0 */
+ if (!co)
+ return;
+
+ fp = co->co_file;
+ if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) {
+ list_del(&co->co_perfile);
+ spin_unlock(&fp->fi_lock);
+
+ nfsd4_return_all_file_layouts(co->co_client, fp);
+ kfree(co);
+ }
+}
+
+static struct nfs4_clnt_odstate *
+find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new)
+{
+ struct nfs4_clnt_odstate *co;
+ struct nfs4_client *cl;
+
+ /* This is always NULL in v4.0 */
+ if (!new)
+ return NULL;
+
+ cl = new->co_client;
+
+ spin_lock(&fp->fi_lock);
+ list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) {
+ if (co->co_client == cl) {
+ get_clnt_odstate(co);
+ goto out;
+ }
+ }
+ co = new;
+ co->co_file = fp;
+ hash_clnt_odstate_locked(new);
+out:
+ spin_unlock(&fp->fi_lock);
+ return co;
+}
+
struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
struct kmem_cache *slab)
{
@@ -606,7 +688,8 @@ static void block_delegations(struct knfsd_fh *fh)
}
static struct nfs4_delegation *
-alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
+alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
+ struct nfs4_clnt_odstate *odstate)
{
struct nfs4_delegation *dp;
long n;
@@ -631,6 +714,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
INIT_LIST_HEAD(&dp->dl_perfile);
INIT_LIST_HEAD(&dp->dl_perclnt);
INIT_LIST_HEAD(&dp->dl_recall_lru);
+ dp->dl_clnt_odstate = odstate;
+ get_clnt_odstate(odstate);
dp->dl_type = NFS4_OPEN_DELEGATE_READ;
dp->dl_retries = 1;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
@@ -714,6 +799,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
spin_lock(&state_lock);
unhash_delegation_locked(dp);
spin_unlock(&state_lock);
+ put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid);
}
@@ -724,6 +810,7 @@ static void revoke_delegation(struct nfs4_delegation *dp)
WARN_ON(!list_empty(&dp->dl_recall_lru));
+ put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
if (clp->cl_minorversion == 0)
@@ -933,6 +1020,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
{
struct nfs4_ol_stateid *stp = openlockstateid(stid);
+ put_clnt_odstate(stp->st_clnt_odstate);
release_all_access(stp);
if (stp->st_stateowner)
nfs4_put_stateowner(stp->st_stateowner);
@@ -1634,6 +1722,7 @@ __destroy_client(struct nfs4_client *clp)
while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
+ put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid);
}
@@ -3057,6 +3146,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
spin_lock_init(&fp->fi_lock);
INIT_LIST_HEAD(&fp->fi_stateids);
INIT_LIST_HEAD(&fp->fi_delegations);
+ INIT_LIST_HEAD(&fp->fi_clnt_odstate);
fh_copy_shallow(&fp->fi_fhandle, fh);
fp->fi_deleg_file = NULL;
fp->fi_had_conflict = false;
@@ -3581,6 +3671,13 @@ alloc_stateid:
open->op_stp = nfs4_alloc_open_stateid(clp);
if (!open->op_stp)
return nfserr_jukebox;
+
+ if (nfsd4_has_session(cstate)) {
+ open->op_odstate = alloc_clnt_odstate(clp);
+ if (!open->op_odstate)
+ return nfserr_jukebox;
+ }
+
return nfs_ok;
}
@@ -3869,7 +3966,7 @@ out_fput:
static struct nfs4_delegation *
nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
- struct nfs4_file *fp)
+ struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
{
int status;
struct nfs4_delegation *dp;
@@ -3877,7 +3974,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
if (fp->fi_had_conflict)
return ERR_PTR(-EAGAIN);
- dp = alloc_init_deleg(clp, fh);
+ dp = alloc_init_deleg(clp, fh, odstate);
if (!dp)
return ERR_PTR(-ENOMEM);
@@ -3903,6 +4000,7 @@ out_unlock:
spin_unlock(&state_lock);
out:
if (status) {
+ put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_stid(&dp->dl_stid);
return ERR_PTR(status);
}
@@ -3980,7 +4078,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
default:
goto out_no_deleg;
}
- dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file);
+ dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate);
if (IS_ERR(dp))
goto out_no_deleg;
@@ -4069,6 +4167,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
release_open_stateid(stp);
goto out;
}
+
+ stp->st_clnt_odstate = find_or_hash_clnt_odstate(fp,
+ open->op_odstate);
+ if (stp->st_clnt_odstate == open->op_odstate)
+ open->op_odstate = NULL;
}
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
@@ -4129,6 +4232,8 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
kmem_cache_free(file_slab, open->op_file);
if (open->op_stp)
nfs4_put_stid(&open->op_stp->st_stid);
+ if (open->op_odstate)
+ kfree(open->op_odstate);
}
__be32
@@ -4852,9 +4957,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
- nfsd4_return_all_file_layouts(stp->st_stateowner->so_client,
- stp->st_stid.sc_file);
-
nfsd4_close_open_stateid(stp);
/* put reference from nfs4_preprocess_seqid_op */
@@ -6488,6 +6590,7 @@ nfs4_state_shutdown_net(struct net *net)
list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
list_del_init(&dp->dl_recall_lru);
+ put_clnt_odstate(dp->dl_clnt_odstate);
nfs4_put_deleg_lease(dp->dl_stid.sc_file);
nfs4_put_stid(&dp->dl_stid);
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 4f3bfeb..bde45d9 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -126,6 +126,7 @@ struct nfs4_delegation {
struct list_head dl_perfile;
struct list_head dl_perclnt;
struct list_head dl_recall_lru; /* delegation recalled */
+ struct nfs4_clnt_odstate *dl_clnt_odstate;
u32 dl_type;
time_t dl_time;
/* For recall: */
@@ -465,6 +466,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
}
/*
+ * Per-client state indicating no. of opens and outstanding delegations
+ * on a file from a particular client.'od' stands for 'open & delegation'
+ */
+struct nfs4_clnt_odstate {
+ struct nfs4_client *co_client;
+ struct nfs4_file *co_file;
+ struct list_head co_perfile;
+ atomic_t co_odcount;
+};
+
+/*
* nfs4_file: a file opened by some number of (open) nfs4_stateowners.
*
* These objects are global. nfsd keeps one instance of a nfs4_file per
@@ -485,6 +497,7 @@ struct nfs4_file {
struct list_head fi_delegations;
struct rcu_head fi_rcu;
};
+ struct list_head fi_clnt_odstate;
/* One each for O_RDONLY, O_WRONLY, O_RDWR: */
struct file * fi_fds[3];
/*
@@ -526,6 +539,7 @@ struct nfs4_ol_stateid {
struct list_head st_perstateowner;
struct list_head st_locks;
struct nfs4_stateowner * st_stateowner;
+ struct nfs4_clnt_odstate * st_clnt_odstate;
unsigned char st_access_bmap;
unsigned char st_deny_bmap;
struct nfs4_ol_stateid * st_openstp;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 556ce2e..2c38cce 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -247,6 +247,7 @@ struct nfsd4_open {
struct nfs4_openowner *op_openowner; /* used during processing */
struct nfs4_file *op_file; /* used during processing */
struct nfs4_ol_stateid *op_stp; /* used during processing */
+ struct nfs4_clnt_odstate *op_odstate; /* used during processing */
struct nfs4_acl *op_acl;
struct xdr_netobj op_label;
};
--
1.9.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 1/2] nfsd/blocklayout: pretend we can send deviceid notifications
2015-04-27 12:50 two pNFS server fixes for 4.1, V2 Christoph Hellwig
@ 2015-04-27 12:50 ` Christoph Hellwig
2015-04-27 20:36 ` J. Bruce Fields
0 siblings, 1 reply; 5+ messages in thread
From: Christoph Hellwig @ 2015-04-27 12:50 UTC (permalink / raw)
To: J. Bruce Fields; +Cc: linux-nfs, stable
Commit df52699e4fcef ("NFSv4.1: Don't cache deviceids that have no
notifications") causes the Linux NFS client stop caching deviceid
unless a server pretends to support deviceid notifications. While
this behavior is stupid and the language around this area in rfc5661
is a mess carified by an errata that I submittted, Trond insists
on this behavior. Not caching deviceids degrades block layout
performance massively as a GETDEVICEINFO is fairly expensive.
So add this hack to make the Linux client happy again.
Cc: stable@vger.kernel.org
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
fs/nfsd/blocklayout.c | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 03d647b..cdefaa3 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -181,6 +181,17 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
}
const struct nfsd4_layout_ops bl_layout_ops = {
+ /*
+ * Pretend that we send notification to the client. This is a blatant
+ * lie to force recent Linux clients to cache our device IDs.
+ * We rarely ever change the device ID, so the harm of leaking deviceids
+ * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
+ * in this regard, but I filed errata 4119 for this a while ago, and
+ * hopefully the Linux client will eventually start caching deviceids
+ * without this again.
+ */
+ .notify_types =
+ NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
.proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo,
.encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
.proc_layoutget = nfsd4_block_proc_layoutget,
--
1.9.1
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] nfsd/blocklayout: pretend we can send deviceid notifications
2015-04-27 12:50 ` [PATCH 1/2] nfsd/blocklayout: pretend we can send deviceid notifications Christoph Hellwig
@ 2015-04-27 20:36 ` J. Bruce Fields
0 siblings, 0 replies; 5+ messages in thread
From: J. Bruce Fields @ 2015-04-27 20:36 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: linux-nfs, stable
On Mon, Apr 27, 2015 at 02:50:13PM +0200, Christoph Hellwig wrote:
> Commit df52699e4fcef ("NFSv4.1: Don't cache deviceids that have no
> notifications") causes the Linux NFS client stop caching deviceid
> unless a server pretends to support deviceid notifications. While
> this behavior is stupid and the language around this area in rfc5661
> is a mess carified by an errata that I submittted, Trond insists
> on this behavior. Not caching deviceids degrades block layout
> performance massively as a GETDEVICEINFO is fairly expensive.
>
> So add this hack to make the Linux client happy again.
OK, thanks.--b.
>
> Cc: stable@vger.kernel.org
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
> fs/nfsd/blocklayout.c | 11 +++++++++++
> 1 file changed, 11 insertions(+)
>
> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> index 03d647b..cdefaa3 100644
> --- a/fs/nfsd/blocklayout.c
> +++ b/fs/nfsd/blocklayout.c
> @@ -181,6 +181,17 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
> }
>
> const struct nfsd4_layout_ops bl_layout_ops = {
> + /*
> + * Pretend that we send notification to the client. This is a blatant
> + * lie to force recent Linux clients to cache our device IDs.
> + * We rarely ever change the device ID, so the harm of leaking deviceids
> + * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
> + * in this regard, but I filed errata 4119 for this a while ago, and
> + * hopefully the Linux client will eventually start caching deviceids
> + * without this again.
> + */
> + .notify_types =
> + NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
> .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo,
> .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
> .proc_layoutget = nfsd4_block_proc_layoutget,
> --
> 1.9.1
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2015-04-27 20:36 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-04-26 15:48 two pNFS server fixes for 4.1 Christoph Hellwig
2015-04-26 15:48 ` [PATCH 1/2] nfsd/blocklayout: pretend we can send deviceid notifications Christoph Hellwig
2015-04-26 15:48 ` [PATCH 2/2] nfsd: fix pNFS return on close semantics Christoph Hellwig
-- strict thread matches above, loose matches on Subject: below --
2015-04-27 12:50 two pNFS server fixes for 4.1, V2 Christoph Hellwig
2015-04-27 12:50 ` [PATCH 1/2] nfsd/blocklayout: pretend we can send deviceid notifications Christoph Hellwig
2015-04-27 20:36 ` J. Bruce Fields
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).