* [PATCH 01/15] SUNRPC: Fix xs_setup_bc_tcp() @ 2010-05-13 21:08 Trond Myklebust 2010-05-13 21:08 ` [PATCH 02/15] NFSv4: Don't use GFP_KERNEL allocations in state recovery Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs It is a BUG for anybody to call this function without setting args->bc_xprt. Trying to return an error value is just wrong, since the user cannot fix this: it is a programming error, not a user error. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- net/sunrpc/xprtsock.c | 3 --- 1 files changed, 0 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 3d1dcdf..beefa7a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2442,9 +2442,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) struct sock_xprt *transport; struct svc_sock *bc_sock; - if (!args->bc_xprt) - ERR_PTR(-EINVAL); - xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 02/15] NFSv4: Don't use GFP_KERNEL allocations in state recovery 2010-05-13 21:08 [PATCH 01/15] SUNRPC: Fix xs_setup_bc_tcp() Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 03/15] NFS: Don't use GFP_KERNEL in rpcsec_gss downcalls Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs We do not want to have the state recovery thread kick off and wait for a memory reclaim, since that may deadlock when the writebacks end up waiting for the state recovery thread to complete. The safe thing is therefore to use GFP_NOFS in all open, close, delegation return, lock, etc. operations that may be called by the state recovery thread. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/delegation.c | 2 +- fs/nfs/nfs4_fs.h | 4 ++-- fs/nfs/nfs4proc.c | 47 +++++++++++++++++++++++++---------------------- fs/nfs/nfs4state.c | 21 +++++++++++---------- 4 files changed, 39 insertions(+), 35 deletions(-) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index ea61d26..3016345 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -213,7 +213,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct struct nfs_delegation *freeme = NULL; int status = 0; - delegation = kmalloc(sizeof(*delegation), GFP_KERNEL); + delegation = kmalloc(sizeof(*delegation), GFP_NOFS); if (delegation == NULL) return -ENOMEM; memcpy(delegation->stateid.data, res->delegation.data, diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5099306..c538c61 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -213,7 +213,7 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); -extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait); +extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); @@ -286,7 +286,7 @@ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); -extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter); +extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid); extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9998c29..70015dd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -717,17 +717,18 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, struct nfs4_state_owner *sp, fmode_t fmode, int flags, - const struct iattr *attrs) + const struct iattr *attrs, + gfp_t gfp_mask) { struct dentry *parent = dget_parent(path->dentry); struct inode *dir = parent->d_inode; struct nfs_server *server = NFS_SERVER(dir); struct nfs4_opendata *p; - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), gfp_mask); if (p == NULL) goto err; - p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid); + p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); if (p->o_arg.seqid == NULL) goto err_free; path_get(path); @@ -1063,7 +1064,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context { struct nfs4_opendata *opendata; - opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL); + opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL, GFP_NOFS); if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; @@ -1651,7 +1652,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in if (path->dentry->d_inode != NULL) nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode); status = -ENOMEM; - opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr); + opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr, GFP_KERNEL); if (opendata == NULL) goto err_put_state_owner; @@ -1926,7 +1927,7 @@ static const struct rpc_call_ops nfs4_close_ops = { * * NOTE: Caller must be holding the sp->so_owner semaphore! */ -int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) +int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_closedata *calldata; @@ -1945,7 +1946,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) }; int status = -ENOMEM; - calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); + calldata = kzalloc(sizeof(*calldata), gfp_mask); if (calldata == NULL) goto out; calldata->inode = state->inode; @@ -1953,7 +1954,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait) calldata->arg.fh = NFS_FH(state->inode); calldata->arg.stateid = &state->open_stateid; /* Serialization for the sequence id */ - calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid); + calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask); if (calldata->arg.seqid == NULL) goto out_free_calldata; calldata->arg.fmode = 0; @@ -3704,7 +3705,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co }; int status = 0; - data = kzalloc(sizeof(*data), GFP_KERNEL); + data = kzalloc(sizeof(*data), GFP_NOFS); if (data == NULL) return -ENOMEM; data->args.fhandle = &data->fh; @@ -3860,7 +3861,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, struct nfs4_unlockdata *p; struct inode *inode = lsp->ls_state->inode; - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_NOFS); if (p == NULL) return NULL; p->arg.fh = NFS_FH(inode); @@ -3998,7 +3999,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * if (test_bit(NFS_DELEGATED_STATE, &state->flags)) goto out; lsp = request->fl_u.nfs4_fl.owner; - seqid = nfs_alloc_seqid(&lsp->ls_seqid); + seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL); status = -ENOMEM; if (seqid == NULL) goto out; @@ -4026,22 +4027,23 @@ struct nfs4_lockdata { }; static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, - struct nfs_open_context *ctx, struct nfs4_lock_state *lsp) + struct nfs_open_context *ctx, struct nfs4_lock_state *lsp, + gfp_t gfp_mask) { struct nfs4_lockdata *p; struct inode *inode = lsp->ls_state->inode; struct nfs_server *server = NFS_SERVER(inode); - p = kzalloc(sizeof(*p), GFP_KERNEL); + p = kzalloc(sizeof(*p), gfp_mask); if (p == NULL) return NULL; p->arg.fh = NFS_FH(inode); p->arg.fl = &p->fl; - p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid); + p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask); if (p->arg.open_seqid == NULL) goto out_free; - p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid); + p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask); if (p->arg.lock_seqid == NULL) goto out_free_seqid; p->arg.lock_stateid = &lsp->ls_stateid; @@ -4195,7 +4197,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f dprintk("%s: begin!\n", __func__); data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), - fl->fl_u.nfs4_fl.owner); + fl->fl_u.nfs4_fl.owner, + recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS); if (data == NULL) return -ENOMEM; if (IS_SETLKW(cmd)) @@ -4684,7 +4687,7 @@ static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs, if (max_reqs != tbl->max_slots) { ret = -ENOMEM; new = kmalloc(max_reqs * sizeof(struct nfs4_slot), - GFP_KERNEL); + GFP_NOFS); if (!new) goto out; ret = 0; @@ -4749,7 +4752,7 @@ static int nfs4_init_slot_table(struct nfs4_slot_table *tbl, dprintk("--> %s: max_reqs=%u\n", __func__, max_slots); - slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL); + slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS); if (!slot) goto out; ret = 0; @@ -4798,7 +4801,7 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp) struct nfs4_session *session; struct nfs4_slot_table *tbl; - session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL); + session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS); if (!session) return NULL; @@ -5142,8 +5145,8 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, if (!atomic_inc_not_zero(&clp->cl_count)) return -EIO; - args = kzalloc(sizeof(*args), GFP_KERNEL); - res = kzalloc(sizeof(*res), GFP_KERNEL); + args = kzalloc(sizeof(*args), GFP_NOFS); + res = kzalloc(sizeof(*res), GFP_NOFS); if (!args || !res) { kfree(args); kfree(res); @@ -5244,7 +5247,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) int status = -ENOMEM; dprintk("--> %s\n", __func__); - calldata = kzalloc(sizeof(*calldata), GFP_KERNEL); + calldata = kzalloc(sizeof(*calldata), GFP_NOFS); if (calldata == NULL) goto out; calldata->clp = clp; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cd2d904..34acf59 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -366,7 +366,7 @@ nfs4_alloc_state_owner(void) { struct nfs4_state_owner *sp; - sp = kzalloc(sizeof(*sp),GFP_KERNEL); + sp = kzalloc(sizeof(*sp),GFP_NOFS); if (!sp) return NULL; spin_lock_init(&sp->so_lock); @@ -440,7 +440,7 @@ nfs4_alloc_open_state(void) { struct nfs4_state *state; - state = kzalloc(sizeof(*state), GFP_KERNEL); + state = kzalloc(sizeof(*state), GFP_NOFS); if (!state) return NULL; atomic_set(&state->count, 1); @@ -542,7 +542,8 @@ void nfs4_put_open_state(struct nfs4_state *state) /* * Close the current file. */ -static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait) +static void __nfs4_close(struct path *path, struct nfs4_state *state, + fmode_t fmode, gfp_t gfp_mask, int wait) { struct nfs4_state_owner *owner = state->owner; int call_close = 0; @@ -583,17 +584,17 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fm nfs4_put_open_state(state); nfs4_put_state_owner(owner); } else - nfs4_do_close(path, state, wait); + nfs4_do_close(path, state, gfp_mask, wait); } void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) { - __nfs4_close(path, state, fmode, 0); + __nfs4_close(path, state, fmode, GFP_NOFS, 0); } void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode) { - __nfs4_close(path, state, fmode, 1); + __nfs4_close(path, state, fmode, GFP_KERNEL, 1); } /* @@ -623,7 +624,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f struct nfs4_lock_state *lsp; struct nfs_client *clp = state->owner->so_client; - lsp = kzalloc(sizeof(*lsp), GFP_KERNEL); + lsp = kzalloc(sizeof(*lsp), GFP_NOFS); if (lsp == NULL) return NULL; rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue"); @@ -759,11 +760,11 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f nfs4_put_lock_state(lsp); } -struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter) +struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask) { struct nfs_seqid *new; - new = kmalloc(sizeof(*new), GFP_KERNEL); + new = kmalloc(sizeof(*new), gfp_mask); if (new != NULL) { new->sequence = counter; INIT_LIST_HEAD(&new->list); @@ -1352,7 +1353,7 @@ static int nfs4_recall_slot(struct nfs_client *clp) nfs4_begin_drain_session(clp); new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot), - GFP_KERNEL); + GFP_NOFS); if (!new) return -ENOMEM; -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 03/15] NFS: Don't use GFP_KERNEL in rpcsec_gss downcalls 2010-05-13 21:08 ` [PATCH 02/15] NFSv4: Don't use GFP_KERNEL allocations in state recovery Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 04/15] NFS: Clean up nfs_create_request() Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs Again, we can deadlock if the memory reclaim triggers a writeback that requires a rpcsec_gss credential lookup. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- include/linux/sunrpc/gss_api.h | 6 +++- include/linux/sunrpc/gss_krb5.h | 3 +- net/sunrpc/auth_gss/auth_gss.c | 2 +- net/sunrpc/auth_gss/gss_krb5_keys.c | 9 ++++--- net/sunrpc/auth_gss/gss_krb5_mech.c | 34 +++++++++++++++++--------------- net/sunrpc/auth_gss/gss_mech_switch.c | 7 +++-- net/sunrpc/auth_gss/gss_spkm3_mech.c | 5 ++- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- 8 files changed, 38 insertions(+), 30 deletions(-) diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index b22d7f1..5d8048b 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -35,7 +35,8 @@ int gss_import_sec_context( const void* input_token, size_t bufsize, struct gss_api_mech *mech, - struct gss_ctx **ctx_id); + struct gss_ctx **ctx_id, + gfp_t gfp_mask); u32 gss_get_mic( struct gss_ctx *ctx_id, struct xdr_buf *message, @@ -89,7 +90,8 @@ struct gss_api_ops { int (*gss_import_sec_context)( const void *input_token, size_t bufsize, - struct gss_ctx *ctx_id); + struct gss_ctx *ctx_id, + gfp_t gfp_mask); u32 (*gss_get_mic)( struct gss_ctx *ctx_id, struct xdr_buf *message, diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 5e774a5..5af2931 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -295,7 +295,8 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e, const struct xdr_netobj *inkey, struct xdr_netobj *outkey, - const struct xdr_netobj *in_constant); + const struct xdr_netobj *in_constant, + gfp_t gfp_mask); u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e, diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 6654c85..48a7939 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -229,7 +229,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct p = ERR_PTR(-EFAULT); goto err; } - ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx); + ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS); if (ret < 0) { p = ERR_PTR(ret); goto err; diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c index 33b87f0..76e42e6 100644 --- a/net/sunrpc/auth_gss/gss_krb5_keys.c +++ b/net/sunrpc/auth_gss/gss_krb5_keys.c @@ -150,7 +150,8 @@ static void krb5_nfold(u32 inbits, const u8 *in, u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e, const struct xdr_netobj *inkey, struct xdr_netobj *outkey, - const struct xdr_netobj *in_constant) + const struct xdr_netobj *in_constant, + gfp_t gfp_mask) { size_t blocksize, keybytes, keylength, n; unsigned char *inblockdata, *outblockdata, *rawkey; @@ -175,15 +176,15 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e, /* allocate and set up buffers */ ret = ENOMEM; - inblockdata = kmalloc(blocksize, GFP_KERNEL); + inblockdata = kmalloc(blocksize, gfp_mask); if (inblockdata == NULL) goto err_free_cipher; - outblockdata = kmalloc(blocksize, GFP_KERNEL); + outblockdata = kmalloc(blocksize, gfp_mask); if (outblockdata == NULL) goto err_free_in; - rawkey = kmalloc(keybytes, GFP_KERNEL); + rawkey = kmalloc(keybytes, gfp_mask); if (rawkey == NULL) goto err_free_out; diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c index 7c249a3..0326446 100644 --- a/net/sunrpc/auth_gss/gss_krb5_mech.c +++ b/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -369,7 +369,7 @@ set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed) } static int -context_derive_keys_des3(struct krb5_ctx *ctx) +context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask) { struct xdr_netobj c, keyin, keyout; u8 cdata[GSS_KRB5_K5CLENGTH]; @@ -396,7 +396,7 @@ context_derive_keys_des3(struct krb5_ctx *ctx) /* derive cksum */ set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM); keyout.data = ctx->cksum; - err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c); + err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask); if (err) { dprintk("%s: Error %d deriving cksum key\n", __func__, err); @@ -487,7 +487,7 @@ out_err: } static int -context_derive_keys_new(struct krb5_ctx *ctx) +context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask) { struct xdr_netobj c, keyin, keyout; u8 cdata[GSS_KRB5_K5CLENGTH]; @@ -503,7 +503,7 @@ context_derive_keys_new(struct krb5_ctx *ctx) /* initiator seal encryption */ set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION); keyout.data = ctx->initiator_seal; - err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c); + err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask); if (err) { dprintk("%s: Error %d deriving initiator_seal key\n", __func__, err); @@ -518,7 +518,7 @@ context_derive_keys_new(struct krb5_ctx *ctx) /* acceptor seal encryption */ set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION); keyout.data = ctx->acceptor_seal; - err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c); + err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask); if (err) { dprintk("%s: Error %d deriving acceptor_seal key\n", __func__, err); @@ -533,7 +533,7 @@ context_derive_keys_new(struct krb5_ctx *ctx) /* initiator sign checksum */ set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM); keyout.data = ctx->initiator_sign; - err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c); + err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask); if (err) { dprintk("%s: Error %d deriving initiator_sign key\n", __func__, err); @@ -543,7 +543,7 @@ context_derive_keys_new(struct krb5_ctx *ctx) /* acceptor sign checksum */ set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM); keyout.data = ctx->acceptor_sign; - err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c); + err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask); if (err) { dprintk("%s: Error %d deriving acceptor_sign key\n", __func__, err); @@ -553,7 +553,7 @@ context_derive_keys_new(struct krb5_ctx *ctx) /* initiator seal integrity */ set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY); keyout.data = ctx->initiator_integ; - err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c); + err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask); if (err) { dprintk("%s: Error %d deriving initiator_integ key\n", __func__, err); @@ -563,7 +563,7 @@ context_derive_keys_new(struct krb5_ctx *ctx) /* acceptor seal integrity */ set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY); keyout.data = ctx->acceptor_integ; - err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c); + err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask); if (err) { dprintk("%s: Error %d deriving acceptor_integ key\n", __func__, err); @@ -598,7 +598,8 @@ out_err: } static int -gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx) +gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx, + gfp_t gfp_mask) { int keylen; @@ -645,7 +646,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx) } ctx->mech_used.data = kmemdup(gss_kerberos_mech.gm_oid.data, - gss_kerberos_mech.gm_oid.len, GFP_KERNEL); + gss_kerberos_mech.gm_oid.len, gfp_mask); if (unlikely(ctx->mech_used.data == NULL)) { p = ERR_PTR(-ENOMEM); goto out_err; @@ -654,12 +655,12 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx) switch (ctx->enctype) { case ENCTYPE_DES3_CBC_RAW: - return context_derive_keys_des3(ctx); + return context_derive_keys_des3(ctx, gfp_mask); case ENCTYPE_ARCFOUR_HMAC: return context_derive_keys_rc4(ctx); case ENCTYPE_AES128_CTS_HMAC_SHA1_96: case ENCTYPE_AES256_CTS_HMAC_SHA1_96: - return context_derive_keys_new(ctx); + return context_derive_keys_new(ctx, gfp_mask); default: return -EINVAL; } @@ -670,20 +671,21 @@ out_err: static int gss_import_sec_context_kerberos(const void *p, size_t len, - struct gss_ctx *ctx_id) + struct gss_ctx *ctx_id, + gfp_t gfp_mask) { const void *end = (const void *)((const char *)p + len); struct krb5_ctx *ctx; int ret; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + ctx = kzalloc(sizeof(*ctx), gfp_mask); if (ctx == NULL) return -ENOMEM; if (len == 85) ret = gss_import_v1_context(p, end, ctx); else - ret = gss_import_v2_context(p, end, ctx); + ret = gss_import_v2_context(p, end, ctx, gfp_mask); if (ret == 0) ctx_id->internal_ctx_id = ctx; diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 28a84ef..2689de3 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -249,14 +249,15 @@ EXPORT_SYMBOL_GPL(gss_mech_put); int gss_import_sec_context(const void *input_token, size_t bufsize, struct gss_api_mech *mech, - struct gss_ctx **ctx_id) + struct gss_ctx **ctx_id, + gfp_t gfp_mask) { - if (!(*ctx_id = kzalloc(sizeof(**ctx_id), GFP_KERNEL))) + if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask))) return -ENOMEM; (*ctx_id)->mech_type = gss_mech_get(mech); return mech->gm_ops - ->gss_import_sec_context(input_token, bufsize, *ctx_id); + ->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask); } /* gss_get_mic: compute a mic over message and return mic_token. */ diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c index 035e1dd..dc3f1f5 100644 --- a/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -84,13 +84,14 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) static int gss_import_sec_context_spkm3(const void *p, size_t len, - struct gss_ctx *ctx_id) + struct gss_ctx *ctx_id, + gfp_t gfp_mask) { const void *end = (const void *)((const char *)p + len); struct spkm3_ctx *ctx; int version; - if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS))) + if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask))) goto out_err; p = simple_get_bytes(p, end, &version, sizeof(version)); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 1d9ac4a..cc385b3 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -494,7 +494,7 @@ static int rsc_parse(struct cache_detail *cd, len = qword_get(&mesg, buf, mlen); if (len < 0) goto out; - status = gss_import_sec_context(buf, len, gm, &rsci.mechctx); + status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL); if (status) goto out; -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 04/15] NFS: Clean up nfs_create_request() 2010-05-13 21:08 ` [PATCH 03/15] NFS: Don't use GFP_KERNEL in rpcsec_gss downcalls Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 05/15] NFS: Read requests can use GFP_KERNEL Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs There is no point in looping if we're out of memory. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/pagelist.c | 14 ++++---------- 1 files changed, 4 insertions(+), 10 deletions(-) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 29d9d36..a3654e5 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -60,16 +60,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, { struct nfs_page *req; - for (;;) { - /* try to allocate the request struct */ - req = nfs_page_alloc(); - if (req != NULL) - break; - - if (fatal_signal_pending(current)) - return ERR_PTR(-ERESTARTSYS); - yield(); - } + /* try to allocate the request struct */ + req = nfs_page_alloc(); + if (req == NULL) + return ERR_PTR(-ENOMEM); /* Initialize the request struct. Initially, we assume a * long write-back delay. This will be adjusted in -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 05/15] NFS: Read requests can use GFP_KERNEL. 2010-05-13 21:08 ` [PATCH 04/15] NFS: Clean up nfs_create_request() Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 06/15] SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs There is no danger of deadlock should the allocation trigger page writeback. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/read.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index db9b360..6e2b06e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -40,7 +40,7 @@ static mempool_t *nfs_rdata_mempool; struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) { - struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); + struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL); if (p) { memset(p, 0, sizeof(*p)); @@ -50,7 +50,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { - p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); + p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); if (!p->pagevec) { mempool_free(p, nfs_rdata_mempool); p = NULL; -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 06/15] SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS 2010-05-13 21:08 ` [PATCH 05/15] NFS: Read requests can use GFP_KERNEL Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 07/15] SUNRPC: Ensure memory shrinker doesn't waste time in rpcauth_prune_expired() Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs Under some circumstances, put_rpccred() can end up allocating memory, so check the gfp_mask to prevent deadlocks. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- net/sunrpc/auth.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 95afe79..03ff9f9 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -270,6 +270,8 @@ rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) LIST_HEAD(free); int res; + if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) + return -1; if (list_empty(&cred_unused)) return 0; spin_lock(&rpc_credcache_lock); -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 07/15] SUNRPC: Ensure memory shrinker doesn't waste time in rpcauth_prune_expired() 2010-05-13 21:08 ` [PATCH 06/15] SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs The 'cred_unused' list, that is traversed by rpcauth_cache_shrinker is ordered by time. If we hit a credential that is under the 60 second garbage collection moratorium, we should exit because we know at that point that all successive credentials are subject to the same moratorium... Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- net/sunrpc/auth.c | 12 +++++++----- 1 files changed, 7 insertions(+), 5 deletions(-) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 03ff9f9..2213dc5 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -236,10 +236,13 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { - /* Enforce a 60 second garbage collection moratorium */ + /* + * Enforce a 60 second garbage collection moratorium + * Note that the cred_unused list must be time-ordered. + */ if (time_in_range(cred->cr_expire, expired, jiffies) && test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) - continue; + return 0; list_del_init(&cred->cr_lru); number_cred_unused--; @@ -258,7 +261,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) if (nr_to_scan == 0) break; } - return nr_to_scan; + return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; } /* @@ -275,8 +278,7 @@ rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) if (list_empty(&cred_unused)) return 0; spin_lock(&rpc_credcache_lock); - nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan); - res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure; + res = rpcauth_prune_expired(&free, nr_to_scan); spin_unlock(&rpc_credcache_lock); rpcauth_destroy_credlist(&free); return res; -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter 2010-05-13 21:08 ` [PATCH 07/15] SUNRPC: Ensure memory shrinker doesn't waste time in rpcauth_prune_expired() Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 09/15] NFS: Don't run nfs_access_cache_shrinker() when the mask is GFP_NOFS Trond Myklebust 2010-05-14 16:34 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Chuck Lever 0 siblings, 2 replies; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- net/sunrpc/auth.c | 5 ++--- 1 files changed, 2 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 2213dc5..5fb02ac 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -236,6 +236,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { + if (nr_to_scan-- == 0) + break; /* * Enforce a 60 second garbage collection moratorium * Note that the cred_unused list must be time-ordered. @@ -255,11 +257,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) get_rpccred(cred); list_add_tail(&cred->cr_lru, free); rpcauth_unhash_cred_locked(cred); - nr_to_scan--; } spin_unlock(cache_lock); - if (nr_to_scan == 0) - break; } return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; } -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 09/15] NFS: Don't run nfs_access_cache_shrinker() when the mask is GFP_NOFS 2010-05-13 21:08 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 10/15] NFS: Clean up nfs_access_zap_cache() Trond Myklebust 2010-05-14 16:34 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Chuck Lever 1 sibling, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs Both iput() and put_rpccred() might allocate memory under certain circumstances, so make sure that we don't recurse and deadlock... Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/dir.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 18da8ab..bc136ef 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1703,6 +1703,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) struct nfs_inode *nfsi; struct nfs_access_entry *cache; + if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) + return -1; restart: spin_lock(&nfs_access_lru_lock); list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 10/15] NFS: Clean up nfs_access_zap_cache() 2010-05-13 21:08 ` [PATCH 09/15] NFS: Don't run nfs_access_cache_shrinker() when the mask is GFP_NOFS Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 11/15] NFS: Don't call iput() in nfs_access_cache_shrinker Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/dir.c | 60 +++++++++++++++++++++++++++++---------------------------- 1 files changed, 31 insertions(+), 29 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bc136ef..5582e9d 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1697,6 +1697,17 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry) smp_mb__after_atomic_dec(); } +static void nfs_access_free_list(struct list_head *head) +{ + struct nfs_access_entry *cache; + + while (!list_empty(head)) { + cache = list_entry(head->next, struct nfs_access_entry, lru); + list_del(&cache->lru); + nfs_access_free_entry(cache); + } +} + int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) { LIST_HEAD(head); @@ -1743,52 +1754,41 @@ remove_lru_entry: goto restart; } spin_unlock(&nfs_access_lru_lock); - while (!list_empty(&head)) { - cache = list_entry(head.next, struct nfs_access_entry, lru); - list_del(&cache->lru); - nfs_access_free_entry(cache); - } + nfs_access_free_list(&head); return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure; } -static void __nfs_access_zap_cache(struct inode *inode) +static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head) { - struct nfs_inode *nfsi = NFS_I(inode); struct rb_root *root_node = &nfsi->access_cache; - struct rb_node *n, *dispose = NULL; + struct rb_node *n; struct nfs_access_entry *entry; /* Unhook entries from the cache */ while ((n = rb_first(root_node)) != NULL) { entry = rb_entry(n, struct nfs_access_entry, rb_node); rb_erase(n, root_node); - list_del(&entry->lru); - n->rb_left = dispose; - dispose = n; + list_move(&entry->lru, head); } nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; - spin_unlock(&inode->i_lock); - - /* Now kill them all! */ - while (dispose != NULL) { - n = dispose; - dispose = n->rb_left; - nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node)); - } } void nfs_access_zap_cache(struct inode *inode) { + LIST_HEAD(head); + + if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0) + return; /* Remove from global LRU init */ - if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { - spin_lock(&nfs_access_lru_lock); + spin_lock(&nfs_access_lru_lock); + if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) list_del_init(&NFS_I(inode)->access_cache_inode_lru); - spin_unlock(&nfs_access_lru_lock); - } spin_lock(&inode->i_lock); - /* This will release the spinlock */ - __nfs_access_zap_cache(inode); + __nfs_access_zap_cache(NFS_I(inode), &head); + spin_unlock(&inode->i_lock); + spin_unlock(&nfs_access_lru_lock); + nfs_access_free_list(&head); } static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred) @@ -1839,8 +1839,8 @@ out_stale: nfs_access_free_entry(cache); return -ENOENT; out_zap: - /* This will release the spinlock */ - __nfs_access_zap_cache(inode); + spin_unlock(&inode->i_lock); + nfs_access_zap_cache(inode); return -ENOENT; } @@ -1895,9 +1895,11 @@ static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *s smp_mb__after_atomic_inc(); /* Add inode to global LRU list */ - if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { + if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) { spin_lock(&nfs_access_lru_lock); - list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list); + if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) + list_add_tail(&NFS_I(inode)->access_cache_inode_lru, + &nfs_access_lru_list); spin_unlock(&nfs_access_lru_lock); } } -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 11/15] NFS: Don't call iput() in nfs_access_cache_shrinker 2010-05-13 21:08 ` [PATCH 10/15] NFS: Clean up nfs_access_zap_cache() Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 12/15] SUNRPC: Move the task->tk_bytes_sent and tk_rtt to struct rpc_rqst Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs iput() can potentially attempt to allocate memory, so we should avoid calling it in a memory shrinker. Instead, rely on the fact that iput() will call nfs_access_zap_cache(). Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- fs/nfs/dir.c | 19 ++++--------------- 1 files changed, 4 insertions(+), 15 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5582e9d..7bf20be 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1716,22 +1716,14 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) return -1; -restart: + spin_lock(&nfs_access_lru_lock); list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { - struct rw_semaphore *s_umount; struct inode *inode; if (nr_to_scan-- == 0) break; - s_umount = &nfsi->vfs_inode.i_sb->s_umount; - if (!down_read_trylock(s_umount)) - continue; - inode = igrab(&nfsi->vfs_inode); - if (inode == NULL) { - up_read(s_umount); - continue; - } + inode = &nfsi->vfs_inode; spin_lock(&inode->i_lock); if (list_empty(&nfsi->access_cache_entry_lru)) goto remove_lru_entry; @@ -1745,13 +1737,10 @@ restart: else { remove_lru_entry: list_del_init(&nfsi->access_cache_inode_lru); + smp_mb__before_clear_bit(); clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags); + smp_mb__after_clear_bit(); } - spin_unlock(&inode->i_lock); - spin_unlock(&nfs_access_lru_lock); - iput(inode); - up_read(s_umount); - goto restart; } spin_unlock(&nfs_access_lru_lock); nfs_access_free_list(&head); -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 12/15] SUNRPC: Move the task->tk_bytes_sent and tk_rtt to struct rpc_rqst 2010-05-13 21:08 ` [PATCH 11/15] NFS: Don't call iput() in nfs_access_cache_shrinker Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 13/15] SUNRPC: Remove the 'tk_magic' debugging field Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs It seems strange to maintain stats for bytes_sent in one structure, and bytes received in another. Try to assemble all the RPC request-related stats in struct rpc_rqst Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- include/linux/sunrpc/sched.h | 4 +--- include/linux/sunrpc/xprt.h | 6 ++++-- net/sunrpc/stats.c | 4 ++-- net/sunrpc/xprt.c | 4 ++-- net/sunrpc/xprtrdma/transport.c | 2 +- net/sunrpc/xprtsock.c | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 76720d2..46ebef1 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -80,9 +80,7 @@ struct rpc_task { } u; unsigned short tk_timeouts; /* maj timeouts */ - size_t tk_bytes_sent; /* total bytes sent */ - ktime_t tk_start, /* RPC task init timestamp */ - tk_rtt; /* round-trip time */ + ktime_t tk_start; /* RPC task init timestamp */ pid_t tk_owner; /* Process id for batching tasks */ unsigned char tk_priority : 2;/* Task priority */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index f885186..b514703 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -66,8 +66,6 @@ struct rpc_rqst { struct rpc_task * rq_task; /* RPC task data */ __be32 rq_xid; /* request XID */ int rq_cong; /* has incremented xprt->cong */ - int rq_reply_bytes_recvd; /* number of reply */ - /* bytes received */ u32 rq_seqno; /* gss seq no. used on req. */ int rq_enc_pages_num; struct page **rq_enc_pages; /* scratch pages for use by @@ -78,12 +76,16 @@ struct rpc_rqst { __u32 * rq_buffer; /* XDR encode buffer */ size_t rq_callsize, rq_rcvsize; + size_t rq_xmit_bytes_sent; /* total bytes sent */ + size_t rq_reply_bytes_recvd; /* total reply bytes */ + /* received */ struct xdr_buf rq_private_buf; /* The receive buffer * used in the softirq. */ unsigned long rq_majortimeo; /* major timeout alarm */ unsigned long rq_timeout; /* Current timeout value */ + ktime_t rq_rtt; /* round-trip time */ unsigned int rq_retries; /* # of retries */ unsigned int rq_connect_cookie; /* A cookie used to track the diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index aacd95f..ea1046f 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -156,13 +156,13 @@ void rpc_count_iostats(struct rpc_task *task) op_metrics->om_ntrans += req->rq_ntrans; op_metrics->om_timeouts += task->tk_timeouts; - op_metrics->om_bytes_sent += task->tk_bytes_sent; + op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent; op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd; delta = ktime_sub(req->rq_xtime, task->tk_start); op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta); - op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, task->tk_rtt); + op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt); delta = ktime_sub(ktime_get(), task->tk_start); op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 8986b1b..65fe2e4 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -780,7 +780,7 @@ static void xprt_update_rtt(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_rtt *rtt = task->tk_client->cl_rtt; unsigned timer = task->tk_msg.rpc_proc->p_timer; - long m = usecs_to_jiffies(ktime_to_us(task->tk_rtt)); + long m = usecs_to_jiffies(ktime_to_us(req->rq_rtt)); if (timer) { if (req->rq_ntrans == 1) @@ -805,7 +805,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) task->tk_pid, ntohl(req->rq_xid), copied); xprt->stat.recvs++; - task->tk_rtt = ktime_sub(ktime_get(), req->rq_xtime); + req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime); if (xprt->ops->timer != NULL) xprt_update_rtt(task); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 3f3b38c..a85e866 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -674,7 +674,7 @@ xprt_rdma_send_request(struct rpc_task *task) if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; - task->tk_bytes_sent += rqst->rq_snd_buf.len; + rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; rqst->rq_bytes_sent = 0; return 0; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index beefa7a..02fc7f0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -528,7 +528,7 @@ static int xs_udp_send_request(struct rpc_task *task) xdr->len - req->rq_bytes_sent, status); if (status >= 0) { - task->tk_bytes_sent += status; + req->rq_xmit_bytes_sent += status; if (status >= req->rq_slen) return 0; /* Still some bytes left; set up for a retry later. */ @@ -624,7 +624,7 @@ static int xs_tcp_send_request(struct rpc_task *task) /* If we've sent the entire packet, immediately * reset the count of bytes sent. */ req->rq_bytes_sent += status; - task->tk_bytes_sent += status; + req->rq_xmit_bytes_sent += status; if (likely(req->rq_bytes_sent >= req->rq_slen)) { req->rq_bytes_sent = 0; return 0; -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 13/15] SUNRPC: Remove the 'tk_magic' debugging field 2010-05-13 21:08 ` [PATCH 12/15] SUNRPC: Move the task->tk_bytes_sent and tk_rtt to struct rpc_rqst Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 14/15] SUNRPC: Reorder the struct rpc_task fields Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs It has not triggered in almost a decade. Time to get rid of it... Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- include/linux/sunrpc/sched.h | 3 --- net/sunrpc/sched.c | 11 ----------- 2 files changed, 0 insertions(+), 14 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 46ebef1..41b9f97 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -41,9 +41,6 @@ struct rpc_wait { * This is the RPC task struct */ struct rpc_task { -#ifdef RPC_DEBUG - unsigned long tk_magic; /* 0xf00baa */ -#endif atomic_t tk_count; /* Reference count */ struct list_head tk_task; /* global list of tasks */ struct rpc_clnt * tk_client; /* RPC client */ diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index aa7b07e..4a843b8 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -25,7 +25,6 @@ #ifdef RPC_DEBUG #define RPCDBG_FACILITY RPCDBG_SCHED -#define RPC_TASK_MAGIC_ID 0xf00baa #endif /* @@ -237,7 +236,6 @@ static void rpc_task_set_debuginfo(struct rpc_task *task) { static atomic_t rpc_pid; - task->tk_magic = RPC_TASK_MAGIC_ID; task->tk_pid = atomic_inc_return(&rpc_pid); } #else @@ -360,9 +358,6 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n", task->tk_pid, jiffies); -#ifdef RPC_DEBUG - BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); -#endif /* Has the task been executed yet? If not, we cannot wake it up! */ if (!RPC_IS_ACTIVATED(task)) { printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); @@ -916,9 +911,6 @@ EXPORT_SYMBOL_GPL(rpc_put_task); static void rpc_release_task(struct rpc_task *task) { -#ifdef RPC_DEBUG - BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID); -#endif dprintk("RPC: %5u release task\n", task->tk_pid); if (!list_empty(&task->tk_task)) { @@ -930,9 +922,6 @@ static void rpc_release_task(struct rpc_task *task) } BUG_ON (RPC_IS_QUEUED(task)); -#ifdef RPC_DEBUG - task->tk_magic = 0; -#endif /* Wake up anyone who is waiting for task completion */ rpc_mark_complete_task(task); -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 14/15] SUNRPC: Reorder the struct rpc_task fields 2010-05-13 21:08 ` [PATCH 13/15] SUNRPC: Remove the 'tk_magic' debugging field Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 2010-05-13 21:08 ` [PATCH 15/15] SUNRPC: Don't spam gssd with upcall requests when the kerberos key expired Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs This improves the packing of the rpc_task, and ensures that on 64-bit platforms the size reduces to 216 bytes. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- include/linux/sunrpc/sched.h | 12 ++++++------ 1 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 41b9f97..7be4f3a 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -45,14 +45,11 @@ struct rpc_task { struct list_head tk_task; /* global list of tasks */ struct rpc_clnt * tk_client; /* RPC client */ struct rpc_rqst * tk_rqstp; /* RPC request */ - int tk_status; /* result of last operation */ /* * RPC call state */ struct rpc_message tk_msg; /* RPC call info */ - __u8 tk_garb_retry; - __u8 tk_cred_retry; /* * callback to be executed after waking up @@ -65,7 +62,6 @@ struct rpc_task { void * tk_calldata; unsigned long tk_timeout; /* timeout for rpc_sleep() */ - unsigned short tk_flags; /* misc flags */ unsigned long tk_runstate; /* Task run status */ struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could * be any workqueue @@ -76,15 +72,19 @@ struct rpc_task { struct rpc_wait tk_wait; /* RPC wait */ } u; - unsigned short tk_timeouts; /* maj timeouts */ ktime_t tk_start; /* RPC task init timestamp */ pid_t tk_owner; /* Process id for batching tasks */ - unsigned char tk_priority : 2;/* Task priority */ + int tk_status; /* result of last operation */ + unsigned short tk_flags; /* misc flags */ + unsigned short tk_timeouts; /* maj timeouts */ #ifdef RPC_DEBUG unsigned short tk_pid; /* debugging aid */ #endif + unsigned char tk_priority : 2,/* Task priority */ + tk_garb_retry : 2, + tk_cred_retry : 2; }; #define tk_xprt tk_client->cl_xprt -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 15/15] SUNRPC: Don't spam gssd with upcall requests when the kerberos key expired 2010-05-13 21:08 ` [PATCH 14/15] SUNRPC: Reorder the struct rpc_task fields Trond Myklebust @ 2010-05-13 21:08 ` Trond Myklebust 0 siblings, 0 replies; 21+ messages in thread From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw) To: linux-nfs Now that the rpc.gssd daemon can explicitly tell us that the key expired, we should cache that information to avoid spamming gssd. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- include/linux/sunrpc/auth.h | 1 + include/linux/sunrpc/auth_gss.h | 1 + net/sunrpc/auth_gss/auth_gss.c | 65 +++++++++++++++++++++++++++++++------- 3 files changed, 55 insertions(+), 12 deletions(-) diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h index 996df4d..87d7ec0 100644 --- a/include/linux/sunrpc/auth.h +++ b/include/linux/sunrpc/auth.h @@ -54,6 +54,7 @@ struct rpc_cred { #define RPCAUTH_CRED_NEW 0 #define RPCAUTH_CRED_UPTODATE 1 #define RPCAUTH_CRED_HASHED 2 +#define RPCAUTH_CRED_NEGATIVE 3 #define RPCAUTH_CRED_MAGIC 0x0f4aa4f0 diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h index d48d4e6..671538d 100644 --- a/include/linux/sunrpc/auth_gss.h +++ b/include/linux/sunrpc/auth_gss.h @@ -82,6 +82,7 @@ struct gss_cred { enum rpc_gss_svc gc_service; struct gss_cl_ctx *gc_ctx; struct gss_upcall_msg *gc_upcall; + unsigned long gc_upcall_timestamp; unsigned char gc_machine_cred : 1; }; diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 48a7939..8da2a0e 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -57,6 +57,9 @@ static const struct rpc_authops authgss_ops; static const struct rpc_credops gss_credops; static const struct rpc_credops gss_nullops; +#define GSS_RETRY_EXPIRED 5 +static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED; + #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH #endif @@ -350,6 +353,24 @@ gss_unhash_msg(struct gss_upcall_msg *gss_msg) } static void +gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss_msg) +{ + switch (gss_msg->msg.errno) { + case 0: + if (gss_msg->ctx == NULL) + break; + clear_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags); + gss_cred_set_ctx(&gss_cred->gc_base, gss_msg->ctx); + break; + case -EKEYEXPIRED: + set_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags); + } + gss_cred->gc_upcall_timestamp = jiffies; + gss_cred->gc_upcall = NULL; + rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); +} + +static void gss_upcall_callback(struct rpc_task *task) { struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred, @@ -358,13 +379,9 @@ gss_upcall_callback(struct rpc_task *task) struct inode *inode = &gss_msg->inode->vfs_inode; spin_lock(&inode->i_lock); - if (gss_msg->ctx) - gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx); - else - task->tk_status = gss_msg->msg.errno; - gss_cred->gc_upcall = NULL; - rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); + gss_handle_downcall_result(gss_cred, gss_msg); spin_unlock(&inode->i_lock); + task->tk_status = gss_msg->msg.errno; gss_release_msg(gss_msg); } @@ -513,18 +530,16 @@ gss_refresh_upcall(struct rpc_task *task) spin_lock(&inode->i_lock); if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); - else if (gss_msg->ctx != NULL) { - gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx); - gss_cred->gc_upcall = NULL; - rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); - } else if (gss_msg->msg.errno >= 0) { + else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { task->tk_timeout = 0; gss_cred->gc_upcall = gss_msg; /* gss_upcall_callback will release the reference to gss_upcall_msg */ atomic_inc(&gss_msg->count); rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback); - } else + } else { + gss_handle_downcall_result(gss_cred, gss_msg); err = gss_msg->msg.errno; + } spin_unlock(&inode->i_lock); gss_release_msg(gss_msg); out: @@ -1123,6 +1138,23 @@ static int gss_renew_cred(struct rpc_task *task) return 0; } +static int gss_cred_is_negative_entry(struct rpc_cred *cred) +{ + if (test_bit(RPCAUTH_CRED_NEGATIVE, &cred->cr_flags)) { + unsigned long now = jiffies; + unsigned long begin, expire; + struct gss_cred *gss_cred; + + gss_cred = container_of(cred, struct gss_cred, gc_base); + begin = gss_cred->gc_upcall_timestamp; + expire = begin + gss_expired_cred_retry_delay * HZ; + + if (time_in_range_open(now, begin, expire)) + return 1; + } + return 0; +} + /* * Refresh credentials. XXX - finish */ @@ -1132,6 +1164,9 @@ gss_refresh(struct rpc_task *task) struct rpc_cred *cred = task->tk_msg.rpc_cred; int ret = 0; + if (gss_cred_is_negative_entry(cred)) + return -EKEYEXPIRED; + if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && !test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) { ret = gss_renew_cred(task); @@ -1585,5 +1620,11 @@ static void __exit exit_rpcsec_gss(void) } MODULE_LICENSE("GPL"); +module_param_named(expired_cred_retry_delay, + gss_expired_cred_retry_delay, + uint, 0644); +MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until " + "the RPC engine retries an expired credential"); + module_init(init_rpcsec_gss) module_exit(exit_rpcsec_gss) -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter 2010-05-13 21:08 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Trond Myklebust 2010-05-13 21:08 ` [PATCH 09/15] NFS: Don't run nfs_access_cache_shrinker() when the mask is GFP_NOFS Trond Myklebust @ 2010-05-14 16:34 ` Chuck Lever 2010-05-14 17:13 ` Trond Myklebust 1 sibling, 1 reply; 21+ messages in thread From: Chuck Lever @ 2010-05-14 16:34 UTC (permalink / raw) To: Trond Myklebust; +Cc: linux-nfs On 05/13/10 05:08 PM, Trond Myklebust wrote: > Signed-off-by: Trond Myklebust<Trond.Myklebust@netapp.com> > --- > net/sunrpc/auth.c | 5 ++--- > 1 files changed, 2 insertions(+), 3 deletions(-) > > diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c > index 2213dc5..5fb02ac 100644 > --- a/net/sunrpc/auth.c > +++ b/net/sunrpc/auth.c > @@ -236,6 +236,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) > > list_for_each_entry_safe(cred, next,&cred_unused, cr_lru) { > > + if (nr_to_scan-- == 0) > + break; > /* > * Enforce a 60 second garbage collection moratorium > * Note that the cred_unused list must be time-ordered. > @@ -255,11 +257,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) > get_rpccred(cred); > list_add_tail(&cred->cr_lru, free); > rpcauth_unhash_cred_locked(cred); > - nr_to_scan--; > } > spin_unlock(cache_lock); > - if (nr_to_scan == 0) > - break; > } > return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; > } It looks to me like the mm calls our cache shrinker with nr_to_scan set to zero when it just wants this return value, and nothing more. But the logic here seems to assume that nr_to_scan == 0 means shrink as much as you can. Am I reading this correctly? ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter 2010-05-14 16:34 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Chuck Lever @ 2010-05-14 17:13 ` Trond Myklebust [not found] ` <1273857200.4732.2.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org> 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-14 17:13 UTC (permalink / raw) To: Chuck Lever; +Cc: linux-nfs On Fri, 2010-05-14 at 12:34 -0400, Chuck Lever wrote: > On 05/13/10 05:08 PM, Trond Myklebust wrote: > > Signed-off-by: Trond Myklebust<Trond.Myklebust@netapp.com> > > --- > > net/sunrpc/auth.c | 5 ++--- > > 1 files changed, 2 insertions(+), 3 deletions(-) > > > > diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c > > index 2213dc5..5fb02ac 100644 > > --- a/net/sunrpc/auth.c > > +++ b/net/sunrpc/auth.c > > @@ -236,6 +236,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) > > > > list_for_each_entry_safe(cred, next,&cred_unused, cr_lru) { > > > > + if (nr_to_scan-- == 0) > > + break; > > /* > > * Enforce a 60 second garbage collection moratorium > > * Note that the cred_unused list must be time-ordered. > > @@ -255,11 +257,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) > > get_rpccred(cred); > > list_add_tail(&cred->cr_lru, free); > > rpcauth_unhash_cred_locked(cred); > > - nr_to_scan--; > > } > > spin_unlock(cache_lock); > > - if (nr_to_scan == 0) > > - break; > > } > > return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; > > } > > It looks to me like the mm calls our cache shrinker with nr_to_scan set > to zero when it just wants this return value, and nothing more. But the > logic here seems to assume that nr_to_scan == 0 means shrink as much as > you can. Am I reading this correctly? Look more carefully: the comparison contains a post-decrement operation, so if the nr_to_scan == 0, then we immediately exit the loop (after decrementing nr_to_scan, but who cares about that)... Cheers Trond ^ permalink raw reply [flat|nested] 21+ messages in thread
[parent not found: <1273857200.4732.2.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>]
* Re: [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter [not found] ` <1273857200.4732.2.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org> @ 2010-05-14 17:32 ` Chuck Lever 2010-05-14 18:07 ` Trond Myklebust 0 siblings, 1 reply; 21+ messages in thread From: Chuck Lever @ 2010-05-14 17:32 UTC (permalink / raw) To: Trond Myklebust; +Cc: linux-nfs On 05/14/10 01:13 PM, Trond Myklebust wrote: > On Fri, 2010-05-14 at 12:34 -0400, Chuck Lever wrote: >> On 05/13/10 05:08 PM, Trond Myklebust wrote: >>> Signed-off-by: Trond Myklebust<Trond.Myklebust@netapp.com> >>> --- >>> net/sunrpc/auth.c | 5 ++--- >>> 1 files changed, 2 insertions(+), 3 deletions(-) >>> >>> diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c >>> index 2213dc5..5fb02ac 100644 >>> --- a/net/sunrpc/auth.c >>> +++ b/net/sunrpc/auth.c >>> @@ -236,6 +236,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) >>> >>> list_for_each_entry_safe(cred, next,&cred_unused, cr_lru) { >>> >>> + if (nr_to_scan-- == 0) >>> + break; >>> /* >>> * Enforce a 60 second garbage collection moratorium >>> * Note that the cred_unused list must be time-ordered. >>> @@ -255,11 +257,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) >>> get_rpccred(cred); >>> list_add_tail(&cred->cr_lru, free); >>> rpcauth_unhash_cred_locked(cred); >>> - nr_to_scan--; >>> } >>> spin_unlock(cache_lock); >>> - if (nr_to_scan == 0) >>> - break; >>> } >>> return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; >>> } >> >> It looks to me like the mm calls our cache shrinker with nr_to_scan set >> to zero when it just wants this return value, and nothing more. But the >> logic here seems to assume that nr_to_scan == 0 means shrink as much as >> you can. Am I reading this correctly? > > Look more carefully: the comparison contains a post-decrement operation, > so if the nr_to_scan == 0, then we immediately exit the loop (after > decrementing nr_to_scan, but who cares about that)... When mm calls with nr_to_scan set to zero, it doesn't expect a -1 return, it just uses the returned value. mm checks for a -1 return only when a non-zero scan count argument is passed. So the check you added here (and in the access cache shrinker) to return -1 when the gfp_mask doesn't contain GFP_KERNEL could cause some trouble. It would be safer if we return -1 _after_ checking for nr_to_scan == 0. ^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter 2010-05-14 17:32 ` Chuck Lever @ 2010-05-14 18:07 ` Trond Myklebust [not found] ` <1273860432.4732.30.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org> 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-14 18:07 UTC (permalink / raw) To: Chuck Lever; +Cc: linux-nfs On Fri, 2010-05-14 at 13:32 -0400, Chuck Lever wrote: > On 05/14/10 01:13 PM, Trond Myklebust wrote: > > On Fri, 2010-05-14 at 12:34 -0400, Chuck Lever wrote: > >> On 05/13/10 05:08 PM, Trond Myklebust wrote: > >>> Signed-off-by: Trond Myklebust<Trond.Myklebust@netapp.com> > >>> --- > >>> net/sunrpc/auth.c | 5 ++--- > >>> 1 files changed, 2 insertions(+), 3 deletions(-) > >>> > >>> diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c > >>> index 2213dc5..5fb02ac 100644 > >>> --- a/net/sunrpc/auth.c > >>> +++ b/net/sunrpc/auth.c > >>> @@ -236,6 +236,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) > >>> > >>> list_for_each_entry_safe(cred, next,&cred_unused, cr_lru) { > >>> > >>> + if (nr_to_scan-- == 0) > >>> + break; > >>> /* > >>> * Enforce a 60 second garbage collection moratorium > >>> * Note that the cred_unused list must be time-ordered. > >>> @@ -255,11 +257,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan) > >>> get_rpccred(cred); > >>> list_add_tail(&cred->cr_lru, free); > >>> rpcauth_unhash_cred_locked(cred); > >>> - nr_to_scan--; > >>> } > >>> spin_unlock(cache_lock); > >>> - if (nr_to_scan == 0) > >>> - break; > >>> } > >>> return (number_cred_unused / 100) * sysctl_vfs_cache_pressure; > >>> } > >> > >> It looks to me like the mm calls our cache shrinker with nr_to_scan set > >> to zero when it just wants this return value, and nothing more. But the > >> logic here seems to assume that nr_to_scan == 0 means shrink as much as > >> you can. Am I reading this correctly? > > > > Look more carefully: the comparison contains a post-decrement operation, > > so if the nr_to_scan == 0, then we immediately exit the loop (after > > decrementing nr_to_scan, but who cares about that)... > > When mm calls with nr_to_scan set to zero, it doesn't expect a -1 > return, it just uses the returned value. mm checks for a -1 return only > when a non-zero scan count argument is passed. > > So the check you added here (and in the access cache shrinker) to return > -1 when the gfp_mask doesn't contain GFP_KERNEL could cause some > trouble. It would be safer if we return -1 _after_ checking for > nr_to_scan == 0. Oh... You're referring to the change that was added in Patch 6/15 SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS? I got confused... I can perhaps rather add a check for nr_to_scan != 0 in that patch... ^ permalink raw reply [flat|nested] 21+ messages in thread
[parent not found: <1273860432.4732.30.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>]
* Re: [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter [not found] ` <1273860432.4732.30.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org> @ 2010-05-14 18:37 ` Trond Myklebust [not found] ` <1273862242.4732.39.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org> 0 siblings, 1 reply; 21+ messages in thread From: Trond Myklebust @ 2010-05-14 18:37 UTC (permalink / raw) To: Chuck Lever; +Cc: linux-nfs On Fri, 2010-05-14 at 14:07 -0400, Trond Myklebust wrote: > On Fri, 2010-05-14 at 13:32 -0400, Chuck Lever wrote: > > When mm calls with nr_to_scan set to zero, it doesn't expect a -1 > > return, it just uses the returned value. mm checks for a -1 return only > > when a non-zero scan count argument is passed. > > > > So the check you added here (and in the access cache shrinker) to return > > -1 when the gfp_mask doesn't contain GFP_KERNEL could cause some > > trouble. It would be safer if we return -1 _after_ checking for > > nr_to_scan == 0. > > Oh... You're referring to the change that was added in Patch 6/15 > SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS? I > got confused... > > I can perhaps rather add a check for nr_to_scan != 0 in that patch... OK... How about the following? (and ditto for the access cache shrinker) -------------------------------------------------------------------------------------------- >From 8048209c54b95046a23cd994b3d0520757ea5845 Mon Sep 17 00:00:00 2001 From: Trond Myklebust <Trond.Myklebust@netapp.com> Date: Thu, 13 May 2010 12:51:03 -0400 Subject: [PATCH 06/15] SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS Under some circumstances, put_rpccred() can end up allocating memory, so check the gfp_mask to prevent deadlocks. Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com> --- net/sunrpc/auth.c | 2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 95afe79..0667a36 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -270,6 +270,8 @@ rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) LIST_HEAD(free); int res; + if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) + return (nr_to_scan == 0) ? 0 : -1; if (list_empty(&cred_unused)) return 0; spin_lock(&rpc_credcache_lock); -- 1.6.6.1 ^ permalink raw reply related [flat|nested] 21+ messages in thread
[parent not found: <1273862242.4732.39.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>]
* Re: [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter [not found] ` <1273862242.4732.39.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org> @ 2010-05-14 19:18 ` Chuck Lever 0 siblings, 0 replies; 21+ messages in thread From: Chuck Lever @ 2010-05-14 19:18 UTC (permalink / raw) To: Trond Myklebust; +Cc: linux-nfs On 05/14/10 02:37 PM, Trond Myklebust wrote: > On Fri, 2010-05-14 at 14:07 -0400, Trond Myklebust wrote: >> On Fri, 2010-05-14 at 13:32 -0400, Chuck Lever wrote: >>> When mm calls with nr_to_scan set to zero, it doesn't expect a -1 >>> return, it just uses the returned value. mm checks for a -1 return only >>> when a non-zero scan count argument is passed. >>> >>> So the check you added here (and in the access cache shrinker) to return >>> -1 when the gfp_mask doesn't contain GFP_KERNEL could cause some >>> trouble. It would be safer if we return -1 _after_ checking for >>> nr_to_scan == 0. >> >> Oh... You're referring to the change that was added in Patch 6/15 >> SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS? I >> got confused... Sorry. >> I can perhaps rather add a check for nr_to_scan != 0 in that patch... > > OK... How about the following? (and ditto for the access cache shrinker) A comment that explains the choice of return codes might be nice, but yeah, I guess that will work. It avoids the need to take the rpc_credcache_lock in cases where you can't do any shrinkage anyway. So, for the series: Reviewed-by: Chuck Lever <chuck.lever@oracle.com> Looked at patch 2 and 3, but my understanding of gss and v4 reclaim isn't terribly extensive. I didn't see anything egregious in those two, but I can't really comment intelligently on the high-level logic changes. > -------------------------------------------------------------------------------------------- >> From 8048209c54b95046a23cd994b3d0520757ea5845 Mon Sep 17 00:00:00 2001 > From: Trond Myklebust<Trond.Myklebust@netapp.com> > Date: Thu, 13 May 2010 12:51:03 -0400 > Subject: [PATCH 06/15] SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS > > Under some circumstances, put_rpccred() can end up allocating memory, so > check the gfp_mask to prevent deadlocks. > > Signed-off-by: Trond Myklebust<Trond.Myklebust@netapp.com> > --- > net/sunrpc/auth.c | 2 ++ > 1 files changed, 2 insertions(+), 0 deletions(-) > > diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c > index 95afe79..0667a36 100644 > --- a/net/sunrpc/auth.c > +++ b/net/sunrpc/auth.c > @@ -270,6 +270,8 @@ rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask) > LIST_HEAD(free); > int res; > > + if ((gfp_mask& GFP_KERNEL) != GFP_KERNEL) > + return (nr_to_scan == 0) ? 0 : -1; > if (list_empty(&cred_unused)) > return 0; > spin_lock(&rpc_credcache_lock); ^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2010-05-14 19:19 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-13 21:08 [PATCH 01/15] SUNRPC: Fix xs_setup_bc_tcp() Trond Myklebust
2010-05-13 21:08 ` [PATCH 02/15] NFSv4: Don't use GFP_KERNEL allocations in state recovery Trond Myklebust
2010-05-13 21:08 ` [PATCH 03/15] NFS: Don't use GFP_KERNEL in rpcsec_gss downcalls Trond Myklebust
2010-05-13 21:08 ` [PATCH 04/15] NFS: Clean up nfs_create_request() Trond Myklebust
2010-05-13 21:08 ` [PATCH 05/15] NFS: Read requests can use GFP_KERNEL Trond Myklebust
2010-05-13 21:08 ` [PATCH 06/15] SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS Trond Myklebust
2010-05-13 21:08 ` [PATCH 07/15] SUNRPC: Ensure memory shrinker doesn't waste time in rpcauth_prune_expired() Trond Myklebust
2010-05-13 21:08 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Trond Myklebust
2010-05-13 21:08 ` [PATCH 09/15] NFS: Don't run nfs_access_cache_shrinker() when the mask is GFP_NOFS Trond Myklebust
2010-05-13 21:08 ` [PATCH 10/15] NFS: Clean up nfs_access_zap_cache() Trond Myklebust
2010-05-13 21:08 ` [PATCH 11/15] NFS: Don't call iput() in nfs_access_cache_shrinker Trond Myklebust
2010-05-13 21:08 ` [PATCH 12/15] SUNRPC: Move the task->tk_bytes_sent and tk_rtt to struct rpc_rqst Trond Myklebust
2010-05-13 21:08 ` [PATCH 13/15] SUNRPC: Remove the 'tk_magic' debugging field Trond Myklebust
2010-05-13 21:08 ` [PATCH 14/15] SUNRPC: Reorder the struct rpc_task fields Trond Myklebust
2010-05-13 21:08 ` [PATCH 15/15] SUNRPC: Don't spam gssd with upcall requests when the kerberos key expired Trond Myklebust
2010-05-14 16:34 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Chuck Lever
2010-05-14 17:13 ` Trond Myklebust
[not found] ` <1273857200.4732.2.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-05-14 17:32 ` Chuck Lever
2010-05-14 18:07 ` Trond Myklebust
[not found] ` <1273860432.4732.30.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-05-14 18:37 ` Trond Myklebust
[not found] ` <1273862242.4732.39.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-05-14 19:18 ` Chuck Lever
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).