* [PATCH 01/15] SUNRPC: Fix xs_setup_bc_tcp()
@ 2010-05-13 21:08 Trond Myklebust
2010-05-13 21:08 ` [PATCH 02/15] NFSv4: Don't use GFP_KERNEL allocations in state recovery Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
It is a BUG for anybody to call this function without setting
args->bc_xprt. Trying to return an error value is just wrong, since the
user cannot fix this: it is a programming error, not a user error.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
net/sunrpc/xprtsock.c | 3 ---
1 files changed, 0 insertions(+), 3 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3d1dcdf..beefa7a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2442,9 +2442,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
struct sock_xprt *transport;
struct svc_sock *bc_sock;
- if (!args->bc_xprt)
- ERR_PTR(-EINVAL);
-
xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
if (IS_ERR(xprt))
return xprt;
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 02/15] NFSv4: Don't use GFP_KERNEL allocations in state recovery
2010-05-13 21:08 [PATCH 01/15] SUNRPC: Fix xs_setup_bc_tcp() Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 03/15] NFS: Don't use GFP_KERNEL in rpcsec_gss downcalls Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
We do not want to have the state recovery thread kick off and wait for a
memory reclaim, since that may deadlock when the writebacks end up
waiting for the state recovery thread to complete.
The safe thing is therefore to use GFP_NOFS in all open, close,
delegation return, lock, etc. operations that may be called by the
state recovery thread.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/delegation.c | 2 +-
fs/nfs/nfs4_fs.h | 4 ++--
fs/nfs/nfs4proc.c | 47 +++++++++++++++++++++++++----------------------
fs/nfs/nfs4state.c | 21 +++++++++++----------
4 files changed, 39 insertions(+), 35 deletions(-)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index ea61d26..3016345 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -213,7 +213,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
struct nfs_delegation *freeme = NULL;
int status = 0;
- delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
+ delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
if (delegation == NULL)
return -ENOMEM;
memcpy(delegation->stateid.data, res->delegation.data,
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 5099306..c538c61 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -213,7 +213,7 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait);
extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -286,7 +286,7 @@ extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t);
-extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
+extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9998c29..70015dd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -717,17 +717,18 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
struct nfs4_state_owner *sp, fmode_t fmode, int flags,
- const struct iattr *attrs)
+ const struct iattr *attrs,
+ gfp_t gfp_mask)
{
struct dentry *parent = dget_parent(path->dentry);
struct inode *dir = parent->d_inode;
struct nfs_server *server = NFS_SERVER(dir);
struct nfs4_opendata *p;
- p = kzalloc(sizeof(*p), GFP_KERNEL);
+ p = kzalloc(sizeof(*p), gfp_mask);
if (p == NULL)
goto err;
- p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
+ p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask);
if (p->o_arg.seqid == NULL)
goto err_free;
path_get(path);
@@ -1063,7 +1064,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
{
struct nfs4_opendata *opendata;
- opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL);
+ opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL, GFP_NOFS);
if (opendata == NULL)
return ERR_PTR(-ENOMEM);
opendata->state = state;
@@ -1651,7 +1652,7 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
if (path->dentry->d_inode != NULL)
nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode);
status = -ENOMEM;
- opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr);
+ opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr, GFP_KERNEL);
if (opendata == NULL)
goto err_put_state_owner;
@@ -1926,7 +1927,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
*
* NOTE: Caller must be holding the sp->so_owner semaphore!
*/
-int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
+int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait)
{
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs4_closedata *calldata;
@@ -1945,7 +1946,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
};
int status = -ENOMEM;
- calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
+ calldata = kzalloc(sizeof(*calldata), gfp_mask);
if (calldata == NULL)
goto out;
calldata->inode = state->inode;
@@ -1953,7 +1954,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
calldata->arg.fh = NFS_FH(state->inode);
calldata->arg.stateid = &state->open_stateid;
/* Serialization for the sequence id */
- calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
+ calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid, gfp_mask);
if (calldata->arg.seqid == NULL)
goto out_free_calldata;
calldata->arg.fmode = 0;
@@ -3704,7 +3705,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
};
int status = 0;
- data = kzalloc(sizeof(*data), GFP_KERNEL);
+ data = kzalloc(sizeof(*data), GFP_NOFS);
if (data == NULL)
return -ENOMEM;
data->args.fhandle = &data->fh;
@@ -3860,7 +3861,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
struct nfs4_unlockdata *p;
struct inode *inode = lsp->ls_state->inode;
- p = kzalloc(sizeof(*p), GFP_KERNEL);
+ p = kzalloc(sizeof(*p), GFP_NOFS);
if (p == NULL)
return NULL;
p->arg.fh = NFS_FH(inode);
@@ -3998,7 +3999,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
if (test_bit(NFS_DELEGATED_STATE, &state->flags))
goto out;
lsp = request->fl_u.nfs4_fl.owner;
- seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+ seqid = nfs_alloc_seqid(&lsp->ls_seqid, GFP_KERNEL);
status = -ENOMEM;
if (seqid == NULL)
goto out;
@@ -4026,22 +4027,23 @@ struct nfs4_lockdata {
};
static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
- struct nfs_open_context *ctx, struct nfs4_lock_state *lsp)
+ struct nfs_open_context *ctx, struct nfs4_lock_state *lsp,
+ gfp_t gfp_mask)
{
struct nfs4_lockdata *p;
struct inode *inode = lsp->ls_state->inode;
struct nfs_server *server = NFS_SERVER(inode);
- p = kzalloc(sizeof(*p), GFP_KERNEL);
+ p = kzalloc(sizeof(*p), gfp_mask);
if (p == NULL)
return NULL;
p->arg.fh = NFS_FH(inode);
p->arg.fl = &p->fl;
- p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid);
+ p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid, gfp_mask);
if (p->arg.open_seqid == NULL)
goto out_free;
- p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+ p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid, gfp_mask);
if (p->arg.lock_seqid == NULL)
goto out_free_seqid;
p->arg.lock_stateid = &lsp->ls_stateid;
@@ -4195,7 +4197,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
dprintk("%s: begin!\n", __func__);
data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file),
- fl->fl_u.nfs4_fl.owner);
+ fl->fl_u.nfs4_fl.owner,
+ recovery_type == NFS_LOCK_NEW ? GFP_KERNEL : GFP_NOFS);
if (data == NULL)
return -ENOMEM;
if (IS_SETLKW(cmd))
@@ -4684,7 +4687,7 @@ static int nfs4_reset_slot_table(struct nfs4_slot_table *tbl, u32 max_reqs,
if (max_reqs != tbl->max_slots) {
ret = -ENOMEM;
new = kmalloc(max_reqs * sizeof(struct nfs4_slot),
- GFP_KERNEL);
+ GFP_NOFS);
if (!new)
goto out;
ret = 0;
@@ -4749,7 +4752,7 @@ static int nfs4_init_slot_table(struct nfs4_slot_table *tbl,
dprintk("--> %s: max_reqs=%u\n", __func__, max_slots);
- slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_KERNEL);
+ slot = kcalloc(max_slots, sizeof(struct nfs4_slot), GFP_NOFS);
if (!slot)
goto out;
ret = 0;
@@ -4798,7 +4801,7 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
struct nfs4_session *session;
struct nfs4_slot_table *tbl;
- session = kzalloc(sizeof(struct nfs4_session), GFP_KERNEL);
+ session = kzalloc(sizeof(struct nfs4_session), GFP_NOFS);
if (!session)
return NULL;
@@ -5142,8 +5145,8 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp,
if (!atomic_inc_not_zero(&clp->cl_count))
return -EIO;
- args = kzalloc(sizeof(*args), GFP_KERNEL);
- res = kzalloc(sizeof(*res), GFP_KERNEL);
+ args = kzalloc(sizeof(*args), GFP_NOFS);
+ res = kzalloc(sizeof(*res), GFP_NOFS);
if (!args || !res) {
kfree(args);
kfree(res);
@@ -5244,7 +5247,7 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
int status = -ENOMEM;
dprintk("--> %s\n", __func__);
- calldata = kzalloc(sizeof(*calldata), GFP_KERNEL);
+ calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
if (calldata == NULL)
goto out;
calldata->clp = clp;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index cd2d904..34acf59 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -366,7 +366,7 @@ nfs4_alloc_state_owner(void)
{
struct nfs4_state_owner *sp;
- sp = kzalloc(sizeof(*sp),GFP_KERNEL);
+ sp = kzalloc(sizeof(*sp),GFP_NOFS);
if (!sp)
return NULL;
spin_lock_init(&sp->so_lock);
@@ -440,7 +440,7 @@ nfs4_alloc_open_state(void)
{
struct nfs4_state *state;
- state = kzalloc(sizeof(*state), GFP_KERNEL);
+ state = kzalloc(sizeof(*state), GFP_NOFS);
if (!state)
return NULL;
atomic_set(&state->count, 1);
@@ -542,7 +542,8 @@ void nfs4_put_open_state(struct nfs4_state *state)
/*
* Close the current file.
*/
-static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fmode, int wait)
+static void __nfs4_close(struct path *path, struct nfs4_state *state,
+ fmode_t fmode, gfp_t gfp_mask, int wait)
{
struct nfs4_state_owner *owner = state->owner;
int call_close = 0;
@@ -583,17 +584,17 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fm
nfs4_put_open_state(state);
nfs4_put_state_owner(owner);
} else
- nfs4_do_close(path, state, wait);
+ nfs4_do_close(path, state, gfp_mask, wait);
}
void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode)
{
- __nfs4_close(path, state, fmode, 0);
+ __nfs4_close(path, state, fmode, GFP_NOFS, 0);
}
void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode)
{
- __nfs4_close(path, state, fmode, 1);
+ __nfs4_close(path, state, fmode, GFP_KERNEL, 1);
}
/*
@@ -623,7 +624,7 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
struct nfs4_lock_state *lsp;
struct nfs_client *clp = state->owner->so_client;
- lsp = kzalloc(sizeof(*lsp), GFP_KERNEL);
+ lsp = kzalloc(sizeof(*lsp), GFP_NOFS);
if (lsp == NULL)
return NULL;
rpc_init_wait_queue(&lsp->ls_sequence.wait, "lock_seqid_waitqueue");
@@ -759,11 +760,11 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f
nfs4_put_lock_state(lsp);
}
-struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
+struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask)
{
struct nfs_seqid *new;
- new = kmalloc(sizeof(*new), GFP_KERNEL);
+ new = kmalloc(sizeof(*new), gfp_mask);
if (new != NULL) {
new->sequence = counter;
INIT_LIST_HEAD(&new->list);
@@ -1352,7 +1353,7 @@ static int nfs4_recall_slot(struct nfs_client *clp)
nfs4_begin_drain_session(clp);
new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot),
- GFP_KERNEL);
+ GFP_NOFS);
if (!new)
return -ENOMEM;
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 03/15] NFS: Don't use GFP_KERNEL in rpcsec_gss downcalls
2010-05-13 21:08 ` [PATCH 02/15] NFSv4: Don't use GFP_KERNEL allocations in state recovery Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 04/15] NFS: Clean up nfs_create_request() Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
Again, we can deadlock if the memory reclaim triggers a writeback that
requires a rpcsec_gss credential lookup.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
include/linux/sunrpc/gss_api.h | 6 +++-
include/linux/sunrpc/gss_krb5.h | 3 +-
net/sunrpc/auth_gss/auth_gss.c | 2 +-
net/sunrpc/auth_gss/gss_krb5_keys.c | 9 ++++---
net/sunrpc/auth_gss/gss_krb5_mech.c | 34 +++++++++++++++++---------------
net/sunrpc/auth_gss/gss_mech_switch.c | 7 +++--
net/sunrpc/auth_gss/gss_spkm3_mech.c | 5 ++-
net/sunrpc/auth_gss/svcauth_gss.c | 2 +-
8 files changed, 38 insertions(+), 30 deletions(-)
diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
index b22d7f1..5d8048b 100644
--- a/include/linux/sunrpc/gss_api.h
+++ b/include/linux/sunrpc/gss_api.h
@@ -35,7 +35,8 @@ int gss_import_sec_context(
const void* input_token,
size_t bufsize,
struct gss_api_mech *mech,
- struct gss_ctx **ctx_id);
+ struct gss_ctx **ctx_id,
+ gfp_t gfp_mask);
u32 gss_get_mic(
struct gss_ctx *ctx_id,
struct xdr_buf *message,
@@ -89,7 +90,8 @@ struct gss_api_ops {
int (*gss_import_sec_context)(
const void *input_token,
size_t bufsize,
- struct gss_ctx *ctx_id);
+ struct gss_ctx *ctx_id,
+ gfp_t gfp_mask);
u32 (*gss_get_mic)(
struct gss_ctx *ctx_id,
struct xdr_buf *message,
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 5e774a5..5af2931 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -295,7 +295,8 @@ u32
krb5_derive_key(const struct gss_krb5_enctype *gk5e,
const struct xdr_netobj *inkey,
struct xdr_netobj *outkey,
- const struct xdr_netobj *in_constant);
+ const struct xdr_netobj *in_constant,
+ gfp_t gfp_mask);
u32
gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 6654c85..48a7939 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -229,7 +229,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
p = ERR_PTR(-EFAULT);
goto err;
}
- ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx);
+ ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS);
if (ret < 0) {
p = ERR_PTR(ret);
goto err;
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 33b87f0..76e42e6 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -150,7 +150,8 @@ static void krb5_nfold(u32 inbits, const u8 *in,
u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
const struct xdr_netobj *inkey,
struct xdr_netobj *outkey,
- const struct xdr_netobj *in_constant)
+ const struct xdr_netobj *in_constant,
+ gfp_t gfp_mask)
{
size_t blocksize, keybytes, keylength, n;
unsigned char *inblockdata, *outblockdata, *rawkey;
@@ -175,15 +176,15 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
/* allocate and set up buffers */
ret = ENOMEM;
- inblockdata = kmalloc(blocksize, GFP_KERNEL);
+ inblockdata = kmalloc(blocksize, gfp_mask);
if (inblockdata == NULL)
goto err_free_cipher;
- outblockdata = kmalloc(blocksize, GFP_KERNEL);
+ outblockdata = kmalloc(blocksize, gfp_mask);
if (outblockdata == NULL)
goto err_free_in;
- rawkey = kmalloc(keybytes, GFP_KERNEL);
+ rawkey = kmalloc(keybytes, gfp_mask);
if (rawkey == NULL)
goto err_free_out;
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7c249a3..0326446 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -369,7 +369,7 @@ set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
}
static int
-context_derive_keys_des3(struct krb5_ctx *ctx)
+context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
{
struct xdr_netobj c, keyin, keyout;
u8 cdata[GSS_KRB5_K5CLENGTH];
@@ -396,7 +396,7 @@ context_derive_keys_des3(struct krb5_ctx *ctx)
/* derive cksum */
set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
keyout.data = ctx->cksum;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+ err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving cksum key\n",
__func__, err);
@@ -487,7 +487,7 @@ out_err:
}
static int
-context_derive_keys_new(struct krb5_ctx *ctx)
+context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
{
struct xdr_netobj c, keyin, keyout;
u8 cdata[GSS_KRB5_K5CLENGTH];
@@ -503,7 +503,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
/* initiator seal encryption */
set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
keyout.data = ctx->initiator_seal;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+ err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving initiator_seal key\n",
__func__, err);
@@ -518,7 +518,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
/* acceptor seal encryption */
set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
keyout.data = ctx->acceptor_seal;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+ err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving acceptor_seal key\n",
__func__, err);
@@ -533,7 +533,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
/* initiator sign checksum */
set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
keyout.data = ctx->initiator_sign;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+ err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving initiator_sign key\n",
__func__, err);
@@ -543,7 +543,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
/* acceptor sign checksum */
set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
keyout.data = ctx->acceptor_sign;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+ err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving acceptor_sign key\n",
__func__, err);
@@ -553,7 +553,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
/* initiator seal integrity */
set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
keyout.data = ctx->initiator_integ;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+ err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving initiator_integ key\n",
__func__, err);
@@ -563,7 +563,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
/* acceptor seal integrity */
set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
keyout.data = ctx->acceptor_integ;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+ err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
if (err) {
dprintk("%s: Error %d deriving acceptor_integ key\n",
__func__, err);
@@ -598,7 +598,8 @@ out_err:
}
static int
-gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
+gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
+ gfp_t gfp_mask)
{
int keylen;
@@ -645,7 +646,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
}
ctx->mech_used.data = kmemdup(gss_kerberos_mech.gm_oid.data,
- gss_kerberos_mech.gm_oid.len, GFP_KERNEL);
+ gss_kerberos_mech.gm_oid.len, gfp_mask);
if (unlikely(ctx->mech_used.data == NULL)) {
p = ERR_PTR(-ENOMEM);
goto out_err;
@@ -654,12 +655,12 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
switch (ctx->enctype) {
case ENCTYPE_DES3_CBC_RAW:
- return context_derive_keys_des3(ctx);
+ return context_derive_keys_des3(ctx, gfp_mask);
case ENCTYPE_ARCFOUR_HMAC:
return context_derive_keys_rc4(ctx);
case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return context_derive_keys_new(ctx);
+ return context_derive_keys_new(ctx, gfp_mask);
default:
return -EINVAL;
}
@@ -670,20 +671,21 @@ out_err:
static int
gss_import_sec_context_kerberos(const void *p, size_t len,
- struct gss_ctx *ctx_id)
+ struct gss_ctx *ctx_id,
+ gfp_t gfp_mask)
{
const void *end = (const void *)((const char *)p + len);
struct krb5_ctx *ctx;
int ret;
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ ctx = kzalloc(sizeof(*ctx), gfp_mask);
if (ctx == NULL)
return -ENOMEM;
if (len == 85)
ret = gss_import_v1_context(p, end, ctx);
else
- ret = gss_import_v2_context(p, end, ctx);
+ ret = gss_import_v2_context(p, end, ctx, gfp_mask);
if (ret == 0)
ctx_id->internal_ctx_id = ctx;
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 28a84ef..2689de3 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -249,14 +249,15 @@ EXPORT_SYMBOL_GPL(gss_mech_put);
int
gss_import_sec_context(const void *input_token, size_t bufsize,
struct gss_api_mech *mech,
- struct gss_ctx **ctx_id)
+ struct gss_ctx **ctx_id,
+ gfp_t gfp_mask)
{
- if (!(*ctx_id = kzalloc(sizeof(**ctx_id), GFP_KERNEL)))
+ if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
return -ENOMEM;
(*ctx_id)->mech_type = gss_mech_get(mech);
return mech->gm_ops
- ->gss_import_sec_context(input_token, bufsize, *ctx_id);
+ ->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask);
}
/* gss_get_mic: compute a mic over message and return mic_token. */
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 035e1dd..dc3f1f5 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -84,13 +84,14 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
static int
gss_import_sec_context_spkm3(const void *p, size_t len,
- struct gss_ctx *ctx_id)
+ struct gss_ctx *ctx_id,
+ gfp_t gfp_mask)
{
const void *end = (const void *)((const char *)p + len);
struct spkm3_ctx *ctx;
int version;
- if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS)))
+ if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
goto out_err;
p = simple_get_bytes(p, end, &version, sizeof(version));
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 1d9ac4a..cc385b3 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -494,7 +494,7 @@ static int rsc_parse(struct cache_detail *cd,
len = qword_get(&mesg, buf, mlen);
if (len < 0)
goto out;
- status = gss_import_sec_context(buf, len, gm, &rsci.mechctx);
+ status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL);
if (status)
goto out;
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 04/15] NFS: Clean up nfs_create_request()
2010-05-13 21:08 ` [PATCH 03/15] NFS: Don't use GFP_KERNEL in rpcsec_gss downcalls Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 05/15] NFS: Read requests can use GFP_KERNEL Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
There is no point in looping if we're out of memory.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/pagelist.c | 14 ++++----------
1 files changed, 4 insertions(+), 10 deletions(-)
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 29d9d36..a3654e5 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -60,16 +60,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
{
struct nfs_page *req;
- for (;;) {
- /* try to allocate the request struct */
- req = nfs_page_alloc();
- if (req != NULL)
- break;
-
- if (fatal_signal_pending(current))
- return ERR_PTR(-ERESTARTSYS);
- yield();
- }
+ /* try to allocate the request struct */
+ req = nfs_page_alloc();
+ if (req == NULL)
+ return ERR_PTR(-ENOMEM);
/* Initialize the request struct. Initially, we assume a
* long write-back delay. This will be adjusted in
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 05/15] NFS: Read requests can use GFP_KERNEL.
2010-05-13 21:08 ` [PATCH 04/15] NFS: Clean up nfs_create_request() Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 06/15] SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
There is no danger of deadlock should the allocation trigger page
writeback.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/read.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index db9b360..6e2b06e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -40,7 +40,7 @@ static mempool_t *nfs_rdata_mempool;
struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
{
- struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS);
+ struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL);
if (p) {
memset(p, 0, sizeof(*p));
@@ -50,7 +50,7 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
if (pagecount <= ARRAY_SIZE(p->page_array))
p->pagevec = p->page_array;
else {
- p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
+ p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
if (!p->pagevec) {
mempool_free(p, nfs_rdata_mempool);
p = NULL;
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 06/15] SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS
2010-05-13 21:08 ` [PATCH 05/15] NFS: Read requests can use GFP_KERNEL Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 07/15] SUNRPC: Ensure memory shrinker doesn't waste time in rpcauth_prune_expired() Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
Under some circumstances, put_rpccred() can end up allocating memory, so
check the gfp_mask to prevent deadlocks.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
net/sunrpc/auth.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 95afe79..03ff9f9 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -270,6 +270,8 @@ rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
LIST_HEAD(free);
int res;
+ if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+ return -1;
if (list_empty(&cred_unused))
return 0;
spin_lock(&rpc_credcache_lock);
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 07/15] SUNRPC: Ensure memory shrinker doesn't waste time in rpcauth_prune_expired()
2010-05-13 21:08 ` [PATCH 06/15] SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
The 'cred_unused' list, that is traversed by rpcauth_cache_shrinker is
ordered by time. If we hit a credential that is under the 60 second garbage
collection moratorium, we should exit because we know at that point that
all successive credentials are subject to the same moratorium...
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
net/sunrpc/auth.c | 12 +++++++-----
1 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 03ff9f9..2213dc5 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -236,10 +236,13 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
- /* Enforce a 60 second garbage collection moratorium */
+ /*
+ * Enforce a 60 second garbage collection moratorium
+ * Note that the cred_unused list must be time-ordered.
+ */
if (time_in_range(cred->cr_expire, expired, jiffies) &&
test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
- continue;
+ return 0;
list_del_init(&cred->cr_lru);
number_cred_unused--;
@@ -258,7 +261,7 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
if (nr_to_scan == 0)
break;
}
- return nr_to_scan;
+ return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
}
/*
@@ -275,8 +278,7 @@ rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
if (list_empty(&cred_unused))
return 0;
spin_lock(&rpc_credcache_lock);
- nr_to_scan = rpcauth_prune_expired(&free, nr_to_scan);
- res = (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+ res = rpcauth_prune_expired(&free, nr_to_scan);
spin_unlock(&rpc_credcache_lock);
rpcauth_destroy_credlist(&free);
return res;
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter
2010-05-13 21:08 ` [PATCH 07/15] SUNRPC: Ensure memory shrinker doesn't waste time in rpcauth_prune_expired() Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 09/15] NFS: Don't run nfs_access_cache_shrinker() when the mask is GFP_NOFS Trond Myklebust
2010-05-14 16:34 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Chuck Lever
0 siblings, 2 replies; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
net/sunrpc/auth.c | 5 ++---
1 files changed, 2 insertions(+), 3 deletions(-)
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 2213dc5..5fb02ac 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -236,6 +236,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
+ if (nr_to_scan-- == 0)
+ break;
/*
* Enforce a 60 second garbage collection moratorium
* Note that the cred_unused list must be time-ordered.
@@ -255,11 +257,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
get_rpccred(cred);
list_add_tail(&cred->cr_lru, free);
rpcauth_unhash_cred_locked(cred);
- nr_to_scan--;
}
spin_unlock(cache_lock);
- if (nr_to_scan == 0)
- break;
}
return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
}
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 09/15] NFS: Don't run nfs_access_cache_shrinker() when the mask is GFP_NOFS
2010-05-13 21:08 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 10/15] NFS: Clean up nfs_access_zap_cache() Trond Myklebust
2010-05-14 16:34 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Chuck Lever
1 sibling, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
Both iput() and put_rpccred() might allocate memory under certain
circumstances, so make sure that we don't recurse and deadlock...
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/dir.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 18da8ab..bc136ef 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1703,6 +1703,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
struct nfs_inode *nfsi;
struct nfs_access_entry *cache;
+ if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+ return -1;
restart:
spin_lock(&nfs_access_lru_lock);
list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 10/15] NFS: Clean up nfs_access_zap_cache()
2010-05-13 21:08 ` [PATCH 09/15] NFS: Don't run nfs_access_cache_shrinker() when the mask is GFP_NOFS Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 11/15] NFS: Don't call iput() in nfs_access_cache_shrinker Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/dir.c | 60 +++++++++++++++++++++++++++++----------------------------
1 files changed, 31 insertions(+), 29 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index bc136ef..5582e9d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1697,6 +1697,17 @@ static void nfs_access_free_entry(struct nfs_access_entry *entry)
smp_mb__after_atomic_dec();
}
+static void nfs_access_free_list(struct list_head *head)
+{
+ struct nfs_access_entry *cache;
+
+ while (!list_empty(head)) {
+ cache = list_entry(head->next, struct nfs_access_entry, lru);
+ list_del(&cache->lru);
+ nfs_access_free_entry(cache);
+ }
+}
+
int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
{
LIST_HEAD(head);
@@ -1743,52 +1754,41 @@ remove_lru_entry:
goto restart;
}
spin_unlock(&nfs_access_lru_lock);
- while (!list_empty(&head)) {
- cache = list_entry(head.next, struct nfs_access_entry, lru);
- list_del(&cache->lru);
- nfs_access_free_entry(cache);
- }
+ nfs_access_free_list(&head);
return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
}
-static void __nfs_access_zap_cache(struct inode *inode)
+static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
{
- struct nfs_inode *nfsi = NFS_I(inode);
struct rb_root *root_node = &nfsi->access_cache;
- struct rb_node *n, *dispose = NULL;
+ struct rb_node *n;
struct nfs_access_entry *entry;
/* Unhook entries from the cache */
while ((n = rb_first(root_node)) != NULL) {
entry = rb_entry(n, struct nfs_access_entry, rb_node);
rb_erase(n, root_node);
- list_del(&entry->lru);
- n->rb_left = dispose;
- dispose = n;
+ list_move(&entry->lru, head);
}
nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
- spin_unlock(&inode->i_lock);
-
- /* Now kill them all! */
- while (dispose != NULL) {
- n = dispose;
- dispose = n->rb_left;
- nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node));
- }
}
void nfs_access_zap_cache(struct inode *inode)
{
+ LIST_HEAD(head);
+
+ if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
+ return;
/* Remove from global LRU init */
- if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
- spin_lock(&nfs_access_lru_lock);
+ spin_lock(&nfs_access_lru_lock);
+ if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
list_del_init(&NFS_I(inode)->access_cache_inode_lru);
- spin_unlock(&nfs_access_lru_lock);
- }
spin_lock(&inode->i_lock);
- /* This will release the spinlock */
- __nfs_access_zap_cache(inode);
+ __nfs_access_zap_cache(NFS_I(inode), &head);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&nfs_access_lru_lock);
+ nfs_access_free_list(&head);
}
static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
@@ -1839,8 +1839,8 @@ out_stale:
nfs_access_free_entry(cache);
return -ENOENT;
out_zap:
- /* This will release the spinlock */
- __nfs_access_zap_cache(inode);
+ spin_unlock(&inode->i_lock);
+ nfs_access_zap_cache(inode);
return -ENOENT;
}
@@ -1895,9 +1895,11 @@ static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *s
smp_mb__after_atomic_inc();
/* Add inode to global LRU list */
- if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
+ if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
spin_lock(&nfs_access_lru_lock);
- list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
+ if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
+ list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
+ &nfs_access_lru_list);
spin_unlock(&nfs_access_lru_lock);
}
}
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 11/15] NFS: Don't call iput() in nfs_access_cache_shrinker
2010-05-13 21:08 ` [PATCH 10/15] NFS: Clean up nfs_access_zap_cache() Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 12/15] SUNRPC: Move the task->tk_bytes_sent and tk_rtt to struct rpc_rqst Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
iput() can potentially attempt to allocate memory, so we should avoid
calling it in a memory shrinker. Instead, rely on the fact that iput() will
call nfs_access_zap_cache().
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
fs/nfs/dir.c | 19 ++++---------------
1 files changed, 4 insertions(+), 15 deletions(-)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5582e9d..7bf20be 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1716,22 +1716,14 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
return -1;
-restart:
+
spin_lock(&nfs_access_lru_lock);
list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
- struct rw_semaphore *s_umount;
struct inode *inode;
if (nr_to_scan-- == 0)
break;
- s_umount = &nfsi->vfs_inode.i_sb->s_umount;
- if (!down_read_trylock(s_umount))
- continue;
- inode = igrab(&nfsi->vfs_inode);
- if (inode == NULL) {
- up_read(s_umount);
- continue;
- }
+ inode = &nfsi->vfs_inode;
spin_lock(&inode->i_lock);
if (list_empty(&nfsi->access_cache_entry_lru))
goto remove_lru_entry;
@@ -1745,13 +1737,10 @@ restart:
else {
remove_lru_entry:
list_del_init(&nfsi->access_cache_inode_lru);
+ smp_mb__before_clear_bit();
clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
+ smp_mb__after_clear_bit();
}
- spin_unlock(&inode->i_lock);
- spin_unlock(&nfs_access_lru_lock);
- iput(inode);
- up_read(s_umount);
- goto restart;
}
spin_unlock(&nfs_access_lru_lock);
nfs_access_free_list(&head);
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 12/15] SUNRPC: Move the task->tk_bytes_sent and tk_rtt to struct rpc_rqst
2010-05-13 21:08 ` [PATCH 11/15] NFS: Don't call iput() in nfs_access_cache_shrinker Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 13/15] SUNRPC: Remove the 'tk_magic' debugging field Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
It seems strange to maintain stats for bytes_sent in one structure, and
bytes received in another. Try to assemble all the RPC request-related
stats in struct rpc_rqst
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
include/linux/sunrpc/sched.h | 4 +---
include/linux/sunrpc/xprt.h | 6 ++++--
net/sunrpc/stats.c | 4 ++--
net/sunrpc/xprt.c | 4 ++--
net/sunrpc/xprtrdma/transport.c | 2 +-
net/sunrpc/xprtsock.c | 4 ++--
6 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 76720d2..46ebef1 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -80,9 +80,7 @@ struct rpc_task {
} u;
unsigned short tk_timeouts; /* maj timeouts */
- size_t tk_bytes_sent; /* total bytes sent */
- ktime_t tk_start, /* RPC task init timestamp */
- tk_rtt; /* round-trip time */
+ ktime_t tk_start; /* RPC task init timestamp */
pid_t tk_owner; /* Process id for batching tasks */
unsigned char tk_priority : 2;/* Task priority */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index f885186..b514703 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -66,8 +66,6 @@ struct rpc_rqst {
struct rpc_task * rq_task; /* RPC task data */
__be32 rq_xid; /* request XID */
int rq_cong; /* has incremented xprt->cong */
- int rq_reply_bytes_recvd; /* number of reply */
- /* bytes received */
u32 rq_seqno; /* gss seq no. used on req. */
int rq_enc_pages_num;
struct page **rq_enc_pages; /* scratch pages for use by
@@ -78,12 +76,16 @@ struct rpc_rqst {
__u32 * rq_buffer; /* XDR encode buffer */
size_t rq_callsize,
rq_rcvsize;
+ size_t rq_xmit_bytes_sent; /* total bytes sent */
+ size_t rq_reply_bytes_recvd; /* total reply bytes */
+ /* received */
struct xdr_buf rq_private_buf; /* The receive buffer
* used in the softirq.
*/
unsigned long rq_majortimeo; /* major timeout alarm */
unsigned long rq_timeout; /* Current timeout value */
+ ktime_t rq_rtt; /* round-trip time */
unsigned int rq_retries; /* # of retries */
unsigned int rq_connect_cookie;
/* A cookie used to track the
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index aacd95f..ea1046f 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -156,13 +156,13 @@ void rpc_count_iostats(struct rpc_task *task)
op_metrics->om_ntrans += req->rq_ntrans;
op_metrics->om_timeouts += task->tk_timeouts;
- op_metrics->om_bytes_sent += task->tk_bytes_sent;
+ op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
delta = ktime_sub(req->rq_xtime, task->tk_start);
op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
- op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, task->tk_rtt);
+ op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
delta = ktime_sub(ktime_get(), task->tk_start);
op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8986b1b..65fe2e4 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -780,7 +780,7 @@ static void xprt_update_rtt(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_rtt *rtt = task->tk_client->cl_rtt;
unsigned timer = task->tk_msg.rpc_proc->p_timer;
- long m = usecs_to_jiffies(ktime_to_us(task->tk_rtt));
+ long m = usecs_to_jiffies(ktime_to_us(req->rq_rtt));
if (timer) {
if (req->rq_ntrans == 1)
@@ -805,7 +805,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
task->tk_pid, ntohl(req->rq_xid), copied);
xprt->stat.recvs++;
- task->tk_rtt = ktime_sub(ktime_get(), req->rq_xtime);
+ req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
if (xprt->ops->timer != NULL)
xprt_update_rtt(task);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 3f3b38c..a85e866 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -674,7 +674,7 @@ xprt_rdma_send_request(struct rpc_task *task)
if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
goto drop_connection;
- task->tk_bytes_sent += rqst->rq_snd_buf.len;
+ rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
rqst->rq_bytes_sent = 0;
return 0;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index beefa7a..02fc7f0 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -528,7 +528,7 @@ static int xs_udp_send_request(struct rpc_task *task)
xdr->len - req->rq_bytes_sent, status);
if (status >= 0) {
- task->tk_bytes_sent += status;
+ req->rq_xmit_bytes_sent += status;
if (status >= req->rq_slen)
return 0;
/* Still some bytes left; set up for a retry later. */
@@ -624,7 +624,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
/* If we've sent the entire packet, immediately
* reset the count of bytes sent. */
req->rq_bytes_sent += status;
- task->tk_bytes_sent += status;
+ req->rq_xmit_bytes_sent += status;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
req->rq_bytes_sent = 0;
return 0;
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 13/15] SUNRPC: Remove the 'tk_magic' debugging field
2010-05-13 21:08 ` [PATCH 12/15] SUNRPC: Move the task->tk_bytes_sent and tk_rtt to struct rpc_rqst Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 14/15] SUNRPC: Reorder the struct rpc_task fields Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
It has not triggered in almost a decade. Time to get rid of it...
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
include/linux/sunrpc/sched.h | 3 ---
net/sunrpc/sched.c | 11 -----------
2 files changed, 0 insertions(+), 14 deletions(-)
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 46ebef1..41b9f97 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -41,9 +41,6 @@ struct rpc_wait {
* This is the RPC task struct
*/
struct rpc_task {
-#ifdef RPC_DEBUG
- unsigned long tk_magic; /* 0xf00baa */
-#endif
atomic_t tk_count; /* Reference count */
struct list_head tk_task; /* global list of tasks */
struct rpc_clnt * tk_client; /* RPC client */
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index aa7b07e..4a843b8 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -25,7 +25,6 @@
#ifdef RPC_DEBUG
#define RPCDBG_FACILITY RPCDBG_SCHED
-#define RPC_TASK_MAGIC_ID 0xf00baa
#endif
/*
@@ -237,7 +236,6 @@ static void rpc_task_set_debuginfo(struct rpc_task *task)
{
static atomic_t rpc_pid;
- task->tk_magic = RPC_TASK_MAGIC_ID;
task->tk_pid = atomic_inc_return(&rpc_pid);
}
#else
@@ -360,9 +358,6 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task
dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n",
task->tk_pid, jiffies);
-#ifdef RPC_DEBUG
- BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
-#endif
/* Has the task been executed yet? If not, we cannot wake it up! */
if (!RPC_IS_ACTIVATED(task)) {
printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
@@ -916,9 +911,6 @@ EXPORT_SYMBOL_GPL(rpc_put_task);
static void rpc_release_task(struct rpc_task *task)
{
-#ifdef RPC_DEBUG
- BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
-#endif
dprintk("RPC: %5u release task\n", task->tk_pid);
if (!list_empty(&task->tk_task)) {
@@ -930,9 +922,6 @@ static void rpc_release_task(struct rpc_task *task)
}
BUG_ON (RPC_IS_QUEUED(task));
-#ifdef RPC_DEBUG
- task->tk_magic = 0;
-#endif
/* Wake up anyone who is waiting for task completion */
rpc_mark_complete_task(task);
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 14/15] SUNRPC: Reorder the struct rpc_task fields
2010-05-13 21:08 ` [PATCH 13/15] SUNRPC: Remove the 'tk_magic' debugging field Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
2010-05-13 21:08 ` [PATCH 15/15] SUNRPC: Don't spam gssd with upcall requests when the kerberos key expired Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
This improves the packing of the rpc_task, and ensures that on 64-bit
platforms the size reduces to 216 bytes.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
include/linux/sunrpc/sched.h | 12 ++++++------
1 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 41b9f97..7be4f3a 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -45,14 +45,11 @@ struct rpc_task {
struct list_head tk_task; /* global list of tasks */
struct rpc_clnt * tk_client; /* RPC client */
struct rpc_rqst * tk_rqstp; /* RPC request */
- int tk_status; /* result of last operation */
/*
* RPC call state
*/
struct rpc_message tk_msg; /* RPC call info */
- __u8 tk_garb_retry;
- __u8 tk_cred_retry;
/*
* callback to be executed after waking up
@@ -65,7 +62,6 @@ struct rpc_task {
void * tk_calldata;
unsigned long tk_timeout; /* timeout for rpc_sleep() */
- unsigned short tk_flags; /* misc flags */
unsigned long tk_runstate; /* Task run status */
struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could
* be any workqueue
@@ -76,15 +72,19 @@ struct rpc_task {
struct rpc_wait tk_wait; /* RPC wait */
} u;
- unsigned short tk_timeouts; /* maj timeouts */
ktime_t tk_start; /* RPC task init timestamp */
pid_t tk_owner; /* Process id for batching tasks */
- unsigned char tk_priority : 2;/* Task priority */
+ int tk_status; /* result of last operation */
+ unsigned short tk_flags; /* misc flags */
+ unsigned short tk_timeouts; /* maj timeouts */
#ifdef RPC_DEBUG
unsigned short tk_pid; /* debugging aid */
#endif
+ unsigned char tk_priority : 2,/* Task priority */
+ tk_garb_retry : 2,
+ tk_cred_retry : 2;
};
#define tk_xprt tk_client->cl_xprt
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* [PATCH 15/15] SUNRPC: Don't spam gssd with upcall requests when the kerberos key expired
2010-05-13 21:08 ` [PATCH 14/15] SUNRPC: Reorder the struct rpc_task fields Trond Myklebust
@ 2010-05-13 21:08 ` Trond Myklebust
0 siblings, 0 replies; 21+ messages in thread
From: Trond Myklebust @ 2010-05-13 21:08 UTC (permalink / raw)
To: linux-nfs
Now that the rpc.gssd daemon can explicitly tell us that the key expired,
we should cache that information to avoid spamming gssd.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
include/linux/sunrpc/auth.h | 1 +
include/linux/sunrpc/auth_gss.h | 1 +
net/sunrpc/auth_gss/auth_gss.c | 65 +++++++++++++++++++++++++++++++-------
3 files changed, 55 insertions(+), 12 deletions(-)
diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 996df4d..87d7ec0 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -54,6 +54,7 @@ struct rpc_cred {
#define RPCAUTH_CRED_NEW 0
#define RPCAUTH_CRED_UPTODATE 1
#define RPCAUTH_CRED_HASHED 2
+#define RPCAUTH_CRED_NEGATIVE 3
#define RPCAUTH_CRED_MAGIC 0x0f4aa4f0
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index d48d4e6..671538d 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -82,6 +82,7 @@ struct gss_cred {
enum rpc_gss_svc gc_service;
struct gss_cl_ctx *gc_ctx;
struct gss_upcall_msg *gc_upcall;
+ unsigned long gc_upcall_timestamp;
unsigned char gc_machine_cred : 1;
};
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 48a7939..8da2a0e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -57,6 +57,9 @@ static const struct rpc_authops authgss_ops;
static const struct rpc_credops gss_credops;
static const struct rpc_credops gss_nullops;
+#define GSS_RETRY_EXPIRED 5
+static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
+
#ifdef RPC_DEBUG
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
@@ -350,6 +353,24 @@ gss_unhash_msg(struct gss_upcall_msg *gss_msg)
}
static void
+gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss_msg)
+{
+ switch (gss_msg->msg.errno) {
+ case 0:
+ if (gss_msg->ctx == NULL)
+ break;
+ clear_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
+ gss_cred_set_ctx(&gss_cred->gc_base, gss_msg->ctx);
+ break;
+ case -EKEYEXPIRED:
+ set_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
+ }
+ gss_cred->gc_upcall_timestamp = jiffies;
+ gss_cred->gc_upcall = NULL;
+ rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+}
+
+static void
gss_upcall_callback(struct rpc_task *task)
{
struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred,
@@ -358,13 +379,9 @@ gss_upcall_callback(struct rpc_task *task)
struct inode *inode = &gss_msg->inode->vfs_inode;
spin_lock(&inode->i_lock);
- if (gss_msg->ctx)
- gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
- else
- task->tk_status = gss_msg->msg.errno;
- gss_cred->gc_upcall = NULL;
- rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+ gss_handle_downcall_result(gss_cred, gss_msg);
spin_unlock(&inode->i_lock);
+ task->tk_status = gss_msg->msg.errno;
gss_release_msg(gss_msg);
}
@@ -513,18 +530,16 @@ gss_refresh_upcall(struct rpc_task *task)
spin_lock(&inode->i_lock);
if (gss_cred->gc_upcall != NULL)
rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
- else if (gss_msg->ctx != NULL) {
- gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
- gss_cred->gc_upcall = NULL;
- rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
- } else if (gss_msg->msg.errno >= 0) {
+ else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
task->tk_timeout = 0;
gss_cred->gc_upcall = gss_msg;
/* gss_upcall_callback will release the reference to gss_upcall_msg */
atomic_inc(&gss_msg->count);
rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback);
- } else
+ } else {
+ gss_handle_downcall_result(gss_cred, gss_msg);
err = gss_msg->msg.errno;
+ }
spin_unlock(&inode->i_lock);
gss_release_msg(gss_msg);
out:
@@ -1123,6 +1138,23 @@ static int gss_renew_cred(struct rpc_task *task)
return 0;
}
+static int gss_cred_is_negative_entry(struct rpc_cred *cred)
+{
+ if (test_bit(RPCAUTH_CRED_NEGATIVE, &cred->cr_flags)) {
+ unsigned long now = jiffies;
+ unsigned long begin, expire;
+ struct gss_cred *gss_cred;
+
+ gss_cred = container_of(cred, struct gss_cred, gc_base);
+ begin = gss_cred->gc_upcall_timestamp;
+ expire = begin + gss_expired_cred_retry_delay * HZ;
+
+ if (time_in_range_open(now, begin, expire))
+ return 1;
+ }
+ return 0;
+}
+
/*
* Refresh credentials. XXX - finish
*/
@@ -1132,6 +1164,9 @@ gss_refresh(struct rpc_task *task)
struct rpc_cred *cred = task->tk_msg.rpc_cred;
int ret = 0;
+ if (gss_cred_is_negative_entry(cred))
+ return -EKEYEXPIRED;
+
if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
ret = gss_renew_cred(task);
@@ -1585,5 +1620,11 @@ static void __exit exit_rpcsec_gss(void)
}
MODULE_LICENSE("GPL");
+module_param_named(expired_cred_retry_delay,
+ gss_expired_cred_retry_delay,
+ uint, 0644);
+MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
+ "the RPC engine retries an expired credential");
+
module_init(init_rpcsec_gss)
module_exit(exit_rpcsec_gss)
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter
2010-05-13 21:08 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Trond Myklebust
2010-05-13 21:08 ` [PATCH 09/15] NFS: Don't run nfs_access_cache_shrinker() when the mask is GFP_NOFS Trond Myklebust
@ 2010-05-14 16:34 ` Chuck Lever
2010-05-14 17:13 ` Trond Myklebust
1 sibling, 1 reply; 21+ messages in thread
From: Chuck Lever @ 2010-05-14 16:34 UTC (permalink / raw)
To: Trond Myklebust; +Cc: linux-nfs
On 05/13/10 05:08 PM, Trond Myklebust wrote:
> Signed-off-by: Trond Myklebust<Trond.Myklebust@netapp.com>
> ---
> net/sunrpc/auth.c | 5 ++---
> 1 files changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
> index 2213dc5..5fb02ac 100644
> --- a/net/sunrpc/auth.c
> +++ b/net/sunrpc/auth.c
> @@ -236,6 +236,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
>
> list_for_each_entry_safe(cred, next,&cred_unused, cr_lru) {
>
> + if (nr_to_scan-- == 0)
> + break;
> /*
> * Enforce a 60 second garbage collection moratorium
> * Note that the cred_unused list must be time-ordered.
> @@ -255,11 +257,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
> get_rpccred(cred);
> list_add_tail(&cred->cr_lru, free);
> rpcauth_unhash_cred_locked(cred);
> - nr_to_scan--;
> }
> spin_unlock(cache_lock);
> - if (nr_to_scan == 0)
> - break;
> }
> return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
> }
It looks to me like the mm calls our cache shrinker with nr_to_scan set
to zero when it just wants this return value, and nothing more. But the
logic here seems to assume that nr_to_scan == 0 means shrink as much as
you can. Am I reading this correctly?
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter
2010-05-14 16:34 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Chuck Lever
@ 2010-05-14 17:13 ` Trond Myklebust
[not found] ` <1273857200.4732.2.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-14 17:13 UTC (permalink / raw)
To: Chuck Lever; +Cc: linux-nfs
On Fri, 2010-05-14 at 12:34 -0400, Chuck Lever wrote:
> On 05/13/10 05:08 PM, Trond Myklebust wrote:
> > Signed-off-by: Trond Myklebust<Trond.Myklebust@netapp.com>
> > ---
> > net/sunrpc/auth.c | 5 ++---
> > 1 files changed, 2 insertions(+), 3 deletions(-)
> >
> > diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
> > index 2213dc5..5fb02ac 100644
> > --- a/net/sunrpc/auth.c
> > +++ b/net/sunrpc/auth.c
> > @@ -236,6 +236,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
> >
> > list_for_each_entry_safe(cred, next,&cred_unused, cr_lru) {
> >
> > + if (nr_to_scan-- == 0)
> > + break;
> > /*
> > * Enforce a 60 second garbage collection moratorium
> > * Note that the cred_unused list must be time-ordered.
> > @@ -255,11 +257,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
> > get_rpccred(cred);
> > list_add_tail(&cred->cr_lru, free);
> > rpcauth_unhash_cred_locked(cred);
> > - nr_to_scan--;
> > }
> > spin_unlock(cache_lock);
> > - if (nr_to_scan == 0)
> > - break;
> > }
> > return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
> > }
>
> It looks to me like the mm calls our cache shrinker with nr_to_scan set
> to zero when it just wants this return value, and nothing more. But the
> logic here seems to assume that nr_to_scan == 0 means shrink as much as
> you can. Am I reading this correctly?
Look more carefully: the comparison contains a post-decrement operation,
so if the nr_to_scan == 0, then we immediately exit the loop (after
decrementing nr_to_scan, but who cares about that)...
Cheers
Trond
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter
[not found] ` <1273857200.4732.2.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-05-14 17:32 ` Chuck Lever
2010-05-14 18:07 ` Trond Myklebust
0 siblings, 1 reply; 21+ messages in thread
From: Chuck Lever @ 2010-05-14 17:32 UTC (permalink / raw)
To: Trond Myklebust; +Cc: linux-nfs
On 05/14/10 01:13 PM, Trond Myklebust wrote:
> On Fri, 2010-05-14 at 12:34 -0400, Chuck Lever wrote:
>> On 05/13/10 05:08 PM, Trond Myklebust wrote:
>>> Signed-off-by: Trond Myklebust<Trond.Myklebust@netapp.com>
>>> ---
>>> net/sunrpc/auth.c | 5 ++---
>>> 1 files changed, 2 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
>>> index 2213dc5..5fb02ac 100644
>>> --- a/net/sunrpc/auth.c
>>> +++ b/net/sunrpc/auth.c
>>> @@ -236,6 +236,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
>>>
>>> list_for_each_entry_safe(cred, next,&cred_unused, cr_lru) {
>>>
>>> + if (nr_to_scan-- == 0)
>>> + break;
>>> /*
>>> * Enforce a 60 second garbage collection moratorium
>>> * Note that the cred_unused list must be time-ordered.
>>> @@ -255,11 +257,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
>>> get_rpccred(cred);
>>> list_add_tail(&cred->cr_lru, free);
>>> rpcauth_unhash_cred_locked(cred);
>>> - nr_to_scan--;
>>> }
>>> spin_unlock(cache_lock);
>>> - if (nr_to_scan == 0)
>>> - break;
>>> }
>>> return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
>>> }
>>
>> It looks to me like the mm calls our cache shrinker with nr_to_scan set
>> to zero when it just wants this return value, and nothing more. But the
>> logic here seems to assume that nr_to_scan == 0 means shrink as much as
>> you can. Am I reading this correctly?
>
> Look more carefully: the comparison contains a post-decrement operation,
> so if the nr_to_scan == 0, then we immediately exit the loop (after
> decrementing nr_to_scan, but who cares about that)...
When mm calls with nr_to_scan set to zero, it doesn't expect a -1
return, it just uses the returned value. mm checks for a -1 return only
when a non-zero scan count argument is passed.
So the check you added here (and in the access cache shrinker) to return
-1 when the gfp_mask doesn't contain GFP_KERNEL could cause some
trouble. It would be safer if we return -1 _after_ checking for
nr_to_scan == 0.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter
2010-05-14 17:32 ` Chuck Lever
@ 2010-05-14 18:07 ` Trond Myklebust
[not found] ` <1273860432.4732.30.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-14 18:07 UTC (permalink / raw)
To: Chuck Lever; +Cc: linux-nfs
On Fri, 2010-05-14 at 13:32 -0400, Chuck Lever wrote:
> On 05/14/10 01:13 PM, Trond Myklebust wrote:
> > On Fri, 2010-05-14 at 12:34 -0400, Chuck Lever wrote:
> >> On 05/13/10 05:08 PM, Trond Myklebust wrote:
> >>> Signed-off-by: Trond Myklebust<Trond.Myklebust@netapp.com>
> >>> ---
> >>> net/sunrpc/auth.c | 5 ++---
> >>> 1 files changed, 2 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
> >>> index 2213dc5..5fb02ac 100644
> >>> --- a/net/sunrpc/auth.c
> >>> +++ b/net/sunrpc/auth.c
> >>> @@ -236,6 +236,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
> >>>
> >>> list_for_each_entry_safe(cred, next,&cred_unused, cr_lru) {
> >>>
> >>> + if (nr_to_scan-- == 0)
> >>> + break;
> >>> /*
> >>> * Enforce a 60 second garbage collection moratorium
> >>> * Note that the cred_unused list must be time-ordered.
> >>> @@ -255,11 +257,8 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
> >>> get_rpccred(cred);
> >>> list_add_tail(&cred->cr_lru, free);
> >>> rpcauth_unhash_cred_locked(cred);
> >>> - nr_to_scan--;
> >>> }
> >>> spin_unlock(cache_lock);
> >>> - if (nr_to_scan == 0)
> >>> - break;
> >>> }
> >>> return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
> >>> }
> >>
> >> It looks to me like the mm calls our cache shrinker with nr_to_scan set
> >> to zero when it just wants this return value, and nothing more. But the
> >> logic here seems to assume that nr_to_scan == 0 means shrink as much as
> >> you can. Am I reading this correctly?
> >
> > Look more carefully: the comparison contains a post-decrement operation,
> > so if the nr_to_scan == 0, then we immediately exit the loop (after
> > decrementing nr_to_scan, but who cares about that)...
>
> When mm calls with nr_to_scan set to zero, it doesn't expect a -1
> return, it just uses the returned value. mm checks for a -1 return only
> when a non-zero scan count argument is passed.
>
> So the check you added here (and in the access cache shrinker) to return
> -1 when the gfp_mask doesn't contain GFP_KERNEL could cause some
> trouble. It would be safer if we return -1 _after_ checking for
> nr_to_scan == 0.
Oh... You're referring to the change that was added in Patch 6/15
SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS? I
got confused...
I can perhaps rather add a check for nr_to_scan != 0 in that patch...
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter
[not found] ` <1273860432.4732.30.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-05-14 18:37 ` Trond Myklebust
[not found] ` <1273862242.4732.39.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
0 siblings, 1 reply; 21+ messages in thread
From: Trond Myklebust @ 2010-05-14 18:37 UTC (permalink / raw)
To: Chuck Lever; +Cc: linux-nfs
On Fri, 2010-05-14 at 14:07 -0400, Trond Myklebust wrote:
> On Fri, 2010-05-14 at 13:32 -0400, Chuck Lever wrote:
> > When mm calls with nr_to_scan set to zero, it doesn't expect a -1
> > return, it just uses the returned value. mm checks for a -1 return only
> > when a non-zero scan count argument is passed.
> >
> > So the check you added here (and in the access cache shrinker) to return
> > -1 when the gfp_mask doesn't contain GFP_KERNEL could cause some
> > trouble. It would be safer if we return -1 _after_ checking for
> > nr_to_scan == 0.
>
> Oh... You're referring to the change that was added in Patch 6/15
> SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS? I
> got confused...
>
> I can perhaps rather add a check for nr_to_scan != 0 in that patch...
OK... How about the following? (and ditto for the access cache shrinker)
--------------------------------------------------------------------------------------------
>From 8048209c54b95046a23cd994b3d0520757ea5845 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:51:03 -0400
Subject: [PATCH 06/15] SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS
Under some circumstances, put_rpccred() can end up allocating memory, so
check the gfp_mask to prevent deadlocks.
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
net/sunrpc/auth.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 95afe79..0667a36 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -270,6 +270,8 @@ rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
LIST_HEAD(free);
int res;
+ if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+ return (nr_to_scan == 0) ? 0 : -1;
if (list_empty(&cred_unused))
return 0;
spin_lock(&rpc_credcache_lock);
--
1.6.6.1
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter
[not found] ` <1273862242.4732.39.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
@ 2010-05-14 19:18 ` Chuck Lever
0 siblings, 0 replies; 21+ messages in thread
From: Chuck Lever @ 2010-05-14 19:18 UTC (permalink / raw)
To: Trond Myklebust; +Cc: linux-nfs
On 05/14/10 02:37 PM, Trond Myklebust wrote:
> On Fri, 2010-05-14 at 14:07 -0400, Trond Myklebust wrote:
>> On Fri, 2010-05-14 at 13:32 -0400, Chuck Lever wrote:
>>> When mm calls with nr_to_scan set to zero, it doesn't expect a -1
>>> return, it just uses the returned value. mm checks for a -1 return only
>>> when a non-zero scan count argument is passed.
>>>
>>> So the check you added here (and in the access cache shrinker) to return
>>> -1 when the gfp_mask doesn't contain GFP_KERNEL could cause some
>>> trouble. It would be safer if we return -1 _after_ checking for
>>> nr_to_scan == 0.
>>
>> Oh... You're referring to the change that was added in Patch 6/15
>> SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS? I
>> got confused...
Sorry.
>> I can perhaps rather add a check for nr_to_scan != 0 in that patch...
>
> OK... How about the following? (and ditto for the access cache shrinker)
A comment that explains the choice of return codes might be nice, but
yeah, I guess that will work. It avoids the need to take the
rpc_credcache_lock in cases where you can't do any shrinkage anyway.
So, for the series:
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Looked at patch 2 and 3, but my understanding of gss and v4 reclaim
isn't terribly extensive. I didn't see anything egregious in those two,
but I can't really comment intelligently on the high-level logic changes.
> --------------------------------------------------------------------------------------------
>> From 8048209c54b95046a23cd994b3d0520757ea5845 Mon Sep 17 00:00:00 2001
> From: Trond Myklebust<Trond.Myklebust@netapp.com>
> Date: Thu, 13 May 2010 12:51:03 -0400
> Subject: [PATCH 06/15] SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS
>
> Under some circumstances, put_rpccred() can end up allocating memory, so
> check the gfp_mask to prevent deadlocks.
>
> Signed-off-by: Trond Myklebust<Trond.Myklebust@netapp.com>
> ---
> net/sunrpc/auth.c | 2 ++
> 1 files changed, 2 insertions(+), 0 deletions(-)
>
> diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
> index 95afe79..0667a36 100644
> --- a/net/sunrpc/auth.c
> +++ b/net/sunrpc/auth.c
> @@ -270,6 +270,8 @@ rpcauth_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
> LIST_HEAD(free);
> int res;
>
> + if ((gfp_mask& GFP_KERNEL) != GFP_KERNEL)
> + return (nr_to_scan == 0) ? 0 : -1;
> if (list_empty(&cred_unused))
> return 0;
> spin_lock(&rpc_credcache_lock);
^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2010-05-14 19:19 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-13 21:08 [PATCH 01/15] SUNRPC: Fix xs_setup_bc_tcp() Trond Myklebust
2010-05-13 21:08 ` [PATCH 02/15] NFSv4: Don't use GFP_KERNEL allocations in state recovery Trond Myklebust
2010-05-13 21:08 ` [PATCH 03/15] NFS: Don't use GFP_KERNEL in rpcsec_gss downcalls Trond Myklebust
2010-05-13 21:08 ` [PATCH 04/15] NFS: Clean up nfs_create_request() Trond Myklebust
2010-05-13 21:08 ` [PATCH 05/15] NFS: Read requests can use GFP_KERNEL Trond Myklebust
2010-05-13 21:08 ` [PATCH 06/15] SUNRPC: Dont run rpcauth_cache_shrinker() when gfp_mask is GFP_NOFS Trond Myklebust
2010-05-13 21:08 ` [PATCH 07/15] SUNRPC: Ensure memory shrinker doesn't waste time in rpcauth_prune_expired() Trond Myklebust
2010-05-13 21:08 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Trond Myklebust
2010-05-13 21:08 ` [PATCH 09/15] NFS: Don't run nfs_access_cache_shrinker() when the mask is GFP_NOFS Trond Myklebust
2010-05-13 21:08 ` [PATCH 10/15] NFS: Clean up nfs_access_zap_cache() Trond Myklebust
2010-05-13 21:08 ` [PATCH 11/15] NFS: Don't call iput() in nfs_access_cache_shrinker Trond Myklebust
2010-05-13 21:08 ` [PATCH 12/15] SUNRPC: Move the task->tk_bytes_sent and tk_rtt to struct rpc_rqst Trond Myklebust
2010-05-13 21:08 ` [PATCH 13/15] SUNRPC: Remove the 'tk_magic' debugging field Trond Myklebust
2010-05-13 21:08 ` [PATCH 14/15] SUNRPC: Reorder the struct rpc_task fields Trond Myklebust
2010-05-13 21:08 ` [PATCH 15/15] SUNRPC: Don't spam gssd with upcall requests when the kerberos key expired Trond Myklebust
2010-05-14 16:34 ` [PATCH 08/15] SUNRPC: Ensure rpcauth_prune_expired() respects the nr_to_scan parameter Chuck Lever
2010-05-14 17:13 ` Trond Myklebust
[not found] ` <1273857200.4732.2.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-05-14 17:32 ` Chuck Lever
2010-05-14 18:07 ` Trond Myklebust
[not found] ` <1273860432.4732.30.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-05-14 18:37 ` Trond Myklebust
[not found] ` <1273862242.4732.39.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-05-14 19:18 ` Chuck Lever
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).