* [PATCH 2/8] pnfs-submit: clean locking infrastructure
2010-05-05 17:00 ` [PATCH 1/8] pnfs-submit: clean struct nfs_inode Alexandros Batsakis
@ 2010-05-05 17:00 ` Alexandros Batsakis
2010-06-07 14:34 ` Fred Isaman
0 siblings, 1 reply; 22+ messages in thread
From: Alexandros Batsakis @ 2010-05-05 17:00 UTC (permalink / raw)
To: linux-nfs; +Cc: bhalevy, Alexandros Batsakis
(also minor cleanup of pnfs_free_layout())
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
---
fs/nfs/pnfs.c | 73 ++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 47 insertions(+), 26 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index f32dbbb..a4031b4 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -60,6 +60,8 @@ static int pnfs_initialized;
static void pnfs_free_layout(struct pnfs_layout_type *lo,
struct nfs4_pnfs_layout_segment *range);
static enum pnfs_try_status pnfs_commit(struct nfs_write_data *data, int sync);
+static inline void lock_current_layout(struct nfs_inode *nfsi);
+static inline void unlock_current_layout(struct nfs_inode *nfsi);
/* Locking:
*
@@ -152,15 +154,15 @@ void
pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
{
dprintk("%s: has_layout=%d ctx=%p\n", __func__, has_layout(nfsi), ctx);
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
if (has_layout(nfsi) && !layoutcommit_needed(nfsi)) {
nfsi->layout.lo_cred = get_rpccred(ctx->state->owner->so_cred);
nfsi->change_attr++;
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
dprintk("%s: Set layoutcommit\n", __func__);
return;
}
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
}
/* Update last_write_offset for layoutcommit.
@@ -173,7 +175,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
{
loff_t end_pos;
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
if (offset < nfsi->layout.pnfs_write_begin_pos)
nfsi->layout.pnfs_write_begin_pos = offset;
end_pos = offset + extent - 1; /* I'm being inclusive */
@@ -185,7 +187,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
(unsigned long) offset ,
(unsigned long) nfsi->layout.pnfs_write_begin_pos,
(unsigned long) nfsi->layout.pnfs_write_end_pos);
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
}
/* Unitialize a mountpoint in a layout driver */
@@ -313,6 +315,17 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
#endif /* CONFIG_SMP */
+static inline void lock_current_layout(struct nfs_inode *nfsi)
+{
+ spin_lock(&nfsi->lo_lock);
+}
+
+static inline void unlock_current_layout(struct nfs_inode *nfsi)
+{
+ BUG_ON_UNLOCKED_LO((&nfsi->layout));
+ spin_unlock(&nfsi->lo_lock);
+}
+
/*
* get and lock nfsi->layout
*/
@@ -321,10 +334,10 @@ get_lock_current_layout(struct nfs_inode *nfsi)
{
struct pnfs_layout_type *lo;
+ lock_current_layout(nfsi);
lo = &nfsi->layout;
- spin_lock(&nfsi->lo_lock);
if (!lo->ld_data) {
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
return NULL;
}
@@ -344,7 +357,12 @@ put_unlock_current_layout(struct pnfs_layout_type *lo)
BUG_ON_UNLOCKED_LO(lo);
BUG_ON(lo->refcount <= 0);
- if (--lo->refcount == 0 && list_empty(&lo->segs)) {
+ lo->refcount--;
+
+ if (lo->refcount > 0)
+ goto out;
+
+ if (list_empty(&lo->segs)) {
struct layoutdriver_io_operations *io_ops =
PNFS_LD_IO_OPS(lo);
@@ -358,7 +376,8 @@ put_unlock_current_layout(struct pnfs_layout_type *lo)
list_del_init(&nfsi->lo_inodes);
spin_unlock(&clp->cl_lock);
}
- spin_unlock(&nfsi->lo_lock);
+out:
+ unlock_current_layout(nfsi);
}
void
@@ -367,7 +386,7 @@ pnfs_layout_release(struct pnfs_layout_type *lo, atomic_t *count,
{
struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
if (range)
pnfs_free_layout(lo, range);
atomic_dec(count);
@@ -386,6 +405,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
};
lo = get_lock_current_layout(nfsi);
+ if (!lo)
+ return;
pnfs_free_layout(lo, &range);
put_unlock_current_layout(lo);
}
@@ -663,7 +684,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
struct pnfs_layout_segment *lseg;
bool ret = false;
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
list_for_each_entry (lseg, &nfsi->layout.segs, fi_list) {
if (!should_free_lseg(lseg, range))
continue;
@@ -677,7 +698,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
}
if (atomic_read(&nfsi->layout.lgetcount))
ret = true;
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
dprintk("%s:Return %d\n", __func__, ret);
return ret;
@@ -759,7 +780,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
/* unlock w/o put rebalanced by eventual call to
* pnfs_layout_release
*/
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
if (pnfs_return_layout_barrier(nfsi, &arg)) {
dprintk("%s: waiting\n", __func__);
@@ -900,7 +921,7 @@ static int pnfs_wait_schedule(void *word)
*
* Note: If successful, nfsi->lo_lock is taken and the caller
* must put and unlock current_layout by using put_unlock_current_layout()
- * when the returned layout is released.
+ * directly or pnfs_layout_release() when the returned layout is released.
*/
static struct pnfs_layout_type *
get_lock_alloc_layout(struct inode *ino)
@@ -935,7 +956,7 @@ get_lock_alloc_layout(struct inode *ino)
struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
/* must grab the layout lock before the client lock */
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
spin_lock(&clp->cl_lock);
if (list_empty(&nfsi->lo_inodes))
@@ -1051,10 +1072,10 @@ void drain_layoutreturns(struct pnfs_layout_type *lo)
while (atomic_read(&lo->lretcount)) {
struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
dprintk("%s: waiting\n", __func__);
wait_event(nfsi->lo_waitq, (atomic_read(&lo->lretcount) == 0));
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
}
}
@@ -1093,13 +1114,13 @@ pnfs_update_layout(struct inode *ino,
/* Check to see if the layout for the given range already exists */
lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
if (lseg && !lseg->valid) {
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
if (take_ref)
put_lseg(lseg);
for (;;) {
prepare_to_wait(&nfsi->lo_waitq, &__wait,
TASK_KILLABLE);
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
if (!lseg || lseg->valid)
break;
@@ -1112,7 +1133,7 @@ pnfs_update_layout(struct inode *ino,
result = -ERESTARTSYS;
break;
}
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
schedule();
}
finish_wait(&nfsi->lo_waitq, &__wait);
@@ -1149,7 +1170,7 @@ pnfs_update_layout(struct inode *ino,
/* Matching dec is done in .rpc_release (on non-error paths) */
atomic_inc(&lo->lgetcount);
/* Lose lock, but not reference, match this with pnfs_layout_release */
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
result = get_layout(ino, ctx, &arg, lsegpp, lo);
out:
@@ -1299,7 +1320,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
*lgp->lsegpp = lseg;
}
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
pnfs_insert_layout(lo, lseg);
if (res->return_on_close) {
@@ -1310,7 +1331,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
/* Done processing layoutget. Set the layout stateid */
pnfs_set_layout_stateid(lo, &res->stateid);
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
out:
return status;
}
@@ -2140,9 +2161,9 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
if (!data)
return -ENOMEM;
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
if (!layoutcommit_needed(nfsi)) {
- spin_unlock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
goto out_free;
}
@@ -2157,7 +2178,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
nfsi->layout.lo_cred = NULL;
pnfs_get_layout_stateid(&data->args.stateid, &nfsi->layout);
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
/* Set up layout commit args */
status = pnfs_layoutcommit_setup(inode, data, write_begin_pos,
--
1.6.2.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 2/8] pnfs-submit: clean locking infrastructure
2010-05-17 17:56 ` [PATCH 1/8] pnfs-submit: clean struct nfs_inode Alexandros Batsakis
@ 2010-05-17 17:56 ` Alexandros Batsakis
2010-05-26 8:28 ` Benny Halevy
2010-05-28 17:27 ` Fred Isaman
0 siblings, 2 replies; 22+ messages in thread
From: Alexandros Batsakis @ 2010-05-17 17:56 UTC (permalink / raw)
To: bhalevy; +Cc: linux-nfs, Alexandros Batsakis
(also minor cleanup of pnfs_free_layout())
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Conflicts:
fs/nfs/pnfs.c
---
fs/nfs/pnfs.c | 80 +++++++++++++++++++++++++++++++++++++-------------------
1 files changed, 53 insertions(+), 27 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b72c013..74cb998 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1,4 +1,4 @@
-/*
+ /*
* linux/fs/nfs/pnfs.c
*
* pNFS functions to call and manage layout drivers.
@@ -60,6 +60,8 @@ static int pnfs_initialized;
static void pnfs_free_layout(struct pnfs_layout_type *lo,
struct nfs4_pnfs_layout_segment *range);
static enum pnfs_try_status pnfs_commit(struct nfs_write_data *data, int sync);
+static inline void lock_current_layout(struct nfs_inode *nfsi);
+static inline void unlock_current_layout(struct nfs_inode *nfsi);
/* Locking:
*
@@ -153,16 +155,17 @@ pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
{
dprintk("%s: has_layout=%d layoutcommit_ctx=%p ctx=%p\n", __func__,
has_layout(nfsi), nfsi->layout.layoutcommit_ctx, ctx);
- spin_lock(&nfsi->lo_lock);
+
+ lock_current_layout(nfsi);
if (has_layout(nfsi) && !nfsi->layout.layoutcommit_ctx) {
nfsi->layout.layoutcommit_ctx = get_nfs_open_context(ctx);
nfsi->change_attr++;
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
dprintk("%s: Set layoutcommit_ctx=%p\n", __func__,
nfsi->layout.layoutcommit_ctx);
return;
}
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
}
/* Update last_write_offset for layoutcommit.
@@ -175,7 +178,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
{
loff_t end_pos;
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
if (offset < nfsi->layout.pnfs_write_begin_pos)
nfsi->layout.pnfs_write_begin_pos = offset;
end_pos = offset + extent - 1; /* I'm being inclusive */
@@ -187,7 +190,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
(unsigned long) offset ,
(unsigned long) nfsi->layout.pnfs_write_begin_pos,
(unsigned long) nfsi->layout.pnfs_write_end_pos);
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
}
/* Unitialize a mountpoint in a layout driver */
@@ -296,12 +299,27 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
* pNFS client layout cache
*/
#if defined(CONFIG_SMP)
+#define BUG_ON_LOCKED_LO(lo) \
+ BUG_ON(spin_is_locked(&PNFS_NFS_INODE(lo)->lo_lock))
#define BUG_ON_UNLOCKED_LO(lo) \
BUG_ON(!spin_is_locked(&PNFS_NFS_INODE(lo)->lo_lock))
#else /* CONFIG_SMP */
+#define BUG_ON_LOCKED_LO(lo) do {} while (0)
#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
#endif /* CONFIG_SMP */
+static inline void lock_current_layout(struct nfs_inode *nfsi)
+{
+ BUG_ON_LOCKED_LO((&nfsi->layout));
+ spin_lock(&nfsi->lo_lock);
+}
+
+static inline void unlock_current_layout(struct nfs_inode *nfsi)
+{
+ BUG_ON_UNLOCKED_LO((&nfsi->layout));
+ spin_unlock(&nfsi->lo_lock);
+}
+
/*
* get and lock nfsi->layout
*/
@@ -310,10 +328,10 @@ get_lock_current_layout(struct nfs_inode *nfsi)
{
struct pnfs_layout_type *lo;
+ lock_current_layout(nfsi);
lo = &nfsi->layout;
- spin_lock(&nfsi->lo_lock);
if (!lo->ld_data) {
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
return NULL;
}
@@ -333,7 +351,12 @@ put_unlock_current_layout(struct pnfs_layout_type *lo)
BUG_ON_UNLOCKED_LO(lo);
BUG_ON(lo->refcount <= 0);
- if (--lo->refcount == 0 && list_empty(&lo->segs)) {
+ lo->refcount--;
+
+ if (lo->refcount > 0)
+ goto out;
+
+ if (list_empty(&lo->segs)) {
struct layoutdriver_io_operations *io_ops =
PNFS_LD_IO_OPS(lo);
@@ -347,7 +370,8 @@ put_unlock_current_layout(struct pnfs_layout_type *lo)
list_del_init(&nfsi->lo_inodes);
spin_unlock(&clp->cl_lock);
}
- spin_unlock(&nfsi->lo_lock);
+out:
+ unlock_current_layout(nfsi);
}
void
@@ -356,7 +380,7 @@ pnfs_layout_release(struct pnfs_layout_type *lo, atomic_t *count,
{
struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
if (range)
pnfs_free_layout(lo, range);
atomic_dec(count);
@@ -375,6 +399,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
};
lo = get_lock_current_layout(nfsi);
+ if (!lo)
+ return;
pnfs_free_layout(lo, &range);
put_unlock_current_layout(lo);
}
@@ -652,7 +678,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
struct pnfs_layout_segment *lseg;
bool ret = false;
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
list_for_each_entry (lseg, &nfsi->layout.segs, fi_list) {
if (!should_free_lseg(lseg, range))
continue;
@@ -666,7 +692,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
}
if (atomic_read(&nfsi->layout.lgetcount))
ret = true;
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
dprintk("%s:Return %d\n", __func__, ret);
return ret;
@@ -756,7 +782,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
/* unlock w/o put rebalanced by eventual call to
* pnfs_layout_release
*/
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
if (pnfs_return_layout_barrier(nfsi, &arg)) {
dprintk("%s: waiting\n", __func__);
@@ -887,7 +913,7 @@ static int pnfs_wait_schedule(void *word)
*
* Note: If successful, nfsi->lo_lock is taken and the caller
* must put and unlock current_layout by using put_unlock_current_layout()
- * when the returned layout is released.
+ * directly or pnfs_layout_release() when the returned layout is released.
*/
static struct pnfs_layout_type *
get_lock_alloc_layout(struct inode *ino)
@@ -922,7 +948,7 @@ get_lock_alloc_layout(struct inode *ino)
struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
/* must grab the layout lock before the client lock */
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
spin_lock(&clp->cl_lock);
if (list_empty(&nfsi->lo_inodes))
@@ -1038,10 +1064,10 @@ void drain_layoutreturns(struct pnfs_layout_type *lo)
while (atomic_read(&lo->lretcount)) {
struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
dprintk("%s: waiting\n", __func__);
wait_event(nfsi->lo_waitq, (atomic_read(&lo->lretcount) == 0));
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
}
}
@@ -1080,13 +1106,13 @@ pnfs_update_layout(struct inode *ino,
/* Check to see if the layout for the given range already exists */
lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
if (lseg && !lseg->valid) {
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
if (take_ref)
put_lseg(lseg);
for (;;) {
prepare_to_wait(&nfsi->lo_waitq, &__wait,
TASK_KILLABLE);
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
if (!lseg || lseg->valid)
break;
@@ -1099,7 +1125,7 @@ pnfs_update_layout(struct inode *ino,
result = -ERESTARTSYS;
break;
}
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
schedule();
}
finish_wait(&nfsi->lo_waitq, &__wait);
@@ -1136,7 +1162,7 @@ pnfs_update_layout(struct inode *ino,
/* Matching dec is done in .rpc_release (on non-error paths) */
atomic_inc(&lo->lgetcount);
/* Lose lock, but not reference, match this with pnfs_layout_release */
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
result = get_layout(ino, ctx, &arg, lsegpp, lo);
out:
@@ -1286,7 +1312,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
*lgp->lsegpp = lseg;
}
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
pnfs_insert_layout(lo, lseg);
if (res->return_on_close) {
@@ -1297,7 +1323,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
/* Done processing layoutget. Set the layout stateid */
pnfs_set_layout_stateid(lo, &res->stateid);
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
out:
return status;
}
@@ -2212,7 +2238,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
if (!data)
return -ENOMEM;
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
if (!nfsi->layout.layoutcommit_ctx)
goto out_unlock;
@@ -2233,7 +2259,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
nfsi->layout.layoutcommit_ctx = NULL;
/* release lock on pnfs layoutcommit attrs */
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
data->is_sync = sync;
status = pnfs4_proc_layoutcommit(data);
@@ -2242,7 +2268,7 @@ out:
return status;
out_unlock:
pnfs_layoutcommit_free(data);
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
goto out;
}
--
1.6.2.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH 2/8] pnfs-submit: clean locking infrastructure
2010-05-17 17:56 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Alexandros Batsakis
@ 2010-05-26 8:28 ` Benny Halevy
2010-05-28 17:27 ` Fred Isaman
1 sibling, 0 replies; 22+ messages in thread
From: Benny Halevy @ 2010-05-26 8:28 UTC (permalink / raw)
To: Alexandros Batsakis; +Cc: linux-nfs
On May. 17, 2010, 20:56 +0300, Alexandros Batsakis <batsakis@netapp.com> wrote:
> (also minor cleanup of pnfs_free_layout())
>
> Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
>
> Conflicts:
>
> fs/nfs/pnfs.c
> ---
> fs/nfs/pnfs.c | 80 +++++++++++++++++++++++++++++++++++++-------------------
> 1 files changed, 53 insertions(+), 27 deletions(-)
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index b72c013..74cb998 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1,4 +1,4 @@
> -/*
> + /*
just picking nit...
Benny
> * linux/fs/nfs/pnfs.c
> *
> * pNFS functions to call and manage layout drivers.
> @@ -60,6 +60,8 @@ static int pnfs_initialized;
> static void pnfs_free_layout(struct pnfs_layout_type *lo,
> struct nfs4_pnfs_layout_segment *range);
> static enum pnfs_try_status pnfs_commit(struct nfs_write_data *data, int sync);
> +static inline void lock_current_layout(struct nfs_inode *nfsi);
> +static inline void unlock_current_layout(struct nfs_inode *nfsi);
>
> /* Locking:
> *
> @@ -153,16 +155,17 @@ pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
> {
> dprintk("%s: has_layout=%d layoutcommit_ctx=%p ctx=%p\n", __func__,
> has_layout(nfsi), nfsi->layout.layoutcommit_ctx, ctx);
> - spin_lock(&nfsi->lo_lock);
> +
> + lock_current_layout(nfsi);
> if (has_layout(nfsi) && !nfsi->layout.layoutcommit_ctx) {
> nfsi->layout.layoutcommit_ctx = get_nfs_open_context(ctx);
> nfsi->change_attr++;
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
> dprintk("%s: Set layoutcommit_ctx=%p\n", __func__,
> nfsi->layout.layoutcommit_ctx);
> return;
> }
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
> }
>
> /* Update last_write_offset for layoutcommit.
> @@ -175,7 +178,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
> {
> loff_t end_pos;
>
> - spin_lock(&nfsi->lo_lock);
> + lock_current_layout(nfsi);
> if (offset < nfsi->layout.pnfs_write_begin_pos)
> nfsi->layout.pnfs_write_begin_pos = offset;
> end_pos = offset + extent - 1; /* I'm being inclusive */
> @@ -187,7 +190,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
> (unsigned long) offset ,
> (unsigned long) nfsi->layout.pnfs_write_begin_pos,
> (unsigned long) nfsi->layout.pnfs_write_end_pos);
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
> }
>
> /* Unitialize a mountpoint in a layout driver */
> @@ -296,12 +299,27 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
> * pNFS client layout cache
> */
> #if defined(CONFIG_SMP)
> +#define BUG_ON_LOCKED_LO(lo) \
> + BUG_ON(spin_is_locked(&PNFS_NFS_INODE(lo)->lo_lock))
> #define BUG_ON_UNLOCKED_LO(lo) \
> BUG_ON(!spin_is_locked(&PNFS_NFS_INODE(lo)->lo_lock))
> #else /* CONFIG_SMP */
> +#define BUG_ON_LOCKED_LO(lo) do {} while (0)
> #define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
> #endif /* CONFIG_SMP */
>
> +static inline void lock_current_layout(struct nfs_inode *nfsi)
> +{
> + BUG_ON_LOCKED_LO((&nfsi->layout));
> + spin_lock(&nfsi->lo_lock);
> +}
> +
> +static inline void unlock_current_layout(struct nfs_inode *nfsi)
> +{
> + BUG_ON_UNLOCKED_LO((&nfsi->layout));
> + spin_unlock(&nfsi->lo_lock);
> +}
> +
> /*
> * get and lock nfsi->layout
> */
> @@ -310,10 +328,10 @@ get_lock_current_layout(struct nfs_inode *nfsi)
> {
> struct pnfs_layout_type *lo;
>
> + lock_current_layout(nfsi);
> lo = &nfsi->layout;
> - spin_lock(&nfsi->lo_lock);
> if (!lo->ld_data) {
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
> return NULL;
> }
>
> @@ -333,7 +351,12 @@ put_unlock_current_layout(struct pnfs_layout_type *lo)
> BUG_ON_UNLOCKED_LO(lo);
> BUG_ON(lo->refcount <= 0);
>
> - if (--lo->refcount == 0 && list_empty(&lo->segs)) {
> + lo->refcount--;
> +
> + if (lo->refcount > 0)
> + goto out;
> +
> + if (list_empty(&lo->segs)) {
> struct layoutdriver_io_operations *io_ops =
> PNFS_LD_IO_OPS(lo);
>
> @@ -347,7 +370,8 @@ put_unlock_current_layout(struct pnfs_layout_type *lo)
> list_del_init(&nfsi->lo_inodes);
> spin_unlock(&clp->cl_lock);
> }
> - spin_unlock(&nfsi->lo_lock);
> +out:
> + unlock_current_layout(nfsi);
> }
>
> void
> @@ -356,7 +380,7 @@ pnfs_layout_release(struct pnfs_layout_type *lo, atomic_t *count,
> {
> struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
>
> - spin_lock(&nfsi->lo_lock);
> + lock_current_layout(nfsi);
> if (range)
> pnfs_free_layout(lo, range);
> atomic_dec(count);
> @@ -375,6 +399,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
> };
>
> lo = get_lock_current_layout(nfsi);
> + if (!lo)
> + return;
> pnfs_free_layout(lo, &range);
> put_unlock_current_layout(lo);
> }
> @@ -652,7 +678,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
> struct pnfs_layout_segment *lseg;
> bool ret = false;
>
> - spin_lock(&nfsi->lo_lock);
> + lock_current_layout(nfsi);
> list_for_each_entry (lseg, &nfsi->layout.segs, fi_list) {
> if (!should_free_lseg(lseg, range))
> continue;
> @@ -666,7 +692,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
> }
> if (atomic_read(&nfsi->layout.lgetcount))
> ret = true;
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
>
> dprintk("%s:Return %d\n", __func__, ret);
> return ret;
> @@ -756,7 +782,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
> /* unlock w/o put rebalanced by eventual call to
> * pnfs_layout_release
> */
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
>
> if (pnfs_return_layout_barrier(nfsi, &arg)) {
> dprintk("%s: waiting\n", __func__);
> @@ -887,7 +913,7 @@ static int pnfs_wait_schedule(void *word)
> *
> * Note: If successful, nfsi->lo_lock is taken and the caller
> * must put and unlock current_layout by using put_unlock_current_layout()
> - * when the returned layout is released.
> + * directly or pnfs_layout_release() when the returned layout is released.
> */
> static struct pnfs_layout_type *
> get_lock_alloc_layout(struct inode *ino)
> @@ -922,7 +948,7 @@ get_lock_alloc_layout(struct inode *ino)
> struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
>
> /* must grab the layout lock before the client lock */
> - spin_lock(&nfsi->lo_lock);
> + lock_current_layout(nfsi);
>
> spin_lock(&clp->cl_lock);
> if (list_empty(&nfsi->lo_inodes))
> @@ -1038,10 +1064,10 @@ void drain_layoutreturns(struct pnfs_layout_type *lo)
> while (atomic_read(&lo->lretcount)) {
> struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
>
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
> dprintk("%s: waiting\n", __func__);
> wait_event(nfsi->lo_waitq, (atomic_read(&lo->lretcount) == 0));
> - spin_lock(&nfsi->lo_lock);
> + lock_current_layout(nfsi);
> }
> }
>
> @@ -1080,13 +1106,13 @@ pnfs_update_layout(struct inode *ino,
> /* Check to see if the layout for the given range already exists */
> lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
> if (lseg && !lseg->valid) {
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
> if (take_ref)
> put_lseg(lseg);
> for (;;) {
> prepare_to_wait(&nfsi->lo_waitq, &__wait,
> TASK_KILLABLE);
> - spin_lock(&nfsi->lo_lock);
> + lock_current_layout(nfsi);
> lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
> if (!lseg || lseg->valid)
> break;
> @@ -1099,7 +1125,7 @@ pnfs_update_layout(struct inode *ino,
> result = -ERESTARTSYS;
> break;
> }
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
> schedule();
> }
> finish_wait(&nfsi->lo_waitq, &__wait);
> @@ -1136,7 +1162,7 @@ pnfs_update_layout(struct inode *ino,
> /* Matching dec is done in .rpc_release (on non-error paths) */
> atomic_inc(&lo->lgetcount);
> /* Lose lock, but not reference, match this with pnfs_layout_release */
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
>
> result = get_layout(ino, ctx, &arg, lsegpp, lo);
> out:
> @@ -1286,7 +1312,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
> *lgp->lsegpp = lseg;
> }
>
> - spin_lock(&nfsi->lo_lock);
> + lock_current_layout(nfsi);
> pnfs_insert_layout(lo, lseg);
>
> if (res->return_on_close) {
> @@ -1297,7 +1323,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
>
> /* Done processing layoutget. Set the layout stateid */
> pnfs_set_layout_stateid(lo, &res->stateid);
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
> out:
> return status;
> }
> @@ -2212,7 +2238,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
> if (!data)
> return -ENOMEM;
>
> - spin_lock(&nfsi->lo_lock);
> + lock_current_layout(nfsi);
> if (!nfsi->layout.layoutcommit_ctx)
> goto out_unlock;
>
> @@ -2233,7 +2259,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
> nfsi->layout.layoutcommit_ctx = NULL;
>
> /* release lock on pnfs layoutcommit attrs */
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
>
> data->is_sync = sync;
> status = pnfs4_proc_layoutcommit(data);
> @@ -2242,7 +2268,7 @@ out:
> return status;
> out_unlock:
> pnfs_layoutcommit_free(data);
> - spin_unlock(&nfsi->lo_lock);
> + unlock_current_layout(nfsi);
> goto out;
> }
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 2/8] pnfs-submit: clean locking infrastructure
2010-05-17 17:56 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Alexandros Batsakis
2010-05-26 8:28 ` Benny Halevy
@ 2010-05-28 17:27 ` Fred Isaman
[not found] ` <AANLkTinsHI0fHYdpUlq-MsMX0BmsLGvdAbrKx7M5ydjw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
1 sibling, 1 reply; 22+ messages in thread
From: Fred Isaman @ 2010-05-28 17:27 UTC (permalink / raw)
To: Alexandros Batsakis; +Cc: bhalevy, linux-nfs
On Mon, May 17, 2010 at 1:56 PM, Alexandros Batsakis
<batsakis@netapp.com> wrote:
> (also minor cleanup of pnfs_free_layout())
>
> Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
>
> Conflicts:
>
> =A0 =A0 =A0 =A0fs/nfs/pnfs.c
> ---
> =A0fs/nfs/pnfs.c | =A0 80 +++++++++++++++++++++++++++++++++++++------=
-------------
> =A01 files changed, 53 insertions(+), 27 deletions(-)
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index b72c013..74cb998 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1,4 +1,4 @@
> -/*
> + /*
> =A0* =A0linux/fs/nfs/pnfs.c
> =A0*
> =A0* =A0pNFS functions to call and manage layout drivers.
> @@ -60,6 +60,8 @@ static int pnfs_initialized;
> =A0static void pnfs_free_layout(struct pnfs_layout_type *lo,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct nfs4_p=
nfs_layout_segment *range);
> =A0static enum pnfs_try_status pnfs_commit(struct nfs_write_data *dat=
a, int sync);
> +static inline void lock_current_layout(struct nfs_inode *nfsi);
> +static inline void unlock_current_layout(struct nfs_inode *nfsi);
>
> =A0/* Locking:
> =A0*
> @@ -153,16 +155,17 @@ pnfs_need_layoutcommit(struct nfs_inode *nfsi, =
struct nfs_open_context *ctx)
> =A0{
> =A0 =A0 =A0 =A0dprintk("%s: has_layout=3D%d layoutcommit_ctx=3D%p ctx=
=3D%p\n", __func__,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0has_layout(nfsi), nfsi->layout.layoutc=
ommit_ctx, ctx);
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> +
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0if (has_layout(nfsi) && !nfsi->layout.layoutcommit_ctx=
) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0nfsi->layout.layoutcommit_ctx =3D get_=
nfs_open_context(ctx);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0nfsi->change_attr++;
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dprintk("%s: Set layoutcommit_ctx=3D%p=
\n", __func__,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0nfsi->layout.layoutcom=
mit_ctx);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return;
> =A0 =A0 =A0 =A0}
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0}
>
> =A0/* Update last_write_offset for layoutcommit.
> @@ -175,7 +178,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, lo=
ff_t offset, size_t extent)
> =A0{
> =A0 =A0 =A0 =A0loff_t end_pos;
>
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0if (offset < nfsi->layout.pnfs_write_begin_pos)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0nfsi->layout.pnfs_write_begin_pos =3D =
offset;
> =A0 =A0 =A0 =A0end_pos =3D offset + extent - 1; /* I'm being inclusiv=
e */
> @@ -187,7 +190,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, lo=
ff_t offset, size_t extent)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(unsigned long) offset ,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(unsigned long) nfsi->layout.pnfs_writ=
e_begin_pos,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(unsigned long) nfsi->layout.pnfs_writ=
e_end_pos);
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0}
>
> =A0/* Unitialize a mountpoint in a layout driver */
> @@ -296,12 +299,27 @@ pnfs_unregister_layoutdriver(struct pnfs_layout=
driver_type *ld_type)
> =A0* pNFS client layout cache
> =A0*/
> =A0#if defined(CONFIG_SMP)
> +#define BUG_ON_LOCKED_LO(lo) \
> + =A0 =A0 =A0 BUG_ON(spin_is_locked(&PNFS_NFS_INODE(lo)->lo_lock))
> =A0#define BUG_ON_UNLOCKED_LO(lo) \
> =A0 =A0 =A0 =A0BUG_ON(!spin_is_locked(&PNFS_NFS_INODE(lo)->lo_lock))
> =A0#else /* CONFIG_SMP */
> +#define BUG_ON_LOCKED_LO(lo) do {} while (0)
> =A0#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
> =A0#endif /* CONFIG_SMP */
>
> +static inline void lock_current_layout(struct nfs_inode *nfsi)
> +{
> + =A0 =A0 =A0 BUG_ON_LOCKED_LO((&nfsi->layout));
I just ran into this in testing. This check causes problems. If you
know it is already unlocked, you wouldn't have to "spin".
=46red
> + =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> +}
> +
> +static inline void unlock_current_layout(struct nfs_inode *nfsi)
> +{
> + =A0 =A0 =A0 BUG_ON_UNLOCKED_LO((&nfsi->layout));
> + =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> +}
> +
> =A0/*
> =A0* get and lock nfsi->layout
> =A0*/
> @@ -310,10 +328,10 @@ get_lock_current_layout(struct nfs_inode *nfsi)
> =A0{
> =A0 =A0 =A0 =A0struct pnfs_layout_type *lo;
>
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0lo =3D &nfsi->layout;
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> =A0 =A0 =A0 =A0if (!lo->ld_data) {
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return NULL;
> =A0 =A0 =A0 =A0}
>
> @@ -333,7 +351,12 @@ put_unlock_current_layout(struct pnfs_layout_typ=
e *lo)
> =A0 =A0 =A0 =A0BUG_ON_UNLOCKED_LO(lo);
> =A0 =A0 =A0 =A0BUG_ON(lo->refcount <=3D 0);
>
> - =A0 =A0 =A0 if (--lo->refcount =3D=3D 0 && list_empty(&lo->segs)) {
> + =A0 =A0 =A0 lo->refcount--;
> +
> + =A0 =A0 =A0 if (lo->refcount > 0)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
> +
> + =A0 =A0 =A0 if (list_empty(&lo->segs)) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct layoutdriver_io_operations *io_=
ops =3D
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0PNFS_LD_IO_OPS(lo);
>
> @@ -347,7 +370,8 @@ put_unlock_current_layout(struct pnfs_layout_type=
*lo)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0list_del_init(&nfsi->lo_inodes);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0spin_unlock(&clp->cl_lock);
> =A0 =A0 =A0 =A0}
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> +out:
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0}
>
> =A0void
> @@ -356,7 +380,7 @@ pnfs_layout_release(struct pnfs_layout_type *lo, =
atomic_t *count,
> =A0{
> =A0 =A0 =A0 =A0struct nfs_inode *nfsi =3D PNFS_NFS_INODE(lo);
>
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0if (range)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0pnfs_free_layout(lo, range);
> =A0 =A0 =A0 =A0atomic_dec(count);
> @@ -375,6 +399,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
> =A0 =A0 =A0 =A0};
>
> =A0 =A0 =A0 =A0lo =3D get_lock_current_layout(nfsi);
> + =A0 =A0 =A0 if (!lo)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return;
> =A0 =A0 =A0 =A0pnfs_free_layout(lo, &range);
> =A0 =A0 =A0 =A0put_unlock_current_layout(lo);
> =A0}
> @@ -652,7 +678,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi=
,
> =A0 =A0 =A0 =A0struct pnfs_layout_segment *lseg;
> =A0 =A0 =A0 =A0bool ret =3D false;
>
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0list_for_each_entry (lseg, &nfsi->layout.segs, fi_list=
) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (!should_free_lseg(lseg, range))
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0continue;
> @@ -666,7 +692,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi=
,
> =A0 =A0 =A0 =A0}
> =A0 =A0 =A0 =A0if (atomic_read(&nfsi->layout.lgetcount))
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ret =3D true;
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>
> =A0 =A0 =A0 =A0dprintk("%s:Return %d\n", __func__, ret);
> =A0 =A0 =A0 =A0return ret;
> @@ -756,7 +782,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs=
4_pnfs_layout_segment *range,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/* unlock w/o put rebalanced by eventu=
al call to
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 * pnfs_layout_release
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 */
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
>
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (pnfs_return_layout_barrier(nfsi, &=
arg)) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dprintk("%s: waiting\n=
", __func__);
> @@ -887,7 +913,7 @@ static int pnfs_wait_schedule(void *word)
> =A0*
> =A0* Note: If successful, nfsi->lo_lock is taken and the caller
> =A0* must put and unlock current_layout by using put_unlock_current_l=
ayout()
> - * when the returned layout is released.
> + * directly or pnfs_layout_release() when the returned layout is rel=
eased.
> =A0*/
> =A0static struct pnfs_layout_type *
> =A0get_lock_alloc_layout(struct inode *ino)
> @@ -922,7 +948,7 @@ get_lock_alloc_layout(struct inode *ino)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct nfs_client *clp=
=3D NFS_SERVER(ino)->nfs_client;
>
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/* must grab the layou=
t lock before the client lock */
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(&nfsi->lo_loc=
k);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lock_current_layout(nfs=
i);
>
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0spin_lock(&clp->cl_loc=
k);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (list_empty(&nfsi->=
lo_inodes))
> @@ -1038,10 +1064,10 @@ void drain_layoutreturns(struct pnfs_layout_t=
ype *lo)
> =A0 =A0 =A0 =A0while (atomic_read(&lo->lretcount)) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct nfs_inode *nfsi =3D PNFS_NFS_IN=
ODE(lo);
>
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dprintk("%s: waiting\n", __func__);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0wait_event(nfsi->lo_waitq, (atomic_rea=
d(&lo->lretcount) =3D=3D 0));
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0}
> =A0}
>
> @@ -1080,13 +1106,13 @@ pnfs_update_layout(struct inode *ino,
> =A0 =A0 =A0 =A0/* Check to see if the layout for the given range alre=
ady exists */
> =A0 =A0 =A0 =A0lseg =3D pnfs_has_layout(lo, &arg, take_ref, !take_ref=
);
> =A0 =A0 =A0 =A0if (lseg && !lseg->valid) {
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (take_ref)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0put_lseg(lseg);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0for (;;) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0prepare_to_wait(&nfsi-=
>lo_waitq, &__wait,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0TASK_KILLABLE);
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(&nfsi->lo_loc=
k);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lock_current_layout(nfs=
i);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0lseg =3D pnfs_has_layo=
ut(lo, &arg, take_ref, !take_ref);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (!lseg || lseg->val=
id)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0break;
> @@ -1099,7 +1125,7 @@ pnfs_update_layout(struct inode *ino,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0result=
=3D -ERESTARTSYS;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0break;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_l=
ock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(n=
fsi);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0schedule();
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0finish_wait(&nfsi->lo_waitq, &__wait);
> @@ -1136,7 +1162,7 @@ pnfs_update_layout(struct inode *ino,
> =A0 =A0 =A0 =A0/* Matching dec is done in .rpc_release (on non-error =
paths) */
> =A0 =A0 =A0 =A0atomic_inc(&lo->lgetcount);
> =A0 =A0 =A0 =A0/* Lose lock, but not reference, match this with pnfs_=
layout_release */
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>
> =A0 =A0 =A0 =A0result =3D get_layout(ino, ctx, &arg, lsegpp, lo);
> =A0out:
> @@ -1286,7 +1312,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget =
*lgp)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0*lgp->lsegpp =3D lseg;
> =A0 =A0 =A0 =A0}
>
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0pnfs_insert_layout(lo, lseg);
>
> =A0 =A0 =A0 =A0if (res->return_on_close) {
> @@ -1297,7 +1323,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget =
*lgp)
>
> =A0 =A0 =A0 =A0/* Done processing layoutget. Set the layout stateid *=
/
> =A0 =A0 =A0 =A0pnfs_set_layout_stateid(lo, &res->stateid);
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0out:
> =A0 =A0 =A0 =A0return status;
> =A0}
> @@ -2212,7 +2238,7 @@ pnfs_layoutcommit_inode(struct inode *inode, in=
t sync)
> =A0 =A0 =A0 =A0if (!data)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return -ENOMEM;
>
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0if (!nfsi->layout.layoutcommit_ctx)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0goto out_unlock;
>
> @@ -2233,7 +2259,7 @@ pnfs_layoutcommit_inode(struct inode *inode, in=
t sync)
> =A0 =A0 =A0 =A0nfsi->layout.layoutcommit_ctx =3D NULL;
>
> =A0 =A0 =A0 =A0/* release lock on pnfs layoutcommit attrs */
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>
> =A0 =A0 =A0 =A0data->is_sync =3D sync;
> =A0 =A0 =A0 =A0status =3D pnfs4_proc_layoutcommit(data);
> @@ -2242,7 +2268,7 @@ out:
> =A0 =A0 =A0 =A0return status;
> =A0out_unlock:
> =A0 =A0 =A0 =A0pnfs_layoutcommit_free(data);
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0 =A0 =A0 =A0goto out;
> =A0}
>
> --
> 1.6.2.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" =
in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 2/8] pnfs-submit: clean locking infrastructure
[not found] ` <AANLkTinsHI0fHYdpUlq-MsMX0BmsLGvdAbrKx7M5ydjw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-05-28 18:27 ` Alexandros Batsakis
0 siblings, 0 replies; 22+ messages in thread
From: Alexandros Batsakis @ 2010-05-28 18:27 UTC (permalink / raw)
To: Fred Isaman; +Cc: Alexandros Batsakis, bhalevy, linux-nfs
On Fri, May 28, 2010 at 10:27 AM, Fred Isaman <iisaman@citi.umich.edu> =
wrote:
> On Mon, May 17, 2010 at 1:56 PM, Alexandros Batsakis
> <batsakis@netapp.com> wrote:
>> (also minor cleanup of pnfs_free_layout())
>>
>> Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
>>
>> Conflicts:
>>
>> =A0 =A0 =A0 =A0fs/nfs/pnfs.c
>> ---
>> =A0fs/nfs/pnfs.c | =A0 80 +++++++++++++++++++++++++++++++++++++-----=
--------------
>> =A01 files changed, 53 insertions(+), 27 deletions(-)
>>
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index b72c013..74cb998 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -1,4 +1,4 @@
>> -/*
>> + /*
>> =A0* =A0linux/fs/nfs/pnfs.c
>> =A0*
>> =A0* =A0pNFS functions to call and manage layout drivers.
>> @@ -60,6 +60,8 @@ static int pnfs_initialized;
>> =A0static void pnfs_free_layout(struct pnfs_layout_type *lo,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct nfs4_=
pnfs_layout_segment *range);
>> =A0static enum pnfs_try_status pnfs_commit(struct nfs_write_data *da=
ta, int sync);
>> +static inline void lock_current_layout(struct nfs_inode *nfsi);
>> +static inline void unlock_current_layout(struct nfs_inode *nfsi);
>>
>> =A0/* Locking:
>> =A0*
>> @@ -153,16 +155,17 @@ pnfs_need_layoutcommit(struct nfs_inode *nfsi,=
struct nfs_open_context *ctx)
>> =A0{
>> =A0 =A0 =A0 =A0dprintk("%s: has_layout=3D%d layoutcommit_ctx=3D%p ct=
x=3D%p\n", __func__,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0has_layout(nfsi), nfsi->layout.layout=
commit_ctx, ctx);
>> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
>> +
>> + =A0 =A0 =A0 lock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0if (has_layout(nfsi) && !nfsi->layout.layoutcommit_ct=
x) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0nfsi->layout.layoutcommit_ctx =3D get=
_nfs_open_context(ctx);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0nfsi->change_attr++;
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dprintk("%s: Set layoutcommit_ctx=3D%=
p\n", __func__,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0nfsi->layout.layoutco=
mmit_ctx);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return;
>> =A0 =A0 =A0 =A0}
>> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>> =A0}
>>
>> =A0/* Update last_write_offset for layoutcommit.
>> @@ -175,7 +178,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, l=
off_t offset, size_t extent)
>> =A0{
>> =A0 =A0 =A0 =A0loff_t end_pos;
>>
>> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 lock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0if (offset < nfsi->layout.pnfs_write_begin_pos)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0nfsi->layout.pnfs_write_begin_pos =3D=
offset;
>> =A0 =A0 =A0 =A0end_pos =3D offset + extent - 1; /* I'm being inclusi=
ve */
>> @@ -187,7 +190,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, l=
off_t offset, size_t extent)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(unsigned long) offset ,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(unsigned long) nfsi->layout.pnfs_wri=
te_begin_pos,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(unsigned long) nfsi->layout.pnfs_wri=
te_end_pos);
>> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>> =A0}
>>
>> =A0/* Unitialize a mountpoint in a layout driver */
>> @@ -296,12 +299,27 @@ pnfs_unregister_layoutdriver(struct pnfs_layou=
tdriver_type *ld_type)
>> =A0* pNFS client layout cache
>> =A0*/
>> =A0#if defined(CONFIG_SMP)
>> +#define BUG_ON_LOCKED_LO(lo) \
>> + =A0 =A0 =A0 BUG_ON(spin_is_locked(&PNFS_NFS_INODE(lo)->lo_lock))
>> =A0#define BUG_ON_UNLOCKED_LO(lo) \
>> =A0 =A0 =A0 =A0BUG_ON(!spin_is_locked(&PNFS_NFS_INODE(lo)->lo_lock))
>> =A0#else /* CONFIG_SMP */
>> +#define BUG_ON_LOCKED_LO(lo) do {} while (0)
>> =A0#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
>> =A0#endif /* CONFIG_SMP */
>>
>> +static inline void lock_current_layout(struct nfs_inode *nfsi)
>> +{
>> + =A0 =A0 =A0 BUG_ON_LOCKED_LO((&nfsi->layout));
>
> I just ran into this in testing. This check causes problems. =A0If yo=
u
> know it is already unlocked, you wouldn't have to "spin".
>
Yeah I saw that too. I fixed it in the new version that is coming up.
-alexandros
> Fred
>
>> + =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
>> +}
>> +
>> +static inline void unlock_current_layout(struct nfs_inode *nfsi)
>> +{
>> + =A0 =A0 =A0 BUG_ON_UNLOCKED_LO((&nfsi->layout));
>> + =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> +}
>> +
>> =A0/*
>> =A0* get and lock nfsi->layout
>> =A0*/
>> @@ -310,10 +328,10 @@ get_lock_current_layout(struct nfs_inode *nfsi=
)
>> =A0{
>> =A0 =A0 =A0 =A0struct pnfs_layout_type *lo;
>>
>> + =A0 =A0 =A0 lock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0lo =3D &nfsi->layout;
>> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
>> =A0 =A0 =A0 =A0if (!lo->ld_data) {
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return NULL;
>> =A0 =A0 =A0 =A0}
>>
>> @@ -333,7 +351,12 @@ put_unlock_current_layout(struct pnfs_layout_ty=
pe *lo)
>> =A0 =A0 =A0 =A0BUG_ON_UNLOCKED_LO(lo);
>> =A0 =A0 =A0 =A0BUG_ON(lo->refcount <=3D 0);
>>
>> - =A0 =A0 =A0 if (--lo->refcount =3D=3D 0 && list_empty(&lo->segs)) =
{
>> + =A0 =A0 =A0 lo->refcount--;
>> +
>> + =A0 =A0 =A0 if (lo->refcount > 0)
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>> +
>> + =A0 =A0 =A0 if (list_empty(&lo->segs)) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct layoutdriver_io_operations *io=
_ops =3D
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0PNFS_LD_IO_OPS(lo);
>>
>> @@ -347,7 +370,8 @@ put_unlock_current_layout(struct pnfs_layout_typ=
e *lo)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0list_del_init(&nfsi->lo_inodes);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0spin_unlock(&clp->cl_lock);
>> =A0 =A0 =A0 =A0}
>> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> +out:
>> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>> =A0}
>>
>> =A0void
>> @@ -356,7 +380,7 @@ pnfs_layout_release(struct pnfs_layout_type *lo,=
atomic_t *count,
>> =A0{
>> =A0 =A0 =A0 =A0struct nfs_inode *nfsi =3D PNFS_NFS_INODE(lo);
>>
>> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 lock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0if (range)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0pnfs_free_layout(lo, range);
>> =A0 =A0 =A0 =A0atomic_dec(count);
>> @@ -375,6 +399,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
>> =A0 =A0 =A0 =A0};
>>
>> =A0 =A0 =A0 =A0lo =3D get_lock_current_layout(nfsi);
>> + =A0 =A0 =A0 if (!lo)
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return;
>> =A0 =A0 =A0 =A0pnfs_free_layout(lo, &range);
>> =A0 =A0 =A0 =A0put_unlock_current_layout(lo);
>> =A0}
>> @@ -652,7 +678,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfs=
i,
>> =A0 =A0 =A0 =A0struct pnfs_layout_segment *lseg;
>> =A0 =A0 =A0 =A0bool ret =3D false;
>>
>> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 lock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0list_for_each_entry (lseg, &nfsi->layout.segs, fi_lis=
t) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (!should_free_lseg(lseg, range))
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0continue;
>> @@ -666,7 +692,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfs=
i,
>> =A0 =A0 =A0 =A0}
>> =A0 =A0 =A0 =A0if (atomic_read(&nfsi->layout.lgetcount))
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ret =3D true;
>> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>>
>> =A0 =A0 =A0 =A0dprintk("%s:Return %d\n", __func__, ret);
>> =A0 =A0 =A0 =A0return ret;
>> @@ -756,7 +782,7 @@ _pnfs_return_layout(struct inode *ino, struct nf=
s4_pnfs_layout_segment *range,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/* unlock w/o put rebalanced by event=
ual call to
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 * pnfs_layout_release
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 */
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
>>
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (pnfs_return_layout_barrier(nfsi, =
&arg)) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dprintk("%s: waiting\=
n", __func__);
>> @@ -887,7 +913,7 @@ static int pnfs_wait_schedule(void *word)
>> =A0*
>> =A0* Note: If successful, nfsi->lo_lock is taken and the caller
>> =A0* must put and unlock current_layout by using put_unlock_current_=
layout()
>> - * when the returned layout is released.
>> + * directly or pnfs_layout_release() when the returned layout is re=
leased.
>> =A0*/
>> =A0static struct pnfs_layout_type *
>> =A0get_lock_alloc_layout(struct inode *ino)
>> @@ -922,7 +948,7 @@ get_lock_alloc_layout(struct inode *ino)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct nfs_client *cl=
p =3D NFS_SERVER(ino)->nfs_client;
>>
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/* must grab the layo=
ut lock before the client lock */
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(&nfsi->lo_lo=
ck);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lock_current_layout(nf=
si);
>>
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0spin_lock(&clp->cl_lo=
ck);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (list_empty(&nfsi-=
>lo_inodes))
>> @@ -1038,10 +1064,10 @@ void drain_layoutreturns(struct pnfs_layout_=
type *lo)
>> =A0 =A0 =A0 =A0while (atomic_read(&lo->lretcount)) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct nfs_inode *nfsi =3D PNFS_NFS_I=
NODE(lo);
>>
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dprintk("%s: waiting\n", __func__);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0wait_event(nfsi->lo_waitq, (atomic_re=
ad(&lo->lretcount) =3D=3D 0));
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 lock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0}
>> =A0}
>>
>> @@ -1080,13 +1106,13 @@ pnfs_update_layout(struct inode *ino,
>> =A0 =A0 =A0 =A0/* Check to see if the layout for the given range alr=
eady exists */
>> =A0 =A0 =A0 =A0lseg =3D pnfs_has_layout(lo, &arg, take_ref, !take_re=
f);
>> =A0 =A0 =A0 =A0if (lseg && !lseg->valid) {
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (take_ref)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0put_lseg(lseg);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0for (;;) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0prepare_to_wait(&nfsi=
->lo_waitq, &__wait,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0 =A0TASK_KILLABLE);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(&nfsi->lo_lo=
ck);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lock_current_layout(nf=
si);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0lseg =3D pnfs_has_lay=
out(lo, &arg, take_ref, !take_ref);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (!lseg || lseg->va=
lid)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0break=
;
>> @@ -1099,7 +1125,7 @@ pnfs_update_layout(struct inode *ino,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0resul=
t =3D -ERESTARTSYS;
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0break=
;
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_=
lock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(=
nfsi);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0schedule();
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0finish_wait(&nfsi->lo_waitq, &__wait)=
;
>> @@ -1136,7 +1162,7 @@ pnfs_update_layout(struct inode *ino,
>> =A0 =A0 =A0 =A0/* Matching dec is done in .rpc_release (on non-error=
paths) */
>> =A0 =A0 =A0 =A0atomic_inc(&lo->lgetcount);
>> =A0 =A0 =A0 =A0/* Lose lock, but not reference, match this with pnfs=
_layout_release */
>> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>>
>> =A0 =A0 =A0 =A0result =3D get_layout(ino, ctx, &arg, lsegpp, lo);
>> =A0out:
>> @@ -1286,7 +1312,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget=
*lgp)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0*lgp->lsegpp =3D lseg;
>> =A0 =A0 =A0 =A0}
>>
>> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 lock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0pnfs_insert_layout(lo, lseg);
>>
>> =A0 =A0 =A0 =A0if (res->return_on_close) {
>> @@ -1297,7 +1323,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget=
*lgp)
>>
>> =A0 =A0 =A0 =A0/* Done processing layoutget. Set the layout stateid =
*/
>> =A0 =A0 =A0 =A0pnfs_set_layout_stateid(lo, &res->stateid);
>> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>> =A0out:
>> =A0 =A0 =A0 =A0return status;
>> =A0}
>> @@ -2212,7 +2238,7 @@ pnfs_layoutcommit_inode(struct inode *inode, i=
nt sync)
>> =A0 =A0 =A0 =A0if (!data)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return -ENOMEM;
>>
>> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 lock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0if (!nfsi->layout.layoutcommit_ctx)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0goto out_unlock;
>>
>> @@ -2233,7 +2259,7 @@ pnfs_layoutcommit_inode(struct inode *inode, i=
nt sync)
>> =A0 =A0 =A0 =A0nfsi->layout.layoutcommit_ctx =3D NULL;
>>
>> =A0 =A0 =A0 =A0/* release lock on pnfs layoutcommit attrs */
>> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>>
>> =A0 =A0 =A0 =A0data->is_sync =3D sync;
>> =A0 =A0 =A0 =A0status =3D pnfs4_proc_layoutcommit(data);
>> @@ -2242,7 +2268,7 @@ out:
>> =A0 =A0 =A0 =A0return status;
>> =A0out_unlock:
>> =A0 =A0 =A0 =A0pnfs_layoutcommit_free(data);
>> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
>> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0goto out;
>> =A0}
>>
>> --
>> 1.6.2.5
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-nfs"=
in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html
>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" =
in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 2/8] pnfs-submit: clean locking infrastructure
2010-05-05 17:00 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Alexandros Batsakis
@ 2010-06-07 14:34 ` Fred Isaman
0 siblings, 0 replies; 22+ messages in thread
From: Fred Isaman @ 2010-06-07 14:34 UTC (permalink / raw)
To: Alexandros Batsakis; +Cc: linux-nfs, bhalevy
On Wed, May 5, 2010 at 1:00 PM, Alexandros Batsakis <batsakis-HgOvQuBEEgRhl2p70BpVqQ@public.gmane.org=
m> wrote:
> (also minor cleanup of pnfs_free_layout())
>
> Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
> ---
> =A0fs/nfs/pnfs.c | =A0 73 ++++++++++++++++++++++++++++++++++++-------=
-------------
> =A01 files changed, 47 insertions(+), 26 deletions(-)
>
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index f32dbbb..a4031b4 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -60,6 +60,8 @@ static int pnfs_initialized;
> =A0static void pnfs_free_layout(struct pnfs_layout_type *lo,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct nfs4_p=
nfs_layout_segment *range);
> =A0static enum pnfs_try_status pnfs_commit(struct nfs_write_data *dat=
a, int sync);
> +static inline void lock_current_layout(struct nfs_inode *nfsi);
> +static inline void unlock_current_layout(struct nfs_inode *nfsi);
>
> =A0/* Locking:
> =A0*
> @@ -152,15 +154,15 @@ void
> =A0pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_con=
text *ctx)
> =A0{
> =A0 =A0 =A0 =A0dprintk("%s: has_layout=3D%d ctx=3D%p\n", __func__, ha=
s_layout(nfsi), ctx);
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0if (has_layout(nfsi) && !layoutcommit_needed(nfsi)) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0nfsi->layout.lo_cred =3D get_rpccred(c=
tx->state->owner->so_cred);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0nfsi->change_attr++;
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dprintk("%s: Set layoutcommit\n", __fu=
nc__);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return;
> =A0 =A0 =A0 =A0}
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0}
>
> =A0/* Update last_write_offset for layoutcommit.
> @@ -173,7 +175,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, lo=
ff_t offset, size_t extent)
> =A0{
> =A0 =A0 =A0 =A0loff_t end_pos;
>
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0if (offset < nfsi->layout.pnfs_write_begin_pos)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0nfsi->layout.pnfs_write_begin_pos =3D =
offset;
> =A0 =A0 =A0 =A0end_pos =3D offset + extent - 1; /* I'm being inclusiv=
e */
> @@ -185,7 +187,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, lo=
ff_t offset, size_t extent)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(unsigned long) offset ,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(unsigned long) nfsi->layout.pnfs_writ=
e_begin_pos,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0(unsigned long) nfsi->layout.pnfs_writ=
e_end_pos);
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0}
>
> =A0/* Unitialize a mountpoint in a layout driver */
> @@ -313,6 +315,17 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutd=
river_type *ld_type)
> =A0#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
> =A0#endif /* CONFIG_SMP */
>
> +static inline void lock_current_layout(struct nfs_inode *nfsi)
> +{
> + =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> +}
> +
> +static inline void unlock_current_layout(struct nfs_inode *nfsi)
> +{
> + =A0 =A0 =A0 BUG_ON_UNLOCKED_LO((&nfsi->layout));
> + =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> +}
> +
> =A0/*
> =A0* get and lock nfsi->layout
> =A0*/
> @@ -321,10 +334,10 @@ get_lock_current_layout(struct nfs_inode *nfsi)
> =A0{
> =A0 =A0 =A0 =A0struct pnfs_layout_type *lo;
>
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0lo =3D &nfsi->layout;
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> =A0 =A0 =A0 =A0if (!lo->ld_data) {
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return NULL;
> =A0 =A0 =A0 =A0}
>
> @@ -344,7 +357,12 @@ put_unlock_current_layout(struct pnfs_layout_typ=
e *lo)
> =A0 =A0 =A0 =A0BUG_ON_UNLOCKED_LO(lo);
> =A0 =A0 =A0 =A0BUG_ON(lo->refcount <=3D 0);
>
> - =A0 =A0 =A0 if (--lo->refcount =3D=3D 0 && list_empty(&lo->segs)) {
> + =A0 =A0 =A0 lo->refcount--;
> +
> + =A0 =A0 =A0 if (lo->refcount > 0)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
> +
> + =A0 =A0 =A0 if (list_empty(&lo->segs)) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct layoutdriver_io_operations *io_=
ops =3D
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0PNFS_LD_IO_OPS(lo);
>
> @@ -358,7 +376,8 @@ put_unlock_current_layout(struct pnfs_layout_type=
*lo)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0list_del_init(&nfsi->lo_inodes);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0spin_unlock(&clp->cl_lock);
> =A0 =A0 =A0 =A0}
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> +out:
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0}
>
> =A0void
> @@ -367,7 +386,7 @@ pnfs_layout_release(struct pnfs_layout_type *lo, =
atomic_t *count,
> =A0{
> =A0 =A0 =A0 =A0struct nfs_inode *nfsi =3D PNFS_NFS_INODE(lo);
>
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0if (range)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0pnfs_free_layout(lo, range);
> =A0 =A0 =A0 =A0atomic_dec(count);
> @@ -386,6 +405,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
> =A0 =A0 =A0 =A0};
>
> =A0 =A0 =A0 =A0lo =3D get_lock_current_layout(nfsi);
> + =A0 =A0 =A0 if (!lo)
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 return;
> =A0 =A0 =A0 =A0pnfs_free_layout(lo, &range);
> =A0 =A0 =A0 =A0put_unlock_current_layout(lo);
> =A0}
> @@ -663,7 +684,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi=
,
> =A0 =A0 =A0 =A0struct pnfs_layout_segment *lseg;
> =A0 =A0 =A0 =A0bool ret =3D false;
>
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0list_for_each_entry (lseg, &nfsi->layout.segs, fi_list=
) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (!should_free_lseg(lseg, range))
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0continue;
> @@ -677,7 +698,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi=
,
> =A0 =A0 =A0 =A0}
> =A0 =A0 =A0 =A0if (atomic_read(&nfsi->layout.lgetcount))
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0ret =3D true;
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>
> =A0 =A0 =A0 =A0dprintk("%s:Return %d\n", __func__, ret);
> =A0 =A0 =A0 =A0return ret;
> @@ -759,7 +780,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs=
4_pnfs_layout_segment *range,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/* unlock w/o put rebalanced by eventu=
al call to
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 * pnfs_layout_release
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 */
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
>
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (pnfs_return_layout_barrier(nfsi, &=
arg)) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dprintk("%s: waiting\n=
", __func__);
> @@ -900,7 +921,7 @@ static int pnfs_wait_schedule(void *word)
> =A0*
> =A0* Note: If successful, nfsi->lo_lock is taken and the caller
> =A0* must put and unlock current_layout by using put_unlock_current_l=
ayout()
> - * when the returned layout is released.
> + * directly or pnfs_layout_release() when the returned layout is rel=
eased.
> =A0*/
> =A0static struct pnfs_layout_type *
> =A0get_lock_alloc_layout(struct inode *ino)
> @@ -935,7 +956,7 @@ get_lock_alloc_layout(struct inode *ino)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct nfs_client *clp=
=3D NFS_SERVER(ino)->nfs_client;
>
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0/* must grab the layou=
t lock before the client lock */
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(&nfsi->lo_loc=
k);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lock_current_layout(nfs=
i);
>
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0spin_lock(&clp->cl_loc=
k);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (list_empty(&nfsi->=
lo_inodes))
> @@ -1051,10 +1072,10 @@ void drain_layoutreturns(struct pnfs_layout_t=
ype *lo)
> =A0 =A0 =A0 =A0while (atomic_read(&lo->lretcount)) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0struct nfs_inode *nfsi =3D PNFS_NFS_IN=
ODE(lo);
>
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dprintk("%s: waiting\n", __func__);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0wait_event(nfsi->lo_waitq, (atomic_rea=
d(&lo->lretcount) =3D=3D 0));
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0}
> =A0}
>
> @@ -1093,13 +1114,13 @@ pnfs_update_layout(struct inode *ino,
> =A0 =A0 =A0 =A0/* Check to see if the layout for the given range alre=
ady exists */
> =A0 =A0 =A0 =A0lseg =3D pnfs_has_layout(lo, &arg, take_ref, !take_ref=
);
> =A0 =A0 =A0 =A0if (lseg && !lseg->valid) {
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (take_ref)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0put_lseg(lseg);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0for (;;) {
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0prepare_to_wait(&nfsi-=
>lo_waitq, &__wait,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0TASK_KILLABLE);
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_lock(&nfsi->lo_loc=
k);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lock_current_layout(nfs=
i);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0lseg =3D pnfs_has_layo=
ut(lo, &arg, take_ref, !take_ref);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0if (!lseg || lseg->val=
id)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0break;
> @@ -1112,7 +1133,7 @@ pnfs_update_layout(struct inode *ino,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0result=
=3D -ERESTARTSYS;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0break;
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_l=
ock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(n=
fsi);
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0schedule();
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0}
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0finish_wait(&nfsi->lo_waitq, &__wait);
> @@ -1149,7 +1170,7 @@ pnfs_update_layout(struct inode *ino,
> =A0 =A0 =A0 =A0/* Matching dec is done in .rpc_release (on non-error =
paths) */
> =A0 =A0 =A0 =A0atomic_inc(&lo->lgetcount);
> =A0 =A0 =A0 =A0/* Lose lock, but not reference, match this with pnfs_=
layout_release */
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>
> =A0 =A0 =A0 =A0result =3D get_layout(ino, ctx, &arg, lsegpp, lo);
> =A0out:
> @@ -1299,7 +1320,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget =
*lgp)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0*lgp->lsegpp =3D lseg;
> =A0 =A0 =A0 =A0}
>
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0pnfs_insert_layout(lo, lseg);
>
> =A0 =A0 =A0 =A0if (res->return_on_close) {
> @@ -1310,7 +1331,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget =
*lgp)
>
> =A0 =A0 =A0 =A0/* Done processing layoutget. Set the layout stateid *=
/
> =A0 =A0 =A0 =A0pnfs_set_layout_stateid(lo, &res->stateid);
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
> =A0out:
> =A0 =A0 =A0 =A0return status;
> =A0}
> @@ -2140,9 +2161,9 @@ pnfs_layoutcommit_inode(struct inode *inode, in=
t sync)
> =A0 =A0 =A0 =A0if (!data)
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0return -ENOMEM;
>
> - =A0 =A0 =A0 spin_lock(&nfsi->lo_lock);
> + =A0 =A0 =A0 lock_current_layout(nfsi);
> =A0 =A0 =A0 =A0if (!layoutcommit_needed(nfsi)) {
> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 lock_current_layout(nfsi);
This should be unlock_current_layout
=46red
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0goto out_free;
> =A0 =A0 =A0 =A0}
>
> @@ -2157,7 +2178,7 @@ pnfs_layoutcommit_inode(struct inode *inode, in=
t sync)
> =A0 =A0 =A0 =A0nfsi->layout.lo_cred =3D NULL;
> =A0 =A0 =A0 =A0pnfs_get_layout_stateid(&data->args.stateid, &nfsi->la=
yout);
>
> - =A0 =A0 =A0 spin_unlock(&nfsi->lo_lock);
> + =A0 =A0 =A0 unlock_current_layout(nfsi);
>
> =A0 =A0 =A0 =A0/* Set up layout commit args */
> =A0 =A0 =A0 =A0status =3D pnfs_layoutcommit_setup(inode, data, write_=
begin_pos,
> --
> 1.6.2.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" =
in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH 0/8] forgetful client v2
@ 2010-06-07 21:11 Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 1/8] pnfs-submit: clean struct nfs_inode Alexandros Batsakis
0 siblings, 1 reply; 22+ messages in thread
From: Alexandros Batsakis @ 2010-06-07 21:11 UTC (permalink / raw)
To: linux-nfs; +Cc: bhalevy, Alexandros Batsakis
This set of patches (2.6.35-rc2) includes a first attempt to implement
the forgetful client model for the pNFS client. The model
is explained is patch 7.
It also includes some minor cleanups in the layout management code
that help to improve the maintanability of the current code.
Passed cthon tests against the pyNFS server, and against a modified
version of pyNFS server that randomly issues layout recalls after opens.
Alexandros Batsakis (8):
pnfs-submit: clean struct nfs_inode
pnfs-submit: clean locking infrastructure
pnfs-submit: remove lgetcount, lretcount
pnfs-submit: change stateid to be a union
pnfs-submit: request whole-file layouts only
pnfs-submit: change layout list to be similar to other state lists
pnfs-submit: forgetful client (layouts)
pnfs-submit: support for CB_RECALL_ANY (layouts)
fs/nfs/callback.h | 7 +
fs/nfs/callback_proc.c | 231 +++++++++++++++++++++++++++++---------
fs/nfs/callback_xdr.c | 2 +-
fs/nfs/client.c | 2 +-
fs/nfs/delegation.c | 19 ++--
fs/nfs/inode.c | 12 +-
fs/nfs/nfs4_fs.h | 1 +
fs/nfs/nfs4proc.c | 46 +++++---
fs/nfs/nfs4state.c | 4 +-
fs/nfs/nfs4xdr.c | 38 ++++---
fs/nfs/pnfs.c | 276 +++++++++++++++++++++------------------------
fs/nfs/pnfs.h | 3 +-
fs/nfsd/nfs4callback.c | 1 -
include/linux/nfs4.h | 16 +++-
include/linux/nfs4_pnfs.h | 2 +-
include/linux/nfs_fs.h | 28 ++---
include/linux/nfs_fs_sb.h | 2 +-
17 files changed, 414 insertions(+), 276 deletions(-)
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH 1/8] pnfs-submit: clean struct nfs_inode
2010-06-07 21:11 [PATCH 0/8] forgetful client v2 Alexandros Batsakis
@ 2010-06-07 21:11 ` Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Alexandros Batsakis
0 siblings, 1 reply; 22+ messages in thread
From: Alexandros Batsakis @ 2010-06-07 21:11 UTC (permalink / raw)
To: linux-nfs; +Cc: bhalevy, Alexandros Batsakis
by moving layout specific fields from nfs_inode to struct pnfs_layout_type
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
---
fs/nfs/inode.c | 8 +++---
fs/nfs/pnfs.c | 55 ++++++++++++++++++++++++--------------------
include/linux/nfs4_pnfs.h | 2 +-
include/linux/nfs_fs.h | 22 +++++++++---------
4 files changed, 46 insertions(+), 41 deletions(-)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index b33d1a1..d43f2c5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1366,12 +1366,12 @@ void nfs4_clear_inode(struct inode *inode)
static void pnfs_alloc_init_inode(struct nfs_inode *nfsi)
{
#ifdef CONFIG_NFS_V4_1
- nfsi->pnfs_layout_state = 0;
+ nfsi->layout.pnfs_layout_state = 0;
memset(&nfsi->layout.stateid, 0, NFS4_STATEID_SIZE);
nfsi->layout.roc_iomode = 0;
- nfsi->lo_cred = NULL;
- nfsi->pnfs_write_begin_pos = 0;
- nfsi->pnfs_write_end_pos = 0;
+ nfsi->layout.lo_cred = NULL;
+ nfsi->layout.pnfs_write_begin_pos = 0;
+ nfsi->layout.pnfs_write_end_pos = 0;
#endif /* CONFIG_NFS_V4_1 */
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8cc4412..8620f68 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -154,7 +154,7 @@ pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
dprintk("%s: has_layout=%d ctx=%p\n", __func__, has_layout(nfsi), ctx);
spin_lock(&nfsi->lo_lock);
if (has_layout(nfsi) && !layoutcommit_needed(nfsi)) {
- nfsi->lo_cred = get_rpccred(ctx->state->owner->so_cred);
+ nfsi->layout.lo_cred = get_rpccred(ctx->state->owner->so_cred);
nfsi->change_attr++;
spin_unlock(&nfsi->lo_lock);
dprintk("%s: Set layoutcommit\n", __func__);
@@ -174,17 +174,17 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
loff_t end_pos;
spin_lock(&nfsi->lo_lock);
- if (offset < nfsi->pnfs_write_begin_pos)
- nfsi->pnfs_write_begin_pos = offset;
+ if (offset < nfsi->layout.pnfs_write_begin_pos)
+ nfsi->layout.pnfs_write_begin_pos = offset;
end_pos = offset + extent - 1; /* I'm being inclusive */
- if (end_pos > nfsi->pnfs_write_end_pos)
- nfsi->pnfs_write_end_pos = end_pos;
+ if (end_pos > nfsi->layout.pnfs_write_end_pos)
+ nfsi->layout.pnfs_write_end_pos = end_pos;
dprintk("%s: Wrote %lu@%lu bpos %lu, epos: %lu\n",
__func__,
(unsigned long) extent,
(unsigned long) offset ,
- (unsigned long) nfsi->pnfs_write_begin_pos,
- (unsigned long) nfsi->pnfs_write_end_pos);
+ (unsigned long) nfsi->layout.pnfs_write_begin_pos,
+ (unsigned long) nfsi->layout.pnfs_write_end_pos);
spin_unlock(&nfsi->lo_lock);
}
@@ -915,7 +915,8 @@ get_lock_alloc_layout(struct inode *ino)
* wait until bit is cleared if we lost this race.
*/
res = wait_on_bit_lock(
- &nfsi->pnfs_layout_state, NFS_INO_LAYOUT_ALLOC,
+ &nfsi->layout.pnfs_layout_state,
+ NFS_INO_LAYOUT_ALLOC,
pnfs_wait_schedule, TASK_KILLABLE);
if (res) {
lo = ERR_PTR(res);
@@ -943,8 +944,10 @@ get_lock_alloc_layout(struct inode *ino)
lo = ERR_PTR(-ENOMEM);
/* release the NFS_INO_LAYOUT_ALLOC bit and wake up waiters */
- clear_bit_unlock(NFS_INO_LAYOUT_ALLOC, &nfsi->pnfs_layout_state);
- wake_up_bit(&nfsi->pnfs_layout_state, NFS_INO_LAYOUT_ALLOC);
+ clear_bit_unlock(NFS_INO_LAYOUT_ALLOC,
+ &nfsi->layout.pnfs_layout_state);
+ wake_up_bit(&nfsi->layout.pnfs_layout_state,
+ NFS_INO_LAYOUT_ALLOC);
break;
}
@@ -1104,13 +1107,13 @@ pnfs_update_layout(struct inode *ino,
}
/* if get layout already failed once goto out */
- if (test_bit(lo_fail_bit(iomode), &nfsi->pnfs_layout_state)) {
- if (unlikely(nfsi->pnfs_layout_suspend &&
- get_seconds() >= nfsi->pnfs_layout_suspend)) {
+ if (test_bit(lo_fail_bit(iomode), &nfsi->layout.pnfs_layout_state)) {
+ if (unlikely(nfsi->layout.pnfs_layout_suspend &&
+ get_seconds() >= nfsi->layout.pnfs_layout_suspend)) {
dprintk("%s: layout_get resumed\n", __func__);
clear_bit(lo_fail_bit(iomode),
- &nfsi->pnfs_layout_state);
- nfsi->pnfs_layout_suspend = 0;
+ &nfsi->layout.pnfs_layout_state);
+ nfsi->layout.pnfs_layout_suspend = 0;
} else {
result = 1;
goto out_put;
@@ -1126,7 +1129,8 @@ pnfs_update_layout(struct inode *ino,
result = get_layout(ino, ctx, &arg, lsegpp, lo);
out:
dprintk("%s end (err:%d) state 0x%lx lseg %p\n",
- __func__, result, nfsi->pnfs_layout_state, lseg);
+ __func__, result, nfsi->layout.pnfs_layout_state,
+ lseg);
return result;
out_put:
if (lsegpp)
@@ -1231,13 +1235,14 @@ pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
get_out:
/* remember that get layout failed and suspend trying */
- nfsi->pnfs_layout_suspend = suspend;
- set_bit(lo_fail_bit(lgp->args.lseg.iomode), &nfsi->pnfs_layout_state);
+ nfsi->layout.pnfs_layout_suspend = suspend;
+ set_bit(lo_fail_bit(lgp->args.lseg.iomode),
+ &nfsi->layout.pnfs_layout_state);
dprintk("%s: layout_get suspended until %ld\n",
__func__, suspend);
out:
dprintk("%s end (err:%d) state 0x%lx lseg %p\n",
- __func__, lgp->status, nfsi->pnfs_layout_state, lseg);
+ __func__, lgp->status, nfsi->layout.pnfs_layout_state, lseg);
return;
}
@@ -2009,12 +2014,12 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
/* Clear layoutcommit properties in the inode so
* new lc info can be generated
*/
- write_begin_pos = nfsi->pnfs_write_begin_pos;
- write_end_pos = nfsi->pnfs_write_end_pos;
- data->cred = nfsi->lo_cred;
- nfsi->pnfs_write_begin_pos = 0;
- nfsi->pnfs_write_end_pos = 0;
- nfsi->lo_cred = NULL;
+ write_begin_pos = nfsi->layout.pnfs_write_begin_pos;
+ write_end_pos = nfsi->layout.pnfs_write_end_pos;
+ data->cred = nfsi->layout.lo_cred;
+ nfsi->layout.pnfs_write_begin_pos = 0;
+ nfsi->layout.pnfs_write_end_pos = 0;
+ nfsi->layout.lo_cred = NULL;
pnfs_get_layout_stateid(&data->args.stateid, &nfsi->layout);
spin_unlock(&nfsi->lo_lock);
diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
index 84d2e95..53626d4 100644
--- a/include/linux/nfs4_pnfs.h
+++ b/include/linux/nfs4_pnfs.h
@@ -83,7 +83,7 @@ has_layout(struct nfs_inode *nfsi)
static inline bool
layoutcommit_needed(struct nfs_inode *nfsi)
{
- return nfsi->lo_cred != NULL;
+ return nfsi->layout.lo_cred != NULL;
}
#endif /* CONFIG_NFS_V4_1 */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2762b2c..45846c5 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -106,6 +106,17 @@ struct pnfs_layout_type {
seqlock_t seqlock; /* Protects the stateid */
nfs4_stateid stateid;
void *ld_data; /* layout driver private data */
+ unsigned long pnfs_layout_state;
+ #define NFS_INO_RO_LAYOUT_FAILED 0 /* get ro layout failed stop trying */
+ #define NFS_INO_RW_LAYOUT_FAILED 1 /* get rw layout failed stop trying */
+ #define NFS_INO_LAYOUT_ALLOC 2 /* bit lock for layout allocation */
+ time_t pnfs_layout_suspend;
+ struct rpc_cred *lo_cred; /* layoutcommit credential */
+ /* DH: These vars keep track of the maximum write range
+ * so the values can be used for layoutcommit.
+ */
+ loff_t pnfs_write_begin_pos;
+ loff_t pnfs_write_end_pos;
};
/*
@@ -198,20 +209,9 @@ struct nfs_inode {
/* Inodes having layouts */
struct list_head lo_inodes;
- unsigned long pnfs_layout_state;
-#define NFS_INO_RO_LAYOUT_FAILED 0 /* get ro layout failed stop trying */
-#define NFS_INO_RW_LAYOUT_FAILED 1 /* get rw layout failed stop trying */
-#define NFS_INO_LAYOUT_ALLOC 2 /* bit lock for layout allocation */
- time_t pnfs_layout_suspend;
- struct rpc_cred *lo_cred; /* layoutcommit credential */
wait_queue_head_t lo_waitq;
spinlock_t lo_lock;
struct pnfs_layout_type layout;
- /* DH: These vars keep track of the maximum write range
- * so the values can be used for layoutcommit.
- */
- loff_t pnfs_write_begin_pos;
- loff_t pnfs_write_end_pos;
#endif /* CONFIG_NFS_V4_1 */
#endif /* CONFIG_NFS_V4*/
#ifdef CONFIG_NFS_FSCACHE
--
1.6.2.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 2/8] pnfs-submit: clean locking infrastructure
2010-06-07 21:11 ` [PATCH 1/8] pnfs-submit: clean struct nfs_inode Alexandros Batsakis
@ 2010-06-07 21:11 ` Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 3/8] pnfs-submit: remove lgetcount, lretcount Alexandros Batsakis
2010-06-08 7:30 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Christoph Hellwig
0 siblings, 2 replies; 22+ messages in thread
From: Alexandros Batsakis @ 2010-06-07 21:11 UTC (permalink / raw)
To: linux-nfs; +Cc: bhalevy, Alexandros Batsakis, Fred Isaman
(also minor cleanup of pnfs_free_layout())
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
Signed-off-by: Fred Isaman <iisaman@netapp.com>
---
fs/nfs/pnfs.c | 73 ++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 47 insertions(+), 26 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8620f68..b0a4bca 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -60,6 +60,8 @@ static int pnfs_initialized;
static void pnfs_free_layout(struct pnfs_layout_type *lo,
struct nfs4_pnfs_layout_segment *range);
static enum pnfs_try_status pnfs_commit(struct nfs_write_data *data, int sync);
+static inline void lock_current_layout(struct nfs_inode *nfsi);
+static inline void unlock_current_layout(struct nfs_inode *nfsi);
/* Locking:
*
@@ -152,15 +154,15 @@ void
pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
{
dprintk("%s: has_layout=%d ctx=%p\n", __func__, has_layout(nfsi), ctx);
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
if (has_layout(nfsi) && !layoutcommit_needed(nfsi)) {
nfsi->layout.lo_cred = get_rpccred(ctx->state->owner->so_cred);
nfsi->change_attr++;
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
dprintk("%s: Set layoutcommit\n", __func__);
return;
}
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
}
/* Update last_write_offset for layoutcommit.
@@ -173,7 +175,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
{
loff_t end_pos;
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
if (offset < nfsi->layout.pnfs_write_begin_pos)
nfsi->layout.pnfs_write_begin_pos = offset;
end_pos = offset + extent - 1; /* I'm being inclusive */
@@ -185,7 +187,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
(unsigned long) offset ,
(unsigned long) nfsi->layout.pnfs_write_begin_pos,
(unsigned long) nfsi->layout.pnfs_write_end_pos);
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
}
/* Unitialize a mountpoint in a layout driver */
@@ -312,6 +314,17 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
#define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
#endif /* CONFIG_SMP */
+static inline void lock_current_layout(struct nfs_inode *nfsi)
+{
+ spin_lock(&nfsi->lo_lock);
+}
+
+static inline void unlock_current_layout(struct nfs_inode *nfsi)
+{
+ BUG_ON_UNLOCKED_LO((&nfsi->layout));
+ spin_unlock(&nfsi->lo_lock);
+}
+
/*
* get and lock nfsi->layout
*/
@@ -320,10 +333,10 @@ get_lock_current_layout(struct nfs_inode *nfsi)
{
struct pnfs_layout_type *lo;
+ lock_current_layout(nfsi);
lo = &nfsi->layout;
- spin_lock(&nfsi->lo_lock);
if (!lo->ld_data) {
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
return NULL;
}
@@ -343,7 +356,12 @@ put_unlock_current_layout(struct pnfs_layout_type *lo)
BUG_ON_UNLOCKED_LO(lo);
BUG_ON(lo->refcount <= 0);
- if (--lo->refcount == 0 && list_empty(&lo->segs)) {
+ lo->refcount--;
+
+ if (lo->refcount > 0)
+ goto out;
+
+ if (list_empty(&lo->segs)) {
struct layoutdriver_io_operations *io_ops =
PNFS_LD_IO_OPS(lo);
@@ -357,7 +375,8 @@ put_unlock_current_layout(struct pnfs_layout_type *lo)
list_del_init(&nfsi->lo_inodes);
spin_unlock(&clp->cl_lock);
}
- spin_unlock(&nfsi->lo_lock);
+out:
+ unlock_current_layout(nfsi);
}
void
@@ -366,7 +385,7 @@ pnfs_layout_release(struct pnfs_layout_type *lo, atomic_t *count,
{
struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
if (range)
pnfs_free_layout(lo, range);
atomic_dec(count);
@@ -385,6 +404,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
};
lo = get_lock_current_layout(nfsi);
+ if (!lo)
+ return;
pnfs_free_layout(lo, &range);
put_unlock_current_layout(lo);
}
@@ -662,7 +683,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
struct pnfs_layout_segment *lseg;
bool ret = false;
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
list_for_each_entry (lseg, &nfsi->layout.segs, fi_list) {
if (!should_free_lseg(lseg, range))
continue;
@@ -676,7 +697,7 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
}
if (atomic_read(&nfsi->layout.lgetcount))
ret = true;
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
dprintk("%s:Return %d\n", __func__, ret);
return ret;
@@ -758,7 +779,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
/* unlock w/o put rebalanced by eventual call to
* pnfs_layout_release
*/
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
if (pnfs_return_layout_barrier(nfsi, &arg)) {
dprintk("%s: waiting\n", __func__);
@@ -899,7 +920,7 @@ static int pnfs_wait_schedule(void *word)
*
* Note: If successful, nfsi->lo_lock is taken and the caller
* must put and unlock current_layout by using put_unlock_current_layout()
- * when the returned layout is released.
+ * directly or pnfs_layout_release() when the returned layout is released.
*/
static struct pnfs_layout_type *
get_lock_alloc_layout(struct inode *ino)
@@ -934,7 +955,7 @@ get_lock_alloc_layout(struct inode *ino)
struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
/* must grab the layout lock before the client lock */
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
spin_lock(&clp->cl_lock);
if (list_empty(&nfsi->lo_inodes))
@@ -1026,10 +1047,10 @@ void drain_layoutreturns(struct pnfs_layout_type *lo)
while (atomic_read(&lo->lretcount)) {
struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
dprintk("%s: waiting\n", __func__);
wait_event(nfsi->lo_waitq, (atomic_read(&lo->lretcount) == 0));
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
}
}
@@ -1068,13 +1089,13 @@ pnfs_update_layout(struct inode *ino,
/* Check to see if the layout for the given range already exists */
lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
if (lseg && !lseg->valid) {
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
if (take_ref)
put_lseg(lseg);
for (;;) {
prepare_to_wait(&nfsi->lo_waitq, &__wait,
TASK_KILLABLE);
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
if (!lseg || lseg->valid)
break;
@@ -1087,7 +1108,7 @@ pnfs_update_layout(struct inode *ino,
result = -ERESTARTSYS;
break;
}
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
schedule();
}
finish_wait(&nfsi->lo_waitq, &__wait);
@@ -1124,7 +1145,7 @@ pnfs_update_layout(struct inode *ino,
/* Matching dec is done in .rpc_release (on non-error paths) */
atomic_inc(&lo->lgetcount);
/* Lose lock, but not reference, match this with pnfs_layout_release */
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
result = get_layout(ino, ctx, &arg, lsegpp, lo);
out:
@@ -1274,7 +1295,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
*lgp->lsegpp = lseg;
}
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
pnfs_insert_layout(lo, lseg);
if (res->return_on_close) {
@@ -1285,7 +1306,7 @@ pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp)
/* Done processing layoutget. Set the layout stateid */
pnfs_set_layout_stateid(lo, &res->stateid);
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
out:
return status;
}
@@ -2005,9 +2026,9 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
if (!data)
return -ENOMEM;
- spin_lock(&nfsi->lo_lock);
+ lock_current_layout(nfsi);
if (!layoutcommit_needed(nfsi)) {
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
goto out_free;
}
@@ -2022,7 +2043,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
nfsi->layout.lo_cred = NULL;
pnfs_get_layout_stateid(&data->args.stateid, &nfsi->layout);
- spin_unlock(&nfsi->lo_lock);
+ unlock_current_layout(nfsi);
/* Set up layout commit args */
status = pnfs_layoutcommit_setup(inode, data, write_begin_pos,
--
1.6.2.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 3/8] pnfs-submit: remove lgetcount, lretcount
2010-06-07 21:11 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Alexandros Batsakis
@ 2010-06-07 21:11 ` Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 4/8] pnfs-submit: change stateid to be a union Alexandros Batsakis
2010-06-08 7:30 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Christoph Hellwig
1 sibling, 1 reply; 22+ messages in thread
From: Alexandros Batsakis @ 2010-06-07 21:11 UTC (permalink / raw)
To: linux-nfs; +Cc: bhalevy, Alexandros Batsakis
This is in order to prepare for the forgetful client. There is no need to explicitly count the number of outstanding layout operations, as the protocol has provision for it (seqid of stateid -- e.g. section 12.5.5.2.1.2). As long as no requests for intersecting layouts are issued LAYOUTGETs/LAYOUTRETURNs can be sent in parallel
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
---
fs/nfs/nfs4proc.c | 5 ++---
fs/nfs/pnfs.c | 46 ++++++++++++----------------------------------
fs/nfs/pnfs.h | 3 +--
include/linux/nfs_fs.h | 2 --
4 files changed, 15 insertions(+), 41 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f962f92..bf854fe 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -5540,7 +5540,7 @@ static void nfs4_pnfs_layoutget_release(void *calldata)
struct nfs4_pnfs_layoutget *lgp = calldata;
dprintk("--> %s\n", __func__);
- pnfs_layout_release(lgp->lo, &lgp->lo->lgetcount, NULL);
+ pnfs_layout_release(lgp->lo, NULL);
if (lgp->res.layout.buf != NULL)
free_page((unsigned long) lgp->res.layout.buf);
kfree(calldata);
@@ -5761,8 +5761,7 @@ static void nfs4_pnfs_layoutreturn_release(void *calldata)
if (lrp->lo && (lrp->args.return_type == RETURN_FILE)) {
if (!lrp->res.lrs_present)
pnfs_set_layout_stateid(lrp->lo, &zero_stateid);
- pnfs_layout_release(lrp->lo, &lrp->lo->lretcount,
- &lrp->args.lseg);
+ pnfs_layout_release(lrp->lo, &lrp->args.lseg);
}
kfree(calldata);
dprintk("<-- %s\n", __func__);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b0a4bca..8df4d75 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -380,7 +380,7 @@ out:
}
void
-pnfs_layout_release(struct pnfs_layout_type *lo, atomic_t *count,
+pnfs_layout_release(struct pnfs_layout_type *lo,
struct nfs4_pnfs_layout_segment *range)
{
struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
@@ -388,7 +388,6 @@ pnfs_layout_release(struct pnfs_layout_type *lo, atomic_t *count,
lock_current_layout(nfsi);
if (range)
pnfs_free_layout(lo, range);
- atomic_dec(count);
put_unlock_current_layout(lo);
wake_up_all(&nfsi->lo_waitq);
}
@@ -573,7 +572,7 @@ get_layout(struct inode *ino,
lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
if (lgp == NULL) {
- pnfs_layout_release(lo, &lo->lgetcount, NULL);
+ pnfs_layout_release(lo, NULL);
return -ENOMEM;
}
lgp->lo = lo;
@@ -647,6 +646,13 @@ has_layout_to_return(struct pnfs_layout_type *lo,
return out;
}
+static inline bool
+_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
+{
+ return atomic_read(&lseg->kref.refcount) == 1;
+}
+
+
static void
pnfs_free_layout(struct pnfs_layout_type *lo,
struct nfs4_pnfs_layout_segment *range)
@@ -657,7 +663,8 @@ pnfs_free_layout(struct pnfs_layout_type *lo,
BUG_ON_UNLOCKED_LO(lo);
list_for_each_entry_safe (lseg, next, &lo->segs, fi_list) {
- if (!should_free_lseg(lseg, range))
+ if (!should_free_lseg(lseg, range) ||
+ !_pnfs_can_return_lseg(lseg))
continue;
dprintk("%s: freeing lseg %p iomode %d "
"offset %llu length %llu\n", __func__,
@@ -670,12 +677,6 @@ pnfs_free_layout(struct pnfs_layout_type *lo,
dprintk("%s:Return\n", __func__);
}
-static inline bool
-_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
-{
- return atomic_read(&lseg->kref.refcount) == 1;
-}
-
static bool
pnfs_return_layout_barrier(struct nfs_inode *nfsi,
struct nfs4_pnfs_layout_segment *range)
@@ -695,8 +696,6 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi,
ret = true;
}
}
- if (atomic_read(&nfsi->layout.lgetcount))
- ret = true;
unlock_current_layout(nfsi);
dprintk("%s:Return %d\n", __func__, ret);
@@ -718,7 +717,7 @@ return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (lrp == NULL) {
if (lo && (type == RETURN_FILE))
- pnfs_layout_release(lo, &lo->lretcount, NULL);
+ pnfs_layout_release(lo, NULL);
goto out;
}
lrp->args.reclaim = 0;
@@ -773,9 +772,6 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
goto out;
}
- /* Matching dec is done in .rpc_release (on non-error paths) */
- atomic_inc(&lo->lretcount);
-
/* unlock w/o put rebalanced by eventual call to
* pnfs_layout_release
*/
@@ -900,8 +896,6 @@ alloc_init_layout(struct inode *ino)
seqlock_init(&lo->seqlock);
memset(&lo->stateid, 0, NFS4_STATEID_SIZE);
lo->refcount = 1;
- atomic_set(&lo->lgetcount, 0);
- atomic_set(&lo->lretcount, 0);
INIT_LIST_HEAD(&lo->segs);
lo->roc_iomode = 0;
return lo;
@@ -1041,19 +1035,6 @@ pnfs_has_layout(struct pnfs_layout_type *lo,
return ret;
}
-/* Called with spin lock held */
-void drain_layoutreturns(struct pnfs_layout_type *lo)
-{
- while (atomic_read(&lo->lretcount)) {
- struct nfs_inode *nfsi = PNFS_NFS_INODE(lo);
-
- unlock_current_layout(nfsi);
- dprintk("%s: waiting\n", __func__);
- wait_event(nfsi->lo_waitq, (atomic_read(&lo->lretcount) == 0));
- lock_current_layout(nfsi);
- }
-}
-
/* Update the file's layout for the given range and iomode.
* Layout is retreived from the server if needed.
* If lsegpp is given, the appropriate layout segment is referenced and
@@ -1141,9 +1122,6 @@ pnfs_update_layout(struct inode *ino,
}
}
- drain_layoutreturns(lo);
- /* Matching dec is done in .rpc_release (on non-error paths) */
- atomic_inc(&lo->lgetcount);
/* Lose lock, but not reference, match this with pnfs_layout_release */
unlock_current_layout(nfsi);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index c89be78..a71145e 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -61,8 +61,7 @@ void pnfs_update_layout_commit(struct inode *, struct list_head *, pgoff_t, unsi
ssize_t pnfs_file_write(struct file *, const char __user *, size_t, loff_t *);
void pnfs_get_layout_done(struct nfs4_pnfs_layoutget *, int rpc_status);
int pnfs_layout_process(struct nfs4_pnfs_layoutget *lgp);
-void pnfs_layout_release(struct pnfs_layout_type *, atomic_t *,
- struct nfs4_pnfs_layout_segment *range);
+void pnfs_layout_release(struct pnfs_layout_type *, struct nfs4_pnfs_layout_segment *range);
void pnfs_set_layout_stateid(struct pnfs_layout_type *lo,
const nfs4_stateid *stateid);
void pnfs_destroy_layout(struct nfs_inode *);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 45846c5..f6e3e20 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -99,8 +99,6 @@ struct posix_acl;
struct pnfs_layout_type {
int refcount;
- atomic_t lretcount; /* Layoutreturns outstanding */
- atomic_t lgetcount; /* Layoutgets outstanding */
struct list_head segs; /* layout segments list */
int roc_iomode; /* iomode to return on close, 0=none */
seqlock_t seqlock; /* Protects the stateid */
--
1.6.2.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 4/8] pnfs-submit: change stateid to be a union
2010-06-07 21:11 ` [PATCH 3/8] pnfs-submit: remove lgetcount, lretcount Alexandros Batsakis
@ 2010-06-07 21:11 ` Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 5/8] pnfs-submit: request whole-file layouts only Alexandros Batsakis
0 siblings, 1 reply; 22+ messages in thread
From: Alexandros Batsakis @ 2010-06-07 21:11 UTC (permalink / raw)
To: linux-nfs; +Cc: bhalevy, Alexandros Batsakis
In NFSv4.1 the stateid consists of the other and seqid fields. For layout
processing we need to numerically compare the seqid value of layout stateids.
To do so, introduce a union to nfs4_stateid to swtich between opaque(16 bytes)
and opaque(12 bytes) / __be32
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
---
fs/nfs/callback_proc.c | 13 +++++++------
fs/nfs/callback_xdr.c | 2 +-
fs/nfs/delegation.c | 19 +++++++++++--------
fs/nfs/nfs4proc.c | 41 +++++++++++++++++++++++++----------------
fs/nfs/nfs4state.c | 4 ++--
fs/nfs/nfs4xdr.c | 38 +++++++++++++++++++++-----------------
fs/nfs/pnfs.c | 11 ++++++-----
fs/nfsd/nfs4callback.c | 1 -
include/linux/nfs4.h | 16 ++++++++++++++--
9 files changed, 87 insertions(+), 58 deletions(-)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 0053fbb..8752239 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -121,8 +121,9 @@ out:
int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nfs4_stateid *stateid)
{
- if (delegation == NULL || memcmp(delegation->stateid.data, stateid->data,
- sizeof(delegation->stateid.data)) != 0)
+ if (delegation == NULL || memcmp(delegation->stateid.u.data,
+ stateid->u.data,
+ sizeof(delegation->stateid.u.data)))
return 0;
return 1;
}
@@ -331,11 +332,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n
if (delegation == NULL)
return 0;
- /* seqid is 4-bytes long */
- if (((u32 *) &stateid->data)[0] != 0)
+ if (stateid->u.stateid.seqid != 0)
return 0;
- if (memcmp(&delegation->stateid.data[4], &stateid->data[4],
- sizeof(stateid->data)-4))
+ if (memcmp(&delegation->stateid.u.stateid.other,
+ &stateid->u.stateid.other,
+ NFS4_STATEID_OTHER_SIZE))
return 0;
return 1;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 1856181..b88db4d 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -137,7 +137,7 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
p = read_buf(xdr, 16);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
- memcpy(stateid->data, p, 16);
+ memcpy(stateid->u.data, p, 16);
return 0;
}
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 3016345..13b3e8d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -104,7 +104,8 @@ again:
continue;
if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
continue;
- if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
+ if (memcmp(state->stateid.u.data, stateid->u.data,
+ sizeof(state->stateid.u.data)) != 0)
continue;
get_nfs_open_context(ctx);
spin_unlock(&inode->i_lock);
@@ -133,8 +134,8 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
if (delegation != NULL) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL) {
- memcpy(delegation->stateid.data, res->delegation.data,
- sizeof(delegation->stateid.data));
+ memcpy(delegation->stateid.u.data, res->delegation.u.data,
+ sizeof(delegation->stateid.u.data));
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
oldcred = delegation->cred;
@@ -187,8 +188,9 @@ static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfs
if (delegation == NULL)
goto nomatch;
spin_lock(&delegation->lock);
- if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
- sizeof(delegation->stateid.data)) != 0)
+ if (stateid != NULL && memcmp(delegation->stateid.u.data,
+ stateid->u.data,
+ sizeof(delegation->stateid.u.data)) != 0)
goto nomatch_unlock;
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
@@ -216,8 +218,8 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
delegation = kmalloc(sizeof(*delegation), GFP_NOFS);
if (delegation == NULL)
return -ENOMEM;
- memcpy(delegation->stateid.data, res->delegation.data,
- sizeof(delegation->stateid.data));
+ memcpy(delegation->stateid.u.data, res->delegation.u.data,
+ sizeof(delegation->stateid.u.data));
delegation->type = res->delegation_type;
delegation->maxsize = res->maxsize;
delegation->change_attr = nfsi->change_attr;
@@ -562,7 +564,8 @@ int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
if (delegation != NULL) {
- memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
+ memcpy(dst->u.data, delegation->stateid.u.data,
+ sizeof(dst->u.data));
ret = 1;
}
rcu_read_unlock();
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index bf854fe..ee3e3bc 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -867,8 +867,10 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode)
static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
{
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
- memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
- memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
+ memcpy(state->stateid.u.data, stateid->u.data,
+ sizeof(state->stateid.u.data));
+ memcpy(state->open_stateid.u.data, stateid->u.data,
+ sizeof(state->open_stateid.u.data));
switch (fmode) {
case FMODE_READ:
set_bit(NFS_O_RDONLY_STATE, &state->flags);
@@ -896,7 +898,8 @@ static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_s
*/
write_seqlock(&state->seqlock);
if (deleg_stateid != NULL) {
- memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
+ memcpy(state->stateid.u.data, deleg_stateid->u.data,
+ sizeof(state->stateid.u.data));
set_bit(NFS_DELEGATED_STATE, &state->flags);
}
if (open_stateid != NULL)
@@ -927,7 +930,8 @@ static int update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stat
if (delegation == NULL)
delegation = &deleg_cur->stateid;
- else if (memcmp(deleg_cur->stateid.data, delegation->data, NFS4_STATEID_SIZE) != 0)
+ else if (memcmp(deleg_cur->stateid.u.data, delegation->u.data,
+ NFS4_STATEID_SIZE) != 0)
goto no_delegation_unlock;
nfs_mark_delegation_referenced(deleg_cur);
@@ -989,7 +993,8 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
break;
}
/* Save the delegation */
- memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
+ memcpy(stateid.u.data, delegation->stateid.u.data,
+ sizeof(stateid.u.data));
rcu_read_unlock();
ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode);
if (ret != 0)
@@ -1155,10 +1160,13 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
* Check if we need to update the current stateid.
*/
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
- memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
+ memcmp(state->stateid.u.data, state->open_stateid.u.data,
+ sizeof(state->stateid.u.data)) != 0) {
write_seqlock(&state->seqlock);
if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
- memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
+ memcpy(state->stateid.u.data,
+ state->open_stateid.u.data,
+ sizeof(state->stateid.u.data));
write_sequnlock(&state->seqlock);
}
pnfs4_layout_reclaim(state);
@@ -1229,8 +1237,8 @@ static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs
if (IS_ERR(opendata))
return PTR_ERR(opendata);
opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
- memcpy(opendata->o_arg.u.delegation.data, stateid->data,
- sizeof(opendata->o_arg.u.delegation.data));
+ memcpy(opendata->o_arg.u.delegation.u.data, stateid->u.data,
+ sizeof(opendata->o_arg.u.delegation.u.data));
ret = nfs4_open_recover(opendata, state);
nfs4_opendata_put(opendata);
return ret;
@@ -1288,8 +1296,8 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
if (RPC_ASSASSINATED(task))
return;
if (data->rpc_status == 0) {
- memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
- sizeof(data->o_res.stateid.data));
+ memcpy(data->o_res.stateid.u.data, data->c_res.stateid.u.data,
+ sizeof(data->o_res.stateid.u.data));
nfs_confirm_seqid(&data->owner->so_seqid, 0);
renew_lease(data->o_res.server, data->timestamp);
data->rpc_done = 1;
@@ -4132,9 +4140,10 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
return;
switch (task->tk_status) {
case 0:
- memcpy(calldata->lsp->ls_stateid.data,
- calldata->res.stateid.data,
- sizeof(calldata->lsp->ls_stateid.data));
+ memcpy(calldata->lsp->ls_stateid.u.data,
+ calldata->res.stateid.u.data,
+ sizeof(calldata->lsp->ls_stateid.u.
+ data));
renew_lease(calldata->server, calldata->timestamp);
break;
case -NFS4ERR_BAD_STATEID:
@@ -4348,8 +4357,8 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
goto out;
}
if (data->rpc_status == 0) {
- memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
- sizeof(data->lsp->ls_stateid.data));
+ memcpy(data->lsp->ls_stateid.u.data, data->res.stateid.u.data,
+ sizeof(data->lsp->ls_stateid.u.data));
data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
}
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8185c1e..0f44704 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1055,8 +1055,8 @@ restart:
* Open state on this file cannot be recovered
* All we can do is revert to using the zero stateid.
*/
- memset(state->stateid.data, 0,
- sizeof(state->stateid.data));
+ memset(state->stateid.u.data, 0,
+ sizeof(state->stateid.u.data));
/* Mark the file as being 'closed' */
state->state = 0;
break;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 31a4b89..301ae14 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -986,7 +986,7 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(OP_CLOSE);
*p++ = cpu_to_be32(arg->seqid->sequence->counter);
- xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_close_maxsz;
}
@@ -1160,7 +1160,8 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
if (args->new_lock_owner){
p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
*p++ = cpu_to_be32(args->open_seqid->sequence->counter);
- p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, args->open_stateid->u.data,
+ NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
p = xdr_encode_hyper(p, args->lock_owner.clientid);
*p++ = cpu_to_be32(16);
@@ -1169,7 +1170,7 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
}
else {
p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
- p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, args->lock_stateid->u.data, NFS4_STATEID_SIZE);
*p = cpu_to_be32(args->lock_seqid->sequence->counter);
}
hdr->nops++;
@@ -1201,7 +1202,8 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
*p++ = cpu_to_be32(OP_LOCKU);
*p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
*p++ = cpu_to_be32(args->seqid->sequence->counter);
- p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, args->stateid->u.data,
+ NFS4_STATEID_SIZE);
p = xdr_encode_hyper(p, args->fl->fl_start);
xdr_encode_hyper(p, nfs4_lock_length(args->fl));
hdr->nops++;
@@ -1351,7 +1353,7 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
- xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
encode_string(xdr, name->len, name->name);
}
@@ -1382,7 +1384,7 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co
p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
*p++ = cpu_to_be32(OP_OPEN_CONFIRM);
- p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
*p = cpu_to_be32(arg->seqid->sequence->counter);
hdr->nops++;
hdr->replen += decode_open_confirm_maxsz;
@@ -1394,7 +1396,7 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close
p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
*p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
- p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, arg->stateid->u.data, NFS4_STATEID_SIZE);
*p = cpu_to_be32(arg->seqid->sequence->counter);
encode_share_access(xdr, arg->fmode);
hdr->nops++;
@@ -1432,9 +1434,10 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
p = reserve_space(xdr, NFS4_STATEID_SIZE);
if (ctx->state != NULL) {
nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
- xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, stateid.u.data,
+ NFS4_STATEID_SIZE);
} else
- xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
}
static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
@@ -1548,7 +1551,7 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(OP_SETATTR);
- xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, zero_stateid.u.data, NFS4_STATEID_SIZE);
p = reserve_space(xdr, 2*4);
*p++ = cpu_to_be32(1);
*p = cpu_to_be32(FATTR4_WORD0_ACL);
@@ -1579,7 +1582,7 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(OP_SETATTR);
- xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, arg->stateid.u.data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_setattr_maxsz;
encode_attrs(xdr, arg->iap, server);
@@ -1642,7 +1645,7 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(OP_DELEGRETURN);
- xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
+ xdr_encode_opaque_fixed(p, stateid->u.data, NFS4_STATEID_SIZE);
hdr->nops++;
hdr->replen += decode_delegreturn_maxsz;
}
@@ -1833,7 +1836,8 @@ encode_layoutget(struct xdr_stream *xdr,
p = xdr_encode_hyper(p, args->lseg.offset);
p = xdr_encode_hyper(p, args->lseg.length);
p = xdr_encode_hyper(p, args->minlength);
- p = xdr_encode_opaque_fixed(p, &args->stateid.data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, &args->stateid.u.data,
+ NFS4_STATEID_SIZE);
*p = cpu_to_be32(args->maxcount);
dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n",
@@ -1865,7 +1869,7 @@ encode_layoutcommit(struct xdr_stream *xdr,
p = xdr_encode_hyper(p, args->lseg.offset);
p = xdr_encode_hyper(p, args->lseg.length);
*p++ = cpu_to_be32(0); /* reclaim */
- p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE);
+ p = xdr_encode_opaque_fixed(p, args->stateid.u.data, NFS4_STATEID_SIZE);
*p++ = cpu_to_be32(1); /* newoffset = TRUE */
p = xdr_encode_hyper(p, args->lastbytewritten);
*p = cpu_to_be32(args->time_modify_changed != 0);
@@ -1912,7 +1916,7 @@ encode_layoutreturn(struct xdr_stream *xdr,
p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
p = xdr_encode_hyper(p, args->lseg.offset);
p = xdr_encode_hyper(p, args->lseg.length);
- p = xdr_encode_opaque_fixed(p, &args->stateid.data,
+ p = xdr_encode_opaque_fixed(p, &args->stateid.u.data,
NFS4_STATEID_SIZE);
dprintk("%s: call %pF\n", __func__,
@@ -3935,7 +3939,7 @@ static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
- return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
+ return decode_opaque_fixed(xdr, stateid->u.data, NFS4_STATEID_SIZE);
}
static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
@@ -5183,7 +5187,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
if (unlikely(!p))
goto out_overflow;
res->return_on_close = be32_to_cpup(p++);
- p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE);
+ p = xdr_decode_opaque_fixed(p, res->stateid.u.data, NFS4_STATEID_SIZE);
layout_count = be32_to_cpup(p);
if (!layout_count) {
dprintk("%s: server responded with empty layout array\n",
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8df4d75..dd7ad26 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -510,7 +510,7 @@ pnfs_set_layout_stateid(struct pnfs_layout_type *lo,
const nfs4_stateid *stateid)
{
write_seqlock(&lo->seqlock);
- memcpy(lo->stateid.data, stateid->data, sizeof(lo->stateid.data));
+ memcpy(lo->stateid.u.data, stateid->u.data, sizeof(lo->stateid.u.data));
write_sequnlock(&lo->seqlock);
}
@@ -523,7 +523,8 @@ pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_type *lo)
do {
seq = read_seqbegin(&lo->seqlock);
- memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
+ memcpy(dst->u.data, lo->stateid.u.data,
+ sizeof(lo->stateid.u.data));
} while (read_seqretry(&lo->seqlock, seq));
dprintk("<-- %s\n", __func__);
@@ -538,8 +539,8 @@ pnfs_layout_from_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
do {
seq = read_seqbegin(&state->seqlock);
- memcpy(dst->data, state->stateid.data,
- sizeof(state->stateid.data));
+ memcpy(dst->u.data, state->stateid.u.data,
+ sizeof(state->stateid.u.data));
} while (read_seqretry(&state->seqlock, seq));
dprintk("<-- %s\n", __func__);
@@ -585,7 +586,7 @@ get_layout(struct inode *ino,
lgp->args.inode = ino;
lgp->lsegpp = lsegpp;
- if (!memcmp(lo->stateid.data, &zero_stateid, NFS4_STATEID_SIZE)) {
+ if (!memcmp(lo->stateid.u.data, &zero_stateid, NFS4_STATEID_SIZE)) {
struct nfs_open_context *oldctx = ctx;
if (!oldctx) {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index eb78e7e..d80356a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -41,7 +41,6 @@
#define NFSPROC4_CB_NULL 0
#define NFSPROC4_CB_COMPOUND 1
-#define NFS4_STATEID_SIZE 16
/* Index of predefined Linux callback client operations */
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 219e6b4..46bd627 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -17,7 +17,9 @@
#define NFS4_BITMAP_SIZE 2
#define NFS4_VERIFIER_SIZE 8
-#define NFS4_STATEID_SIZE 16
+#define NFS4_STATEID_SEQID_SIZE 4
+#define NFS4_STATEID_OTHER_SIZE 12
+#define NFS4_STATEID_SIZE (NFS4_STATEID_SEQID_SIZE + NFS4_STATEID_OTHER_SIZE)
#define NFS4_FHSIZE 128
#define NFS4_MAXPATHLEN PATH_MAX
#define NFS4_MAXNAMLEN NAME_MAX
@@ -174,7 +176,17 @@ struct nfs4_acl {
};
typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
-typedef struct { char data[NFS4_STATEID_SIZE]; } nfs4_stateid;
+
+struct nfs41_stateid {
+ __be32 seqid;
+ char other[NFS4_STATEID_OTHER_SIZE];
+} __attribute__ ((packed));
+typedef struct {
+ union {
+ char data[NFS4_STATEID_SIZE];
+ struct nfs41_stateid stateid;
+ } u;
+} nfs4_stateid;
enum nfs_opnum4 {
OP_ACCESS = 3,
--
1.6.2.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 5/8] pnfs-submit: request whole-file layouts only
2010-06-07 21:11 ` [PATCH 4/8] pnfs-submit: change stateid to be a union Alexandros Batsakis
@ 2010-06-07 21:11 ` Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 6/8] pnfs-submit: change layout list to be similar to other state lists Alexandros Batsakis
2010-06-08 7:14 ` [PATCH 5/8] pnfs-submit: request whole-file layouts only Benny Halevy
0 siblings, 2 replies; 22+ messages in thread
From: Alexandros Batsakis @ 2010-06-07 21:11 UTC (permalink / raw)
To: linux-nfs; +Cc: bhalevy, Alexandros Batsakis
In the first iteration of the pNFS code, we support only whole file layouts.
To facilitate the move to multiple-segments, we keep the segment processing
code, but the segment list should always contain at most one segment per I/O type
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
---
fs/nfs/callback_proc.c | 7 ++++---
fs/nfs/pnfs.c | 25 ++++++++-----------------
2 files changed, 12 insertions(+), 20 deletions(-)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 8752239..16b4510 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -213,6 +213,10 @@ static int pnfs_recall_layout(void *data)
then return layouts, resume after layoutreturns complete
*/
+ /* support whole file layouts only */
+ rl.cbl_seg.offset = 0;
+ rl.cbl_seg.length = NFS4_MAX_UINT64;
+
if (rl.cbl_recall_type == RETURN_FILE) {
status = pnfs_return_layout(inode, &rl.cbl_seg, &rl.cbl_stateid,
RETURN_FILE, true);
@@ -221,9 +225,6 @@ static int pnfs_recall_layout(void *data)
goto out;
}
- rl.cbl_seg.offset = 0;
- rl.cbl_seg.length = NFS4_MAX_UINT64;
-
/* FIXME: This loop is inefficient, running in O(|s_inodes|^2) */
while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
/* XXX need to check status on pnfs_return_layout */
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index dd7ad26..8985e9f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -551,12 +551,6 @@ pnfs_layout_from_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
* for now, assume that whole file layouts are requested.
* arg->offset: 0
* arg->length: all ones
-*
-* for now, assume the LAYOUTGET operation is triggered by an I/O request.
-* the count field is the count in the I/O request, and will be used
-* as the minlength. for the file operation that piggy-backs
-* the LAYOUTGET operation with an OPEN, s
-* arg->minlength = count.
*/
static int
get_layout(struct inode *ino,
@@ -577,11 +571,11 @@ get_layout(struct inode *ino,
return -ENOMEM;
}
lgp->lo = lo;
- lgp->args.minlength = PAGE_CACHE_SIZE;
+ lgp->args.minlength = NFS4_MAX_UINT64;
lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
lgp->args.lseg.iomode = range->iomode;
- lgp->args.lseg.offset = range->offset;
- lgp->args.lseg.length = max(range->length, lgp->args.minlength);
+ lgp->args.lseg.offset = 0;
+ lgp->args.lseg.length = NFS4_MAX_UINT64;
lgp->args.type = server->pnfs_curr_ld->id;
lgp->args.inode = ino;
lgp->lsegpp = lsegpp;
@@ -756,7 +750,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
else {
arg.iomode = IOMODE_ANY;
arg.offset = 0;
- arg.length = ~0;
+ arg.length = NFS4_MAX_UINT64;
}
if (type == RETURN_FILE) {
lo = get_lock_current_layout(nfsi);
@@ -1051,8 +1045,8 @@ pnfs_update_layout(struct inode *ino,
{
struct nfs4_pnfs_layout_segment arg = {
.iomode = iomode,
- .offset = pos,
- .length = count
+ .offset = 0,
+ .length = ~0
};
struct nfs_inode *nfsi = NFS_I(ino);
struct pnfs_layout_type *lo;
@@ -1142,7 +1136,6 @@ out_put:
void
pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
{
- struct nfs4_pnfs_layoutget_res *res = &lgp->res;
struct pnfs_layout_segment *lseg = NULL;
struct nfs_inode *nfsi = PNFS_NFS_INODE(lgp->lo);
time_t suspend = 0;
@@ -1151,11 +1144,10 @@ pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
lgp->status = rpc_status;
if (likely(!rpc_status)) {
- if (unlikely(res->layout.len <= 0)) {
+ if (unlikely(lgp->res.layout.len < 0)) {
printk(KERN_ERR
- "%s: ERROR! Layout size is ZERO!\n", __func__);
+ "%s: ERROR Returned layout size is ZERO\n", __func__);
lgp->status = -EIO;
- goto get_out;
}
goto out;
}
@@ -1233,7 +1225,6 @@ pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
break;
}
-get_out:
/* remember that get layout failed and suspend trying */
nfsi->layout.pnfs_layout_suspend = suspend;
set_bit(lo_fail_bit(lgp->args.lseg.iomode),
--
1.6.2.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 6/8] pnfs-submit: change layout list to be similar to other state lists
2010-06-07 21:11 ` [PATCH 5/8] pnfs-submit: request whole-file layouts only Alexandros Batsakis
@ 2010-06-07 21:11 ` Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 7/8] pnfs-submit: forgetful client (layouts) Alexandros Batsakis
2010-06-08 7:14 ` [PATCH 5/8] pnfs-submit: request whole-file layouts only Benny Halevy
1 sibling, 1 reply; 22+ messages in thread
From: Alexandros Batsakis @ 2010-06-07 21:11 UTC (permalink / raw)
To: linux-nfs; +Cc: bhalevy, Alexandros Batsakis
The current design keeps a list (nfs_client) of inodes having layouts.
In order to make that code more similar to delegation handling (and in general to the rest of the NFS code),
this patch changes the list element to layouts directly.
No backpointer from the layout to the inode is needed as the inode can be accesed by a container_of() call
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
---
fs/nfs/callback_proc.c | 9 +++++++--
fs/nfs/client.c | 2 +-
fs/nfs/inode.c | 4 ++--
fs/nfs/pnfs.c | 10 ++++------
include/linux/nfs_fs.h | 4 +---
include/linux/nfs_fs_sb.h | 2 +-
6 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 16b4510..3bae785 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -76,7 +76,6 @@ static int (*nfs_validate_delegation_stateid(struct nfs_client *clp))(struct nfs
return nfs4_validate_delegation_stateid;
}
-
__be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
{
struct nfs_client *clp;
@@ -140,6 +139,7 @@ nfs_layoutrecall_find_inode(struct nfs_client *clp,
const struct cb_pnfs_layoutrecallargs *args)
{
struct nfs_inode *nfsi;
+ struct pnfs_layout_type *layout;
struct nfs_server *server;
struct inode *ino = NULL;
@@ -147,9 +147,14 @@ nfs_layoutrecall_find_inode(struct nfs_client *clp,
__func__, args->cbl_recall_type, clp);
spin_lock(&clp->cl_lock);
- list_for_each_entry(nfsi, &clp->cl_lo_inodes, lo_inodes) {
+ list_for_each_entry(layout, &clp->cl_layouts, lo_layouts) {
+ nfsi = PNFS_NFS_INODE(layout);
+ if (!nfsi)
+ continue;
+
dprintk("%s: Searching inode=%lu\n",
__func__, nfsi->vfs_inode.i_ino);
+
if (args->cbl_recall_type == RETURN_FILE) {
if (nfs_compare_fh(&args->cbl_fh, &nfsi->fh))
continue;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 09ab4ea..b9abf15 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -157,7 +157,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
if (!IS_ERR(cred))
clp->cl_machine_cred = cred;
#if defined(CONFIG_NFS_V4_1)
- INIT_LIST_HEAD(&clp->cl_lo_inodes);
+ INIT_LIST_HEAD(&clp->cl_layouts);
#endif
nfs_fscache_get_client_cookie(clp);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index d43f2c5..b632ba7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1400,7 +1400,7 @@ static void pnfs_destroy_inode(struct nfs_inode *nfsi)
if (!list_empty(&nfsi->layout.segs))
pnfs_destroy_layout(nfsi);
- BUG_ON(!list_empty(&nfsi->lo_inodes));
+ BUG_ON(!list_empty(&nfsi->layout.lo_layouts));
BUG_ON(!list_empty(&nfsi->layout.segs));
BUG_ON(nfsi->layout.refcount);
BUG_ON(nfsi->layout.ld_data);
@@ -1418,10 +1418,10 @@ void nfs_destroy_inode(struct inode *inode)
static void pnfs_init_once(struct nfs_inode *nfsi)
{
#ifdef CONFIG_NFS_V4_1
- INIT_LIST_HEAD(&nfsi->lo_inodes);
init_waitqueue_head(&nfsi->lo_waitq);
spin_lock_init(&nfsi->lo_lock);
seqlock_init(&nfsi->layout.seqlock);
+ INIT_LIST_HEAD(&nfsi->layout.lo_layouts);
INIT_LIST_HEAD(&nfsi->layout.segs);
nfsi->layout.refcount = 0;
nfsi->layout.ld_data = NULL;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8985e9f..d0b45bf 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -369,10 +369,10 @@ put_unlock_current_layout(struct pnfs_layout_type *lo)
io_ops->free_layout(lo->ld_data);
lo->ld_data = NULL;
- /* Unlist the inode. */
+ /* Unlist the layout. */
clp = NFS_SERVER(&nfsi->vfs_inode)->nfs_client;
spin_lock(&clp->cl_lock);
- list_del_init(&nfsi->lo_inodes);
+ list_del_init(&lo->lo_layouts);
spin_unlock(&clp->cl_lock);
}
out:
@@ -888,10 +888,8 @@ alloc_init_layout(struct inode *ino)
BUG_ON(lo->ld_data != NULL);
lo->ld_data = ld_data;
- seqlock_init(&lo->seqlock);
memset(&lo->stateid, 0, NFS4_STATEID_SIZE);
lo->refcount = 1;
- INIT_LIST_HEAD(&lo->segs);
lo->roc_iomode = 0;
return lo;
}
@@ -947,8 +945,8 @@ get_lock_alloc_layout(struct inode *ino)
lock_current_layout(nfsi);
spin_lock(&clp->cl_lock);
- if (list_empty(&nfsi->lo_inodes))
- list_add_tail(&nfsi->lo_inodes, &clp->cl_lo_inodes);
+ if (list_empty(&lo->lo_layouts))
+ list_add_tail(&lo->lo_layouts, &clp->cl_layouts);
spin_unlock(&clp->cl_lock);
} else
lo = ERR_PTR(-ENOMEM);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index f6e3e20..ee45eac 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -99,6 +99,7 @@ struct posix_acl;
struct pnfs_layout_type {
int refcount;
+ struct list_head lo_layouts; /* other client layouts */
struct list_head segs; /* layout segments list */
int roc_iomode; /* iomode to return on close, 0=none */
seqlock_t seqlock; /* Protects the stateid */
@@ -204,9 +205,6 @@ struct nfs_inode {
/* pNFS layout information */
#if defined(CONFIG_NFS_V4_1)
- /* Inodes having layouts */
- struct list_head lo_inodes;
-
wait_queue_head_t lo_waitq;
spinlock_t lo_lock;
struct pnfs_layout_type layout;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 4d58efd..b6a23e6 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -85,7 +85,7 @@ struct nfs_client {
/* The flags used for obtaining the clientid during EXCHANGE_ID */
u32 cl_exchange_flags;
struct nfs4_session *cl_session; /* sharred session */
- struct list_head cl_lo_inodes; /* Inodes having layouts */
+ struct list_head cl_layouts;
struct nfs4_deviceid_cache *cl_devid_cache; /* pNFS deviceid cache */
#endif /* CONFIG_NFS_V4_1 */
--
1.6.2.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 7/8] pnfs-submit: forgetful client (layouts)
2010-06-07 21:11 ` [PATCH 6/8] pnfs-submit: change layout list to be similar to other state lists Alexandros Batsakis
@ 2010-06-07 21:11 ` Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 8/8] pnfs-submit: support for CB_RECALL_ANY (layouts) Alexandros Batsakis
2010-06-08 7:23 ` [PATCH 7/8] pnfs-submit: forgetful client (layouts) Benny Halevy
0 siblings, 2 replies; 22+ messages in thread
From: Alexandros Batsakis @ 2010-06-07 21:11 UTC (permalink / raw)
To: linux-nfs; +Cc: bhalevy, Alexandros Batsakis
Forgetful client model:
If we receive a CB_LAYOUTRECALL
- we spawn a thread to handle the recall
(xxx: now only one recall can be active at a time, else NFS4ERR_DELAY)
- we check the stateid seqid
if it does not match we return NFS4ERR_DELAY
- we check for pending I/O
if there is we return NFS4ERR_DELAY
Else we return NO_MATCHING_LAYOUT.
Note that for whole file layouts there is no need to serialize LAYOUTGETs/LAYOUTRETURNs
For bulk layouts, if there is a layout active, we return NFS4_OK and we start
cleaning the layouts asynchronously. At the end we send a bulk LAYOUTRETURN.
Note that there is no need to prevent any new LAYOUTGETs explicitly as the server should reject them.
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
---
fs/nfs/callback_proc.c | 146 ++++++++++++++++++++++++++++++++++--------------
fs/nfs/nfs4_fs.h | 1 +
fs/nfs/pnfs.c | 70 ++++++++++-------------
3 files changed, 136 insertions(+), 81 deletions(-)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 3bae785..af7a01d 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -129,6 +129,38 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
#if defined(CONFIG_NFS_V4_1)
+static bool
+pnfs_is_next_layout_stateid(const struct pnfs_layout_type *lo,
+ const nfs4_stateid stateid)
+{
+ int seqlock;
+ bool res;
+ u32 oldseqid, newseqid;
+
+ do {
+ seqlock = read_seqbegin(&lo->seqlock);
+ oldseqid = be32_to_cpu(lo->stateid.u.stateid.seqid);
+ newseqid = be32_to_cpu(stateid.u.stateid.seqid);
+ res = !memcmp(lo->stateid.u.stateid.other,
+ stateid.u.stateid.other,
+ NFS4_STATEID_OTHER_SIZE);
+ if (res) { /* comparing layout stateids */
+ if (oldseqid == ~0)
+ res = (newseqid == 1);
+ else
+ res = (newseqid == oldseqid + 1);
+ } else { /* open stateid */
+ res = !memcmp(lo->stateid.u.data,
+ &zero_stateid,
+ NFS4_STATEID_SIZE);
+ if (res)
+ res = (newseqid == 1);
+ }
+ } while (read_seqretry(&lo->seqlock, seqlock));
+
+ return res;
+}
+
/*
* Retrieve an inode based on layout recall parameters
*
@@ -191,9 +223,10 @@ static int pnfs_recall_layout(void *data)
struct inode *inode, *ino;
struct nfs_client *clp;
struct cb_pnfs_layoutrecallargs rl;
+ struct nfs4_pnfs_layoutreturn *lrp;
struct recall_layout_threadargs *args =
(struct recall_layout_threadargs *)data;
- int status;
+ int status = 0;
daemonize("nfsv4-layoutreturn");
@@ -204,47 +237,59 @@ static int pnfs_recall_layout(void *data)
clp = args->clp;
inode = args->inode;
rl = *args->rl;
- args->result = 0;
- complete(&args->started);
- args = NULL;
- /* Note: args must not be used after this point!!! */
-
-/* FIXME: need barrier here:
- pause I/O to data servers
- pause layoutgets
- drain all outstanding writes to storage devices
- wait for any outstanding layoutreturns and layoutgets mentioned in
- cb_sequence.
- then return layouts, resume after layoutreturns complete
- */
/* support whole file layouts only */
rl.cbl_seg.offset = 0;
rl.cbl_seg.length = NFS4_MAX_UINT64;
if (rl.cbl_recall_type == RETURN_FILE) {
- status = pnfs_return_layout(inode, &rl.cbl_seg, &rl.cbl_stateid,
- RETURN_FILE, true);
+ if (pnfs_is_next_layout_stateid(&NFS_I(inode)->layout,
+ rl.cbl_stateid))
+ status = pnfs_return_layout(inode, &rl.cbl_seg,
+ &rl.cbl_stateid, RETURN_FILE,
+ false);
+ else
+ status = cpu_to_be32(NFS4ERR_DELAY);
if (status)
dprintk("%s RETURN_FILE error: %d\n", __func__, status);
+ else
+ status = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
+ args->result = status;
+ complete(&args->started);
goto out;
}
- /* FIXME: This loop is inefficient, running in O(|s_inodes|^2) */
+ status = cpu_to_be32(NFS4_OK);
+ args->result = status;
+ complete(&args->started);
+ args = NULL;
+
+ /* IMPROVEME: This loop is inefficient, running in O(|s_inodes|^2) */
while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
- /* XXX need to check status on pnfs_return_layout */
- pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, true);
+ /* FIXME: need to check status on pnfs_return_layout */
+ pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, false);
iput(ino);
}
+ lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
+ if (!lrp) {
+ dprintk("%s: allocation failed. Cannot send last LAYOUTRETURN\n",
+ __func__);
+ goto out;
+ }
+
/* send final layoutreturn */
- status = pnfs_return_layout(inode, &rl.cbl_seg, NULL,
- rl.cbl_recall_type, true);
- if (status)
- printk(KERN_INFO "%s: ignoring pnfs_return_layout status=%d\n",
- __func__, status);
+ lrp->args.reclaim = 0;
+ lrp->args.layout_type = rl.cbl_layout_type;
+ lrp->args.return_type = rl.cbl_recall_type;
+ lrp->args.lseg = rl.cbl_seg;
+ lrp->args.inode = inode;
+ lrp->lo = NULL;
+ pnfs4_proc_layoutreturn(lrp, true);
+
out:
- iput(inode);
+ clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
+ nfs_put_client(clp);
module_put_and_exit(0);
dprintk("%s: exit status %d\n", __func__, 0);
return 0;
@@ -262,15 +307,18 @@ static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
.rl = rl,
};
struct task_struct *t;
- int status;
-
- /* should have returned NFS4ERR_NOMATCHING_LAYOUT... */
- BUG_ON(inode == NULL);
+ int status = -EAGAIN;
dprintk("%s: -->\n", __func__);
+ /* FIXME: do not allow two concurrent layout recalls */
+ if (test_and_set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state))
+ return status;
+
init_completion(&data.started);
__module_get(THIS_MODULE);
+ if (!atomic_inc_not_zero(&clp->cl_count))
+ goto out_put_no_client;
t = kthread_run(pnfs_recall_layout, &data, "%s", "pnfs_recall_layout");
if (IS_ERR(t)) {
@@ -284,6 +332,9 @@ static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
wait_for_completion(&data.started);
return data.result;
out_module_put:
+ nfs_put_client(clp);
+out_put_no_client:
+ clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
module_put(THIS_MODULE);
return status;
}
@@ -294,35 +345,46 @@ __be32 pnfs_cb_layoutrecall(struct cb_pnfs_layoutrecallargs *args,
struct nfs_client *clp;
struct inode *inode = NULL;
__be32 res;
+ int status;
unsigned int num_client = 0;
dprintk("%s: -->\n", __func__);
- res = htonl(NFS4ERR_INVAL);
- clp = nfs_find_client(args->cbl_addr, 4);
+ res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
+ clp = nfs_find_client(args->cbl_addr, 4);
if (clp == NULL) {
dprintk("%s: no client for addr %u.%u.%u.%u\n",
__func__, NIPQUAD(args->cbl_addr));
goto out;
}
- res = htonl(NFS4ERR_NOMATCHING_LAYOUT);
+ res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
do {
struct nfs_client *prev = clp;
num_client++;
- inode = nfs_layoutrecall_find_inode(clp, args);
- if (inode != NULL) {
- if (PNFS_LD(&NFS_I(inode)->layout)->id ==
- args->cbl_layout_type) {
- /* Set up a helper thread to actually
- * return the delegation */
- res = pnfs_async_return_layout(clp, inode, args);
- if (res != 0)
- res = htonl(NFS4ERR_RESOURCE);
- break;
+ /* the callback must come from the MDS personality */
+ if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
+ goto loop;
+ if (args->cbl_recall_type == RETURN_FILE) {
+ inode = nfs_layoutrecall_find_inode(clp, args);
+ if (inode != NULL) {
+ status = pnfs_async_return_layout(clp, inode,
+ args);
+ if (status == -EAGAIN)
+ res = cpu_to_be32(NFS4ERR_DELAY);
+ iput(inode);
}
+ } else { /* _ALL or _FSID */
+ /* we need the inode to get the nfs_server struct */
+ inode = nfs_layoutrecall_find_inode(clp, args);
+ if (!inode)
+ goto loop;
+ status = pnfs_async_return_layout(clp, inode, args);
+ if (status == -EAGAIN)
+ res = cpu_to_be32(NFS4ERR_DELAY);
iput(inode);
}
+loop:
clp = nfs_find_client_next(prev);
nfs_put_client(prev);
} while (clp != NULL);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index ebc9b3b..2f7974b 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -47,6 +47,7 @@ enum nfs4_client_state {
NFS4CLNT_SESSION_RESET,
NFS4CLNT_SESSION_DRAINING,
NFS4CLNT_RECALL_SLOT,
+ NFS4CLNT_LAYOUT_RECALL,
};
/*
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d0b45bf..2006926 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -709,6 +709,8 @@ return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
dprintk("--> %s\n", __func__);
+ BUG_ON(type != RETURN_FILE);
+
lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
if (lrp == NULL) {
if (lo && (type == RETURN_FILE))
@@ -745,13 +747,11 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
dprintk("--> %s type %d\n", __func__, type);
- if (range)
- arg = *range;
- else {
- arg.iomode = IOMODE_ANY;
- arg.offset = 0;
- arg.length = NFS4_MAX_UINT64;
- }
+
+ arg.iomode = range ? range->iomode : IOMODE_ANY;
+ arg.offset = 0;
+ arg.length = NFS4_MAX_UINT64;
+
if (type == RETURN_FILE) {
lo = get_lock_current_layout(nfsi);
if (lo && !has_layout_to_return(lo, &arg)) {
@@ -760,11 +760,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
}
if (!lo) {
dprintk("%s: no layout segments to return\n", __func__);
- /* must send the LAYOUTRETURN in response to recall */
- if (stateid)
- goto send_return;
- else
- goto out;
+ goto out;
}
/* unlock w/o put rebalanced by eventual call to
@@ -773,12 +769,23 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
unlock_current_layout(nfsi);
if (pnfs_return_layout_barrier(nfsi, &arg)) {
+ if (stateid) { /* callback */
+ status = -EAGAIN;
+ lock_current_layout(nfsi);
+ put_unlock_current_layout(lo);
+ goto out;
+ }
dprintk("%s: waiting\n", __func__);
wait_event(nfsi->lo_waitq,
- !pnfs_return_layout_barrier(nfsi, &arg));
+ !pnfs_return_layout_barrier(nfsi, &arg));
}
if (layoutcommit_needed(nfsi)) {
+ if (stateid && !wait) { /* callback */
+ dprintk("%s: layoutcommit pending\n", __func__);
+ status = -EAGAIN;
+ goto out;
+ }
status = pnfs_layoutcommit_inode(ino, wait);
if (status) {
dprintk("%s: layoutcommit failed, status=%d. "
@@ -787,9 +794,13 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
status = 0;
}
}
+
+ if (stateid && wait)
+ status = return_layout(ino, &arg, stateid, type,
+ lo, wait);
+ else
+ pnfs_layout_release(lo, &arg);
}
-send_return:
- status = return_layout(ino, &arg, stateid, type, lo, wait);
out:
dprintk("<-- %s status: %d\n", __func__, status);
return status;
@@ -1044,7 +1055,7 @@ pnfs_update_layout(struct inode *ino,
struct nfs4_pnfs_layout_segment arg = {
.iomode = iomode,
.offset = 0,
- .length = ~0
+ .length = NFS4_MAX_UINT64,
};
struct nfs_inode *nfsi = NFS_I(ino);
struct pnfs_layout_type *lo;
@@ -1063,31 +1074,12 @@ pnfs_update_layout(struct inode *ino,
/* Check to see if the layout for the given range already exists */
lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
if (lseg && !lseg->valid) {
- unlock_current_layout(nfsi);
if (take_ref)
put_lseg(lseg);
- for (;;) {
- prepare_to_wait(&nfsi->lo_waitq, &__wait,
- TASK_KILLABLE);
- lock_current_layout(nfsi);
- lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
- if (!lseg || lseg->valid)
- break;
- dprintk("%s: invalid lseg %p ref %d\n", __func__,
- lseg, atomic_read(&lseg->kref.refcount)-1);
- if (take_ref)
- put_lseg(lseg);
- if (signal_pending(current)) {
- lseg = NULL;
- result = -ERESTARTSYS;
- break;
- }
- unlock_current_layout(nfsi);
- schedule();
- }
- finish_wait(&nfsi->lo_waitq, &__wait);
- if (result)
- goto out_put;
+
+ /* someone is cleaning the layout */
+ result = -EAGAIN;
+ goto out_put;
}
if (lseg) {
--
1.6.2.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* [PATCH 8/8] pnfs-submit: support for CB_RECALL_ANY (layouts)
2010-06-07 21:11 ` [PATCH 7/8] pnfs-submit: forgetful client (layouts) Alexandros Batsakis
@ 2010-06-07 21:11 ` Alexandros Batsakis
2010-06-08 7:23 ` [PATCH 7/8] pnfs-submit: forgetful client (layouts) Benny Halevy
1 sibling, 0 replies; 22+ messages in thread
From: Alexandros Batsakis @ 2010-06-07 21:11 UTC (permalink / raw)
To: linux-nfs; +Cc: bhalevy, Alexandros Batsakis
CB_RECALL_ANY serves as a hint to the client to return some server state.
We reply immediately and we clean the layouts asycnhronously.
FIXME: currently we return _all_ layouts
FIXME: eventually we should treat layouts as delegations, marked them expired
and fire the state manager to clean them.
Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
---
fs/nfs/callback.h | 7 +++++
fs/nfs/callback_proc.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 63 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 73f21bc..b39ac86 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -115,6 +115,13 @@ extern int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation,
#define RCA4_TYPE_MASK_RDATA_DLG 0
#define RCA4_TYPE_MASK_WDATA_DLG 1
+#define RCA4_TYPE_MASK_DIR_DLG 2
+#define RCA4_TYPE_MASK_FILE_LAYOUT 3
+#define RCA4_TYPE_MASK_BLK_LAYOUT 4
+#define RCA4_TYPE_MASK_OBJ_LAYOUT_MIN 8
+#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX 9
+#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
+#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
struct cb_recallanyargs {
struct sockaddr *craa_addr;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index af7a01d..4bff9b0 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -339,6 +339,27 @@ out_put_no_client:
return status;
}
+static int pnfs_recall_all_layouts(struct nfs_client *clp)
+{
+ struct cb_pnfs_layoutrecallargs rl;
+ struct inode *inode;
+ int status = 0;
+
+ rl.cbl_recall_type = RETURN_ALL;
+ rl.cbl_seg.iomode = IOMODE_ANY;
+ rl.cbl_seg.offset = 0;
+ rl.cbl_seg.length = NFS4_MAX_UINT64;
+
+ /* we need the inode to get the nfs_server struct */
+ inode = nfs_layoutrecall_find_inode(clp, &rl);
+ if (!inode)
+ return status;
+ status = pnfs_async_return_layout(clp, inode, &rl);
+ iput(inode);
+
+ return status;
+}
+
__be32 pnfs_cb_layoutrecall(struct cb_pnfs_layoutrecallargs *args,
void *dummy)
{
@@ -606,13 +627,37 @@ out:
return status;
}
+static inline bool
+validate_bitmap_values(const unsigned long *mask)
+{
+ int i;
+
+ if (*mask == 0)
+ return true;
+ if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, mask) ||
+ test_bit(RCA4_TYPE_MASK_WDATA_DLG, mask) ||
+ test_bit(RCA4_TYPE_MASK_DIR_DLG, mask) ||
+ test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, mask) ||
+ test_bit(RCA4_TYPE_MASK_BLK_LAYOUT, mask))
+ return true;
+ for (i = RCA4_TYPE_MASK_OBJ_LAYOUT_MIN;
+ i <= RCA4_TYPE_MASK_OBJ_LAYOUT_MAX; i++)
+ if (test_bit(i, mask))
+ return true;
+ for (i = RCA4_TYPE_MASK_OTHER_LAYOUT_MIN;
+ i <= RCA4_TYPE_MASK_OTHER_LAYOUT_MAX; i++)
+ if (test_bit(i, mask))
+ return true;
+ return false;
+}
+
__be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
{
struct nfs_client *clp;
__be32 status;
fmode_t flags = 0;
- status = htonl(NFS4ERR_OP_NOT_IN_SESSION);
+ status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
clp = nfs_find_client(args->craa_addr, 4);
if (clp == NULL)
goto out;
@@ -620,16 +665,25 @@ __be32 nfs4_callback_recallany(struct cb_recallanyargs *args, void *dummy)
dprintk("NFS: RECALL_ANY callback request from %s\n",
rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+ status = cpu_to_be32(NFS4ERR_INVAL);
+ if (!validate_bitmap_values((const unsigned long *)
+ &args->craa_type_mask))
+ return status;
+
+ status = cpu_to_be32(NFS4_OK);
if (test_bit(RCA4_TYPE_MASK_RDATA_DLG, (const unsigned long *)
&args->craa_type_mask))
flags = FMODE_READ;
if (test_bit(RCA4_TYPE_MASK_WDATA_DLG, (const unsigned long *)
&args->craa_type_mask))
flags |= FMODE_WRITE;
+ if (test_bit(RCA4_TYPE_MASK_FILE_LAYOUT, (const unsigned long *)
+ &args->craa_type_mask))
+ if (pnfs_recall_all_layouts(clp) == -EAGAIN)
+ status = cpu_to_be32(NFS4ERR_DELAY);
if (flags)
nfs_expire_all_delegation_types(clp, flags);
- status = htonl(NFS4_OK);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
--
1.6.2.5
^ permalink raw reply related [flat|nested] 22+ messages in thread
* Re: [PATCH 5/8] pnfs-submit: request whole-file layouts only
2010-06-07 21:11 ` [PATCH 5/8] pnfs-submit: request whole-file layouts only Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 6/8] pnfs-submit: change layout list to be similar to other state lists Alexandros Batsakis
@ 2010-06-08 7:14 ` Benny Halevy
2010-06-08 7:33 ` Alexandros Batsakis
1 sibling, 1 reply; 22+ messages in thread
From: Benny Halevy @ 2010-06-08 7:14 UTC (permalink / raw)
To: Alexandros Batsakis; +Cc: linux-nfs
On 2010-06-08 00:11, Alexandros Batsakis wrote:
> In the first iteration of the pNFS code, we support only whole file layouts.
> To facilitate the move to multiple-segments, we keep the segment processing
> code, but the segment list should always contain at most one segment per I/O type
>
> Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
> ---
> fs/nfs/callback_proc.c | 7 ++++---
> fs/nfs/pnfs.c | 25 ++++++++-----------------
> 2 files changed, 12 insertions(+), 20 deletions(-)
>
> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
> index 8752239..16b4510 100644
> --- a/fs/nfs/callback_proc.c
> +++ b/fs/nfs/callback_proc.c
> @@ -213,6 +213,10 @@ static int pnfs_recall_layout(void *data)
> then return layouts, resume after layoutreturns complete
> */
>
> + /* support whole file layouts only */
> + rl.cbl_seg.offset = 0;
> + rl.cbl_seg.length = NFS4_MAX_UINT64;
> +
> if (rl.cbl_recall_type == RETURN_FILE) {
> status = pnfs_return_layout(inode, &rl.cbl_seg, &rl.cbl_stateid,
> RETURN_FILE, true);
> @@ -221,9 +225,6 @@ static int pnfs_recall_layout(void *data)
> goto out;
> }
>
> - rl.cbl_seg.offset = 0;
> - rl.cbl_seg.length = NFS4_MAX_UINT64;
> -
> /* FIXME: This loop is inefficient, running in O(|s_inodes|^2) */
> while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
> /* XXX need to check status on pnfs_return_layout */
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index dd7ad26..8985e9f 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -551,12 +551,6 @@ pnfs_layout_from_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
> * for now, assume that whole file layouts are requested.
> * arg->offset: 0
> * arg->length: all ones
> -*
> -* for now, assume the LAYOUTGET operation is triggered by an I/O request.
> -* the count field is the count in the I/O request, and will be used
> -* as the minlength. for the file operation that piggy-backs
> -* the LAYOUTGET operation with an OPEN, s
> -* arg->minlength = count.
> */
> static int
> get_layout(struct inode *ino,
> @@ -577,11 +571,11 @@ get_layout(struct inode *ino,
> return -ENOMEM;
> }
> lgp->lo = lo;
> - lgp->args.minlength = PAGE_CACHE_SIZE;
> + lgp->args.minlength = NFS4_MAX_UINT64;
> lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
> lgp->args.lseg.iomode = range->iomode;
> - lgp->args.lseg.offset = range->offset;
> - lgp->args.lseg.length = max(range->length, lgp->args.minlength);
> + lgp->args.lseg.offset = 0;
> + lgp->args.lseg.length = NFS4_MAX_UINT64;
I'm not sure how well this will work for pnfs-obj writes.
We might have to revert it in the pnfs post-submit branch.
Benny
> lgp->args.type = server->pnfs_curr_ld->id;
> lgp->args.inode = ino;
> lgp->lsegpp = lsegpp;
> @@ -756,7 +750,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
> else {
> arg.iomode = IOMODE_ANY;
> arg.offset = 0;
> - arg.length = ~0;
> + arg.length = NFS4_MAX_UINT64;
> }
> if (type == RETURN_FILE) {
> lo = get_lock_current_layout(nfsi);
> @@ -1051,8 +1045,8 @@ pnfs_update_layout(struct inode *ino,
> {
> struct nfs4_pnfs_layout_segment arg = {
> .iomode = iomode,
> - .offset = pos,
> - .length = count
> + .offset = 0,
> + .length = ~0
> };
> struct nfs_inode *nfsi = NFS_I(ino);
> struct pnfs_layout_type *lo;
> @@ -1142,7 +1136,6 @@ out_put:
> void
> pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
> {
> - struct nfs4_pnfs_layoutget_res *res = &lgp->res;
> struct pnfs_layout_segment *lseg = NULL;
> struct nfs_inode *nfsi = PNFS_NFS_INODE(lgp->lo);
> time_t suspend = 0;
> @@ -1151,11 +1144,10 @@ pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
>
> lgp->status = rpc_status;
> if (likely(!rpc_status)) {
> - if (unlikely(res->layout.len <= 0)) {
> + if (unlikely(lgp->res.layout.len < 0)) {
> printk(KERN_ERR
> - "%s: ERROR! Layout size is ZERO!\n", __func__);
> + "%s: ERROR Returned layout size is ZERO\n", __func__);
> lgp->status = -EIO;
> - goto get_out;
> }
> goto out;
> }
> @@ -1233,7 +1225,6 @@ pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_status)
> break;
> }
>
> -get_out:
> /* remember that get layout failed and suspend trying */
> nfsi->layout.pnfs_layout_suspend = suspend;
> set_bit(lo_fail_bit(lgp->args.lseg.iomode),
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 7/8] pnfs-submit: forgetful client (layouts)
2010-06-07 21:11 ` [PATCH 7/8] pnfs-submit: forgetful client (layouts) Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 8/8] pnfs-submit: support for CB_RECALL_ANY (layouts) Alexandros Batsakis
@ 2010-06-08 7:23 ` Benny Halevy
2010-06-08 7:51 ` Alexandros Batsakis
1 sibling, 1 reply; 22+ messages in thread
From: Benny Halevy @ 2010-06-08 7:23 UTC (permalink / raw)
To: Alexandros Batsakis; +Cc: linux-nfs
On Jun. 08, 2010, 0:11 +0300, Alexandros Batsakis <batsakis@netapp.com> wrote:
> Forgetful client model:
>
> If we receive a CB_LAYOUTRECALL
> - we spawn a thread to handle the recall
> (xxx: now only one recall can be active at a time, else NFS4ERR_DELAY)
> - we check the stateid seqid
> if it does not match we return NFS4ERR_DELAY
> - we check for pending I/O
> if there is we return NFS4ERR_DELAY
> Else we return NO_MATCHING_LAYOUT.
> Note that for whole file layouts there is no need to serialize LAYOUTGETs/LAYOUTRETURNs
> For bulk layouts, if there is a layout active, we return NFS4_OK and we start
> cleaning the layouts asynchronously. At the end we send a bulk LAYOUTRETURN.
> Note that there is no need to prevent any new LAYOUTGETs explicitly as the server should reject them.
>
> Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
> ---
> fs/nfs/callback_proc.c | 146 ++++++++++++++++++++++++++++++++++--------------
> fs/nfs/nfs4_fs.h | 1 +
> fs/nfs/pnfs.c | 70 ++++++++++-------------
> 3 files changed, 136 insertions(+), 81 deletions(-)
>
> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
> index 3bae785..af7a01d 100644
> --- a/fs/nfs/callback_proc.c
> +++ b/fs/nfs/callback_proc.c
> @@ -129,6 +129,38 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
>
> #if defined(CONFIG_NFS_V4_1)
>
> +static bool
> +pnfs_is_next_layout_stateid(const struct pnfs_layout_type *lo,
> + const nfs4_stateid stateid)
> +{
> + int seqlock;
> + bool res;
> + u32 oldseqid, newseqid;
> +
> + do {
> + seqlock = read_seqbegin(&lo->seqlock);
> + oldseqid = be32_to_cpu(lo->stateid.u.stateid.seqid);
> + newseqid = be32_to_cpu(stateid.u.stateid.seqid);
> + res = !memcmp(lo->stateid.u.stateid.other,
> + stateid.u.stateid.other,
> + NFS4_STATEID_OTHER_SIZE);
> + if (res) { /* comparing layout stateids */
> + if (oldseqid == ~0)
> + res = (newseqid == 1);
> + else
> + res = (newseqid == oldseqid + 1);
> + } else { /* open stateid */
> + res = !memcmp(lo->stateid.u.data,
> + &zero_stateid,
> + NFS4_STATEID_SIZE);
> + if (res)
> + res = (newseqid == 1);
> + }
> + } while (read_seqretry(&lo->seqlock, seqlock));
> +
> + return res;
> +}
> +
> /*
> * Retrieve an inode based on layout recall parameters
> *
> @@ -191,9 +223,10 @@ static int pnfs_recall_layout(void *data)
> struct inode *inode, *ino;
> struct nfs_client *clp;
> struct cb_pnfs_layoutrecallargs rl;
> + struct nfs4_pnfs_layoutreturn *lrp;
> struct recall_layout_threadargs *args =
> (struct recall_layout_threadargs *)data;
> - int status;
> + int status = 0;
>
> daemonize("nfsv4-layoutreturn");
>
> @@ -204,47 +237,59 @@ static int pnfs_recall_layout(void *data)
> clp = args->clp;
> inode = args->inode;
> rl = *args->rl;
> - args->result = 0;
> - complete(&args->started);
> - args = NULL;
> - /* Note: args must not be used after this point!!! */
> -
> -/* FIXME: need barrier here:
> - pause I/O to data servers
> - pause layoutgets
> - drain all outstanding writes to storage devices
> - wait for any outstanding layoutreturns and layoutgets mentioned in
> - cb_sequence.
> - then return layouts, resume after layoutreturns complete
> - */
>
> /* support whole file layouts only */
> rl.cbl_seg.offset = 0;
> rl.cbl_seg.length = NFS4_MAX_UINT64;
>
> if (rl.cbl_recall_type == RETURN_FILE) {
> - status = pnfs_return_layout(inode, &rl.cbl_seg, &rl.cbl_stateid,
> - RETURN_FILE, true);
> + if (pnfs_is_next_layout_stateid(&NFS_I(inode)->layout,
> + rl.cbl_stateid))
> + status = pnfs_return_layout(inode, &rl.cbl_seg,
> + &rl.cbl_stateid, RETURN_FILE,
> + false);
> + else
> + status = cpu_to_be32(NFS4ERR_DELAY);
> if (status)
> dprintk("%s RETURN_FILE error: %d\n", __func__, status);
> + else
> + status = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
> + args->result = status;
> + complete(&args->started);
> goto out;
> }
>
> - /* FIXME: This loop is inefficient, running in O(|s_inodes|^2) */
> + status = cpu_to_be32(NFS4_OK);
> + args->result = status;
> + complete(&args->started);
> + args = NULL;
> +
> + /* IMPROVEME: This loop is inefficient, running in O(|s_inodes|^2) */
> while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) {
> - /* XXX need to check status on pnfs_return_layout */
> - pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, true);
> + /* FIXME: need to check status on pnfs_return_layout */
> + pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, false);
> iput(ino);
> }
>
> + lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
> + if (!lrp) {
> + dprintk("%s: allocation failed. Cannot send last LAYOUTRETURN\n",
> + __func__);
> + goto out;
> + }
> +
> /* send final layoutreturn */
> - status = pnfs_return_layout(inode, &rl.cbl_seg, NULL,
> - rl.cbl_recall_type, true);
> - if (status)
> - printk(KERN_INFO "%s: ignoring pnfs_return_layout status=%d\n",
> - __func__, status);
> + lrp->args.reclaim = 0;
> + lrp->args.layout_type = rl.cbl_layout_type;
> + lrp->args.return_type = rl.cbl_recall_type;
> + lrp->args.lseg = rl.cbl_seg;
> + lrp->args.inode = inode;
> + lrp->lo = NULL;
> + pnfs4_proc_layoutreturn(lrp, true);
> +
> out:
> - iput(inode);
> + clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
> + nfs_put_client(clp);
> module_put_and_exit(0);
> dprintk("%s: exit status %d\n", __func__, 0);
> return 0;
> @@ -262,15 +307,18 @@ static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
> .rl = rl,
> };
> struct task_struct *t;
> - int status;
> -
> - /* should have returned NFS4ERR_NOMATCHING_LAYOUT... */
> - BUG_ON(inode == NULL);
> + int status = -EAGAIN;
>
> dprintk("%s: -->\n", __func__);
>
> + /* FIXME: do not allow two concurrent layout recalls */
> + if (test_and_set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state))
> + return status;
> +
> init_completion(&data.started);
> __module_get(THIS_MODULE);
> + if (!atomic_inc_not_zero(&clp->cl_count))
> + goto out_put_no_client;
>
> t = kthread_run(pnfs_recall_layout, &data, "%s", "pnfs_recall_layout");
> if (IS_ERR(t)) {
> @@ -284,6 +332,9 @@ static int pnfs_async_return_layout(struct nfs_client *clp, struct inode *inode,
> wait_for_completion(&data.started);
> return data.result;
> out_module_put:
> + nfs_put_client(clp);
> +out_put_no_client:
> + clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
> module_put(THIS_MODULE);
> return status;
> }
> @@ -294,35 +345,46 @@ __be32 pnfs_cb_layoutrecall(struct cb_pnfs_layoutrecallargs *args,
> struct nfs_client *clp;
> struct inode *inode = NULL;
> __be32 res;
> + int status;
> unsigned int num_client = 0;
>
> dprintk("%s: -->\n", __func__);
>
> - res = htonl(NFS4ERR_INVAL);
> - clp = nfs_find_client(args->cbl_addr, 4);
> + res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
> + clp = nfs_find_client(args->cbl_addr, 4);
> if (clp == NULL) {
> dprintk("%s: no client for addr %u.%u.%u.%u\n",
> __func__, NIPQUAD(args->cbl_addr));
> goto out;
> }
>
> - res = htonl(NFS4ERR_NOMATCHING_LAYOUT);
> + res = cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
> do {
> struct nfs_client *prev = clp;
> num_client++;
> - inode = nfs_layoutrecall_find_inode(clp, args);
> - if (inode != NULL) {
> - if (PNFS_LD(&NFS_I(inode)->layout)->id ==
> - args->cbl_layout_type) {
> - /* Set up a helper thread to actually
> - * return the delegation */
> - res = pnfs_async_return_layout(clp, inode, args);
> - if (res != 0)
> - res = htonl(NFS4ERR_RESOURCE);
> - break;
> + /* the callback must come from the MDS personality */
> + if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
> + goto loop;
> + if (args->cbl_recall_type == RETURN_FILE) {
> + inode = nfs_layoutrecall_find_inode(clp, args);
> + if (inode != NULL) {
> + status = pnfs_async_return_layout(clp, inode,
> + args);
> + if (status == -EAGAIN)
> + res = cpu_to_be32(NFS4ERR_DELAY);
what about other errors?
> + iput(inode);
> }
> + } else { /* _ALL or _FSID */
> + /* we need the inode to get the nfs_server struct */
> + inode = nfs_layoutrecall_find_inode(clp, args);
> + if (!inode)
> + goto loop;
> + status = pnfs_async_return_layout(clp, inode, args);
> + if (status == -EAGAIN)
> + res = cpu_to_be32(NFS4ERR_DELAY);
ditto
> iput(inode);
> }
> +loop:
> clp = nfs_find_client_next(prev);
> nfs_put_client(prev);
> } while (clp != NULL);
> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
> index ebc9b3b..2f7974b 100644
> --- a/fs/nfs/nfs4_fs.h
> +++ b/fs/nfs/nfs4_fs.h
> @@ -47,6 +47,7 @@ enum nfs4_client_state {
> NFS4CLNT_SESSION_RESET,
> NFS4CLNT_SESSION_DRAINING,
> NFS4CLNT_RECALL_SLOT,
> + NFS4CLNT_LAYOUT_RECALL,
> };
>
> /*
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index d0b45bf..2006926 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -709,6 +709,8 @@ return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
>
> dprintk("--> %s\n", __func__);
>
> + BUG_ON(type != RETURN_FILE);
> +
> lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
> if (lrp == NULL) {
> if (lo && (type == RETURN_FILE))
> @@ -745,13 +747,11 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
>
> dprintk("--> %s type %d\n", __func__, type);
>
> - if (range)
> - arg = *range;
> - else {
> - arg.iomode = IOMODE_ANY;
> - arg.offset = 0;
> - arg.length = NFS4_MAX_UINT64;
> - }
> +
> + arg.iomode = range ? range->iomode : IOMODE_ANY;
> + arg.offset = 0;
> + arg.length = NFS4_MAX_UINT64;
> +
> if (type == RETURN_FILE) {
> lo = get_lock_current_layout(nfsi);
> if (lo && !has_layout_to_return(lo, &arg)) {
> @@ -760,11 +760,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
> }
> if (!lo) {
> dprintk("%s: no layout segments to return\n", __func__);
> - /* must send the LAYOUTRETURN in response to recall */
> - if (stateid)
> - goto send_return;
> - else
> - goto out;
> + goto out;
> }
>
> /* unlock w/o put rebalanced by eventual call to
> @@ -773,12 +769,23 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
> unlock_current_layout(nfsi);
>
> if (pnfs_return_layout_barrier(nfsi, &arg)) {
> + if (stateid) { /* callback */
> + status = -EAGAIN;
> + lock_current_layout(nfsi);
> + put_unlock_current_layout(lo);
> + goto out;
> + }
> dprintk("%s: waiting\n", __func__);
> wait_event(nfsi->lo_waitq,
> - !pnfs_return_layout_barrier(nfsi, &arg));
> + !pnfs_return_layout_barrier(nfsi, &arg));
> }
>
> if (layoutcommit_needed(nfsi)) {
> + if (stateid && !wait) { /* callback */
> + dprintk("%s: layoutcommit pending\n", __func__);
> + status = -EAGAIN;
> + goto out;
> + }
> status = pnfs_layoutcommit_inode(ino, wait);
> if (status) {
> dprintk("%s: layoutcommit failed, status=%d. "
> @@ -787,9 +794,13 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
> status = 0;
> }
> }
> +
> + if (stateid && wait)
> + status = return_layout(ino, &arg, stateid, type,
> + lo, wait);
> + else
> + pnfs_layout_release(lo, &arg);
> }
> -send_return:
> - status = return_layout(ino, &arg, stateid, type, lo, wait);
> out:
> dprintk("<-- %s status: %d\n", __func__, status);
> return status;
> @@ -1044,7 +1055,7 @@ pnfs_update_layout(struct inode *ino,
> struct nfs4_pnfs_layout_segment arg = {
> .iomode = iomode,
> .offset = 0,
> - .length = ~0
> + .length = NFS4_MAX_UINT64,
why do you have to ask for whole file layouts?
Isn't it enough to always return the whole layout
but potentially having more than one layout segment?
Benny
> };
> struct nfs_inode *nfsi = NFS_I(ino);
> struct pnfs_layout_type *lo;
> @@ -1063,31 +1074,12 @@ pnfs_update_layout(struct inode *ino,
> /* Check to see if the layout for the given range already exists */
> lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
> if (lseg && !lseg->valid) {
> - unlock_current_layout(nfsi);
> if (take_ref)
> put_lseg(lseg);
> - for (;;) {
> - prepare_to_wait(&nfsi->lo_waitq, &__wait,
> - TASK_KILLABLE);
> - lock_current_layout(nfsi);
> - lseg = pnfs_has_layout(lo, &arg, take_ref, !take_ref);
> - if (!lseg || lseg->valid)
> - break;
> - dprintk("%s: invalid lseg %p ref %d\n", __func__,
> - lseg, atomic_read(&lseg->kref.refcount)-1);
> - if (take_ref)
> - put_lseg(lseg);
> - if (signal_pending(current)) {
> - lseg = NULL;
> - result = -ERESTARTSYS;
> - break;
> - }
> - unlock_current_layout(nfsi);
> - schedule();
> - }
> - finish_wait(&nfsi->lo_waitq, &__wait);
> - if (result)
> - goto out_put;
> +
> + /* someone is cleaning the layout */
> + result = -EAGAIN;
> + goto out_put;
> }
>
> if (lseg) {
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 2/8] pnfs-submit: clean locking infrastructure
2010-06-07 21:11 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 3/8] pnfs-submit: remove lgetcount, lretcount Alexandros Batsakis
@ 2010-06-08 7:30 ` Christoph Hellwig
2010-06-08 7:34 ` Benny Halevy
1 sibling, 1 reply; 22+ messages in thread
From: Christoph Hellwig @ 2010-06-08 7:30 UTC (permalink / raw)
To: Alexandros Batsakis; +Cc: linux-nfs, bhalevy, Fred Isaman
On Mon, Jun 07, 2010 at 02:11:47PM -0700, Alexandros Batsakis wrote:
> +static inline void lock_current_layout(struct nfs_inode *nfsi)
> +{
> + spin_lock(&nfsi->lo_lock);
> +}
> +
> +static inline void unlock_current_layout(struct nfs_inode *nfsi)
> +{
> + BUG_ON_UNLOCKED_LO((&nfsi->layout));
> + spin_unlock(&nfsi->lo_lock);
> +}
Adding wrappers for these is nothing but obsfucation. No need
for the BUG_ON above, the spinlock code asserts that already if
building with spinlock debugging.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 5/8] pnfs-submit: request whole-file layouts only
2010-06-08 7:14 ` [PATCH 5/8] pnfs-submit: request whole-file layouts only Benny Halevy
@ 2010-06-08 7:33 ` Alexandros Batsakis
0 siblings, 0 replies; 22+ messages in thread
From: Alexandros Batsakis @ 2010-06-08 7:33 UTC (permalink / raw)
To: Benny Halevy; +Cc: Alexandros Batsakis, linux-nfs
On Tue, Jun 8, 2010 at 12:14 AM, Benny Halevy <bhalevy@panasas.com> wro=
te:
> On 2010-06-08 00:11, Alexandros Batsakis wrote:
>> In the first iteration of the pNFS code, we support only whole file =
layouts.
>> To facilitate the move to multiple-segments, we keep the segment pro=
cessing
>> code, but the segment list should always contain at most one segment=
per I/O type
>>
>> Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
>> ---
>> =A0fs/nfs/callback_proc.c | =A0 =A07 ++++---
>> =A0fs/nfs/pnfs.c =A0 =A0 =A0 =A0 =A0| =A0 25 ++++++++---------------=
--
>> =A02 files changed, 12 insertions(+), 20 deletions(-)
>>
>> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
>> index 8752239..16b4510 100644
>> --- a/fs/nfs/callback_proc.c
>> +++ b/fs/nfs/callback_proc.c
>> @@ -213,6 +213,10 @@ static int pnfs_recall_layout(void *data)
>> =A0 =A0 then return layouts, resume after layoutreturns complete
>> =A0 */
>>
>> + =A0 =A0 /* support whole file layouts only */
>> + =A0 =A0 rl.cbl_seg.offset =3D 0;
>> + =A0 =A0 rl.cbl_seg.length =3D NFS4_MAX_UINT64;
>> +
>> =A0 =A0 =A0 if (rl.cbl_recall_type =3D=3D RETURN_FILE) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D pnfs_return_layout(inode, &rl=
=2Ecbl_seg, &rl.cbl_stateid,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0 =A0 =A0 RETURN_FILE, true);
>> @@ -221,9 +225,6 @@ static int pnfs_recall_layout(void *data)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>> =A0 =A0 =A0 }
>>
>> - =A0 =A0 rl.cbl_seg.offset =3D 0;
>> - =A0 =A0 rl.cbl_seg.length =3D NFS4_MAX_UINT64;
>> -
>> =A0 =A0 =A0 /* FIXME: This loop is inefficient, running in O(|s_inod=
es|^2) */
>> =A0 =A0 =A0 while ((ino =3D nfs_layoutrecall_find_inode(clp, &rl)) !=
=3D NULL) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* XXX need to check status on pnfs_retu=
rn_layout */
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index dd7ad26..8985e9f 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -551,12 +551,6 @@ pnfs_layout_from_open_stateid(nfs4_stateid *dst=
, struct nfs4_state *state)
>> =A0* =A0 =A0for now, assume that whole file layouts are requested.
>> =A0* =A0 =A0arg->offset: 0
>> =A0* =A0 =A0arg->length: all ones
>> -*
>> -* =A0 =A0for now, assume the LAYOUTGET operation is triggered by an=
I/O request.
>> -* =A0 =A0the count field is the count in the I/O request, and will =
be used
>> -* =A0 =A0as the minlength. for the file operation that piggy-backs
>> -* =A0 =A0the LAYOUTGET operation with an OPEN, s
>> -* =A0 =A0arg->minlength =3D count.
>> =A0*/
>> =A0static int
>> =A0get_layout(struct inode *ino,
>> @@ -577,11 +571,11 @@ get_layout(struct inode *ino,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 return -ENOMEM;
>> =A0 =A0 =A0 }
>> =A0 =A0 =A0 lgp->lo =3D lo;
>> - =A0 =A0 lgp->args.minlength =3D PAGE_CACHE_SIZE;
>> + =A0 =A0 lgp->args.minlength =3D NFS4_MAX_UINT64;
>> =A0 =A0 =A0 lgp->args.maxcount =3D PNFS_LAYOUT_MAXSIZE;
>> =A0 =A0 =A0 lgp->args.lseg.iomode =3D range->iomode;
>> - =A0 =A0 lgp->args.lseg.offset =3D range->offset;
>> - =A0 =A0 lgp->args.lseg.length =3D max(range->length, lgp->args.min=
length);
>> + =A0 =A0 lgp->args.lseg.offset =3D 0;
>> + =A0 =A0 lgp->args.lseg.length =3D NFS4_MAX_UINT64;
>
> I'm not sure how well this will work for pnfs-obj writes.
> We might have to revert it in the pnfs post-submit branch.
>
> Benny
>
Agreed... but bear in mind that there are other things that need to
change too to properly (and efficiently) support partial layouts (e.g.
layout operation sequencing). So maybe a fresh patch will be more
appropriate, but it's up to you of course.
-alexandros
>> =A0 =A0 =A0 lgp->args.type =3D server->pnfs_curr_ld->id;
>> =A0 =A0 =A0 lgp->args.inode =3D ino;
>> =A0 =A0 =A0 lgp->lsegpp =3D lsegpp;
>> @@ -756,7 +750,7 @@ _pnfs_return_layout(struct inode *ino, struct nf=
s4_pnfs_layout_segment *range,
>> =A0 =A0 =A0 else {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 arg.iomode =3D IOMODE_ANY;
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 arg.offset =3D 0;
>> - =A0 =A0 =A0 =A0 =A0 =A0 arg.length =3D ~0;
>> + =A0 =A0 =A0 =A0 =A0 =A0 arg.length =3D NFS4_MAX_UINT64;
>> =A0 =A0 =A0 }
>> =A0 =A0 =A0 if (type =3D=3D RETURN_FILE) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 lo =3D get_lock_current_layout(nfsi);
>> @@ -1051,8 +1045,8 @@ pnfs_update_layout(struct inode *ino,
>> =A0{
>> =A0 =A0 =A0 struct nfs4_pnfs_layout_segment arg =3D {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 .iomode =3D iomode,
>> - =A0 =A0 =A0 =A0 =A0 =A0 .offset =3D pos,
>> - =A0 =A0 =A0 =A0 =A0 =A0 .length =3D count
>> + =A0 =A0 =A0 =A0 =A0 =A0 .offset =3D 0,
>> + =A0 =A0 =A0 =A0 =A0 =A0 .length =3D ~0
>> =A0 =A0 =A0 };
>> =A0 =A0 =A0 struct nfs_inode *nfsi =3D NFS_I(ino);
>> =A0 =A0 =A0 struct pnfs_layout_type *lo;
>> @@ -1142,7 +1136,6 @@ out_put:
>> =A0void
>> =A0pnfs_get_layout_done(struct nfs4_pnfs_layoutget *lgp, int rpc_sta=
tus)
>> =A0{
>> - =A0 =A0 struct nfs4_pnfs_layoutget_res *res =3D &lgp->res;
>> =A0 =A0 =A0 struct pnfs_layout_segment *lseg =3D NULL;
>> =A0 =A0 =A0 struct nfs_inode *nfsi =3D PNFS_NFS_INODE(lgp->lo);
>> =A0 =A0 =A0 time_t suspend =3D 0;
>> @@ -1151,11 +1144,10 @@ pnfs_get_layout_done(struct nfs4_pnfs_layout=
get *lgp, int rpc_status)
>>
>> =A0 =A0 =A0 lgp->status =3D rpc_status;
>> =A0 =A0 =A0 if (likely(!rpc_status)) {
>> - =A0 =A0 =A0 =A0 =A0 =A0 if (unlikely(res->layout.len <=3D 0)) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 if (unlikely(lgp->res.layout.len < 0)) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 printk(KERN_ERR
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0"%s: ERROR!=
=A0Layout size is ZERO!\n", __func__);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0"%s: ERROR =
Returned layout size is ZERO\n", __func__);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lgp->status =3D -EIO;
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto get_out;
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>> =A0 =A0 =A0 }
>> @@ -1233,7 +1225,6 @@ pnfs_get_layout_done(struct nfs4_pnfs_layoutge=
t *lgp, int rpc_status)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 break;
>> =A0 =A0 =A0 }
>>
>> -get_out:
>> =A0 =A0 =A0 /* remember that get layout failed and suspend trying */
>> =A0 =A0 =A0 nfsi->layout.pnfs_layout_suspend =3D suspend;
>> =A0 =A0 =A0 set_bit(lo_fail_bit(lgp->args.lseg.iomode),
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" =
in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 2/8] pnfs-submit: clean locking infrastructure
2010-06-08 7:30 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Christoph Hellwig
@ 2010-06-08 7:34 ` Benny Halevy
0 siblings, 0 replies; 22+ messages in thread
From: Benny Halevy @ 2010-06-08 7:34 UTC (permalink / raw)
To: Christoph Hellwig; +Cc: Alexandros Batsakis, linux-nfs, Fred Isaman
On 2010-06-08 10:30, Christoph Hellwig wrote:
> On Mon, Jun 07, 2010 at 02:11:47PM -0700, Alexandros Batsakis wrote:
>> +static inline void lock_current_layout(struct nfs_inode *nfsi)
>> +{
>> + spin_lock(&nfsi->lo_lock);
>> +}
>> +
>> +static inline void unlock_current_layout(struct nfs_inode *nfsi)
>> +{
>> + BUG_ON_UNLOCKED_LO((&nfsi->layout));
>> + spin_unlock(&nfsi->lo_lock);
>> +}
>
> Adding wrappers for these is nothing but obsfucation. No need
> for the BUG_ON above, the spinlock code asserts that already if
> building with spinlock debugging.
Good point.
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 7/8] pnfs-submit: forgetful client (layouts)
2010-06-08 7:23 ` [PATCH 7/8] pnfs-submit: forgetful client (layouts) Benny Halevy
@ 2010-06-08 7:51 ` Alexandros Batsakis
2010-06-08 9:15 ` Benny Halevy
0 siblings, 1 reply; 22+ messages in thread
From: Alexandros Batsakis @ 2010-06-08 7:51 UTC (permalink / raw)
To: Benny Halevy; +Cc: Alexandros Batsakis, linux-nfs
On Tue, Jun 8, 2010 at 12:23 AM, Benny Halevy <bhalevy@panasas.com> wro=
te:
> On Jun. 08, 2010, 0:11 +0300, Alexandros Batsakis <batsakis-HgOvQuBEEgRhl2p70BpVqQ@public.gmane.org=
m> wrote:
>> Forgetful client model:
>>
>> If we receive a CB_LAYOUTRECALL
>> =A0 =A0 =A0 =A0 - we spawn a thread to handle the recall
>> =A0 =A0 =A0 =A0 (xxx: now only one recall can be active at a time, e=
lse NFS4ERR_DELAY)
>> =A0 =A0 =A0 =A0 - we check the stateid seqid
>> =A0 =A0 =A0 =A0 if it does not match we return NFS4ERR_DELAY
>> =A0 =A0 =A0 =A0 - we check for pending I/O
>> =A0 =A0 =A0 =A0 if there is we return NFS4ERR_DELAY
>> =A0 =A0 =A0 =A0 Else we return NO_MATCHING_LAYOUT.
>> =A0 =A0 =A0 =A0 Note that for whole file layouts there is no need to=
serialize LAYOUTGETs/LAYOUTRETURNs
>> For bulk layouts, if there is a layout active, we return NFS4_OK and=
we start
>> cleaning the layouts asynchronously. At the end we send a bulk LAYOU=
TRETURN.
>> Note that there is no need to prevent any new LAYOUTGETs explicitly =
as the server should reject them.
>>
>> Signed-off-by: Alexandros Batsakis <batsakis@netapp.com>
>> ---
>> =A0fs/nfs/callback_proc.c | =A0146 +++++++++++++++++++++++++++++++++=
+--------------
>> =A0fs/nfs/nfs4_fs.h =A0 =A0 =A0 | =A0 =A01 +
>> =A0fs/nfs/pnfs.c =A0 =A0 =A0 =A0 =A0| =A0 70 ++++++++++-------------
>> =A03 files changed, 136 insertions(+), 81 deletions(-)
>>
>> diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
>> index 3bae785..af7a01d 100644
>> --- a/fs/nfs/callback_proc.c
>> +++ b/fs/nfs/callback_proc.c
>> @@ -129,6 +129,38 @@ int nfs4_validate_delegation_stateid(struct nfs=
_delegation *delegation, const nf
>>
>> =A0#if defined(CONFIG_NFS_V4_1)
>>
>> +static bool
>> +pnfs_is_next_layout_stateid(const struct pnfs_layout_type *lo,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 const nfs4_stateid=
stateid)
>> +{
>> + =A0 =A0 int seqlock;
>> + =A0 =A0 bool res;
>> + =A0 =A0 u32 oldseqid, newseqid;
>> +
>> + =A0 =A0 do {
>> + =A0 =A0 =A0 =A0 =A0 =A0 seqlock =3D read_seqbegin(&lo->seqlock);
>> + =A0 =A0 =A0 =A0 =A0 =A0 oldseqid =3D be32_to_cpu(lo->stateid.u.sta=
teid.seqid);
>> + =A0 =A0 =A0 =A0 =A0 =A0 newseqid =3D be32_to_cpu(stateid.u.stateid=
=2Eseqid);
>> + =A0 =A0 =A0 =A0 =A0 =A0 res =3D !memcmp(lo->stateid.u.stateid.othe=
r,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 stateid.u.stat=
eid.other,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 NFS4_STATEID_O=
THER_SIZE);
>> + =A0 =A0 =A0 =A0 =A0 =A0 if (res) { /* comparing layout stateids */
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (oldseqid =3D=3D ~0)
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 res =3D (n=
ewseqid =3D=3D 1);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 else
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 res =3D (n=
ewseqid =3D=3D oldseqid + 1);
>> + =A0 =A0 =A0 =A0 =A0 =A0 } else { /* open stateid */
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 res =3D !memcmp(lo->statei=
d.u.data,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
&zero_stateid,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
NFS4_STATEID_SIZE);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (res)
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 res =3D (n=
ewseqid =3D=3D 1);
>> + =A0 =A0 =A0 =A0 =A0 =A0 }
>> + =A0 =A0 } while (read_seqretry(&lo->seqlock, seqlock));
>> +
>> + =A0 =A0 return res;
>> +}
>> +
>> =A0/*
>> =A0 * Retrieve an inode based on layout recall parameters
>> =A0 *
>> @@ -191,9 +223,10 @@ static int pnfs_recall_layout(void *data)
>> =A0 =A0 =A0 struct inode *inode, *ino;
>> =A0 =A0 =A0 struct nfs_client *clp;
>> =A0 =A0 =A0 struct cb_pnfs_layoutrecallargs rl;
>> + =A0 =A0 struct nfs4_pnfs_layoutreturn *lrp;
>> =A0 =A0 =A0 struct recall_layout_threadargs *args =3D
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 (struct recall_layout_threadargs *)data;
>> - =A0 =A0 int status;
>> + =A0 =A0 int status =3D 0;
>>
>> =A0 =A0 =A0 daemonize("nfsv4-layoutreturn");
>>
>> @@ -204,47 +237,59 @@ static int pnfs_recall_layout(void *data)
>> =A0 =A0 =A0 clp =3D args->clp;
>> =A0 =A0 =A0 inode =3D args->inode;
>> =A0 =A0 =A0 rl =3D *args->rl;
>> - =A0 =A0 args->result =3D 0;
>> - =A0 =A0 complete(&args->started);
>> - =A0 =A0 args =3D NULL;
>> - =A0 =A0 /* Note: args must not be used after this point!!! */
>> -
>> -/* FIXME: need barrier here:
>> - =A0 pause I/O to data servers
>> - =A0 pause layoutgets
>> - =A0 drain all outstanding writes to storage devices
>> - =A0 wait for any outstanding layoutreturns and layoutgets mentione=
d in
>> - =A0 cb_sequence.
>> - =A0 then return layouts, resume after layoutreturns complete
>> - */
>>
>> =A0 =A0 =A0 /* support whole file layouts only */
>> =A0 =A0 =A0 rl.cbl_seg.offset =3D 0;
>> =A0 =A0 =A0 rl.cbl_seg.length =3D NFS4_MAX_UINT64;
>>
>> =A0 =A0 =A0 if (rl.cbl_recall_type =3D=3D RETURN_FILE) {
>> - =A0 =A0 =A0 =A0 =A0 =A0 status =3D pnfs_return_layout(inode, &rl.c=
bl_seg, &rl.cbl_stateid,
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 RETURN_FILE, true);
>> + =A0 =A0 =A0 =A0 =A0 =A0 if (pnfs_is_next_layout_stateid(&NFS_I(ino=
de)->layout,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 =A0 =A0 rl.cbl_stateid))
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D pnfs_return_lay=
out(inode, &rl.cbl_seg,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 =A0 =A0 =A0 =A0 &rl.cbl_stateid, RETURN_FILE,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 =A0 =A0 =A0 =A0 false);
>> + =A0 =A0 =A0 =A0 =A0 =A0 else
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D cpu_to_be32(NFS=
4ERR_DELAY);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s RETURN_FILE =
error: %d\n", __func__, status);
>> + =A0 =A0 =A0 =A0 =A0 =A0 else
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D =A0cpu_to_be32(=
NFS4ERR_NOMATCHING_LAYOUT);
>> + =A0 =A0 =A0 =A0 =A0 =A0 args->result =3D status;
>> + =A0 =A0 =A0 =A0 =A0 =A0 complete(&args->started);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>> =A0 =A0 =A0 }
>>
>> - =A0 =A0 /* FIXME: This loop is inefficient, running in O(|s_inodes=
|^2) */
>> + =A0 =A0 status =3D cpu_to_be32(NFS4_OK);
>> + =A0 =A0 args->result =3D status;
>> + =A0 =A0 complete(&args->started);
>> + =A0 =A0 args =3D NULL;
>> +
>> + =A0 =A0 /* IMPROVEME: This loop is inefficient, running in O(|s_in=
odes|^2) */
>> =A0 =A0 =A0 while ((ino =3D nfs_layoutrecall_find_inode(clp, &rl)) !=
=3D NULL) {
>> - =A0 =A0 =A0 =A0 =A0 =A0 /* XXX need to check status on pnfs_return=
_layout */
>> - =A0 =A0 =A0 =A0 =A0 =A0 pnfs_return_layout(ino, &rl.cbl_seg, NULL,=
RETURN_FILE, true);
>> + =A0 =A0 =A0 =A0 =A0 =A0 /* FIXME: need to check status on pnfs_ret=
urn_layout */
>> + =A0 =A0 =A0 =A0 =A0 =A0 pnfs_return_layout(ino, &rl.cbl_seg, NULL,=
RETURN_FILE, false);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 iput(ino);
>> =A0 =A0 =A0 }
>>
>> + =A0 =A0 lrp =3D kzalloc(sizeof(*lrp), GFP_KERNEL);
>> + =A0 =A0 if (!lrp) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s: allocation failed. Cannot sen=
d last LAYOUTRETURN\n",
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 __func__);
>> + =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>> + =A0 =A0 }
>> +
>> =A0 =A0 =A0 /* send final layoutreturn */
>> - =A0 =A0 status =3D pnfs_return_layout(inode, &rl.cbl_seg, NULL,
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 rl=
=2Ecbl_recall_type, true);
>> - =A0 =A0 if (status)
>> - =A0 =A0 =A0 =A0 =A0 =A0 printk(KERN_INFO "%s: ignoring pnfs_return=
_layout status=3D%d\n",
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 __func__, =
status);
>> + =A0 =A0 lrp->args.reclaim =3D 0;
>> + =A0 =A0 lrp->args.layout_type =3D rl.cbl_layout_type;
>> + =A0 =A0 lrp->args.return_type =3D rl.cbl_recall_type;
>> + =A0 =A0 lrp->args.lseg =3D rl.cbl_seg;
>> + =A0 =A0 lrp->args.inode =3D inode;
>> + =A0 =A0 lrp->lo =3D NULL;
>> + =A0 =A0 pnfs4_proc_layoutreturn(lrp, true);
>> +
>> =A0out:
>> - =A0 =A0 iput(inode);
>> + =A0 =A0 clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
>> + =A0 =A0 nfs_put_client(clp);
>> =A0 =A0 =A0 module_put_and_exit(0);
>> =A0 =A0 =A0 dprintk("%s: exit status %d\n", __func__, 0);
>> =A0 =A0 =A0 return 0;
>> @@ -262,15 +307,18 @@ static int pnfs_async_return_layout(struct nfs=
_client *clp, struct inode *inode,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 .rl =3D rl,
>> =A0 =A0 =A0 };
>> =A0 =A0 =A0 struct task_struct *t;
>> - =A0 =A0 int status;
>> -
>> - =A0 =A0 /* should have returned NFS4ERR_NOMATCHING_LAYOUT... */
>> - =A0 =A0 BUG_ON(inode =3D=3D NULL);
>> + =A0 =A0 int status =3D -EAGAIN;
>>
>> =A0 =A0 =A0 dprintk("%s: -->\n", __func__);
>>
>> + =A0 =A0 /* FIXME: do not allow two concurrent layout recalls */
>> + =A0 =A0 if (test_and_set_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_stat=
e))
>> + =A0 =A0 =A0 =A0 =A0 =A0 return status;
>> +
>> =A0 =A0 =A0 init_completion(&data.started);
>> =A0 =A0 =A0 __module_get(THIS_MODULE);
>> + =A0 =A0 if (!atomic_inc_not_zero(&clp->cl_count))
>> + =A0 =A0 =A0 =A0 =A0 =A0 goto out_put_no_client;
>>
>> =A0 =A0 =A0 t =3D kthread_run(pnfs_recall_layout, &data, "%s", "pnfs=
_recall_layout");
>> =A0 =A0 =A0 if (IS_ERR(t)) {
>> @@ -284,6 +332,9 @@ static int pnfs_async_return_layout(struct nfs_c=
lient *clp, struct inode *inode,
>> =A0 =A0 =A0 wait_for_completion(&data.started);
>> =A0 =A0 =A0 return data.result;
>> =A0out_module_put:
>> + =A0 =A0 nfs_put_client(clp);
>> +out_put_no_client:
>> + =A0 =A0 clear_bit(NFS4CLNT_LAYOUT_RECALL, &clp->cl_state);
>> =A0 =A0 =A0 module_put(THIS_MODULE);
>> =A0 =A0 =A0 return status;
>> =A0}
>> @@ -294,35 +345,46 @@ __be32 pnfs_cb_layoutrecall(struct cb_pnfs_lay=
outrecallargs *args,
>> =A0 =A0 =A0 struct nfs_client *clp;
>> =A0 =A0 =A0 struct inode *inode =3D NULL;
>> =A0 =A0 =A0 __be32 res;
>> + =A0 =A0 int status;
>> =A0 =A0 =A0 unsigned int num_client =3D 0;
>>
>> =A0 =A0 =A0 dprintk("%s: -->\n", __func__);
>>
>> - =A0 =A0 res =3D htonl(NFS4ERR_INVAL);
>> - =A0 =A0 clp =3D nfs_find_client(args->cbl_addr, 4);
>> + =A0 =A0 res =3D cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
>> + =A0 =A0 clp =A0=3D nfs_find_client(args->cbl_addr, 4);
>> =A0 =A0 =A0 if (clp =3D=3D NULL) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s: no client for addr %u.%u.%u=
=2E%u\n",
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 __func__, NIPQUAD(args->=
cbl_addr));
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>> =A0 =A0 =A0 }
>>
>> - =A0 =A0 res =3D htonl(NFS4ERR_NOMATCHING_LAYOUT);
>> + =A0 =A0 res =3D cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT);
>> =A0 =A0 =A0 do {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 struct nfs_client *prev =3D clp;
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 num_client++;
>> - =A0 =A0 =A0 =A0 =A0 =A0 inode =3D nfs_layoutrecall_find_inode(clp,=
args);
>> - =A0 =A0 =A0 =A0 =A0 =A0 if (inode !=3D NULL) {
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (PNFS_LD(&NFS_I(inode)-=
>layout)->id =3D=3D
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args->cbl_layout_t=
ype) {
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* Set up =
a helper thread to actually
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0* retur=
n the delegation */
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 res =3D pn=
fs_async_return_layout(clp, inode, args);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (res !=3D=
0)
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 res =3D htonl(NFS4ERR_RESOURCE);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 break;
>> + =A0 =A0 =A0 =A0 =A0 =A0 /* the callback must come from the MDS per=
sonality */
>> + =A0 =A0 =A0 =A0 =A0 =A0 if (!(clp->cl_exchange_flags & EXCHGID4_FL=
AG_USE_PNFS_MDS))
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto loop;
>> + =A0 =A0 =A0 =A0 =A0 =A0 if (args->cbl_recall_type =3D=3D RETURN_FI=
LE) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 inode =3D nfs_layoutrecall=
_find_inode(clp, args);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (inode !=3D NULL) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D=
pnfs_async_return_layout(clp, inode,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 args);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status=
=3D=3D -EAGAIN)
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 res =3D cpu_to_be32(NFS4ERR_DELAY);
>
> what about other errors?
>
pnfs_async_return_layout does not send any RPCs, so it's either EAGAIN
or an "out of memory" error in which case I guess it's better to
return NFS4ERR_RESOURCE than NFS4ERR_NO_MATCHING_LAYOUT. So you are
right, I ll send a fix.
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 iput(inode=
);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> + =A0 =A0 =A0 =A0 =A0 =A0 } else { /* _ALL or _FSID */
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* we need the inode to ge=
t the nfs_server struct */
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 inode =3D nfs_layoutrecall=
_find_inode(clp, args);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (!inode)
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto loop;
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D pnfs_async_retu=
rn_layout(clp, inode, args);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status =3D=3D -EAGAIN)
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 res =3D cp=
u_to_be32(NFS4ERR_DELAY);
>
> ditto
>
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 iput(inode);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> +loop:
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 clp =3D nfs_find_client_next(prev);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 nfs_put_client(prev);
>> =A0 =A0 =A0 } while (clp !=3D NULL);
>> diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
>> index ebc9b3b..2f7974b 100644
>> --- a/fs/nfs/nfs4_fs.h
>> +++ b/fs/nfs/nfs4_fs.h
>> @@ -47,6 +47,7 @@ enum nfs4_client_state {
>> =A0 =A0 =A0 NFS4CLNT_SESSION_RESET,
>> =A0 =A0 =A0 NFS4CLNT_SESSION_DRAINING,
>> =A0 =A0 =A0 NFS4CLNT_RECALL_SLOT,
>> + =A0 =A0 NFS4CLNT_LAYOUT_RECALL,
>> =A0};
>>
>> =A0/*
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index d0b45bf..2006926 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -709,6 +709,8 @@ return_layout(struct inode *ino, struct nfs4_pnf=
s_layout_segment *range,
>>
>> =A0 =A0 =A0 dprintk("--> %s\n", __func__);
>>
>> + =A0 =A0 BUG_ON(type !=3D RETURN_FILE);
>> +
>> =A0 =A0 =A0 lrp =3D kzalloc(sizeof(*lrp), GFP_KERNEL);
>> =A0 =A0 =A0 if (lrp =3D=3D NULL) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (lo && (type =3D=3D RETURN_FILE))
>> @@ -745,13 +747,11 @@ _pnfs_return_layout(struct inode *ino, struct =
nfs4_pnfs_layout_segment *range,
>>
>> =A0 =A0 =A0 dprintk("--> %s type %d\n", __func__, type);
>>
>> - =A0 =A0 if (range)
>> - =A0 =A0 =A0 =A0 =A0 =A0 arg =3D *range;
>> - =A0 =A0 else {
>> - =A0 =A0 =A0 =A0 =A0 =A0 arg.iomode =3D IOMODE_ANY;
>> - =A0 =A0 =A0 =A0 =A0 =A0 arg.offset =3D 0;
>> - =A0 =A0 =A0 =A0 =A0 =A0 arg.length =3D NFS4_MAX_UINT64;
>> - =A0 =A0 }
>> +
>> + =A0 =A0 arg.iomode =3D range ? range->iomode : IOMODE_ANY;
>> + =A0 =A0 arg.offset =3D 0;
>> + =A0 =A0 arg.length =3D NFS4_MAX_UINT64;
>> +
>> =A0 =A0 =A0 if (type =3D=3D RETURN_FILE) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 lo =3D get_lock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (lo && !has_layout_to_return(lo, &arg=
)) {
>> @@ -760,11 +760,7 @@ _pnfs_return_layout(struct inode *ino, struct n=
fs4_pnfs_layout_segment *range,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (!lo) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s: no layout s=
egments to return\n", __func__);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* must send the LAYOUTRET=
URN in response to recall */
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (stateid)
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto send_=
return;
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 else
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>>
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 /* unlock w/o put rebalanced by eventual=
call to
>> @@ -773,12 +769,23 @@ _pnfs_return_layout(struct inode *ino, struct =
nfs4_pnfs_layout_segment *range,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
>>
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (pnfs_return_layout_barrier(nfsi, &ar=
g)) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (stateid) { /* callback=
*/
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D=
-EAGAIN;
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lock_curre=
nt_layout(nfsi);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 put_unlock=
_current_layout(lo);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s: waiting\n",=
__func__);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 wait_event(nfsi->lo_wait=
q,
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 !pnfs_retu=
rn_layout_barrier(nfsi, &arg));
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0!pn=
fs_return_layout_barrier(nfsi, &arg));
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>>
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (layoutcommit_needed(nfsi)) {
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (stateid && !wait) { /*=
callback */
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%=
s: layoutcommit pending\n", __func__);
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D=
-EAGAIN;
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out;
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D pnfs_layoutco=
mmit_inode(ino, wait);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (status) {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dprintk(=
"%s: layoutcommit failed, status=3D%d. "
>> @@ -787,9 +794,13 @@ _pnfs_return_layout(struct inode *ino, struct n=
fs4_pnfs_layout_segment *range,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D=
0;
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> +
>> + =A0 =A0 =A0 =A0 =A0 =A0 if (stateid && wait)
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 status =3D return_layout(i=
no, &arg, stateid, type,
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 =A0 =A0 =A0 =A0lo, wait);
>> + =A0 =A0 =A0 =A0 =A0 =A0 else
>> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 pnfs_layout_release(lo, &a=
rg);
>> =A0 =A0 =A0 }
>> -send_return:
>> - =A0 =A0 status =3D return_layout(ino, &arg, stateid, type, lo, wai=
t);
>> =A0out:
>> =A0 =A0 =A0 dprintk("<-- %s status: %d\n", __func__, status);
>> =A0 =A0 =A0 return status;
>> @@ -1044,7 +1055,7 @@ pnfs_update_layout(struct inode *ino,
>> =A0 =A0 =A0 struct nfs4_pnfs_layout_segment arg =3D {
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 .iomode =3D iomode,
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 .offset =3D 0,
>> - =A0 =A0 =A0 =A0 =A0 =A0 .length =3D ~0
>> + =A0 =A0 =A0 =A0 =A0 =A0 .length =3D NFS4_MAX_UINT64,
>
> why do you have to ask for whole file layouts?
> Isn't it enough to always return the whole layout
> but potentially having more than one layout segment?
>
Supposedly version A will not support multiple segments. Is this what
you mean ? I guarantee it by setting "minlength" equal to "length" in
Layoutget. I just wanted to enforce it here too.
-alexandros
> Benny
>
>> =A0 =A0 =A0 };
>> =A0 =A0 =A0 struct nfs_inode *nfsi =3D NFS_I(ino);
>> =A0 =A0 =A0 struct pnfs_layout_type *lo;
>> @@ -1063,31 +1074,12 @@ pnfs_update_layout(struct inode *ino,
>> =A0 =A0 =A0 /* Check to see if the layout for the given range alread=
y exists */
>> =A0 =A0 =A0 lseg =3D pnfs_has_layout(lo, &arg, take_ref, !take_ref);
>> =A0 =A0 =A0 if (lseg && !lseg->valid) {
>> - =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi);
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (take_ref)
>> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 put_lseg(lseg);
>> - =A0 =A0 =A0 =A0 =A0 =A0 for (;;) {
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 prepare_to_wait(&nfsi->lo_=
waitq, &__wait,
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0=
=A0 TASK_KILLABLE);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lock_current_layout(nfsi);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lseg =3D pnfs_has_layout(l=
o, &arg, take_ref, !take_ref);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (!lseg || lseg->valid)
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 break;
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 dprintk("%s: invalid lseg =
%p ref %d\n", __func__,
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lseg, atom=
ic_read(&lseg->kref.refcount)-1);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (take_ref)
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 put_lseg(l=
seg);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 if (signal_pending(current=
)) {
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 lseg =3D N=
ULL;
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 result =3D=
-ERESTARTSYS;
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 break;
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 }
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 unlock_current_layout(nfsi=
);
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 schedule();
>> - =A0 =A0 =A0 =A0 =A0 =A0 }
>> - =A0 =A0 =A0 =A0 =A0 =A0 finish_wait(&nfsi->lo_waitq, &__wait);
>> - =A0 =A0 =A0 =A0 =A0 =A0 if (result)
>> - =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 goto out_put;
>> +
>> + =A0 =A0 =A0 =A0 =A0 =A0 /* someone is cleaning the layout */
>> + =A0 =A0 =A0 =A0 =A0 =A0 result =3D -EAGAIN;
>> + =A0 =A0 =A0 =A0 =A0 =A0 goto out_put;
>> =A0 =A0 =A0 }
>>
>> =A0 =A0 =A0 if (lseg) {
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" =
in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at =A0http://vger.kernel.org/majordomo-info.html
>
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH 7/8] pnfs-submit: forgetful client (layouts)
2010-06-08 7:51 ` Alexandros Batsakis
@ 2010-06-08 9:15 ` Benny Halevy
0 siblings, 0 replies; 22+ messages in thread
From: Benny Halevy @ 2010-06-08 9:15 UTC (permalink / raw)
To: Alexandros Batsakis; +Cc: Alexandros Batsakis, linux-nfs
On 2010-06-08 10:51, Alexandros Batsakis wrote:
> On Tue, Jun 8, 2010 at 12:23 AM, Benny Halevy <bhalevy@panasas.com> wrote:
>> On Jun. 08, 2010, 0:11 +0300, Alexandros Batsakis <batsakis@netapp.com> wrote:
>>> + status = pnfs_async_return_layout(clp, inode,
>>> + args);
>>> + if (status == -EAGAIN)
>>> + res = cpu_to_be32(NFS4ERR_DELAY);
>>
>> what about other errors?
>>
>
> pnfs_async_return_layout does not send any RPCs, so it's either EAGAIN
> or an "out of memory" error in which case I guess it's better to
> return NFS4ERR_RESOURCE than NFS4ERR_NO_MATCHING_LAYOUT. So you are
> right, I ll send a fix.
>
Note that NFS4ERR_RESOURCE is no longer a valid status
in nfsv4.1 NFS4ERR_DELAY might be a better choice for -ENOMEM.
NFS4ERR_NO_MATCHING_LAYOUT should never be returned unless
you're sure you have no layout to return or you had one
and forgot everything about it.
For any other error I'd BUG() and return NFS4ERR_DELAY.
>>> + iput(inode);
>>> }
>>> + } else { /* _ALL or _FSID */
>>> + /* we need the inode to get the nfs_server struct */
>>> + inode = nfs_layoutrecall_find_inode(clp, args);
>>> + if (!inode)
>>> + goto loop;
>>> + status = pnfs_async_return_layout(clp, inode, args);
>>> + if (status == -EAGAIN)
>>> + res = cpu_to_be32(NFS4ERR_DELAY);
>>
>> ditto
>>
<snip>
>>> @@ -1044,7 +1055,7 @@ pnfs_update_layout(struct inode *ino,
>>> struct nfs4_pnfs_layout_segment arg = {
>>> .iomode = iomode,
>>> .offset = 0,
>>> - .length = ~0
>>> + .length = NFS4_MAX_UINT64,
sorry, this is just cosmetic (my bad)
Benny
^ permalink raw reply [flat|nested] 22+ messages in thread
end of thread, other threads:[~2010-06-08 9:15 UTC | newest]
Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-07 21:11 [PATCH 0/8] forgetful client v2 Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 1/8] pnfs-submit: clean struct nfs_inode Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 3/8] pnfs-submit: remove lgetcount, lretcount Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 4/8] pnfs-submit: change stateid to be a union Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 5/8] pnfs-submit: request whole-file layouts only Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 6/8] pnfs-submit: change layout list to be similar to other state lists Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 7/8] pnfs-submit: forgetful client (layouts) Alexandros Batsakis
2010-06-07 21:11 ` [PATCH 8/8] pnfs-submit: support for CB_RECALL_ANY (layouts) Alexandros Batsakis
2010-06-08 7:23 ` [PATCH 7/8] pnfs-submit: forgetful client (layouts) Benny Halevy
2010-06-08 7:51 ` Alexandros Batsakis
2010-06-08 9:15 ` Benny Halevy
2010-06-08 7:14 ` [PATCH 5/8] pnfs-submit: request whole-file layouts only Benny Halevy
2010-06-08 7:33 ` Alexandros Batsakis
2010-06-08 7:30 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Christoph Hellwig
2010-06-08 7:34 ` Benny Halevy
-- strict thread matches above, loose matches on Subject: below --
2010-05-17 17:56 [PATCH 0/8] pnfs-submit: Forgetful cleint and some layout cleanups Alexandros Batsakis
2010-05-17 17:56 ` [PATCH 1/8] pnfs-submit: clean struct nfs_inode Alexandros Batsakis
2010-05-17 17:56 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Alexandros Batsakis
2010-05-26 8:28 ` Benny Halevy
2010-05-28 17:27 ` Fred Isaman
[not found] ` <AANLkTinsHI0fHYdpUlq-MsMX0BmsLGvdAbrKx7M5ydjw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-05-28 18:27 ` Alexandros Batsakis
2010-05-05 17:00 [PATCH 0/8] pnfs-submit: forgetful client v2 Alexandros Batsakis
2010-05-05 17:00 ` [PATCH 1/8] pnfs-submit: clean struct nfs_inode Alexandros Batsakis
2010-05-05 17:00 ` [PATCH 2/8] pnfs-submit: clean locking infrastructure Alexandros Batsakis
2010-06-07 14:34 ` Fred Isaman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).