* [PATCH v3 1/1] PNFS fix dangling DS mount
@ 2017-06-30 19:52 Olga Kornievskaia
2017-07-05 13:52 ` Steve Dickson
2017-07-20 19:56 ` Trond Myklebust
0 siblings, 2 replies; 3+ messages in thread
From: Olga Kornievskaia @ 2017-06-30 19:52 UTC (permalink / raw)
To: linux-fsdevel, linux-nfs
There is a regression by commit 8d40b0f14846 ("NFS filelayout:call
GETDEVICEINFO after pnfs_layout_process completes"). It leaves the
DS mount dangling.
Previously, filelayout_alloc_sec() would call filelayout_check_layout()
which would call nfs4_find_get_deviceid which ups the count on the
device_id. It's only called once and it's matched by the
filelayout_free_lseg() that calls nfs4_fl_put_deviceid().
After that patch, each read/write ends up calling nfs4_find_get_deviceid
and there is no balance for that. Instead, do nfs4_fl_put_deviceid()
in the filelayout's .pg_cleanup and remove it from filelayout_free_lseg.
But we still need a reference to hold over the lifetime of the segment.
For every new lseg that's created we need to take a reference on deviceid
that uses it. It will be released in the "free_lseg" routine.
Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
---
fs/nfs/filelayout/filelayout.c | 21 +++++++++++++++++----
fs/nfs/flexfilelayout/flexfilelayout.c | 9 ++++++---
fs/nfs/pnfs.c | 13 ++++++++++---
fs/nfs/pnfs.h | 3 ++-
4 files changed, 35 insertions(+), 11 deletions(-)
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 1cf85d6..86d694e 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -909,9 +909,10 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
struct pnfs_layout_hdr *lo;
struct nfs4_filelayout_segment *fl;
int status;
+ bool new_layout = false;
lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
- gfp_flags);
+ gfp_flags, &new_layout);
if (!lseg)
lseg = ERR_PTR(-ENOMEM);
if (IS_ERR(lseg))
@@ -924,7 +925,8 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
if (status) {
pnfs_put_lseg(lseg);
lseg = ERR_PTR(status);
- }
+ } else if (new_layout)
+ nfs4_get_deviceid(&fl->dsaddr->id_node);
out:
return lseg;
}
@@ -991,18 +993,29 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
nfs_pageio_reset_write_mds(pgio);
}
+static void filelayout_pg_cleanup(struct nfs_pageio_descriptor *desc)
+{
+ if (desc->pg_lseg) {
+ struct nfs4_filelayout_segment *fl =
+ FILELAYOUT_LSEG(desc->pg_lseg);
+
+ nfs4_fl_put_deviceid(fl->dsaddr);
+ }
+ pnfs_generic_pg_cleanup(desc);
+}
+
static const struct nfs_pageio_ops filelayout_pg_read_ops = {
.pg_init = filelayout_pg_init_read,
.pg_test = filelayout_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
- .pg_cleanup = pnfs_generic_pg_cleanup,
+ .pg_cleanup = filelayout_pg_cleanup,
};
static const struct nfs_pageio_ops filelayout_pg_write_ops = {
.pg_init = filelayout_pg_init_write,
.pg_test = filelayout_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
- .pg_cleanup = pnfs_generic_pg_cleanup,
+ .pg_cleanup = filelayout_pg_cleanup,
};
static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 23542dc..53a4a19 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -820,7 +820,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
NFS4_MAX_UINT64,
IOMODE_READ,
strict_iomode,
- GFP_KERNEL);
+ GFP_KERNEL,
+ NULL);
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -904,7 +905,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
NFS4_MAX_UINT64,
IOMODE_RW,
false,
- GFP_NOFS);
+ GFP_NOFS,
+ NULL);
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -960,7 +962,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
NFS4_MAX_UINT64,
IOMODE_RW,
false,
- GFP_NOFS);
+ GFP_NOFS,
+ NULL);
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index c383d09..fb011c6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1698,7 +1698,8 @@ struct pnfs_layout_segment *
u64 count,
enum pnfs_iomode iomode,
bool strict_iomode,
- gfp_t gfp_flags)
+ gfp_t gfp_flags,
+ bool *new)
{
struct pnfs_layout_range arg = {
.iomode = iomode,
@@ -1764,8 +1765,12 @@ struct pnfs_layout_segment *
if (lseg) {
trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
PNFS_UPDATE_LAYOUT_FOUND_CACHED);
+ if (new)
+ *new = false;
goto out_unlock;
}
+ if (new)
+ *new = true;
if (!nfs4_valid_open_stateid(ctx->state)) {
trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
@@ -2126,7 +2131,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
rd_size,
IOMODE_READ,
false,
- GFP_KERNEL);
+ GFP_KERNEL,
+ NULL);
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
@@ -2153,7 +2159,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
wb_size,
IOMODE_RW,
false,
- GFP_NOFS);
+ GFP_NOFS,
+ NULL);
if (IS_ERR(pgio->pg_lseg)) {
pgio->pg_error = PTR_ERR(pgio->pg_lseg);
pgio->pg_lseg = NULL;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 99731e3..978fab0 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -291,7 +291,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
u64 count,
enum pnfs_iomode iomode,
bool strict_iomode,
- gfp_t gfp_flags);
+ gfp_t gfp_flags,
+ bool *new);
void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
const nfs4_stateid *arg_stateid,
const struct pnfs_layout_range *range,
--
1.8.3.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH v3 1/1] PNFS fix dangling DS mount
2017-06-30 19:52 [PATCH v3 1/1] PNFS fix dangling DS mount Olga Kornievskaia
@ 2017-07-05 13:52 ` Steve Dickson
2017-07-20 19:56 ` Trond Myklebust
1 sibling, 0 replies; 3+ messages in thread
From: Steve Dickson @ 2017-07-05 13:52 UTC (permalink / raw)
To: Olga Kornievskaia, linux-fsdevel, linux-nfs
On 06/30/2017 03:52 PM, Olga Kornievskaia wrote:
> There is a regression by commit 8d40b0f14846 ("NFS filelayout:call
> GETDEVICEINFO after pnfs_layout_process completes"). It leaves the
> DS mount dangling.
>
> Previously, filelayout_alloc_sec() would call filelayout_check_layout()
> which would call nfs4_find_get_deviceid which ups the count on the
> device_id. It's only called once and it's matched by the
> filelayout_free_lseg() that calls nfs4_fl_put_deviceid().
>
> After that patch, each read/write ends up calling nfs4_find_get_deviceid
> and there is no balance for that. Instead, do nfs4_fl_put_deviceid()
> in the filelayout's .pg_cleanup and remove it from filelayout_free_lseg.
>
> But we still need a reference to hold over the lifetime of the segment.
> For every new lseg that's created we need to take a reference on deviceid
> that uses it. It will be released in the "free_lseg" routine.
>
> Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Tested-by: Steve Dickson <steved@redhat.com>
steved.
> ---
> fs/nfs/filelayout/filelayout.c | 21 +++++++++++++++++----
> fs/nfs/flexfilelayout/flexfilelayout.c | 9 ++++++---
> fs/nfs/pnfs.c | 13 ++++++++++---
> fs/nfs/pnfs.h | 3 ++-
> 4 files changed, 35 insertions(+), 11 deletions(-)
>
> diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
> index 1cf85d6..86d694e 100644
> --- a/fs/nfs/filelayout/filelayout.c
> +++ b/fs/nfs/filelayout/filelayout.c
> @@ -909,9 +909,10 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
> struct pnfs_layout_hdr *lo;
> struct nfs4_filelayout_segment *fl;
> int status;
> + bool new_layout = false;
>
> lseg = pnfs_update_layout(ino, ctx, pos, count, iomode, strict_iomode,
> - gfp_flags);
> + gfp_flags, &new_layout);
> if (!lseg)
> lseg = ERR_PTR(-ENOMEM);
> if (IS_ERR(lseg))
> @@ -924,7 +925,8 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
> if (status) {
> pnfs_put_lseg(lseg);
> lseg = ERR_PTR(status);
> - }
> + } else if (new_layout)
> + nfs4_get_deviceid(&fl->dsaddr->id_node);
> out:
> return lseg;
> }
> @@ -991,18 +993,29 @@ static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
> nfs_pageio_reset_write_mds(pgio);
> }
>
> +static void filelayout_pg_cleanup(struct nfs_pageio_descriptor *desc)
> +{
> + if (desc->pg_lseg) {
> + struct nfs4_filelayout_segment *fl =
> + FILELAYOUT_LSEG(desc->pg_lseg);
> +
> + nfs4_fl_put_deviceid(fl->dsaddr);
> + }
> + pnfs_generic_pg_cleanup(desc);
> +}
> +
> static const struct nfs_pageio_ops filelayout_pg_read_ops = {
> .pg_init = filelayout_pg_init_read,
> .pg_test = filelayout_pg_test,
> .pg_doio = pnfs_generic_pg_readpages,
> - .pg_cleanup = pnfs_generic_pg_cleanup,
> + .pg_cleanup = filelayout_pg_cleanup,
> };
>
> static const struct nfs_pageio_ops filelayout_pg_write_ops = {
> .pg_init = filelayout_pg_init_write,
> .pg_test = filelayout_pg_test,
> .pg_doio = pnfs_generic_pg_writepages,
> - .pg_cleanup = pnfs_generic_pg_cleanup,
> + .pg_cleanup = filelayout_pg_cleanup,
> };
>
> static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
> diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
> index 23542dc..53a4a19 100644
> --- a/fs/nfs/flexfilelayout/flexfilelayout.c
> +++ b/fs/nfs/flexfilelayout/flexfilelayout.c
> @@ -820,7 +820,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
> NFS4_MAX_UINT64,
> IOMODE_READ,
> strict_iomode,
> - GFP_KERNEL);
> + GFP_KERNEL,
> + NULL);
> if (IS_ERR(pgio->pg_lseg)) {
> pgio->pg_error = PTR_ERR(pgio->pg_lseg);
> pgio->pg_lseg = NULL;
> @@ -904,7 +905,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
> NFS4_MAX_UINT64,
> IOMODE_RW,
> false,
> - GFP_NOFS);
> + GFP_NOFS,
> + NULL);
> if (IS_ERR(pgio->pg_lseg)) {
> pgio->pg_error = PTR_ERR(pgio->pg_lseg);
> pgio->pg_lseg = NULL;
> @@ -960,7 +962,8 @@ static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout)
> NFS4_MAX_UINT64,
> IOMODE_RW,
> false,
> - GFP_NOFS);
> + GFP_NOFS,
> + NULL);
> if (IS_ERR(pgio->pg_lseg)) {
> pgio->pg_error = PTR_ERR(pgio->pg_lseg);
> pgio->pg_lseg = NULL;
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index c383d09..fb011c6 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -1698,7 +1698,8 @@ struct pnfs_layout_segment *
> u64 count,
> enum pnfs_iomode iomode,
> bool strict_iomode,
> - gfp_t gfp_flags)
> + gfp_t gfp_flags,
> + bool *new)
> {
> struct pnfs_layout_range arg = {
> .iomode = iomode,
> @@ -1764,8 +1765,12 @@ struct pnfs_layout_segment *
> if (lseg) {
> trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
> PNFS_UPDATE_LAYOUT_FOUND_CACHED);
> + if (new)
> + *new = false;
> goto out_unlock;
> }
> + if (new)
> + *new = true;
>
> if (!nfs4_valid_open_stateid(ctx->state)) {
> trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
> @@ -2126,7 +2131,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
> rd_size,
> IOMODE_READ,
> false,
> - GFP_KERNEL);
> + GFP_KERNEL,
> + NULL);
> if (IS_ERR(pgio->pg_lseg)) {
> pgio->pg_error = PTR_ERR(pgio->pg_lseg);
> pgio->pg_lseg = NULL;
> @@ -2153,7 +2159,8 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
> wb_size,
> IOMODE_RW,
> false,
> - GFP_NOFS);
> + GFP_NOFS,
> + NULL);
> if (IS_ERR(pgio->pg_lseg)) {
> pgio->pg_error = PTR_ERR(pgio->pg_lseg);
> pgio->pg_lseg = NULL;
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 99731e3..978fab0 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -291,7 +291,8 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
> u64 count,
> enum pnfs_iomode iomode,
> bool strict_iomode,
> - gfp_t gfp_flags);
> + gfp_t gfp_flags,
> + bool *new);
> void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo,
> const nfs4_stateid *arg_stateid,
> const struct pnfs_layout_range *range,
>
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH v3 1/1] PNFS fix dangling DS mount
2017-06-30 19:52 [PATCH v3 1/1] PNFS fix dangling DS mount Olga Kornievskaia
2017-07-05 13:52 ` Steve Dickson
@ 2017-07-20 19:56 ` Trond Myklebust
1 sibling, 0 replies; 3+ messages in thread
From: Trond Myklebust @ 2017-07-20 19:56 UTC (permalink / raw)
To: linux-nfs@vger.kernel.org, kolga@netapp.com,
linux-fsdevel@vger.kernel.org
Hi Olga,
Apologies for missing this patch. It was hiding in my 'linux-fsdevel'
mailbox, so I didn't recognise it as a NFS patch.
On Fri, 2017-06-30 at 15:52 -0400, Olga Kornievskaia wrote:
> There is a regression by commit 8d40b0f14846 ("NFS filelayout:call
> GETDEVICEINFO after pnfs_layout_process completes"). It leaves the
> DS mount dangling.
>
> Previously, filelayout_alloc_sec() would call
> filelayout_check_layout()
> which would call nfs4_find_get_deviceid which ups the count on the
> device_id. It's only called once and it's matched by the
> filelayout_free_lseg() that calls nfs4_fl_put_deviceid().
>
> After that patch, each read/write ends up calling
> nfs4_find_get_deviceid
> and there is no balance for that. Instead, do nfs4_fl_put_deviceid()
> in the filelayout's .pg_cleanup and remove it from
> filelayout_free_lseg.
>
> But we still need a reference to hold over the lifetime of the
> segment.
> For every new lseg that's created we need to take a reference on
> deviceid
> that uses it. It will be released in the "free_lseg" routine.
This is what I'm not understanding. If you have a reference in the
layout segment, then why do you need to call nfs4_find_get_deviceid()
in the read/write code?
Isn't it sufficient to change the "pg_init" calls to check whether or
not the struct nfs4_filelayout_segment has set a value for dsaddr (that
needs to be done with care to avoid races - cmpxchg() is your friend),
and then rely on that reference being set for the remainder of the
layout segment lifetime?
--
Trond Myklebust
Linux NFS client maintainer, PrimaryData
trond.myklebust@primarydata.com
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2017-07-20 19:56 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-06-30 19:52 [PATCH v3 1/1] PNFS fix dangling DS mount Olga Kornievskaia
2017-07-05 13:52 ` Steve Dickson
2017-07-20 19:56 ` Trond Myklebust
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).