All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCHSET 0/3] pnfs-obj: Fixes left-out from 3.5 for the 3.6 merge window
@ 2012-08-02 12:32 Boaz Harrosh
  2012-08-02 12:34 ` [PATCH 1/3] NFS41: add pg_layout_private to nfs_pageio_descriptor Boaz Harrosh
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Boaz Harrosh @ 2012-08-02 12:32 UTC (permalink / raw)
  To: Trond Myklebust, NFS list, open-osd; +Cc: Peng Tao

Hi Trond

These fixes where for the 3.5 Kernel, Sorry I forgot about them. In any
way they should be pushed for 3.6 fixes.
(Should we pursue Stable@ for them)

This is a re-post, except the 3rd one which is a new BUG fix.

list of patches:
[PATCH 1/3] NFS41: add pg_layout_private to nfs_pageio_descriptor
[PATCH 2/3] pnfs-obj: Better IO pattern in case of unaligned offset
[PATCH 3/3] pnfs: Don't BUG on info received from Server

Thanks
Boaz

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 1/3] NFS41: add pg_layout_private to nfs_pageio_descriptor
  2012-08-02 12:32 [PATCHSET 0/3] pnfs-obj: Fixes left-out from 3.5 for the 3.6 merge window Boaz Harrosh
@ 2012-08-02 12:34 ` Boaz Harrosh
  2012-08-02 12:36 ` [PATCH 1/3 repost] " Boaz Harrosh
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Boaz Harrosh @ 2012-08-02 12:34 UTC (permalink / raw)
  To: Trond Myklebust, NFS list, open-osd; +Cc: Peng Tao


To allow layout driver to pass private information around
pg_init/pg_doio.

Signed-off-by: Peng Tao <tao.peng@emc.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/nfs/pagelist.c        | 2 ++
 include/linux/nfs_page.h | 1 +
 include/linux/nfs_xdr.h  | 1 +
 3 files changed, 4 insertions(+)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index aed913c..342ca5e 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 	hdr->io_start = req_offset(hdr->req);
 	hdr->good_bytes = desc->pg_count;
 	hdr->dreq = desc->pg_dreq;
+	hdr->layout_private = desc->pg_layout_private;
 	hdr->release = release;
 	hdr->completion_ops = desc->pg_completion_ops;
 	if (hdr->completion_ops->init_hdr)
@@ -267,6 +268,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
 	desc->pg_dreq = NULL;
+	desc->pg_layout_private = NULL;
 }
 
 /**
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 88d166b..63093b1 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -69,6 +69,7 @@ struct nfs_pageio_descriptor {
 	const struct nfs_pgio_completion_ops *pg_completion_ops;
 	struct pnfs_layout_segment *pg_lseg;
 	struct nfs_direct_req	*pg_dreq;
+	void			*pg_layout_private;
 };
 
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 8aadd90..58023cd 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1244,6 +1244,7 @@ struct nfs_pgio_header {
 	void (*release) (struct nfs_pgio_header *hdr);
 	const struct nfs_pgio_completion_ops *completion_ops;
 	struct nfs_direct_req	*dreq;
+	void			*layout_private;
 	spinlock_t		lock;
 	/* fields protected by lock */
 	int			pnfs_error;
-- 
1.7.10.2.677.gb6bc67f



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 1/3 repost] NFS41: add pg_layout_private to nfs_pageio_descriptor
  2012-08-02 12:32 [PATCHSET 0/3] pnfs-obj: Fixes left-out from 3.5 for the 3.6 merge window Boaz Harrosh
  2012-08-02 12:34 ` [PATCH 1/3] NFS41: add pg_layout_private to nfs_pageio_descriptor Boaz Harrosh
@ 2012-08-02 12:36 ` Boaz Harrosh
  2012-08-02 13:06   ` Boaz Harrosh
  2012-08-02 12:38 ` [PATCH 2/3] pnfs-obj: Better IO pattern in case of unaligned offset Boaz Harrosh
  2012-08-02 12:40 ` [PATCH 3/3] pnfs: Don't BUG on info received from Server Boaz Harrosh
  3 siblings, 1 reply; 6+ messages in thread
From: Boaz Harrosh @ 2012-08-02 12:36 UTC (permalink / raw)
  To: Trond Myklebust, NFS list, open-osd; +Cc: Peng Tao

From: Peng Tao <bergwolf@gmail.com>

To allow layout driver to pass private information around
pg_init/pg_doio.

Signed-off-by: Peng Tao <tao.peng@emc.com>
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/nfs/pagelist.c        | 2 ++
 include/linux/nfs_page.h | 1 +
 include/linux/nfs_xdr.h  | 1 +
 3 files changed, 4 insertions(+)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index aed913c..342ca5e 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
 	hdr->io_start = req_offset(hdr->req);
 	hdr->good_bytes = desc->pg_count;
 	hdr->dreq = desc->pg_dreq;
+	hdr->layout_private = desc->pg_layout_private;
 	hdr->release = release;
 	hdr->completion_ops = desc->pg_completion_ops;
 	if (hdr->completion_ops->init_hdr)
@@ -267,6 +268,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
 	desc->pg_error = 0;
 	desc->pg_lseg = NULL;
 	desc->pg_dreq = NULL;
+	desc->pg_layout_private = NULL;
 }
 
 /**
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 88d166b..63093b1 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -69,6 +69,7 @@ struct nfs_pageio_descriptor {
 	const struct nfs_pgio_completion_ops *pg_completion_ops;
 	struct pnfs_layout_segment *pg_lseg;
 	struct nfs_direct_req	*pg_dreq;
+	void			*pg_layout_private;
 };
 
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 8aadd90..58023cd 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1244,6 +1244,7 @@ struct nfs_pgio_header {
 	void (*release) (struct nfs_pgio_header *hdr);
 	const struct nfs_pgio_completion_ops *completion_ops;
 	struct nfs_direct_req	*dreq;
+	void			*layout_private;
 	spinlock_t		lock;
 	/* fields protected by lock */
 	int			pnfs_error;
-- 
1.7.10.2.677.gb6bc67f



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 2/3] pnfs-obj: Better IO pattern in case of unaligned offset
  2012-08-02 12:32 [PATCHSET 0/3] pnfs-obj: Fixes left-out from 3.5 for the 3.6 merge window Boaz Harrosh
  2012-08-02 12:34 ` [PATCH 1/3] NFS41: add pg_layout_private to nfs_pageio_descriptor Boaz Harrosh
  2012-08-02 12:36 ` [PATCH 1/3 repost] " Boaz Harrosh
@ 2012-08-02 12:38 ` Boaz Harrosh
  2012-08-02 12:40 ` [PATCH 3/3] pnfs: Don't BUG on info received from Server Boaz Harrosh
  3 siblings, 0 replies; 6+ messages in thread
From: Boaz Harrosh @ 2012-08-02 12:38 UTC (permalink / raw)
  To: Trond Myklebust, NFS list, open-osd; +Cc: Peng Tao


Depending on layout and ARCH, ORE has some limits on max IO sizes
which is communicated on (what else) ore_layout->max_io_length,
which is always stripe aligned.
This was considered as the pg_test boundary for splitting and starting
a new IO.

But in the case of a long IO where the start offset is not aligned
what would happen is that both end of IO[N] and start of IO[N+1]
would be unaligned, causing each IO boundary parity unit to be
calculated and written twice.

So what we do in this patch is split the very start of an unaligned
IO, up to a stripe boundary, and then next IO's can continue fully
aligned til the end.

We might be sacrificing the case where the full unaligned IO would
fit within a single max_io_length, but the sacrifice is well worth
the elimination of double calculation and parity units IO.
Actually the sacrificing is marginal and is almost unmeasurable.

TODO:
	If we know the total expected linear segment that will
	be received, at pg_init, we could use that information
	in many places:
	1. blocks-layout get_layout write segment size
	2. Better mds-threshold
	3. In above situation for a better clean split

	I will do this in future submission.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/nfs/objlayout/objio_osd.c | 55 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index f50d3e8..ea6d111 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -570,17 +570,66 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
 		return false;
 
 	return pgio->pg_count + req->wb_bytes <=
-			OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
+			(unsigned long)pgio->pg_layout_private;
+}
+
+void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+	pnfs_generic_pg_init_read(pgio, req);
+	if (unlikely(pgio->pg_lseg == NULL))
+		return; /* Not pNFS */
+
+	pgio->pg_layout_private = (void *)
+				OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
+}
+
+static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
+				   unsigned long *stripe_end)
+{
+	u32 stripe_off;
+	unsigned stripe_size;
+
+	if (layout->raid_algorithm == PNFS_OSD_RAID_0)
+		return true;
+
+	stripe_size = layout->stripe_unit *
+				(layout->group_width - layout->parity);
+
+	div_u64_rem(offset, stripe_size, &stripe_off);
+	if (!stripe_off)
+		return true;
+
+	*stripe_end = stripe_size - stripe_off;
+	return false;
+}
+
+void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+	unsigned long stripe_end = 0;
+
+	pnfs_generic_pg_init_write(pgio, req);
+	if (unlikely(pgio->pg_lseg == NULL))
+		return; /* Not pNFS */
+
+	if (req->wb_offset ||
+	    !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
+			       &OBJIO_LSEG(pgio->pg_lseg)->layout,
+			       &stripe_end)) {
+		pgio->pg_layout_private = (void *)stripe_end;
+	} else {
+		pgio->pg_layout_private = (void *)
+				OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
+	}
 }
 
 static const struct nfs_pageio_ops objio_pg_read_ops = {
-	.pg_init = pnfs_generic_pg_init_read,
+	.pg_init = objio_init_read,
 	.pg_test = objio_pg_test,
 	.pg_doio = pnfs_generic_pg_readpages,
 };
 
 static const struct nfs_pageio_ops objio_pg_write_ops = {
-	.pg_init = pnfs_generic_pg_init_write,
+	.pg_init = objio_init_write,
 	.pg_test = objio_pg_test,
 	.pg_doio = pnfs_generic_pg_writepages,
 };
-- 
1.7.10.2.677.gb6bc67f



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH 3/3] pnfs: Don't BUG on info received from Server
  2012-08-02 12:32 [PATCHSET 0/3] pnfs-obj: Fixes left-out from 3.5 for the 3.6 merge window Boaz Harrosh
                   ` (2 preceding siblings ...)
  2012-08-02 12:38 ` [PATCH 2/3] pnfs-obj: Better IO pattern in case of unaligned offset Boaz Harrosh
@ 2012-08-02 12:40 ` Boaz Harrosh
  3 siblings, 0 replies; 6+ messages in thread
From: Boaz Harrosh @ 2012-08-02 12:40 UTC (permalink / raw)
  To: Trond Myklebust, NFS list, open-osd; +Cc: Peng Tao


In nfs4_layoutreturn_done() there is a BUG_ON on a Server
returned member, when received in what the client thinks is
an impossible situation.

[The server returned ! lrp->res.lrs_present, but client
 still had more segments]

This BUG really hit me with the Linux pnfs Server, but
regardless of who is at fault here, Server or client, client
must not crash, even on a buggy Server.

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 15fc7e4..e25b686 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6249,7 +6249,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
 		if (lrp->res.lrs_present) {
 			pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
 		} else
-			BUG_ON(!list_empty(&lo->plh_segs));
+			WARN_ON(!list_empty(&lo->plh_segs));
 	}
 	lo->plh_block_lgets--;
 	spin_unlock(&lo->plh_inode->i_lock);
-- 
1.7.10.2.677.gb6bc67f



^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3 repost] NFS41: add pg_layout_private to nfs_pageio_descriptor
  2012-08-02 12:36 ` [PATCH 1/3 repost] " Boaz Harrosh
@ 2012-08-02 13:06   ` Boaz Harrosh
  0 siblings, 0 replies; 6+ messages in thread
From: Boaz Harrosh @ 2012-08-02 13:06 UTC (permalink / raw)
  To: Trond Myklebust, NFS list, open-osd; +Cc: Peng Tao

On 08/02/2012 03:36 PM, Boaz Harrosh wrote:

> From: Peng Tao <bergwolf@gmail.com>
> 


Trond Hi

I forgot the From: Peng ... on the first post please use this
one

Thanks
Boaz

> To allow layout driver to pass private information around
> pg_init/pg_doio.
> 
> Signed-off-by: Peng Tao <tao.peng@emc.com>
> Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
> ---
>  fs/nfs/pagelist.c        | 2 ++
>  include/linux/nfs_page.h | 1 +
>  include/linux/nfs_xdr.h  | 1 +
>  3 files changed, 4 insertions(+)
> 
> diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
> index aed913c..342ca5e 100644
> --- a/fs/nfs/pagelist.c
> +++ b/fs/nfs/pagelist.c
> @@ -49,6 +49,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
>  	hdr->io_start = req_offset(hdr->req);
>  	hdr->good_bytes = desc->pg_count;
>  	hdr->dreq = desc->pg_dreq;
> +	hdr->layout_private = desc->pg_layout_private;
>  	hdr->release = release;
>  	hdr->completion_ops = desc->pg_completion_ops;
>  	if (hdr->completion_ops->init_hdr)
> @@ -267,6 +268,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
>  	desc->pg_error = 0;
>  	desc->pg_lseg = NULL;
>  	desc->pg_dreq = NULL;
> +	desc->pg_layout_private = NULL;
>  }
>  
>  /**
> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> index 88d166b..63093b1 100644
> --- a/include/linux/nfs_page.h
> +++ b/include/linux/nfs_page.h
> @@ -69,6 +69,7 @@ struct nfs_pageio_descriptor {
>  	const struct nfs_pgio_completion_ops *pg_completion_ops;
>  	struct pnfs_layout_segment *pg_lseg;
>  	struct nfs_direct_req	*pg_dreq;
> +	void			*pg_layout_private;
>  };
>  
>  #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
> diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
> index 8aadd90..58023cd 100644
> --- a/include/linux/nfs_xdr.h
> +++ b/include/linux/nfs_xdr.h
> @@ -1244,6 +1244,7 @@ struct nfs_pgio_header {
>  	void (*release) (struct nfs_pgio_header *hdr);
>  	const struct nfs_pgio_completion_ops *completion_ops;
>  	struct nfs_direct_req	*dreq;
> +	void			*layout_private;
>  	spinlock_t		lock;
>  	/* fields protected by lock */
>  	int			pnfs_error;



^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-08-02 13:06 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-08-02 12:32 [PATCHSET 0/3] pnfs-obj: Fixes left-out from 3.5 for the 3.6 merge window Boaz Harrosh
2012-08-02 12:34 ` [PATCH 1/3] NFS41: add pg_layout_private to nfs_pageio_descriptor Boaz Harrosh
2012-08-02 12:36 ` [PATCH 1/3 repost] " Boaz Harrosh
2012-08-02 13:06   ` Boaz Harrosh
2012-08-02 12:38 ` [PATCH 2/3] pnfs-obj: Better IO pattern in case of unaligned offset Boaz Harrosh
2012-08-02 12:40 ` [PATCH 3/3] pnfs: Don't BUG on info received from Server Boaz Harrosh

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.