* [PATCH] pnfsblock: init pg_bsize properly @ 2011-08-13 1:04 Peng Tao 2011-08-16 21:05 ` Boaz Harrosh 0 siblings, 1 reply; 12+ messages in thread From: Peng Tao @ 2011-08-13 1:04 UTC (permalink / raw) To: benny; +Cc: linux-nfs, Peng Tao pg_bsize is server->wsize/rsize by default. We would want to use the lseg length. Signed-off-by: Peng Tao <peng_tao@emc.com> --- fs/nfs/blocklayout/blocklayout.c | 20 ++++++++++++++++++-- 1 files changed, 18 insertions(+), 2 deletions(-) diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 36648e1..9143e61 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -919,14 +919,30 @@ bl_clear_layoutdriver(struct nfs_server *server) return 0; } +static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + pnfs_generic_pg_init_read(pgio, req); + if (pgio->pg_lseg) + pgio->pg_bsize = pgio->pg_lseg->pls_range.length; +} + +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + pnfs_generic_pg_init_write(pgio, req); + if (pgio->pg_lseg) + pgio->pg_bsize = pgio->pg_lseg->pls_range.length; +} + static const struct nfs_pageio_ops bl_pg_read_ops = { - .pg_init = pnfs_generic_pg_init_read, + .pg_init = bl_pg_init_read, .pg_test = pnfs_generic_pg_test, .pg_doio = pnfs_generic_pg_readpages, }; static const struct nfs_pageio_ops bl_pg_write_ops = { - .pg_init = pnfs_generic_pg_init_write, + .pg_init = bl_pg_init_write, .pg_test = pnfs_generic_pg_test, .pg_doio = pnfs_generic_pg_writepages, }; -- 1.7.1.262.g5ef3d ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH] pnfsblock: init pg_bsize properly 2011-08-13 1:04 [PATCH] pnfsblock: init pg_bsize properly Peng Tao @ 2011-08-16 21:05 ` Boaz Harrosh 2011-08-17 7:15 ` Benny Halevy 0 siblings, 1 reply; 12+ messages in thread From: Boaz Harrosh @ 2011-08-16 21:05 UTC (permalink / raw) To: Peng Tao; +Cc: benny, linux-nfs, Peng Tao On 08/12/2011 06:04 PM, Peng Tao wrote: > pg_bsize is server->wsize/rsize by default. We would want to use the lseg length. > Hi What is the problem you are trying to solve with this patch? >From what I understand the only place that actually cares about pg_bsize is nfs_generic_pg_test() which is only used in MDS read/write. In the pNFS RW, the LD and pnfs has it's own .pg_test() check that should not concern with pg_bsize (Unless for pnfs-files which does). So the idea is that pg_bsize is the maximum set by MDS server in regard to IO through MDS. And it should not be changed by client. If it is not what you see then we should fix it. But should never override MDS wsize/rsize. > Signed-off-by: Peng Tao <peng_tao@emc.com> > --- > fs/nfs/blocklayout/blocklayout.c | 20 ++++++++++++++++++-- > 1 files changed, 18 insertions(+), 2 deletions(-) > > diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c > index 36648e1..9143e61 100644 > --- a/fs/nfs/blocklayout/blocklayout.c > +++ b/fs/nfs/blocklayout/blocklayout.c > @@ -919,14 +919,30 @@ bl_clear_layoutdriver(struct nfs_server *server) > return 0; > } > > +static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio, > + struct nfs_page *req) > +{ > + pnfs_generic_pg_init_read(pgio, req); > + if (pgio->pg_lseg) > + pgio->pg_bsize = pgio->pg_lseg->pls_range.length; > +} > + > +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio, > + struct nfs_page *req) > +{ > + pnfs_generic_pg_init_write(pgio, req); > + if (pgio->pg_lseg) > + pgio->pg_bsize = pgio->pg_lseg->pls_range.length; > +} > + > static const struct nfs_pageio_ops bl_pg_read_ops = { > - .pg_init = pnfs_generic_pg_init_read, > + .pg_init = bl_pg_init_read, > .pg_test = pnfs_generic_pg_test, I see here that you do not override .pg_test. This is your problem look at objio_osd::objio_pg_test() it checks for similar boundaries at the objects side. This is where you need to do these checks for blocks as well. > .pg_doio = pnfs_generic_pg_readpages, > }; > > static const struct nfs_pageio_ops bl_pg_write_ops = { > - .pg_init = pnfs_generic_pg_init_write, > + .pg_init = bl_pg_init_write, > .pg_test = pnfs_generic_pg_test, Same here > .pg_doio = pnfs_generic_pg_writepages, > }; Thanks Boaz ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] pnfsblock: init pg_bsize properly 2011-08-16 21:05 ` Boaz Harrosh @ 2011-08-17 7:15 ` Benny Halevy 2011-08-17 9:35 ` Peng Tao 0 siblings, 1 reply; 12+ messages in thread From: Benny Halevy @ 2011-08-17 7:15 UTC (permalink / raw) To: Boaz Harrosh; +Cc: Peng Tao, benny, linux-nfs, Peng Tao On 2011-08-17 00:05, Boaz Harrosh wrote: > On 08/12/2011 06:04 PM, Peng Tao wrote: >> pg_bsize is server->wsize/rsize by default. We would want to use the lseg length. >> > > Hi > > What is the problem you are trying to solve with this patch? > > From what I understand the only place that actually cares about > pg_bsize is nfs_generic_pg_test() which is only used in MDS > read/write. In the pNFS RW, the LD and pnfs has it's own .pg_test() > check that should not concern with pg_bsize (Unless for pnfs-files > which does). So the idea is that pg_bsize is the maximum set by > MDS server in regard to IO through MDS. And it should not be changed > by client. > > If it is not what you see then we should fix it. But should never > override MDS wsize/rsize. I second that. Benny > >> Signed-off-by: Peng Tao <peng_tao@emc.com> >> --- >> fs/nfs/blocklayout/blocklayout.c | 20 ++++++++++++++++++-- >> 1 files changed, 18 insertions(+), 2 deletions(-) >> >> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c >> index 36648e1..9143e61 100644 >> --- a/fs/nfs/blocklayout/blocklayout.c >> +++ b/fs/nfs/blocklayout/blocklayout.c >> @@ -919,14 +919,30 @@ bl_clear_layoutdriver(struct nfs_server *server) >> return 0; >> } >> >> +static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio, >> + struct nfs_page *req) >> +{ >> + pnfs_generic_pg_init_read(pgio, req); >> + if (pgio->pg_lseg) >> + pgio->pg_bsize = pgio->pg_lseg->pls_range.length; >> +} >> + >> +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio, >> + struct nfs_page *req) >> +{ >> + pnfs_generic_pg_init_write(pgio, req); >> + if (pgio->pg_lseg) >> + pgio->pg_bsize = pgio->pg_lseg->pls_range.length; >> +} >> + >> static const struct nfs_pageio_ops bl_pg_read_ops = { >> - .pg_init = pnfs_generic_pg_init_read, >> + .pg_init = bl_pg_init_read, >> .pg_test = pnfs_generic_pg_test, > > I see here that you do not override .pg_test. This is your problem > look at objio_osd::objio_pg_test() it checks for similar boundaries > at the objects side. This is where you need to do these checks > for blocks as well. > >> .pg_doio = pnfs_generic_pg_readpages, >> }; >> >> static const struct nfs_pageio_ops bl_pg_write_ops = { >> - .pg_init = pnfs_generic_pg_init_write, >> + .pg_init = bl_pg_init_write, >> .pg_test = pnfs_generic_pg_test, > > Same here > >> .pg_doio = pnfs_generic_pg_writepages, >> }; > > Thanks > Boaz > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] pnfsblock: init pg_bsize properly 2011-08-17 7:15 ` Benny Halevy @ 2011-08-17 9:35 ` Peng Tao 2011-08-17 16:27 ` Benny Halevy 2011-08-22 23:52 ` Boaz Harrosh 0 siblings, 2 replies; 12+ messages in thread From: Peng Tao @ 2011-08-17 9:35 UTC (permalink / raw) To: Benny Halevy, Boaz Harrosh; +Cc: linux-nfs, Peng Tao Hi, Benny and Boaz, On Wed, Aug 17, 2011 at 3:15 PM, Benny Halevy <bhalevy@tonian.com> wrote: > > On 2011-08-17 00:05, Boaz Harrosh wrote: >> On 08/12/2011 06:04 PM, Peng Tao wrote: >>> pg_bsize is server->wsize/rsize by default. We would want to use the lseg length. >>> >> >> Hi >> >> What is the problem you are trying to solve with this patch? >> >> From what I understand the only place that actually cares about >> pg_bsize is nfs_generic_pg_test() which is only used in MDS >> read/write. In the pNFS RW, the LD and pnfs has it's own .pg_test() >> check that should not concern with pg_bsize (Unless for pnfs-files >> which does). So the idea is that pg_bsize is the maximum set by >> MDS server in regard to IO through MDS. And it should not be changed >> by client. >> >> If it is not what you see then we should fix it. But should never >> override MDS wsize/rsize. In pnfs_do_multiple_reads/pnfs_do_multiple_writes, data->mds_ops will be set as desc->pg_rpc_callops, which is determined in nfs_generic_flush/nfs_generic_pagein according to desc->pg_bsize. For blocklayout, we wouldn't want to set data->mds_ops to partial_read/write ops, so I write the patch to use lseg length as pg_bsize. LD can override pg_bsize in pg_init because nfs_pageio_reset_read_mds/nfs_pageio_reset_write_mds will reset it to server rsize/wsize if pnfs is not tried. Sorry that I didn't explain it clearly in the commit log... > > I second that. > > Benny > >> >>> Signed-off-by: Peng Tao <peng_tao@emc.com> >>> --- >>> fs/nfs/blocklayout/blocklayout.c | 20 ++++++++++++++++++-- >>> 1 files changed, 18 insertions(+), 2 deletions(-) >>> >>> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c >>> index 36648e1..9143e61 100644 >>> --- a/fs/nfs/blocklayout/blocklayout.c >>> +++ b/fs/nfs/blocklayout/blocklayout.c >>> @@ -919,14 +919,30 @@ bl_clear_layoutdriver(struct nfs_server *server) >>> return 0; >>> } >>> >>> +static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio, >>> + struct nfs_page *req) >>> +{ >>> + pnfs_generic_pg_init_read(pgio, req); >>> + if (pgio->pg_lseg) >>> + pgio->pg_bsize = pgio->pg_lseg->pls_range.length; >>> +} >>> + >>> +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio, >>> + struct nfs_page *req) >>> +{ >>> + pnfs_generic_pg_init_write(pgio, req); >>> + if (pgio->pg_lseg) >>> + pgio->pg_bsize = pgio->pg_lseg->pls_range.length; >>> +} >>> + >>> static const struct nfs_pageio_ops bl_pg_read_ops = { >>> - .pg_init = pnfs_generic_pg_init_read, >>> + .pg_init = bl_pg_init_read, >>> .pg_test = pnfs_generic_pg_test, >> >> I see here that you do not override .pg_test. This is your problem >> look at objio_osd::objio_pg_test() it checks for similar boundaries >> at the objects side. This is where you need to do these checks >> for blocks as well. For blocklayout, we don't need to force each IO under a certain size. Currently (w/ and w/o this patch) the lseg coverage is the only constraint for pagelist length. So pnfs_generic_pg_test is enough for blocklayout. Thanks, Tao >> >>> .pg_doio = pnfs_generic_pg_readpages, >>> }; >>> >>> static const struct nfs_pageio_ops bl_pg_write_ops = { >>> - .pg_init = pnfs_generic_pg_init_write, >>> + .pg_init = bl_pg_init_write, >>> .pg_test = pnfs_generic_pg_test, >> >> Same here >> >>> .pg_doio = pnfs_generic_pg_writepages, >>> }; >> >> Thanks >> Boaz >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] pnfsblock: init pg_bsize properly 2011-08-17 9:35 ` Peng Tao @ 2011-08-17 16:27 ` Benny Halevy 2011-08-18 14:34 ` Peng Tao 2011-08-22 23:52 ` Boaz Harrosh 1 sibling, 1 reply; 12+ messages in thread From: Benny Halevy @ 2011-08-17 16:27 UTC (permalink / raw) To: Peng Tao; +Cc: Boaz Harrosh, linux-nfs, Peng Tao On 2011-08-17 12:35, Peng Tao wrote: > Hi, Benny and Boaz, > > On Wed, Aug 17, 2011 at 3:15 PM, Benny Halevy <bhalevy@tonian.com> wrote: >> >> On 2011-08-17 00:05, Boaz Harrosh wrote: >>> On 08/12/2011 06:04 PM, Peng Tao wrote: >>>> pg_bsize is server->wsize/rsize by default. We would want to use the lseg length. >>>> >>> >>> Hi >>> >>> What is the problem you are trying to solve with this patch? >>> >>> From what I understand the only place that actually cares about >>> pg_bsize is nfs_generic_pg_test() which is only used in MDS >>> read/write. In the pNFS RW, the LD and pnfs has it's own .pg_test() >>> check that should not concern with pg_bsize (Unless for pnfs-files >>> which does). So the idea is that pg_bsize is the maximum set by >>> MDS server in regard to IO through MDS. And it should not be changed >>> by client. >>> >>> If it is not what you see then we should fix it. But should never >>> override MDS wsize/rsize. > In pnfs_do_multiple_reads/pnfs_do_multiple_writes, data->mds_ops will > be set as desc->pg_rpc_callops, which is determined in > nfs_generic_flush/nfs_generic_pagein according to desc->pg_bsize. For > blocklayout, we wouldn't want to set data->mds_ops to > partial_read/write ops, so I write the patch to use lseg length as > pg_bsize. > > LD can override pg_bsize in pg_init because > nfs_pageio_reset_read_mds/nfs_pageio_reset_write_mds will reset it to > server rsize/wsize if pnfs is not tried. > > Sorry that I didn't explain it clearly in the commit log... > > To reflect that maybe we should also rename pg_bsize to pg_iosize. Benny >> >> I second that. >> >> Benny >> >>> >>>> Signed-off-by: Peng Tao <peng_tao@emc.com> >>>> --- >>>> fs/nfs/blocklayout/blocklayout.c | 20 ++++++++++++++++++-- >>>> 1 files changed, 18 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c >>>> index 36648e1..9143e61 100644 >>>> --- a/fs/nfs/blocklayout/blocklayout.c >>>> +++ b/fs/nfs/blocklayout/blocklayout.c >>>> @@ -919,14 +919,30 @@ bl_clear_layoutdriver(struct nfs_server *server) >>>> return 0; >>>> } >>>> >>>> +static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio, >>>> + struct nfs_page *req) >>>> +{ >>>> + pnfs_generic_pg_init_read(pgio, req); >>>> + if (pgio->pg_lseg) >>>> + pgio->pg_bsize = pgio->pg_lseg->pls_range.length; >>>> +} >>>> + >>>> +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio, >>>> + struct nfs_page *req) >>>> +{ >>>> + pnfs_generic_pg_init_write(pgio, req); >>>> + if (pgio->pg_lseg) >>>> + pgio->pg_bsize = pgio->pg_lseg->pls_range.length; >>>> +} >>>> + >>>> static const struct nfs_pageio_ops bl_pg_read_ops = { >>>> - .pg_init = pnfs_generic_pg_init_read, >>>> + .pg_init = bl_pg_init_read, >>>> .pg_test = pnfs_generic_pg_test, >>> >>> I see here that you do not override .pg_test. This is your problem >>> look at objio_osd::objio_pg_test() it checks for similar boundaries >>> at the objects side. This is where you need to do these checks >>> for blocks as well. > For blocklayout, we don't need to force each IO under a certain size. > Currently (w/ and w/o this patch) the lseg coverage is the only > constraint for pagelist length. So pnfs_generic_pg_test is enough for > blocklayout. > > Thanks, > Tao > >>> >>>> .pg_doio = pnfs_generic_pg_readpages, >>>> }; >>>> >>>> static const struct nfs_pageio_ops bl_pg_write_ops = { >>>> - .pg_init = pnfs_generic_pg_init_write, >>>> + .pg_init = bl_pg_init_write, >>>> .pg_test = pnfs_generic_pg_test, >>> >>> Same here >>> >>>> .pg_doio = pnfs_generic_pg_writepages, >>>> }; >>> >>> Thanks >>> Boaz >>> -- >>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >>> the body of a message to majordomo@vger.kernel.org >>> More majordomo info at http://vger.kernel.org/majordomo-info.html >> > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] pnfsblock: init pg_bsize properly 2011-08-17 16:27 ` Benny Halevy @ 2011-08-18 14:34 ` Peng Tao 0 siblings, 0 replies; 12+ messages in thread From: Peng Tao @ 2011-08-18 14:34 UTC (permalink / raw) To: Benny Halevy; +Cc: Boaz Harrosh, linux-nfs, Peng Tao On Thu, Aug 18, 2011 at 12:27 AM, Benny Halevy <bhalevy@tonian.com> wrote: > On 2011-08-17 12:35, Peng Tao wrote: >> Hi, Benny and Boaz, >> >> On Wed, Aug 17, 2011 at 3:15 PM, Benny Halevy <bhalevy@tonian.com> wrote: >>> >>> On 2011-08-17 00:05, Boaz Harrosh wrote: >>>> On 08/12/2011 06:04 PM, Peng Tao wrote: >>>>> pg_bsize is server->wsize/rsize by default. We would want to use the lseg length. >>>>> >>>> >>>> Hi >>>> >>>> What is the problem you are trying to solve with this patch? >>>> >>>> From what I understand the only place that actually cares about >>>> pg_bsize is nfs_generic_pg_test() which is only used in MDS >>>> read/write. In the pNFS RW, the LD and pnfs has it's own .pg_test() >>>> check that should not concern with pg_bsize (Unless for pnfs-files >>>> which does). So the idea is that pg_bsize is the maximum set by >>>> MDS server in regard to IO through MDS. And it should not be changed >>>> by client. >>>> >>>> If it is not what you see then we should fix it. But should never >>>> override MDS wsize/rsize. >> In pnfs_do_multiple_reads/pnfs_do_multiple_writes, data->mds_ops will >> be set as desc->pg_rpc_callops, which is determined in >> nfs_generic_flush/nfs_generic_pagein according to desc->pg_bsize. For >> blocklayout, we wouldn't want to set data->mds_ops to >> partial_read/write ops, so I write the patch to use lseg length as >> pg_bsize. >> >> LD can override pg_bsize in pg_init because >> nfs_pageio_reset_read_mds/nfs_pageio_reset_write_mds will reset it to >> server rsize/wsize if pnfs is not tried. >> >> Sorry that I didn't explain it clearly in the commit log... >> >> > > To reflect that maybe we should also rename pg_bsize to pg_iosize. For pnfs, in fact we are not using pg_bsize as the iosize limit. It's just that if pg_bsize is smaller than PAGE_CACHE_SIZE, partial read/write ops will be used. I'm afraid that if we rename pg_bsize to pg_iosize, people would really think it is the limit for read/write iosize, which it really isn't. :) Thanks, Tao > > Benny > >>> >>> I second that. >>> >>> Benny >>> >>>> >>>>> Signed-off-by: Peng Tao <peng_tao@emc.com> >>>>> --- >>>>> fs/nfs/blocklayout/blocklayout.c | 20 ++++++++++++++++++-- >>>>> 1 files changed, 18 insertions(+), 2 deletions(-) >>>>> >>>>> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c >>>>> index 36648e1..9143e61 100644 >>>>> --- a/fs/nfs/blocklayout/blocklayout.c >>>>> +++ b/fs/nfs/blocklayout/blocklayout.c >>>>> @@ -919,14 +919,30 @@ bl_clear_layoutdriver(struct nfs_server *server) >>>>> return 0; >>>>> } >>>>> >>>>> +static void bl_pg_init_read(struct nfs_pageio_descriptor *pgio, >>>>> + struct nfs_page *req) >>>>> +{ >>>>> + pnfs_generic_pg_init_read(pgio, req); >>>>> + if (pgio->pg_lseg) >>>>> + pgio->pg_bsize = pgio->pg_lseg->pls_range.length; >>>>> +} >>>>> + >>>>> +static void bl_pg_init_write(struct nfs_pageio_descriptor *pgio, >>>>> + struct nfs_page *req) >>>>> +{ >>>>> + pnfs_generic_pg_init_write(pgio, req); >>>>> + if (pgio->pg_lseg) >>>>> + pgio->pg_bsize = pgio->pg_lseg->pls_range.length; >>>>> +} >>>>> + >>>>> static const struct nfs_pageio_ops bl_pg_read_ops = { >>>>> - .pg_init = pnfs_generic_pg_init_read, >>>>> + .pg_init = bl_pg_init_read, >>>>> .pg_test = pnfs_generic_pg_test, >>>> >>>> I see here that you do not override .pg_test. This is your problem >>>> look at objio_osd::objio_pg_test() it checks for similar boundaries >>>> at the objects side. This is where you need to do these checks >>>> for blocks as well. >> For blocklayout, we don't need to force each IO under a certain size. >> Currently (w/ and w/o this patch) the lseg coverage is the only >> constraint for pagelist length. So pnfs_generic_pg_test is enough for >> blocklayout. >> >> Thanks, >> Tao >> >>>> >>>>> .pg_doio = pnfs_generic_pg_readpages, >>>>> }; >>>>> >>>>> static const struct nfs_pageio_ops bl_pg_write_ops = { >>>>> - .pg_init = pnfs_generic_pg_init_write, >>>>> + .pg_init = bl_pg_init_write, >>>>> .pg_test = pnfs_generic_pg_test, >>>> >>>> Same here >>>> >>>>> .pg_doio = pnfs_generic_pg_writepages, >>>>> }; >>>> >>>> Thanks >>>> Boaz >>>> -- >>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >>>> the body of a message to majordomo@vger.kernel.org >>>> More majordomo info at http://vger.kernel.org/majordomo-info.html >>> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html > -- Thanks, -Bergwolf ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] pnfsblock: init pg_bsize properly 2011-08-17 9:35 ` Peng Tao 2011-08-17 16:27 ` Benny Halevy @ 2011-08-22 23:52 ` Boaz Harrosh 2011-08-23 0:00 ` Myklebust, Trond 1 sibling, 1 reply; 12+ messages in thread From: Boaz Harrosh @ 2011-08-22 23:52 UTC (permalink / raw) To: Peng Tao; +Cc: Benny Halevy, linux-nfs, Peng Tao, Trond Myklebust, Fred Isaman On 08/17/2011 02:35 AM, Peng Tao wrote: > Hi, Benny and Boaz, > <snip> > In pnfs_do_multiple_reads/pnfs_do_multiple_writes, data->mds_ops will > be set as desc->pg_rpc_callops, which is determined in > nfs_generic_flush/nfs_generic_pagein according to desc->pg_bsize. For > blocklayout, we wouldn't want to set data->mds_ops to > partial_read/write ops, so I write the patch to use lseg length as > pg_bsize. > Do you mean in the case where MDS sets (pg_bsize < PAGE_SIZE) ? Right, that is a problem. (Theoretically, because the pNFSD-Linux server does not do that. Do you have a Server that does?) > LD can override pg_bsize in pg_init because > nfs_pageio_reset_read_mds/nfs_pageio_reset_write_mds will reset it to > server rsize/wsize if pnfs is not tried. > So if it is the "pg_bsize < PAGE_SIZE" but pNFS-IO case then I don't like your patch, at all. We should fix the generic code to behave properly, and not let LDs hack their way out. (For example what about objects and files LDs) There is a few ways you can fix the generic code. One is override the desc->pg_rpc_callops for the pNFS case to always be the same one. Or override the test for (pg_bsize < PAGE_SIZE) in the pNFS case if we have a lseg. Or some other clean way. But please don't fix it like that, inside each LD driver. [ Trond Fred One thing I do not understand about the files-layout operations. You have explained in the passed that r/wsize sent from the MDS is also the same one for each DS. So if we take an example of rsize beeing 2MB and there is a stripping of 2 DS for that layout.(Say strip_unit==rsize) Then we need to read 1/2 of that page from one DS and the 2/2 half from the second. Will current partial_read/write work if going through files-LD? ] Thanks Boaz ^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: [PATCH] pnfsblock: init pg_bsize properly 2011-08-22 23:52 ` Boaz Harrosh @ 2011-08-23 0:00 ` Myklebust, Trond 2011-08-23 15:01 ` Peng Tao 0 siblings, 1 reply; 12+ messages in thread From: Myklebust, Trond @ 2011-08-23 0:00 UTC (permalink / raw) To: Boaz Harrosh, Peng Tao; +Cc: Benny Halevy, linux-nfs, Peng Tao, Isaman, Fred PiAtLS0tLU9yaWdpbmFsIE1lc3NhZ2UtLS0tLQ0KPiBGcm9tOiBCb2F6IEhhcnJvc2ggW21haWx0 bzpiaGFycm9zaEBwYW5hc2FzLmNvbV0NCj4gU2VudDogTW9uZGF5LCBBdWd1c3QgMjIsIDIwMTEg Nzo1MiBQTQ0KPiBUbzogUGVuZyBUYW8NCj4gQ2M6IEJlbm55IEhhbGV2eTsgbGludXgtbmZzQHZn ZXIua2VybmVsLm9yZzsgUGVuZyBUYW87IE15a2xlYnVzdCwNCj4gVHJvbmQ7IElzYW1hbiwgRnJl ZA0KPiBTdWJqZWN0OiBSZTogW1BBVENIXSBwbmZzYmxvY2s6IGluaXQgcGdfYnNpemUgcHJvcGVy bHkNCj4gDQo+IE9uIDA4LzE3LzIwMTEgMDI6MzUgQU0sIFBlbmcgVGFvIHdyb3RlOg0KPiA+IEhp LCBCZW5ueSBhbmQgQm9heiwNCj4gPg0KPiA8c25pcD4NCj4gDQo+ID4gSW4gcG5mc19kb19tdWx0 aXBsZV9yZWFkcy9wbmZzX2RvX211bHRpcGxlX3dyaXRlcywgZGF0YS0+bWRzX29wcyB3aWxsDQo+ ID4gYmUgc2V0IGFzIGRlc2MtPnBnX3JwY19jYWxsb3BzLCB3aGljaCBpcyBkZXRlcm1pbmVkIGlu DQo+ID4gbmZzX2dlbmVyaWNfZmx1c2gvbmZzX2dlbmVyaWNfcGFnZWluIGFjY29yZGluZyB0byBk ZXNjLT5wZ19ic2l6ZS4gRm9yDQo+ID4gYmxvY2tsYXlvdXQsIHdlIHdvdWxkbid0IHdhbnQgdG8g c2V0IGRhdGEtPm1kc19vcHMgdG8NCj4gPiBwYXJ0aWFsX3JlYWQvd3JpdGUgb3BzLCBzbyBJIHdy aXRlIHRoZSBwYXRjaCB0byB1c2UgbHNlZyBsZW5ndGggYXMNCj4gPiBwZ19ic2l6ZS4NCj4gPg0K PiANCj4gRG8geW91IG1lYW4gaW4gdGhlIGNhc2Ugd2hlcmUgTURTIHNldHMgKHBnX2JzaXplIDwg UEFHRV9TSVpFKSA/DQo+IA0KPiBSaWdodCwgdGhhdCBpcyBhIHByb2JsZW0uIChUaGVvcmV0aWNh bGx5LCBiZWNhdXNlIHRoZSBwTkZTRC1MaW51eA0KPiBzZXJ2ZXINCj4gZG9lcyBub3QgZG8gdGhh dC4gRG8geW91IGhhdmUgYSBTZXJ2ZXIgdGhhdCBkb2VzPykNCj4gDQo+ID4gTEQgY2FuIG92ZXJy aWRlIHBnX2JzaXplIGluIHBnX2luaXQgYmVjYXVzZQ0KPiA+IG5mc19wYWdlaW9fcmVzZXRfcmVh ZF9tZHMvbmZzX3BhZ2Vpb19yZXNldF93cml0ZV9tZHMgd2lsbCByZXNldCBpdCB0bw0KPiA+IHNl cnZlciByc2l6ZS93c2l6ZSBpZiBwbmZzIGlzIG5vdCB0cmllZC4NCj4gPg0KPiANCj4gU28gaWYg aXQgaXMgdGhlICJwZ19ic2l6ZSA8IFBBR0VfU0laRSIgYnV0IHBORlMtSU8gY2FzZSB0aGVuIEkg ZG9uJ3QNCj4gbGlrZSB5b3VyIHBhdGNoLCBhdCBhbGwuIFdlIHNob3VsZCBmaXggdGhlIGdlbmVy aWMgY29kZSB0byBiZWhhdmUNCj4gcHJvcGVybHksIGFuZCBub3QgbGV0IExEcyBoYWNrIHRoZWly IHdheSBvdXQuIChGb3IgZXhhbXBsZSB3aGF0IGFib3V0DQo+IG9iamVjdHMgYW5kIGZpbGVzIExE cykNCj4gDQo+IFRoZXJlIGlzIGEgZmV3IHdheXMgeW91IGNhbiBmaXggdGhlIGdlbmVyaWMgY29k ZS4gT25lIGlzIG92ZXJyaWRlIHRoZQ0KPiBkZXNjLT5wZ19ycGNfY2FsbG9wcyBmb3IgdGhlIHBO RlMgY2FzZSB0byBhbHdheXMgYmUgdGhlIHNhbWUgb25lLiBPcg0KPiBvdmVycmlkZSB0aGUgdGVz dCBmb3IgKHBnX2JzaXplIDwgUEFHRV9TSVpFKSBpbiB0aGUgcE5GUyBjYXNlIGlmIHdlDQo+IGhh dmUNCj4gYSBsc2VnLiBPciBzb21lIG90aGVyIGNsZWFuIHdheS4NCj4gDQo+IEJ1dCBwbGVhc2Ug ZG9uJ3QgZml4IGl0IGxpa2UgdGhhdCwgaW5zaWRlIGVhY2ggTEQgZHJpdmVyLg0KPiANCj4gWyBU cm9uZCBGcmVkDQo+ICAgT25lIHRoaW5nIEkgZG8gbm90IHVuZGVyc3RhbmQgYWJvdXQgdGhlIGZp bGVzLWxheW91dCBvcGVyYXRpb25zLiBZb3UNCj4gICBoYXZlIGV4cGxhaW5lZCBpbiB0aGUgcGFz c2VkIHRoYXQgci93c2l6ZSBzZW50IGZyb20gdGhlIE1EUyBpcyBhbHNvDQo+IHRoZQ0KPiAgIHNh bWUgb25lIGZvciBlYWNoIERTLiBTbyBpZiB3ZSB0YWtlIGFuIGV4YW1wbGUgb2YgcnNpemUgYmVl aW5nIDJNQg0KPiAgIGFuZCB0aGVyZSBpcyBhIHN0cmlwcGluZyBvZiAyIERTIGZvciB0aGF0IGxh eW91dC4oU2F5DQo+IHN0cmlwX3VuaXQ9PXJzaXplKQ0KPiAgIFRoZW4gd2UgbmVlZCB0byByZWFk IDEvMiBvZiB0aGF0IHBhZ2UgZnJvbSBvbmUgRFMgYW5kIHRoZSAyLzIgaGFsZg0KPiBmcm9tIHRo ZQ0KPiAgIHNlY29uZC4gV2lsbCBjdXJyZW50IHBhcnRpYWxfcmVhZC93cml0ZSB3b3JrIGlmIGdv aW5nIHRocm91Z2ggZmlsZXMtDQo+IExEPw0KPiBdDQoNCk5vLiBUaGUgc3RyaXBlIHNpemUgbWF5 IGJlIHNtYWxsZXIgdGhhbiB0aGUgci93c2l6ZSwgaW4gd2hpY2ggY2FzZSB3ZSdyZSBpbiB0aGUg c2FtZSBib2F0IGFzIHRoZSBibG9ja3MgYW5kIG9iamVjdHMuDQoNCkNoZWVycw0KICBUcm9uZA0K Tu+/ve+/ve+/ve+/ve+/vXLvv73vv71577+977+977+9Yu+/vVjvv73vv73Hp3bvv71e77+9Kd66 ey5u77+9K++/ve+/ve+/ve+/vXvvv73vv73vv70i77+977+9Xm7vv71y77+977+977+9eu+/vRrv v73vv71o77+977+977+977+9Ju+/ve+/vR7vv71H77+977+977+9aO+/vQMo77+96ZqO77+93aJq Iu+/ve+/vRrvv70bbe+/ve+/ve+/ve+/ve+/vXrvv73elu+/ve+/ve+/vWbvv73vv73vv71o77+9 77+977+9fu+/vW3vv70= ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] pnfsblock: init pg_bsize properly 2011-08-23 0:00 ` Myklebust, Trond @ 2011-08-23 15:01 ` Peng Tao 2011-08-23 21:19 ` Boaz Harrosh 0 siblings, 1 reply; 12+ messages in thread From: Peng Tao @ 2011-08-23 15:01 UTC (permalink / raw) To: Myklebust, Trond, Boaz Harrosh Cc: Benny Halevy, linux-nfs, Peng Tao, Isaman, Fred Hi, Trond and Boaz, On Tue, Aug 23, 2011 at 8:00 AM, Myklebust, Trond <Trond.Myklebust@netapp.com> wrote: >> -----Original Message----- >> From: Boaz Harrosh [mailto:bharrosh@panasas.com] >> Sent: Monday, August 22, 2011 7:52 PM >> To: Peng Tao >> Cc: Benny Halevy; linux-nfs@vger.kernel.org; Peng Tao; Myklebust, >> Trond; Isaman, Fred >> Subject: Re: [PATCH] pnfsblock: init pg_bsize properly >> >> On 08/17/2011 02:35 AM, Peng Tao wrote: >> > Hi, Benny and Boaz, >> > >> <snip> >> >> > In pnfs_do_multiple_reads/pnfs_do_multiple_writes, data->mds_ops will >> > be set as desc->pg_rpc_callops, which is determined in >> > nfs_generic_flush/nfs_generic_pagein according to desc->pg_bsize. For >> > blocklayout, we wouldn't want to set data->mds_ops to >> > partial_read/write ops, so I write the patch to use lseg length as >> > pg_bsize. >> > >> >> Do you mean in the case where MDS sets (pg_bsize < PAGE_SIZE) ? >> >> Right, that is a problem. (Theoretically, because the pNFSD-Linux >> server >> does not do that. Do you have a Server that does?) No, I don't have a server does that. But it is a server config option and we can't force users not to change it. So better fix it at client side. >> >> > LD can override pg_bsize in pg_init because >> > nfs_pageio_reset_read_mds/nfs_pageio_reset_write_mds will reset it to >> > server rsize/wsize if pnfs is not tried. >> > >> >> So if it is the "pg_bsize < PAGE_SIZE" but pNFS-IO case then I don't >> like your patch, at all. We should fix the generic code to behave >> properly, and not let LDs hack their way out. (For example what about >> objects and files LDs) >> >> There is a few ways you can fix the generic code. One is override the >> desc->pg_rpc_callops for the pNFS case to always be the same one. Or >> override the test for (pg_bsize < PAGE_SIZE) in the pNFS case if we >> have >> a lseg. Or some other clean way. I was under the impression that for object and file layouts, partial read/write rpc ops are still needed for DS IO when DS r/wsize is smaller than PAGE_SIZE... >> >> But please don't fix it like that, inside each LD driver. >> >> [ Trond Fred >> One thing I do not understand about the files-layout operations. You >> have explained in the passed that r/wsize sent from the MDS is also >> the >> same one for each DS. So if we take an example of rsize beeing 2MB >> and there is a stripping of 2 DS for that layout.(Say >> strip_unit==rsize) >> Then we need to read 1/2 of that page from one DS and the 2/2 half >> from the >> second. Will current partial_read/write work if going through files- >> LD? >> ] > > No. The stripe size may be smaller than the r/wsize, in which case we're in the same boat as the blocks and objects. So this is a generic issue. For file and object layout, do you need to use partial read/write rpc ops in any case? For block layout, we would like to never use it in LD. But I'm not sure about file and object case. Could you confirm? Thanks, Tao ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] pnfsblock: init pg_bsize properly 2011-08-23 15:01 ` Peng Tao @ 2011-08-23 21:19 ` Boaz Harrosh 2011-08-25 20:15 ` Jim Rees 0 siblings, 1 reply; 12+ messages in thread From: Boaz Harrosh @ 2011-08-23 21:19 UTC (permalink / raw) To: Peng Tao, Isaman, Fred, Andy Adamson Cc: Myklebust, Trond, Benny Halevy, linux-nfs, Peng Tao On 08/23/2011 08:01 AM, Peng Tao wrote: > So this is a generic issue. For file and object layout, do you need to > use partial read/write rpc ops in any case? For block layout, we would > like to never use it in LD. But I'm not sure about file and object > case. Could you confirm? > For objects it is like blocks. NEVER (ever) use partial rpc ops. r/wsize are not relevant for obj-LD IO. (As I understand from Trond also with files the LD should inspect the situation and have the final disposition. But someone will need to write some files-LD code for that, Fred, Andy?) > Thanks, > Tao Thanks Boaz ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] pnfsblock: init pg_bsize properly 2011-08-23 21:19 ` Boaz Harrosh @ 2011-08-25 20:15 ` Jim Rees 2011-08-26 0:16 ` Boaz Harrosh 0 siblings, 1 reply; 12+ messages in thread From: Jim Rees @ 2011-08-25 20:15 UTC (permalink / raw) To: Boaz Harrosh Cc: Peng Tao, Isaman, Fred, Andy Adamson, Myklebust, Trond, Benny Halevy, linux-nfs, Peng Tao Boaz Harrosh wrote: On 08/23/2011 08:01 AM, Peng Tao wrote: > So this is a generic issue. For file and object layout, do you need to > use partial read/write rpc ops in any case? For block layout, we would > like to never use it in LD. But I'm not sure about file and object > case. Could you confirm? > For objects it is like blocks. NEVER (ever) use partial rpc ops. r/wsize are not relevant for obj-LD IO. (As I understand from Trond also with files the LD should inspect the situation and have the final disposition. But someone will need to write some files-LD code for that, Fred, Andy?) > Thanks, > Tao We discussed this on the call today. Boaz is going to write a brief description of how to fix this in the generic layer, then I'm going to implement it. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH] pnfsblock: init pg_bsize properly 2011-08-25 20:15 ` Jim Rees @ 2011-08-26 0:16 ` Boaz Harrosh 0 siblings, 0 replies; 12+ messages in thread From: Boaz Harrosh @ 2011-08-26 0:16 UTC (permalink / raw) To: Jim Rees Cc: Peng Tao, Isaman, Fred, Andy Adamson, Myklebust, Trond, Benny Halevy, linux-nfs, Peng Tao On 08/25/2011 01:15 PM, Jim Rees wrote: > > We discussed this on the call today. Boaz is going to write a brief > description of how to fix this in the generic layer, then I'm going to > implement it. For blocks and objects we need something like the below [1]. (Done only for reads) But I suspect I now broke files-LD. For files-LD what it actually needs, (As I understood from trond) Is the same code like today but with a similar patch as Peng's but for files-LD that sets pg_bsize to the minimum of w/rsize and stripe_unit. This is mainly because it needs that nfs_readpage_result/release_partial which waits for all RPCs before it actually calls nfs_end_page_writeback (PageUpTodate for reads) on that page that was shared between multiple requests. So I guess we need to do [2] option below (Only done for writes). + With added code to set this bit_flag in objects and blocks. (Just like PNFS_LAYOUTRET_ON_SETATTR) + files-LD code to override pnfs_generic_pg_init_read/write and set pg_bsize to min(pg_bsize, stripe_unit). (Can be its own patch) + Define empty pnfs_ld_ignore_rwsize() for !CONFIG_NFS_V4_1 --------------------------------------------------------------- [1] (only reads) Do not use nfs_pagein_multi() for the pNFS case ... ----- diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ab12913..a4d0191 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -296,7 +296,7 @@ extern int nfs_access_cache_shrinker(struct shrinker *shrink, extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); -extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, +extern int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *head); extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index e550e88..b7e3e41 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1356,7 +1356,7 @@ struct pnfs_layout_segment * LIST_HEAD(head); int ret; - ret = nfs_generic_pagein(desc, &head); + ret = nfs_pagein_one(desc, &head); if (ret != 0) { put_lseg(desc->pg_lseg); desc->pg_lseg = NULL; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 2171c04..ce5982a 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -336,7 +336,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head return -ENOMEM; } -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) +int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req; struct page **pages; @@ -369,19 +369,15 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head * return ret; } -int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) -{ - if (desc->pg_bsize < PAGE_CACHE_SIZE) - return nfs_pagein_multi(desc, head); - return nfs_pagein_one(desc, head); -} - static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { LIST_HEAD(head); int ret; - ret = nfs_generic_pagein(desc, &head); + if (desc->pg_bsize < PAGE_CACHE_SIZE) + ret = nfs_pagein_multi(desc, &head); + else + ret = nfs_pagein_one(desc, &head); if (ret == 0) ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); return ret; --------------------------------------------------------------- [2] (only writes) Do not use nfs_pagein_multi() for layout drivers that must not use it. (Objects and Blocks) ... ------- diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 01cbfd5..d32538a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -68,6 +68,8 @@ enum { enum layoutdriver_policy_flags { /* Should the pNFS client commit and return the layout upon a setattr */ PNFS_LAYOUTRET_ON_SETATTR = 1 << 0, + /* Do not use nfs_xxx_partial_ops */ + PNFS_IGNOR_RWSIZE = 2 << 0, }; struct nfs4_deviceid_node; @@ -315,6 +317,15 @@ static inline void pnfs_clear_request_commit(struct nfs_page *req) PNFS_LAYOUTRET_ON_SETATTR; } +static inline bool +pnfs_ld_ignore_rwsize(struct inode *inode) +{ + if (!pnfs_enabled_sb(NFS_SERVER(inode))) + return false; + return NFS_SERVER(inode)->pnfs_curr_ld->flags & + PNFS_IGNOR_RWSIZE; +} + static inline int pnfs_return_layout(struct inode *ino) { struct nfs_inode *nfsi = NFS_I(ino); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b39b37f..6b25073 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1029,7 +1029,8 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *r int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) { - if (desc->pg_bsize < PAGE_CACHE_SIZE) + if (!pnfs_ld_ignore_rwsize(desc->pg_inode) && + desc->pg_bsize < PAGE_CACHE_SIZE) return nfs_flush_multi(desc, head); return nfs_flush_one(desc, head); } ^ permalink raw reply related [flat|nested] 12+ messages in thread
end of thread, other threads:[~2011-08-26 0:17 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2011-08-13 1:04 [PATCH] pnfsblock: init pg_bsize properly Peng Tao 2011-08-16 21:05 ` Boaz Harrosh 2011-08-17 7:15 ` Benny Halevy 2011-08-17 9:35 ` Peng Tao 2011-08-17 16:27 ` Benny Halevy 2011-08-18 14:34 ` Peng Tao 2011-08-22 23:52 ` Boaz Harrosh 2011-08-23 0:00 ` Myklebust, Trond 2011-08-23 15:01 ` Peng Tao 2011-08-23 21:19 ` Boaz Harrosh 2011-08-25 20:15 ` Jim Rees 2011-08-26 0:16 ` Boaz Harrosh
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).