From: Benny Halevy <bhalevy@tonian.com>
To: Peng Tao <bergwolf@gmail.com>
Cc: linux-nfs@vger.kernel.org, Trond.Myklebust@netapp.com,
Peng Tao <peng_tao@emc.com>
Subject: Re: [PATCH 8/8] pnfsblock: alloc short extent before submit bio
Date: Thu, 10 Nov 2011 11:22:55 +0200 [thread overview]
Message-ID: <4EBB97EF.1050006@tonian.com> (raw)
In-Reply-To: <1320851766-1834-9-git-send-email-bergwolf@gmail.com>
Hi Tao,
I like the direction in this patch however
there are a couple nits below.
Thanks,
Benny
On 2011-11-09 17:16, Peng Tao wrote:
> As discussed earlier, it is better for block client to allocate memoroy for
nit: "memory"
> tracking extents state before submitting bio. So the patch does it by allocating
> a short_extent for every INVALID extent touched by write pagelist and for
> every zeroing page we created, saving them in layout header.
> Then in end_io we can just use them to create commit list items and avoid
> memory allocation there.
>
> Signed-off-by: Peng Tao <peng_tao@emc.com>
> ---
> fs/nfs/blocklayout/blocklayout.c | 70 +++++++++++++++++++++++++++++---------
> fs/nfs/blocklayout/blocklayout.h | 3 +-
> fs/nfs/blocklayout/extents.c | 13 ++-----
> 3 files changed, 60 insertions(+), 26 deletions(-)
>
> diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
> index cb4ff0f..53cd332 100644
> --- a/fs/nfs/blocklayout/blocklayout.c
> +++ b/fs/nfs/blocklayout/blocklayout.c
> @@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
> */
> struct parallel_io {
> struct kref refcnt;
> - void (*pnfs_callback) (void *data);
> + void (*pnfs_callback) (void *data, int num_se);
> void *data;
> + int bse_count;
> };
>
> static inline struct parallel_io *alloc_parallel(void *data)
> @@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
> if (rv) {
> rv->data = data;
> kref_init(&rv->refcnt);
> + rv->bse_count = 0;
> }
> return rv;
> }
> @@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref)
> struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
>
> dprintk("%s enter\n", __func__);
> - p->pnfs_callback(p->data);
> + p->pnfs_callback(p->data, p->bse_count);
> kfree(p);
> }
>
> @@ -211,7 +213,7 @@ static void bl_read_cleanup(struct work_struct *work)
> }
>
> static void
> -bl_end_par_io_read(void *data)
> +bl_end_par_io_read(void *data, int unused)
> {
> struct nfs_read_data *rdata = data;
>
> @@ -312,6 +314,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
> {
> sector_t isect, end;
> struct pnfs_block_extent *be;
> + struct pnfs_block_short_extent *se;
>
> dprintk("%s(%llu, %u)\n", __func__, offset, count);
> if (count == 0)
> @@ -324,8 +327,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
> be = bl_find_get_extent(bl, isect, NULL);
> BUG_ON(!be); /* FIXME */
> len = min(end, be->be_f_offset + be->be_length) - isect;
> - if (be->be_state == PNFS_BLOCK_INVALID_DATA)
> - bl_mark_for_commit(be, isect, len); /* What if fails? */
> + if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
> + se = bl_pop_short_extent(be->be_inval, 1);
> + BUG_ON(!se);
> + bl_mark_for_commit(be, isect, len, se);
> + }
> isect += len;
> bl_put_extent(be);
> }
> @@ -347,7 +353,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
> end_page_writeback(page);
> page_cache_release(page);
> } while (bvec >= bio->bi_io_vec);
> - if (!uptodate) {
> +
> + if (unlikely(!uptodate)) {
> if (!wdata->pnfs_error)
> wdata->pnfs_error = -EIO;
> pnfs_set_lo_fail(wdata->lseg);
> @@ -356,7 +363,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
> put_parallel(par);
> }
>
> -/* This is basically copied from mpage_end_io_read */
> static void bl_end_io_write(struct bio *bio, int err)
> {
> struct parallel_io *par = bio->bi_private;
> @@ -382,7 +388,7 @@ static void bl_write_cleanup(struct work_struct *work)
> dprintk("%s enter\n", __func__);
> task = container_of(work, struct rpc_task, u.tk_work);
> wdata = container_of(task, struct nfs_write_data, task);
> - if (!wdata->pnfs_error) {
> + if (likely(!wdata->pnfs_error)) {
> /* Marks for LAYOUTCOMMIT */
> mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
> wdata->args.offset, wdata->args.count);
> @@ -391,9 +397,16 @@ static void bl_write_cleanup(struct work_struct *work)
> }
>
> /* Called when last of bios associated with a bl_write_pagelist call finishes */
> -static void bl_end_par_io_write(void *data)
> +static void bl_end_par_io_write(void *data, int num_se)
> {
> struct nfs_write_data *wdata = data;
> + struct pnfs_block_short_extent *se;
> +
> + if (unlikely(wdata->pnfs_error)) {
> + se = bl_pop_short_extent(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
> + num_se);
> + kfree(se);
> + }
>
> wdata->task.tk_status = wdata->pnfs_error;
> wdata->verf.committed = NFS_FILE_SYNC;
> @@ -548,7 +561,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
> */
> par = alloc_parallel(wdata);
> if (!par)
> - return PNFS_NOT_ATTEMPTED;
> + goto out_mds;
> par->pnfs_callback = bl_end_par_io_write;
> /* At this point, have to be more careful with error handling */
>
> @@ -556,12 +569,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
> be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
> if (!be || !is_writable(be, isect)) {
> dprintk("%s no matching extents!\n", __func__);
> - wdata->pnfs_error = -EINVAL;
> - goto out;
> + goto out_mds;
> }
>
> /* First page inside INVALID extent */
> if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
> + if (likely(!bl_push_one_short_extent(be->be_inval)))
> + par->bse_count++;
> + else
> + goto out_mds;
> temp = offset >> PAGE_CACHE_SHIFT;
> npg_zero = do_div(temp, npg_per_block);
> isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
> @@ -598,6 +614,19 @@ fill_invalid_ext:
> wdata->pnfs_error = ret;
> goto out;
> }
> + if (likely(!bl_push_one_short_extent(be->be_inval)))
> + par->bse_count++;
> + else {
> + end_page_writeback(page);
> + page_cache_release(page);
> + wdata->pnfs_error = -ENOMEM;
> + goto out;
> + }
> + /* FIXME: This should be done in bi_end_io */
> + mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
> + page->index << PAGE_CACHE_SHIFT,
> + PAGE_CACHE_SIZE);
> +
> bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
> isect, page, be,
> bl_end_io_write_zero, par);
> @@ -606,10 +635,6 @@ fill_invalid_ext:
> bio = NULL;
> goto out;
> }
> - /* FIXME: This should be done in bi_end_io */
> - mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
> - page->index << PAGE_CACHE_SHIFT,
> - PAGE_CACHE_SIZE);
> next_page:
> isect += PAGE_CACHE_SECTORS;
> extent_length -= PAGE_CACHE_SECTORS;
> @@ -633,6 +658,14 @@ next_page:
> wdata->pnfs_error = -EINVAL;
> goto out;
> }
> + if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
> + if (likely(!bl_push_one_short_extent(be->be_inval)))
checkpatch nit: please use tabs, not spaces, for indent...
Benny
> + par->bse_count++;
> + else {
> + wdata->pnfs_error = -ENOMEM;
> + goto out;
> + }
> + }
> extent_length = be->be_length -
> (isect - be->be_f_offset);
> }
> @@ -680,6 +713,11 @@ out:
> bl_submit_bio(WRITE, bio);
> put_parallel(par);
> return PNFS_ATTEMPTED;
> +out_mds:
> + bl_put_extent(be);
> + if (par)
> + kfree(par);
> + return PNFS_NOT_ATTEMPTED;
> }
>
> /* FIXME - range ignored */
> diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
> index df0e0fb..4986c23 100644
> --- a/fs/nfs/blocklayout/blocklayout.h
> +++ b/fs/nfs/blocklayout/blocklayout.h
> @@ -201,7 +201,8 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
> int bl_add_merge_extent(struct pnfs_block_layout *bl,
> struct pnfs_block_extent *new);
> int bl_mark_for_commit(struct pnfs_block_extent *be,
> - sector_t offset, sector_t length);
> + sector_t offset, sector_t length,
> + struct pnfs_block_short_extent *new);
> int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
> struct pnfs_block_short_extent*
> bl_pop_short_extent(struct pnfs_inval_markings *marks, int num_to_pop);
> diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
> index 72c7fa1..21da3a3 100644
> --- a/fs/nfs/blocklayout/extents.c
> +++ b/fs/nfs/blocklayout/extents.c
> @@ -370,20 +370,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl,
>
> /* Note the range described by offset, length is guaranteed to be contained
> * within be.
> + * new will be freed, either by this function or add_to_commitlist if they
> + * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
> */
> int bl_mark_for_commit(struct pnfs_block_extent *be,
> - sector_t offset, sector_t length)
> + sector_t offset, sector_t length,
> + struct pnfs_block_short_extent *new)
> {
> sector_t new_end, end = offset + length;
> - struct pnfs_block_short_extent *new;
> struct pnfs_block_layout *bl = container_of(be->be_inval,
> struct pnfs_block_layout,
> bl_inval);
>
> - new = kmalloc(sizeof(*new), GFP_NOFS);
> - if (!new)
> - return -ENOMEM;
> -
> mark_written_sectors(be->be_inval, offset, length);
> /* We want to add the range to commit list, but it must be
> * block-normalized, and verified that the normalized range has
> @@ -413,9 +411,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be,
> new->bse_mdev = be->be_mdev;
>
> spin_lock(&bl->bl_ext_lock);
> - /* new will be freed, either by add_to_commitlist if it decides not
> - * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
> - */
> add_to_commitlist(bl, new);
> spin_unlock(&bl->bl_ext_lock);
> return 0;
next prev parent reply other threads:[~2011-11-10 9:23 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-09 15:15 [PATCH 0/8] pnfsblock cleanup and fixes Peng Tao
2011-11-09 15:15 ` [PATCH 1/8] pnfsblock: cleanup bl_mark_sectors_init Peng Tao
2011-11-09 15:16 ` [PATCH 3/8] pnfsblock: move find lock page logic out of bl_write_pagelist Peng Tao
2011-11-10 8:32 ` Benny Halevy
2011-11-10 8:49 ` tao.peng
2011-11-10 9:31 ` Benny Halevy
2011-11-10 9:40 ` tao.peng
2011-11-09 15:16 ` [PATCH 4/8] pnfsblock: set read/write tk_status to pnfs_error Peng Tao
2011-11-09 15:16 ` [PATCH 5/8] pnfsblock: remove rpc_call_ops from struct parallel_io Peng Tao
2011-11-09 15:16 ` [PATCH 6/8] pnfsblock: clean up _add_entry Peng Tao
2011-11-10 8:44 ` Benny Halevy
2011-11-10 8:56 ` tao.peng
2011-11-10 9:11 ` tao.peng
2011-11-09 15:16 ` [PATCH 7/8] pnfsblock: add im_extents to pnfs_inval_markings Peng Tao
2011-11-10 8:54 ` Benny Halevy
2011-11-10 9:08 ` tao.peng
2011-11-10 9:37 ` Benny Halevy
2011-11-10 9:20 ` Benny Halevy
2011-11-10 9:37 ` tao.peng
2011-11-10 9:39 ` Benny Halevy
2011-11-09 15:16 ` [PATCH 8/8] pnfsblock: alloc short extent before submit bio Peng Tao
2011-11-10 9:22 ` Benny Halevy [this message]
2011-11-10 2:09 ` [PATCH 0/8] pnfsblock cleanup and fixes tao.peng
2011-11-10 7:06 ` Benny Halevy
2011-11-10 8:37 ` Benny Halevy
2011-11-10 8:51 ` tao.peng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4EBB97EF.1050006@tonian.com \
--to=bhalevy@tonian.com \
--cc=Trond.Myklebust@netapp.com \
--cc=bergwolf@gmail.com \
--cc=linux-nfs@vger.kernel.org \
--cc=peng_tao@emc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox