From: Peng Tao <bergwolf@gmail.com>
To: linux-nfs@vger.kernel.org
Cc: Trond.Myklebust@netapp.com, bhalevy@tonian.com,
Peng Tao <peng_tao@emc.com>
Subject: [PATCH 8/8] pnfsblock: alloc short extent before submit bio
Date: Wed, 9 Nov 2011 07:16:06 -0800 [thread overview]
Message-ID: <1320851766-1834-9-git-send-email-bergwolf@gmail.com> (raw)
In-Reply-To: <1320851766-1834-1-git-send-email-bergwolf@gmail.com>
As discussed earlier, it is better for block client to allocate memoroy for
tracking extents state before submitting bio. So the patch does it by allocating
a short_extent for every INVALID extent touched by write pagelist and for
every zeroing page we created, saving them in layout header.
Then in end_io we can just use them to create commit list items and avoid
memory allocation there.
Signed-off-by: Peng Tao <peng_tao@emc.com>
---
fs/nfs/blocklayout/blocklayout.c | 70 +++++++++++++++++++++++++++++---------
fs/nfs/blocklayout/blocklayout.h | 3 +-
fs/nfs/blocklayout/extents.c | 13 ++-----
3 files changed, 60 insertions(+), 26 deletions(-)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index cb4ff0f..53cd332 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -90,8 +90,9 @@ static int is_writable(struct pnfs_block_extent *be, sector_t isect)
*/
struct parallel_io {
struct kref refcnt;
- void (*pnfs_callback) (void *data);
+ void (*pnfs_callback) (void *data, int num_se);
void *data;
+ int bse_count;
};
static inline struct parallel_io *alloc_parallel(void *data)
@@ -102,6 +103,7 @@ static inline struct parallel_io *alloc_parallel(void *data)
if (rv) {
rv->data = data;
kref_init(&rv->refcnt);
+ rv->bse_count = 0;
}
return rv;
}
@@ -116,7 +118,7 @@ static void destroy_parallel(struct kref *kref)
struct parallel_io *p = container_of(kref, struct parallel_io, refcnt);
dprintk("%s enter\n", __func__);
- p->pnfs_callback(p->data);
+ p->pnfs_callback(p->data, p->bse_count);
kfree(p);
}
@@ -211,7 +213,7 @@ static void bl_read_cleanup(struct work_struct *work)
}
static void
-bl_end_par_io_read(void *data)
+bl_end_par_io_read(void *data, int unused)
{
struct nfs_read_data *rdata = data;
@@ -312,6 +314,7 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
{
sector_t isect, end;
struct pnfs_block_extent *be;
+ struct pnfs_block_short_extent *se;
dprintk("%s(%llu, %u)\n", __func__, offset, count);
if (count == 0)
@@ -324,8 +327,11 @@ static void mark_extents_written(struct pnfs_block_layout *bl,
be = bl_find_get_extent(bl, isect, NULL);
BUG_ON(!be); /* FIXME */
len = min(end, be->be_f_offset + be->be_length) - isect;
- if (be->be_state == PNFS_BLOCK_INVALID_DATA)
- bl_mark_for_commit(be, isect, len); /* What if fails? */
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ se = bl_pop_short_extent(be->be_inval, 1);
+ BUG_ON(!se);
+ bl_mark_for_commit(be, isect, len, se);
+ }
isect += len;
bl_put_extent(be);
}
@@ -347,7 +353,8 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
end_page_writeback(page);
page_cache_release(page);
} while (bvec >= bio->bi_io_vec);
- if (!uptodate) {
+
+ if (unlikely(!uptodate)) {
if (!wdata->pnfs_error)
wdata->pnfs_error = -EIO;
pnfs_set_lo_fail(wdata->lseg);
@@ -356,7 +363,6 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
put_parallel(par);
}
-/* This is basically copied from mpage_end_io_read */
static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
@@ -382,7 +388,7 @@ static void bl_write_cleanup(struct work_struct *work)
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
wdata = container_of(task, struct nfs_write_data, task);
- if (!wdata->pnfs_error) {
+ if (likely(!wdata->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
wdata->args.offset, wdata->args.count);
@@ -391,9 +397,16 @@ static void bl_write_cleanup(struct work_struct *work)
}
/* Called when last of bios associated with a bl_write_pagelist call finishes */
-static void bl_end_par_io_write(void *data)
+static void bl_end_par_io_write(void *data, int num_se)
{
struct nfs_write_data *wdata = data;
+ struct pnfs_block_short_extent *se;
+
+ if (unlikely(wdata->pnfs_error)) {
+ se = bl_pop_short_extent(&BLK_LSEG2EXT(wdata->lseg)->bl_inval,
+ num_se);
+ kfree(se);
+ }
wdata->task.tk_status = wdata->pnfs_error;
wdata->verf.committed = NFS_FILE_SYNC;
@@ -548,7 +561,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
*/
par = alloc_parallel(wdata);
if (!par)
- return PNFS_NOT_ATTEMPTED;
+ goto out_mds;
par->pnfs_callback = bl_end_par_io_write;
/* At this point, have to be more careful with error handling */
@@ -556,12 +569,15 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
be = bl_find_get_extent(BLK_LSEG2EXT(wdata->lseg), isect, &cow_read);
if (!be || !is_writable(be, isect)) {
dprintk("%s no matching extents!\n", __func__);
- wdata->pnfs_error = -EINVAL;
- goto out;
+ goto out_mds;
}
/* First page inside INVALID extent */
if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ if (likely(!bl_push_one_short_extent(be->be_inval)))
+ par->bse_count++;
+ else
+ goto out_mds;
temp = offset >> PAGE_CACHE_SHIFT;
npg_zero = do_div(temp, npg_per_block);
isect = (sector_t) (((offset - npg_zero * PAGE_CACHE_SIZE) &
@@ -598,6 +614,19 @@ fill_invalid_ext:
wdata->pnfs_error = ret;
goto out;
}
+ if (likely(!bl_push_one_short_extent(be->be_inval)))
+ par->bse_count++;
+ else {
+ end_page_writeback(page);
+ page_cache_release(page);
+ wdata->pnfs_error = -ENOMEM;
+ goto out;
+ }
+ /* FIXME: This should be done in bi_end_io */
+ mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
+ page->index << PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE);
+
bio = bl_add_page_to_bio(bio, npg_zero, WRITE,
isect, page, be,
bl_end_io_write_zero, par);
@@ -606,10 +635,6 @@ fill_invalid_ext:
bio = NULL;
goto out;
}
- /* FIXME: This should be done in bi_end_io */
- mark_extents_written(BLK_LSEG2EXT(wdata->lseg),
- page->index << PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE);
next_page:
isect += PAGE_CACHE_SECTORS;
extent_length -= PAGE_CACHE_SECTORS;
@@ -633,6 +658,14 @@ next_page:
wdata->pnfs_error = -EINVAL;
goto out;
}
+ if (be->be_state == PNFS_BLOCK_INVALID_DATA) {
+ if (likely(!bl_push_one_short_extent(be->be_inval)))
+ par->bse_count++;
+ else {
+ wdata->pnfs_error = -ENOMEM;
+ goto out;
+ }
+ }
extent_length = be->be_length -
(isect - be->be_f_offset);
}
@@ -680,6 +713,11 @@ out:
bl_submit_bio(WRITE, bio);
put_parallel(par);
return PNFS_ATTEMPTED;
+out_mds:
+ bl_put_extent(be);
+ if (par)
+ kfree(par);
+ return PNFS_NOT_ATTEMPTED;
}
/* FIXME - range ignored */
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index df0e0fb..4986c23 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -201,7 +201,8 @@ void clean_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
int bl_add_merge_extent(struct pnfs_block_layout *bl,
struct pnfs_block_extent *new);
int bl_mark_for_commit(struct pnfs_block_extent *be,
- sector_t offset, sector_t length);
+ sector_t offset, sector_t length,
+ struct pnfs_block_short_extent *new);
int bl_push_one_short_extent(struct pnfs_inval_markings *marks);
struct pnfs_block_short_extent*
bl_pop_short_extent(struct pnfs_inval_markings *marks, int num_to_pop);
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 72c7fa1..21da3a3 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -370,20 +370,18 @@ static void add_to_commitlist(struct pnfs_block_layout *bl,
/* Note the range described by offset, length is guaranteed to be contained
* within be.
+ * new will be freed, either by this function or add_to_commitlist if they
+ * decide not to use it, or after LAYOUTCOMMIT uses it in the commitlist.
*/
int bl_mark_for_commit(struct pnfs_block_extent *be,
- sector_t offset, sector_t length)
+ sector_t offset, sector_t length,
+ struct pnfs_block_short_extent *new)
{
sector_t new_end, end = offset + length;
- struct pnfs_block_short_extent *new;
struct pnfs_block_layout *bl = container_of(be->be_inval,
struct pnfs_block_layout,
bl_inval);
- new = kmalloc(sizeof(*new), GFP_NOFS);
- if (!new)
- return -ENOMEM;
-
mark_written_sectors(be->be_inval, offset, length);
/* We want to add the range to commit list, but it must be
* block-normalized, and verified that the normalized range has
@@ -413,9 +411,6 @@ int bl_mark_for_commit(struct pnfs_block_extent *be,
new->bse_mdev = be->be_mdev;
spin_lock(&bl->bl_ext_lock);
- /* new will be freed, either by add_to_commitlist if it decides not
- * to use it, or after LAYOUTCOMMIT uses it in the commitlist.
- */
add_to_commitlist(bl, new);
spin_unlock(&bl->bl_ext_lock);
return 0;
--
1.7.1.262.g5ef3d
next prev parent reply other threads:[~2011-11-09 15:18 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-11-09 15:15 [PATCH 0/8] pnfsblock cleanup and fixes Peng Tao
2011-11-09 15:15 ` [PATCH 1/8] pnfsblock: cleanup bl_mark_sectors_init Peng Tao
2011-11-09 15:16 ` [PATCH 3/8] pnfsblock: move find lock page logic out of bl_write_pagelist Peng Tao
2011-11-10 8:32 ` Benny Halevy
2011-11-10 8:49 ` tao.peng
2011-11-10 9:31 ` Benny Halevy
2011-11-10 9:40 ` tao.peng
2011-11-09 15:16 ` [PATCH 4/8] pnfsblock: set read/write tk_status to pnfs_error Peng Tao
2011-11-09 15:16 ` [PATCH 5/8] pnfsblock: remove rpc_call_ops from struct parallel_io Peng Tao
2011-11-09 15:16 ` [PATCH 6/8] pnfsblock: clean up _add_entry Peng Tao
2011-11-10 8:44 ` Benny Halevy
2011-11-10 8:56 ` tao.peng
2011-11-10 9:11 ` tao.peng
2011-11-09 15:16 ` [PATCH 7/8] pnfsblock: add im_extents to pnfs_inval_markings Peng Tao
2011-11-10 8:54 ` Benny Halevy
2011-11-10 9:08 ` tao.peng
2011-11-10 9:37 ` Benny Halevy
2011-11-10 9:20 ` Benny Halevy
2011-11-10 9:37 ` tao.peng
2011-11-10 9:39 ` Benny Halevy
2011-11-09 15:16 ` Peng Tao [this message]
2011-11-10 9:22 ` [PATCH 8/8] pnfsblock: alloc short extent before submit bio Benny Halevy
2011-11-10 2:09 ` [PATCH 0/8] pnfsblock cleanup and fixes tao.peng
2011-11-10 7:06 ` Benny Halevy
2011-11-10 8:37 ` Benny Halevy
2011-11-10 8:51 ` tao.peng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1320851766-1834-9-git-send-email-bergwolf@gmail.com \
--to=bergwolf@gmail.com \
--cc=Trond.Myklebust@netapp.com \
--cc=bhalevy@tonian.com \
--cc=linux-nfs@vger.kernel.org \
--cc=peng_tao@emc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox