* [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order
@ 2010-05-10 3:36 Zhang Jingwang
[not found] ` <20100510033610.GA5443-nK6E9TRyOkVSq9BJjBFyUp/QNRX+jHPU@public.gmane.org>
0 siblings, 1 reply; 14+ messages in thread
From: Zhang Jingwang @ 2010-05-10 3:36 UTC (permalink / raw)
To: linux-nfs; +Cc: bhalevy, iisaman
Optimize for sequencial write. Layout infos and tags are organized by
file offset. When appending data to a file whole list will be examined,
which introduce notable performance decrease.
Signed-off-by: Zhang Jingwang <zhangjingwang@nrchpc.ac.cn>
---
fs/nfs/blocklayout/extents.c | 126 +++++++++++++++++++++---------------------
1 files changed, 64 insertions(+), 62 deletions(-)
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 3c311f2..514f2cc 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -66,8 +66,8 @@ static int32_t _find_entry(struct my_tree_t *tree, u64 s)
struct pnfs_inval_tracking *pos;
dprintk("%s(%llu) enter\n", __func__, s);
- list_for_each_entry(pos, &tree->mtt_stub, it_link) {
- if (pos->it_sector < s)
+ list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
+ if (pos->it_sector > s)
continue;
else if (pos->it_sector == s)
return pos->it_tags & INTERNAL_MASK;
@@ -102,8 +102,8 @@ static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
struct pnfs_inval_tracking *pos;
dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
- list_for_each_entry(pos, &tree->mtt_stub, it_link) {
- if (pos->it_sector < s)
+ list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
+ if (pos->it_sector > s)
continue;
else if (pos->it_sector == s) {
found = 1;
@@ -125,7 +125,7 @@ static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag,
}
new->it_sector = s;
new->it_tags = (1 << tag);
- list_add_tail(&new->it_link, &pos->it_link);
+ list_add(&new->it_link, &pos->it_link);
return 1;
}
}
@@ -230,14 +230,14 @@ _range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag)
u64 expect = 0;
dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag);
- list_for_each_entry(pos, &tree->mtt_stub, it_link) {
- if (pos->it_sector < start)
+ list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
+ if (pos->it_sector >= end)
continue;
if (!expect) {
- if ((pos->it_sector == start) &&
+ if ((pos->it_sector == end - tree->mtt_step_size) &&
(pos->it_tags & (1 << tag))) {
- expect = start + tree->mtt_step_size;
- if (expect == end)
+ expect = pos->it_sector - tree->mtt_step_size;
+ if (expect < start)
return 1;
continue;
} else {
@@ -246,8 +246,8 @@ _range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag)
}
if (pos->it_sector != expect || !(pos->it_tags & (1 << tag)))
return 0;
- expect += tree->mtt_step_size;
- if (expect == end)
+ expect -= tree->mtt_step_size;
+ if (expect < start)
return 1;
}
return 0;
@@ -594,65 +594,67 @@ add_and_merge_extent(struct pnfs_block_layout *bl,
/* Scan for proper place to insert, extending new to the left
* as much as possible.
*/
- list_for_each_entry_safe(be, tmp, list, be_node) {
- if (new->be_f_offset < be->be_f_offset)
+ list_for_each_entry_safe_reverse(be, tmp, list, be_node) {
+ if (new->be_f_offset >= be->be_f_offset + be->be_length)
break;
- if (end <= be->be_f_offset + be->be_length) {
- /* new is a subset of existing be*/
+ if (new->be_f_offset >= be->be_f_offset) {
+ if (end <= be->be_f_offset + be->be_length) {
+ /* new is a subset of existing be*/
+ if (extents_consistent(be, new)) {
+ dprintk("%s: new is subset, ignoring\n",
+ __func__);
+ put_extent(new);
+ return 0;
+ } else {
+ goto out_err;
+ }
+ } else {
+ /* |<-- be -->|
+ * |<-- new -->| */
+ if (extents_consistent(be, new)) {
+ /* extend new to fully replace be */
+ new->be_length += new->be_f_offset -
+ be->be_f_offset;
+ new->be_f_offset = be->be_f_offset;
+ new->be_v_offset = be->be_v_offset;
+ dprintk("%s: removing %p\n", __func__, be);
+ list_del(&be->be_node);
+ put_extent(be);
+ } else {
+ goto out_err;
+ }
+ }
+ } else if (end >= be->be_f_offset + be->be_length) {
+ /* new extent overlap existing be */
if (extents_consistent(be, new)) {
- dprintk("%s: new is subset, ignoring\n",
- __func__);
- put_extent(new);
- return 0;
- } else
+ /* extend new to fully replace be */
+ dprintk("%s: removing %p\n", __func__, be);
+ list_del(&be->be_node);
+ put_extent(be);
+ } else {
goto out_err;
- } else if (new->be_f_offset <=
- be->be_f_offset + be->be_length) {
- /* new overlaps or abuts existing be */
- if (extents_consistent(be, new)) {
+ }
+ } else if (end > be->be_f_offset) {
+ /* |<-- be -->|
+ *|<-- new -->| */
+ if (extents_consistent(new, be)) {
/* extend new to fully replace be */
- new->be_length += new->be_f_offset -
- be->be_f_offset;
- new->be_f_offset = be->be_f_offset;
- new->be_v_offset = be->be_v_offset;
+ new->be_length += be->be_f_offset + be->be_length -
+ new->be_f_offset - new->be_length;
dprintk("%s: removing %p\n", __func__, be);
list_del(&be->be_node);
put_extent(be);
- } else if (new->be_f_offset !=
- be->be_f_offset + be->be_length)
+ } else {
goto out_err;
+ }
}
}
/* Note that if we never hit the above break, be will not point to a
* valid extent. However, in that case &be->be_node==list.
*/
- list_add_tail(&new->be_node, &be->be_node);
+ list_add(&new->be_node, &be->be_node);
dprintk("%s: inserting new\n", __func__);
print_elist(list);
- /* Scan forward for overlaps. If we find any, extend new and
- * remove the overlapped extent.
- */
- be = list_prepare_entry(new, list, be_node);
- list_for_each_entry_safe_continue(be, tmp, list, be_node) {
- if (end < be->be_f_offset)
- break;
- /* new overlaps or abuts existing be */
- if (extents_consistent(be, new)) {
- if (end < be->be_f_offset + be->be_length) {
- /* extend new to fully cover be */
- end = be->be_f_offset + be->be_length;
- new->be_length = end - new->be_f_offset;
- }
- dprintk("%s: removing %p\n", __func__, be);
- list_del(&be->be_node);
- put_extent(be);
- } else if (end != be->be_f_offset) {
- list_del(&new->be_node);
- goto out_err;
- }
- }
- dprintk("%s: after merging\n", __func__);
- print_elist(list);
/* STUB - The per-list consistency checks have all been done,
* should now check cross-list consistency.
*/
@@ -685,10 +687,10 @@ find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
if (ret &&
(!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA))
break;
- list_for_each_entry(be, &bl->bl_extents[i], be_node) {
- if (isect < be->be_f_offset)
+ list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
+ if (isect >= be->be_f_offset + be->be_length)
break;
- if (isect < be->be_f_offset + be->be_length) {
+ if (isect >= be->be_f_offset) {
/* We have found an extent */
dprintk("%s Get %p (%i)\n", __func__, be,
atomic_read(&be->be_refcnt.refcount));
@@ -721,10 +723,10 @@ find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect)
for (i = 0; i < EXTENT_LISTS; i++) {
if (ret)
break;
- list_for_each_entry(be, &bl->bl_extents[i], be_node) {
- if (isect < be->be_f_offset)
+ list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) {
+ if (isect >= be->be_f_offset + be->be_length)
break;
- if (isect < be->be_f_offset + be->be_length) {
+ if (isect >= be->be_f_offset) {
/* We have found an extent */
dprintk("%s Get %p (%i)\n", __func__, be,
atomic_read(&be->be_refcnt.refcount));
--
1.6.2.5
^ permalink raw reply related [flat|nested] 14+ messages in thread[parent not found: <20100510033610.GA5443-nK6E9TRyOkVSq9BJjBFyUp/QNRX+jHPU@public.gmane.org>]
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order [not found] ` <20100510033610.GA5443-nK6E9TRyOkVSq9BJjBFyUp/QNRX+jHPU@public.gmane.org> @ 2010-05-12 6:46 ` Benny Halevy 2010-05-12 20:28 ` J. Bruce Fields 0 siblings, 1 reply; 14+ messages in thread From: Benny Halevy @ 2010-05-12 6:46 UTC (permalink / raw) To: Zhang Jingwang; +Cc: linux-nfs, iisaman On May. 10, 2010, 6:36 +0300, Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org> wrote: > Optimize for sequencial write. Layout infos and tags are organized by > file offset. When appending data to a file whole list will be examined, > which introduce notable performance decrease. Looks good to me. Fred, can you please double check? Benny P.S.: Zhang, please note Fred's new email address > > Signed-off-by: Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org> > --- > fs/nfs/blocklayout/extents.c | 126 +++++++++++++++++++++--------------------- > 1 files changed, 64 insertions(+), 62 deletions(-) > > diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c > index 3c311f2..514f2cc 100644 > --- a/fs/nfs/blocklayout/extents.c > +++ b/fs/nfs/blocklayout/extents.c > @@ -66,8 +66,8 @@ static int32_t _find_entry(struct my_tree_t *tree, u64 s) > struct pnfs_inval_tracking *pos; > > dprintk("%s(%llu) enter\n", __func__, s); > - list_for_each_entry(pos, &tree->mtt_stub, it_link) { > - if (pos->it_sector < s) > + list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { > + if (pos->it_sector > s) > continue; > else if (pos->it_sector == s) > return pos->it_tags & INTERNAL_MASK; > @@ -102,8 +102,8 @@ static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag, > struct pnfs_inval_tracking *pos; > > dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage); > - list_for_each_entry(pos, &tree->mtt_stub, it_link) { > - if (pos->it_sector < s) > + list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { > + if (pos->it_sector > s) > continue; > else if (pos->it_sector == s) { > found = 1; > @@ -125,7 +125,7 @@ static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag, > } > new->it_sector = s; > new->it_tags = (1 << tag); > - list_add_tail(&new->it_link, &pos->it_link); > + list_add(&new->it_link, &pos->it_link); > return 1; > } > } > @@ -230,14 +230,14 @@ _range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag) > u64 expect = 0; > > dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag); > - list_for_each_entry(pos, &tree->mtt_stub, it_link) { > - if (pos->it_sector < start) > + list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { > + if (pos->it_sector >= end) > continue; > if (!expect) { > - if ((pos->it_sector == start) && > + if ((pos->it_sector == end - tree->mtt_step_size) && > (pos->it_tags & (1 << tag))) { > - expect = start + tree->mtt_step_size; > - if (expect == end) > + expect = pos->it_sector - tree->mtt_step_size; > + if (expect < start) > return 1; > continue; > } else { > @@ -246,8 +246,8 @@ _range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag) > } > if (pos->it_sector != expect || !(pos->it_tags & (1 << tag))) > return 0; > - expect += tree->mtt_step_size; > - if (expect == end) > + expect -= tree->mtt_step_size; > + if (expect < start) > return 1; > } > return 0; > @@ -594,65 +594,67 @@ add_and_merge_extent(struct pnfs_block_layout *bl, > /* Scan for proper place to insert, extending new to the left > * as much as possible. > */ > - list_for_each_entry_safe(be, tmp, list, be_node) { > - if (new->be_f_offset < be->be_f_offset) > + list_for_each_entry_safe_reverse(be, tmp, list, be_node) { > + if (new->be_f_offset >= be->be_f_offset + be->be_length) > break; > - if (end <= be->be_f_offset + be->be_length) { > - /* new is a subset of existing be*/ > + if (new->be_f_offset >= be->be_f_offset) { > + if (end <= be->be_f_offset + be->be_length) { > + /* new is a subset of existing be*/ > + if (extents_consistent(be, new)) { > + dprintk("%s: new is subset, ignoring\n", > + __func__); > + put_extent(new); > + return 0; > + } else { > + goto out_err; > + } > + } else { > + /* |<-- be -->| > + * |<-- new -->| */ > + if (extents_consistent(be, new)) { > + /* extend new to fully replace be */ > + new->be_length += new->be_f_offset - > + be->be_f_offset; > + new->be_f_offset = be->be_f_offset; > + new->be_v_offset = be->be_v_offset; > + dprintk("%s: removing %p\n", __func__, be); > + list_del(&be->be_node); > + put_extent(be); > + } else { > + goto out_err; > + } > + } > + } else if (end >= be->be_f_offset + be->be_length) { > + /* new extent overlap existing be */ > if (extents_consistent(be, new)) { > - dprintk("%s: new is subset, ignoring\n", > - __func__); > - put_extent(new); > - return 0; > - } else > + /* extend new to fully replace be */ > + dprintk("%s: removing %p\n", __func__, be); > + list_del(&be->be_node); > + put_extent(be); > + } else { > goto out_err; > - } else if (new->be_f_offset <= > - be->be_f_offset + be->be_length) { > - /* new overlaps or abuts existing be */ > - if (extents_consistent(be, new)) { > + } > + } else if (end > be->be_f_offset) { > + /* |<-- be -->| > + *|<-- new -->| */ > + if (extents_consistent(new, be)) { > /* extend new to fully replace be */ > - new->be_length += new->be_f_offset - > - be->be_f_offset; > - new->be_f_offset = be->be_f_offset; > - new->be_v_offset = be->be_v_offset; > + new->be_length += be->be_f_offset + be->be_length - > + new->be_f_offset - new->be_length; > dprintk("%s: removing %p\n", __func__, be); > list_del(&be->be_node); > put_extent(be); > - } else if (new->be_f_offset != > - be->be_f_offset + be->be_length) > + } else { > goto out_err; > + } > } > } > /* Note that if we never hit the above break, be will not point to a > * valid extent. However, in that case &be->be_node==list. > */ > - list_add_tail(&new->be_node, &be->be_node); > + list_add(&new->be_node, &be->be_node); > dprintk("%s: inserting new\n", __func__); > print_elist(list); > - /* Scan forward for overlaps. If we find any, extend new and > - * remove the overlapped extent. > - */ > - be = list_prepare_entry(new, list, be_node); > - list_for_each_entry_safe_continue(be, tmp, list, be_node) { > - if (end < be->be_f_offset) > - break; > - /* new overlaps or abuts existing be */ > - if (extents_consistent(be, new)) { > - if (end < be->be_f_offset + be->be_length) { > - /* extend new to fully cover be */ > - end = be->be_f_offset + be->be_length; > - new->be_length = end - new->be_f_offset; > - } > - dprintk("%s: removing %p\n", __func__, be); > - list_del(&be->be_node); > - put_extent(be); > - } else if (end != be->be_f_offset) { > - list_del(&new->be_node); > - goto out_err; > - } > - } > - dprintk("%s: after merging\n", __func__); > - print_elist(list); > /* STUB - The per-list consistency checks have all been done, > * should now check cross-list consistency. > */ > @@ -685,10 +687,10 @@ find_get_extent(struct pnfs_block_layout *bl, sector_t isect, > if (ret && > (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA)) > break; > - list_for_each_entry(be, &bl->bl_extents[i], be_node) { > - if (isect < be->be_f_offset) > + list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { > + if (isect >= be->be_f_offset + be->be_length) > break; > - if (isect < be->be_f_offset + be->be_length) { > + if (isect >= be->be_f_offset) { > /* We have found an extent */ > dprintk("%s Get %p (%i)\n", __func__, be, > atomic_read(&be->be_refcnt.refcount)); > @@ -721,10 +723,10 @@ find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect) > for (i = 0; i < EXTENT_LISTS; i++) { > if (ret) > break; > - list_for_each_entry(be, &bl->bl_extents[i], be_node) { > - if (isect < be->be_f_offset) > + list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { > + if (isect >= be->be_f_offset + be->be_length) > break; > - if (isect < be->be_f_offset + be->be_length) { > + if (isect >= be->be_f_offset) { > /* We have found an extent */ > dprintk("%s Get %p (%i)\n", __func__, be, > atomic_read(&be->be_refcnt.refcount)); ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order 2010-05-12 6:46 ` Benny Halevy @ 2010-05-12 20:28 ` J. Bruce Fields 2010-05-17 13:53 ` J. Bruce Fields 0 siblings, 1 reply; 14+ messages in thread From: J. Bruce Fields @ 2010-05-12 20:28 UTC (permalink / raw) To: Benny Halevy; +Cc: Zhang Jingwang, linux-nfs, iisaman On Wed, May 12, 2010 at 09:46:43AM +0300, Benny Halevy wrote: > On May. 10, 2010, 6:36 +0300, Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org> wrote: > > Optimize for sequencial write. Layout infos and tags are organized by > > file offset. When appending data to a file whole list will be examined, > > which introduce notable performance decrease. > > Looks good to me. > > Fred, can you please double check? I don't know if Fred's still up for reviewing block stuff? I've been trying to keep up with at least some minimal testing, but not as well as I'd like. The one thing I've noticed is that the connectathon general test has started failing right at the start with an IO error. The last good version I tested was b5c09c21, which was based on 33-rc6. The earliest bad version I tested was 419312ada, based on 34-rc2. A quick look at network traces from the two traces didn't turn up anything obvious. I haven't had the chance yet to look closer. --b. > > Benny > > P.S.: Zhang, please note Fred's new email address > > > > > Signed-off-by: Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org> > > --- > > fs/nfs/blocklayout/extents.c | 126 +++++++++++++++++++++--------------------- > > 1 files changed, 64 insertions(+), 62 deletions(-) > > > > diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c > > index 3c311f2..514f2cc 100644 > > --- a/fs/nfs/blocklayout/extents.c > > +++ b/fs/nfs/blocklayout/extents.c > > @@ -66,8 +66,8 @@ static int32_t _find_entry(struct my_tree_t *tree, u64 s) > > struct pnfs_inval_tracking *pos; > > > > dprintk("%s(%llu) enter\n", __func__, s); > > - list_for_each_entry(pos, &tree->mtt_stub, it_link) { > > - if (pos->it_sector < s) > > + list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { > > + if (pos->it_sector > s) > > continue; > > else if (pos->it_sector == s) > > return pos->it_tags & INTERNAL_MASK; > > @@ -102,8 +102,8 @@ static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag, > > struct pnfs_inval_tracking *pos; > > > > dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage); > > - list_for_each_entry(pos, &tree->mtt_stub, it_link) { > > - if (pos->it_sector < s) > > + list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { > > + if (pos->it_sector > s) > > continue; > > else if (pos->it_sector == s) { > > found = 1; > > @@ -125,7 +125,7 @@ static int _add_entry(struct my_tree_t *tree, u64 s, int32_t tag, > > } > > new->it_sector = s; > > new->it_tags = (1 << tag); > > - list_add_tail(&new->it_link, &pos->it_link); > > + list_add(&new->it_link, &pos->it_link); > > return 1; > > } > > } > > @@ -230,14 +230,14 @@ _range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag) > > u64 expect = 0; > > > > dprintk("%s(%llu, %llu, %i) enter\n", __func__, start, end, tag); > > - list_for_each_entry(pos, &tree->mtt_stub, it_link) { > > - if (pos->it_sector < start) > > + list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) { > > + if (pos->it_sector >= end) > > continue; > > if (!expect) { > > - if ((pos->it_sector == start) && > > + if ((pos->it_sector == end - tree->mtt_step_size) && > > (pos->it_tags & (1 << tag))) { > > - expect = start + tree->mtt_step_size; > > - if (expect == end) > > + expect = pos->it_sector - tree->mtt_step_size; > > + if (expect < start) > > return 1; > > continue; > > } else { > > @@ -246,8 +246,8 @@ _range_has_tag(struct my_tree_t *tree, u64 start, u64 end, int32_t tag) > > } > > if (pos->it_sector != expect || !(pos->it_tags & (1 << tag))) > > return 0; > > - expect += tree->mtt_step_size; > > - if (expect == end) > > + expect -= tree->mtt_step_size; > > + if (expect < start) > > return 1; > > } > > return 0; > > @@ -594,65 +594,67 @@ add_and_merge_extent(struct pnfs_block_layout *bl, > > /* Scan for proper place to insert, extending new to the left > > * as much as possible. > > */ > > - list_for_each_entry_safe(be, tmp, list, be_node) { > > - if (new->be_f_offset < be->be_f_offset) > > + list_for_each_entry_safe_reverse(be, tmp, list, be_node) { > > + if (new->be_f_offset >= be->be_f_offset + be->be_length) > > break; > > - if (end <= be->be_f_offset + be->be_length) { > > - /* new is a subset of existing be*/ > > + if (new->be_f_offset >= be->be_f_offset) { > > + if (end <= be->be_f_offset + be->be_length) { > > + /* new is a subset of existing be*/ > > + if (extents_consistent(be, new)) { > > + dprintk("%s: new is subset, ignoring\n", > > + __func__); > > + put_extent(new); > > + return 0; > > + } else { > > + goto out_err; > > + } > > + } else { > > + /* |<-- be -->| > > + * |<-- new -->| */ > > + if (extents_consistent(be, new)) { > > + /* extend new to fully replace be */ > > + new->be_length += new->be_f_offset - > > + be->be_f_offset; > > + new->be_f_offset = be->be_f_offset; > > + new->be_v_offset = be->be_v_offset; > > + dprintk("%s: removing %p\n", __func__, be); > > + list_del(&be->be_node); > > + put_extent(be); > > + } else { > > + goto out_err; > > + } > > + } > > + } else if (end >= be->be_f_offset + be->be_length) { > > + /* new extent overlap existing be */ > > if (extents_consistent(be, new)) { > > - dprintk("%s: new is subset, ignoring\n", > > - __func__); > > - put_extent(new); > > - return 0; > > - } else > > + /* extend new to fully replace be */ > > + dprintk("%s: removing %p\n", __func__, be); > > + list_del(&be->be_node); > > + put_extent(be); > > + } else { > > goto out_err; > > - } else if (new->be_f_offset <= > > - be->be_f_offset + be->be_length) { > > - /* new overlaps or abuts existing be */ > > - if (extents_consistent(be, new)) { > > + } > > + } else if (end > be->be_f_offset) { > > + /* |<-- be -->| > > + *|<-- new -->| */ > > + if (extents_consistent(new, be)) { > > /* extend new to fully replace be */ > > - new->be_length += new->be_f_offset - > > - be->be_f_offset; > > - new->be_f_offset = be->be_f_offset; > > - new->be_v_offset = be->be_v_offset; > > + new->be_length += be->be_f_offset + be->be_length - > > + new->be_f_offset - new->be_length; > > dprintk("%s: removing %p\n", __func__, be); > > list_del(&be->be_node); > > put_extent(be); > > - } else if (new->be_f_offset != > > - be->be_f_offset + be->be_length) > > + } else { > > goto out_err; > > + } > > } > > } > > /* Note that if we never hit the above break, be will not point to a > > * valid extent. However, in that case &be->be_node==list. > > */ > > - list_add_tail(&new->be_node, &be->be_node); > > + list_add(&new->be_node, &be->be_node); > > dprintk("%s: inserting new\n", __func__); > > print_elist(list); > > - /* Scan forward for overlaps. If we find any, extend new and > > - * remove the overlapped extent. > > - */ > > - be = list_prepare_entry(new, list, be_node); > > - list_for_each_entry_safe_continue(be, tmp, list, be_node) { > > - if (end < be->be_f_offset) > > - break; > > - /* new overlaps or abuts existing be */ > > - if (extents_consistent(be, new)) { > > - if (end < be->be_f_offset + be->be_length) { > > - /* extend new to fully cover be */ > > - end = be->be_f_offset + be->be_length; > > - new->be_length = end - new->be_f_offset; > > - } > > - dprintk("%s: removing %p\n", __func__, be); > > - list_del(&be->be_node); > > - put_extent(be); > > - } else if (end != be->be_f_offset) { > > - list_del(&new->be_node); > > - goto out_err; > > - } > > - } > > - dprintk("%s: after merging\n", __func__); > > - print_elist(list); > > /* STUB - The per-list consistency checks have all been done, > > * should now check cross-list consistency. > > */ > > @@ -685,10 +687,10 @@ find_get_extent(struct pnfs_block_layout *bl, sector_t isect, > > if (ret && > > (!cow_read || ret->be_state != PNFS_BLOCK_INVALID_DATA)) > > break; > > - list_for_each_entry(be, &bl->bl_extents[i], be_node) { > > - if (isect < be->be_f_offset) > > + list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { > > + if (isect >= be->be_f_offset + be->be_length) > > break; > > - if (isect < be->be_f_offset + be->be_length) { > > + if (isect >= be->be_f_offset) { > > /* We have found an extent */ > > dprintk("%s Get %p (%i)\n", __func__, be, > > atomic_read(&be->be_refcnt.refcount)); > > @@ -721,10 +723,10 @@ find_get_extent_locked(struct pnfs_block_layout *bl, sector_t isect) > > for (i = 0; i < EXTENT_LISTS; i++) { > > if (ret) > > break; > > - list_for_each_entry(be, &bl->bl_extents[i], be_node) { > > - if (isect < be->be_f_offset) > > + list_for_each_entry_reverse(be, &bl->bl_extents[i], be_node) { > > + if (isect >= be->be_f_offset + be->be_length) > > break; > > - if (isect < be->be_f_offset + be->be_length) { > > + if (isect >= be->be_f_offset) { > > /* We have found an extent */ > > dprintk("%s Get %p (%i)\n", __func__, be, > > atomic_read(&be->be_refcnt.refcount)); > > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order 2010-05-12 20:28 ` J. Bruce Fields @ 2010-05-17 13:53 ` J. Bruce Fields 2010-05-17 14:24 ` Boaz Harrosh 0 siblings, 1 reply; 14+ messages in thread From: J. Bruce Fields @ 2010-05-17 13:53 UTC (permalink / raw) To: Benny Halevy; +Cc: Zhang Jingwang, linux-nfs, iisaman On Wed, May 12, 2010 at 04:28:12PM -0400, bfields wrote: > On Wed, May 12, 2010 at 09:46:43AM +0300, Benny Halevy wrote: > > On May. 10, 2010, 6:36 +0300, Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org> wrote: > > > Optimize for sequencial write. Layout infos and tags are organized by > > > file offset. When appending data to a file whole list will be examined, > > > which introduce notable performance decrease. > > > > Looks good to me. > > > > Fred, can you please double check? > > I don't know if Fred's still up for reviewing block stuff? > > I've been trying to keep up with at least some minimal testing, but not > as well as I'd like. > > The one thing I've noticed is that the connectathon general test has > started failing right at the start with an IO error. The last good > version I tested was b5c09c21, which was based on 33-rc6. The earliest > bad version I tested was 419312ada, based on 34-rc2. A quick look at > network traces from the two traces didn't turn up anything obvious. I > haven't had the chance yet to look closer. As of the latest (6666f47d), in my tests the client is falling back on IO to the MDS and doing no block IO at all. b5c09c21 still works, so the problem isn't due to a change in the server I'm testing against. I haven't investigated any more closely. --b. ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order 2010-05-17 13:53 ` J. Bruce Fields @ 2010-05-17 14:24 ` Boaz Harrosh 2010-05-17 14:53 ` J. Bruce Fields 0 siblings, 1 reply; 14+ messages in thread From: Boaz Harrosh @ 2010-05-17 14:24 UTC (permalink / raw) To: J. Bruce Fields; +Cc: Benny Halevy, Zhang Jingwang, linux-nfs, iisaman On 05/17/2010 04:53 PM, J. Bruce Fields wrote: > On Wed, May 12, 2010 at 04:28:12PM -0400, bfields wrote: >> On Wed, May 12, 2010 at 09:46:43AM +0300, Benny Halevy wrote: >>> On May. 10, 2010, 6:36 +0300, Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org> wrote: >>>> Optimize for sequencial write. Layout infos and tags are organized by >>>> file offset. When appending data to a file whole list will be examined, >>>> which introduce notable performance decrease. >>> >>> Looks good to me. >>> >>> Fred, can you please double check? >> >> I don't know if Fred's still up for reviewing block stuff? >> >> I've been trying to keep up with at least some minimal testing, but not >> as well as I'd like. >> >> The one thing I've noticed is that the connectathon general test has >> started failing right at the start with an IO error. The last good >> version I tested was b5c09c21, which was based on 33-rc6. The earliest >> bad version I tested was 419312ada, based on 34-rc2. A quick look at >> network traces from the two traces didn't turn up anything obvious. I >> haven't had the chance yet to look closer. > > As of the latest (6666f47d), in my tests the client is falling back on > IO to the MDS and doing no block IO at all. b5c09c21 still works, so > the problem isn't due to a change in the server I'm testing against. I > haven't investigated any more closely. > You might be hitting the .commit bug, no? Still no fix. I'm using a work around for objects. I'm not sure how it affects blocks. I think you should see that the very first IO goes through layout driver then the IO is redone through MDS, for each node. Even though write/read returned success because commit returns NOT_ATTEMPTED. But I might be totally off. Boaz > --b. > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order 2010-05-17 14:24 ` Boaz Harrosh @ 2010-05-17 14:53 ` J. Bruce Fields 2010-05-17 16:53 ` J. Bruce Fields 0 siblings, 1 reply; 14+ messages in thread From: J. Bruce Fields @ 2010-05-17 14:53 UTC (permalink / raw) To: Boaz Harrosh; +Cc: Benny Halevy, Zhang Jingwang, linux-nfs, iisaman On Mon, May 17, 2010 at 05:24:39PM +0300, Boaz Harrosh wrote: > On 05/17/2010 04:53 PM, J. Bruce Fields wrote: > > On Wed, May 12, 2010 at 04:28:12PM -0400, bfields wrote: > >> On Wed, May 12, 2010 at 09:46:43AM +0300, Benny Halevy wrote: > >>> On May. 10, 2010, 6:36 +0300, Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org> wrote: > >>>> Optimize for sequencial write. Layout infos and tags are organized by > >>>> file offset. When appending data to a file whole list will be examined, > >>>> which introduce notable performance decrease. > >>> > >>> Looks good to me. > >>> > >>> Fred, can you please double check? > >> > >> I don't know if Fred's still up for reviewing block stuff? > >> > >> I've been trying to keep up with at least some minimal testing, but not > >> as well as I'd like. > >> > >> The one thing I've noticed is that the connectathon general test has > >> started failing right at the start with an IO error. The last good > >> version I tested was b5c09c21, which was based on 33-rc6. The earliest > >> bad version I tested was 419312ada, based on 34-rc2. A quick look at > >> network traces from the two traces didn't turn up anything obvious. I > >> haven't had the chance yet to look closer. > > > > As of the latest (6666f47d), in my tests the client is falling back on > > IO to the MDS and doing no block IO at all. b5c09c21 still works, so > > the problem isn't due to a change in the server I'm testing against. I > > haven't investigated any more closely. > > > > You might be hitting the .commit bug, no? Still no fix. I'm using a work > around for objects. I'm not sure how it affects blocks. I think you should > see that the very first IO goes through layout driver then the IO is redone > through MDS, for each node. Even though write/read returned success because > commit returns NOT_ATTEMPTED. But I might be totally off. I don't believe it's even attempting a GETLAYOUT. I'll take a look at the network....--b. ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order 2010-05-17 14:53 ` J. Bruce Fields @ 2010-05-17 16:53 ` J. Bruce Fields 2010-05-17 17:22 ` Zhang Jingwang 0 siblings, 1 reply; 14+ messages in thread From: J. Bruce Fields @ 2010-05-17 16:53 UTC (permalink / raw) To: Boaz Harrosh; +Cc: Benny Halevy, Zhang Jingwang, linux-nfs, iisaman On Mon, May 17, 2010 at 10:53:11AM -0400, J. Bruce Fields wrote: > On Mon, May 17, 2010 at 05:24:39PM +0300, Boaz Harrosh wrote: > > On 05/17/2010 04:53 PM, J. Bruce Fields wrote: > > > On Wed, May 12, 2010 at 04:28:12PM -0400, bfields wrote: > > >> On Wed, May 12, 2010 at 09:46:43AM +0300, Benny Halevy wrote: > > >>> On May. 10, 2010, 6:36 +0300, Zhang Jingwang <zhangjingwang-U4AKAne5IzAR5TUyvShJeg@public.gmane.org> wrote: > > >>>> Optimize for sequencial write. Layout infos and tags are organized by > > >>>> file offset. When appending data to a file whole list will be examined, > > >>>> which introduce notable performance decrease. > > >>> > > >>> Looks good to me. > > >>> > > >>> Fred, can you please double check? > > >> > > >> I don't know if Fred's still up for reviewing block stuff? > > >> > > >> I've been trying to keep up with at least some minimal testing, but not > > >> as well as I'd like. > > >> > > >> The one thing I've noticed is that the connectathon general test has > > >> started failing right at the start with an IO error. The last good > > >> version I tested was b5c09c21, which was based on 33-rc6. The earliest > > >> bad version I tested was 419312ada, based on 34-rc2. A quick look at > > >> network traces from the two traces didn't turn up anything obvious. I > > >> haven't had the chance yet to look closer. > > > > > > As of the latest (6666f47d), in my tests the client is falling back on > > > IO to the MDS and doing no block IO at all. b5c09c21 still works, so > > > the problem isn't due to a change in the server I'm testing against. I > > > haven't investigated any more closely. > > > > > > > You might be hitting the .commit bug, no? Still no fix. I'm using a work > > around for objects. I'm not sure how it affects blocks. I think you should > > see that the very first IO goes through layout driver then the IO is redone > > through MDS, for each node. Even though write/read returned success because > > commit returns NOT_ATTEMPTED. But I might be totally off. > > I don't believe it's even attempting a GETLAYOUT. > > I'll take a look at the network....--b. Everything on the network looks fine, the server's doing the right stuff, the client just never asks for a layout. In fact, blk_initialize_mountpont is failing on the very first check: if (server->pnfs_blksize == 0) { dprintk("%s Server did not return blksize\n", __func__); ... After rearranging the caller: @@ -880,9 +880,9 @@ static void nfs4_init_pnfs(struct nfs_server *server, struct nfs_fh *mntfh, stru if (nfs4_has_session(clp) && (clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS)) { - set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype); pnfs_set_ds_iosize(server); server->pnfs_blksize = fsinfo->blksize; + set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype); } #endif /* CONFIG_NFS_V4_1 */ } it just fails a little later (see below). I haven't tried to go any farther yet. (But: why are the layout drivers using this odd pnfs_client_operations indirection to call back to the common pnfs code? As far as I can tell there's only one definition of the pnfs_client_operations, so we should just remove that structure and call pnfs_getdevicelist, etc., by name.) --b. May 17 16:36:14 pearlet4 kernel: BUG: unable to handle kernel NULL pointer dereference at (null) May 17 16:36:14 pearlet4 kernel: IP: [<ffffffff8122bc36>] _nfs4_pnfs_getdevicelist+0x26/0x110 May 17 16:36:14 pearlet4 kernel: PGD 6e11067 PUD 6e12067 PMD 0 May 17 16:36:14 pearlet4 kernel: Oops: 0000 [#1] PREEMPT May 17 16:36:14 pearlet4 kernel: last sysfs file: /sys/kernel/uevent_seqnum May 17 16:36:14 pearlet4 kernel: CPU 0 May 17 16:36:14 pearlet4 kernel: Modules linked in: iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi May 17 16:36:14 pearlet4 kernel: May 17 16:36:14 pearlet4 kernel: Pid: 2794, comm: mount.nfs4 Not tainted 2.6.34-rc6-pnfs-00314-ga35e9c3 #136 / May 17 16:36:14 pearlet4 kernel: RIP: 0010:[<ffffffff8122bc36>] [<ffffffff8122bc36>] _nfs4_pnfs_getdevicelist+0x26/0x110 May 17 16:36:14 pearlet4 kernel: RSP: 0018:ffff880004e99538 EFLAGS: 00010246 May 17 16:36:14 pearlet4 kernel: RAX: 0000000000000000 RBX: ffff880005fff378 RCX: ffff880004e99548 May 17 16:36:14 pearlet4 kernel: RDX: ffff880004ca24c8 RSI: ffff880004e99a28 RDI: ffff880005fff378 May 17 16:36:14 pearlet4 kernel: RBP: ffff880004e995c8 R08: 0000000000000000 R09: ffff880004ca24c8 May 17 16:36:14 pearlet4 kernel: R10: 0000000000000000 R11: 0000000000000001 R12: ffff880004ca24c8 May 17 16:36:14 pearlet4 kernel: R13: ffff880004ca24c8 R14: ffff880004e995d8 R15: ffff880004e99a28 May 17 16:36:14 pearlet4 kernel: FS: 00007fed29c476f0(0000) GS:ffffffff81e1c000(0000) knlGS:0000000000000000 May 17 16:36:14 pearlet4 kernel: CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b May 17 16:36:14 pearlet4 kernel: CR2: 0000000000000000 CR3: 0000000004e77000 CR4: 00000000000006f0 May 17 16:36:14 pearlet4 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 May 17 16:36:14 pearlet4 kernel: DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 May 17 16:36:14 pearlet4 kernel: Process mount.nfs4 (pid: 2794, threadinfo ffff880004e98000, task ffff880004e78040) May 17 16:36:14 pearlet4 kernel: Stack: May 17 16:36:14 pearlet4 kernel: ffff880004e995c8 ffff880004e995c8 ffff880004e99588 ffffffff8190e5dc May 17 16:36:14 pearlet4 kernel: <0> ffff880004e98000 ffff880004e995c8 ffff880004ca24c0 ffff880007800a80 May 17 16:36:14 pearlet4 kernel: <0> 0000000000000000 ffff880007800a80 ffff880004ca24c0 ffffffff810d46c6 May 17 16:36:14 pearlet4 kernel: Call Trace: May 17 16:36:14 pearlet4 kernel: [<ffffffff8190e5dc>] ? klist_next+0x8c/0xf0 May 17 16:36:14 pearlet4 kernel: [<ffffffff810d46c6>] ? poison_obj+0x36/0x50 May 17 16:36:14 pearlet4 kernel: [<ffffffff810d4a18>] ? cache_alloc_debugcheck_after+0xe8/0x1f0 May 17 16:36:14 pearlet4 kernel: [<ffffffff8122c21e>] nfs4_pnfs_getdevicelist+0x4e/0xa0 May 17 16:36:14 pearlet4 kernel: [<ffffffff810d677d>] ? kmem_cache_alloc_notrace+0xfd/0x1a0 May 17 16:36:14 pearlet4 kernel: [<ffffffff81250e81>] bl_initialize_mountpoint+0x161/0x6a0 May 17 16:36:14 pearlet4 kernel: [<ffffffff812497c9>] set_pnfs_layoutdriver+0x89/0x120 May 17 16:36:14 pearlet4 kernel: [<ffffffff8120c71f>] nfs_probe_fsinfo+0x54f/0x5f0 May 17 16:36:14 pearlet4 kernel: [<ffffffff8120d789>] nfs_clone_server+0x129/0x270 May 17 16:36:14 pearlet4 kernel: [<ffffffff810d46c6>] ? poison_obj+0x36/0x50 May 17 16:36:14 pearlet4 kernel: [<ffffffff810d4a18>] ? cache_alloc_debugcheck_after+0xe8/0x1f0 May 17 16:36:14 pearlet4 kernel: [<ffffffff810f6db1>] ? alloc_vfsmnt+0xa1/0x180 May 17 16:36:14 pearlet4 kernel: [<ffffffff810d627d>] ? __kmalloc_track_caller+0x16d/0x2b0 May 17 16:36:14 pearlet4 kernel: [<ffffffff810f6db1>] ? alloc_vfsmnt+0xa1/0x180 May 17 16:36:14 pearlet4 kernel: [<ffffffff81219fa1>] nfs4_xdev_get_sb+0x61/0x340 May 17 16:36:14 pearlet4 kernel: [<ffffffff810dd15a>] vfs_kern_mount+0x8a/0x1e0 May 17 16:36:14 pearlet4 kernel: [<ffffffff81224f23>] nfs_follow_mountpoint+0x3b3/0x4b0 May 17 16:36:14 pearlet4 kernel: [<ffffffff810e73b7>] link_path_walk+0xb67/0xd20 May 17 16:36:14 pearlet4 kernel: [<ffffffff810e76b0>] path_walk+0x60/0xd0 May 17 16:36:14 pearlet4 kernel: [<ffffffff810e778d>] vfs_path_lookup+0x6d/0x90 May 17 16:36:14 pearlet4 kernel: [<ffffffff8121988d>] nfs_follow_remote_path+0x6d/0x170 May 17 16:36:14 pearlet4 kernel: [<ffffffff810637fd>] ? trace_hardirqs_on_caller+0x14d/0x190 May 17 16:36:14 pearlet4 kernel: [<ffffffff812197fb>] ? nfs_do_root_mount+0x8b/0xb0 May 17 16:36:14 pearlet4 kernel: [<ffffffff81219abf>] nfs4_try_mount+0x6f/0xd0 May 17 16:36:14 pearlet4 kernel: [<ffffffff81219bc2>] nfs4_get_sb+0xa2/0x360 May 17 16:36:14 pearlet4 kernel: [<ffffffff810dd15a>] vfs_kern_mount+0x8a/0x1e0 May 17 16:36:14 pearlet4 kernel: [<ffffffff810dd322>] do_kern_mount+0x52/0x130 May 17 16:36:14 pearlet4 kernel: [<ffffffff81926cda>] ? _lock_kernel+0x6a/0x16a May 17 16:36:14 pearlet4 kernel: [<ffffffff810f788e>] do_mount+0x2de/0x850 May 17 16:36:14 pearlet4 kernel: [<ffffffff810f585a>] ? copy_mount_options+0xea/0x190 May 17 16:36:14 pearlet4 kernel: [<ffffffff810f7e98>] sys_mount+0x98/0xf0 May 17 16:36:14 pearlet4 kernel: [<ffffffff81002518>] system_call_fastpath+0x16/0x1b May 17 16:36:14 pearlet4 kernel: Code: 00 00 00 00 00 55 48 89 e5 53 48 81 ec 88 00 00 00 0f 1f 44 00 00 48 8b 87 70 02 00 00 f6 05 75 38 7e 01 10 48 8d 4d 80 48 89 fb <8b> 00 48 89 55 80 48 8d 55 d0 48 c7 45 d8 00 00 00 00 48 c7 45 May 17 16:36:14 pearlet4 kernel: RIP [<ffffffff8122bc36>] _nfs4_pnfs_getdevicelist+0x26/0x110 May 17 16:36:14 pearlet4 kernel: RSP <ffff880004e99538> May 17 16:36:14 pearlet4 kernel: CR2: 0000000000000000 May 17 16:36:14 pearlet4 kernel: ---[ end trace 3956532521eb7ba1 ]--- May 17 16:36:14 pearlet4 kernel: mount.nfs4 used greatest stack depth: 2104 bytes left May 17 16:36:21 pearlet4 kernel: eth0: no IPv6 routers present May 17 16:40:32 pearlet4 ntpd[2255]: synchronized to 91.189.94.4, stratum 2 May 17 16:40:32 pearlet4 ntpd[2255]: kernel time sync status change 2001 ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order 2010-05-17 16:53 ` J. Bruce Fields @ 2010-05-17 17:22 ` Zhang Jingwang [not found] ` <AANLkTilUpAHrtHH8pauvYrAuD3rWgj7aDmrTOzrmU-h5-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 14+ messages in thread From: Zhang Jingwang @ 2010-05-17 17:22 UTC (permalink / raw) To: J. Bruce Fields Cc: Boaz Harrosh, Benny Halevy, Zhang Jingwang, linux-nfs, iisaman I've sent two patches to solve this problem, you can try them. [PATCH] pnfs: set pnfs_curr_ld before calling initialize_mountpoint [PATCH] pnfs: set pnfs_blksize before calling set_pnfs_layoutdriver 2010/5/18 J. Bruce Fields <bfields@fieldses.org>: > On Mon, May 17, 2010 at 10:53:11AM -0400, J. Bruce Fields wrote: >> On Mon, May 17, 2010 at 05:24:39PM +0300, Boaz Harrosh wrote: >> > On 05/17/2010 04:53 PM, J. Bruce Fields wrote: >> > > On Wed, May 12, 2010 at 04:28:12PM -0400, bfields wrote: >> > >> On Wed, May 12, 2010 at 09:46:43AM +0300, Benny Halevy wrote: >> > >>> On May. 10, 2010, 6:36 +0300, Zhang Jingwang <zhangjingwang@nr= chpc.ac.cn> wrote: >> > >>>> Optimize for sequencial write. Layout infos and tags are orga= nized by >> > >>>> file offset. When appending data to a file whole list will be= examined, >> > >>>> which introduce notable performance decrease. >> > >>> >> > >>> Looks good to me. >> > >>> >> > >>> Fred, can you please double check? >> > >> >> > >> I don't know if Fred's still up for reviewing block stuff? >> > >> >> > >> I've been trying to keep up with at least some minimal testing,= but not >> > >> as well as I'd like. >> > >> >> > >> The one thing I've noticed is that the connectathon general tes= t has >> > >> started failing right at the start with an IO error. =A0The las= t good >> > >> version I tested was b5c09c21, which was based on 33-rc6. =A0Th= e earliest >> > >> bad version I tested was 419312ada, based on 34-rc2. =A0A quick= look at >> > >> network traces from the two traces didn't turn up anything obvi= ous. =A0I >> > >> haven't had the chance yet to look closer. >> > > >> > > As of the latest (6666f47d), in my tests the client is falling b= ack on >> > > IO to the MDS and doing no block IO at all. =A0b5c09c21 still wo= rks, so >> > > the problem isn't due to a change in the server I'm testing agai= nst. =A0I >> > > haven't investigated any more closely. >> > > >> > >> > You might be hitting the .commit bug, no? Still no fix. I'm using = a work >> > around for objects. I'm not sure how it affects blocks. I think yo= u should >> > see that the very first IO goes through layout driver then the IO = is redone >> > through MDS, for each node. Even though write/read returned succes= s because >> > commit returns NOT_ATTEMPTED. But I might be totally off. >> >> I don't believe it's even attempting a GETLAYOUT. >> >> I'll take a look at the network....--b. > > Everything on the network looks fine, the server's doing the right > stuff, the client just never asks for a layout. > > In fact, blk_initialize_mountpont is failing on the very first check: > > =A0 =A0 =A0 =A0if (server->pnfs_blksize =3D=3D 0) { > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0dprintk("%s Server did not return blks= ize\n", __func__); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0... > > After rearranging the caller: > > @@ -880,9 +880,9 @@ static void nfs4_init_pnfs(struct nfs_server *ser= ver, struct nfs_fh *mntfh, stru > > =A0 =A0 =A0 =A0if (nfs4_has_session(clp) && > =A0 =A0 =A0 =A0 =A0 =A0(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PN= =46S_MDS)) { > - =A0 =A0 =A0 =A0 =A0 =A0 =A0 set_pnfs_layoutdriver(server, mntfh, fs= info->layouttype); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0pnfs_set_ds_iosize(server); > =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0server->pnfs_blksize =3D fsinfo->blksi= ze; > + =A0 =A0 =A0 =A0 =A0 =A0 =A0 set_pnfs_layoutdriver(server, mntfh, fs= info->layouttype); > =A0 =A0 =A0 =A0} > =A0#endif /* CONFIG_NFS_V4_1 */ > =A0} > > it just fails a little later (see below). =A0I haven't tried to go an= y > farther yet. > > (But: why are the layout drivers using this odd pnfs_client_operation= s > indirection to call back to the common pnfs code? =A0As far as I can = tell > there's only one definition of the pnfs_client_operations, so we shou= ld > just remove that structure and call pnfs_getdevicelist, etc., by name= =2E) > > --b. > > May 17 16:36:14 pearlet4 kernel: BUG: unable to handle kernel NULL po= inter dereference at (null) > May 17 16:36:14 pearlet4 kernel: IP: [<ffffffff8122bc36>] _nfs4_pnfs_= getdevicelist+0x26/0x110 > May 17 16:36:14 pearlet4 kernel: PGD 6e11067 PUD 6e12067 PMD 0 > May 17 16:36:14 pearlet4 kernel: Oops: 0000 [#1] PREEMPT > May 17 16:36:14 pearlet4 kernel: last sysfs file: /sys/kernel/uevent_= seqnum > May 17 16:36:14 pearlet4 kernel: CPU 0 > May 17 16:36:14 pearlet4 kernel: Modules linked in: iscsi_tcp libiscs= i_tcp libiscsi scsi_transport_iscsi > May 17 16:36:14 pearlet4 kernel: > May 17 16:36:14 pearlet4 kernel: Pid: 2794, comm: mount.nfs4 Not tain= ted 2.6.34-rc6-pnfs-00314-ga35e9c3 #136 / > May 17 16:36:14 pearlet4 kernel: RIP: 0010:[<ffffffff8122bc36>] =A0[<= ffffffff8122bc36>] _nfs4_pnfs_getdevicelist+0x26/0x110 > May 17 16:36:14 pearlet4 kernel: RSP: 0018:ffff880004e99538 =A0EFLAGS= : 00010246 > May 17 16:36:14 pearlet4 kernel: RAX: 0000000000000000 RBX: ffff88000= 5fff378 RCX: ffff880004e99548 > May 17 16:36:14 pearlet4 kernel: RDX: ffff880004ca24c8 RSI: ffff88000= 4e99a28 RDI: ffff880005fff378 > May 17 16:36:14 pearlet4 kernel: RBP: ffff880004e995c8 R08: 000000000= 0000000 R09: ffff880004ca24c8 > May 17 16:36:14 pearlet4 kernel: R10: 0000000000000000 R11: 000000000= 0000001 R12: ffff880004ca24c8 > May 17 16:36:14 pearlet4 kernel: R13: ffff880004ca24c8 R14: ffff88000= 4e995d8 R15: ffff880004e99a28 > May 17 16:36:14 pearlet4 kernel: FS: =A000007fed29c476f0(0000) GS:fff= fffff81e1c000(0000) knlGS:0000000000000000 > May 17 16:36:14 pearlet4 kernel: CS: =A00010 DS: 0000 ES: 0000 CR0: 0= 00000008005003b > May 17 16:36:14 pearlet4 kernel: CR2: 0000000000000000 CR3: 000000000= 4e77000 CR4: 00000000000006f0 > May 17 16:36:14 pearlet4 kernel: DR0: 0000000000000000 DR1: 000000000= 0000000 DR2: 0000000000000000 > May 17 16:36:14 pearlet4 kernel: DR3: 0000000000000000 DR6: 00000000f= fff0ff0 DR7: 0000000000000400 > May 17 16:36:14 pearlet4 kernel: Process mount.nfs4 (pid: 2794, threa= dinfo ffff880004e98000, task ffff880004e78040) > May 17 16:36:14 pearlet4 kernel: Stack: > May 17 16:36:14 pearlet4 kernel: ffff880004e995c8 ffff880004e995c8 ff= ff880004e99588 ffffffff8190e5dc > May 17 16:36:14 pearlet4 kernel: <0> ffff880004e98000 ffff880004e995c= 8 ffff880004ca24c0 ffff880007800a80 > May 17 16:36:14 pearlet4 kernel: <0> 0000000000000000 ffff880007800a8= 0 ffff880004ca24c0 ffffffff810d46c6 > May 17 16:36:14 pearlet4 kernel: Call Trace: > May 17 16:36:14 pearlet4 kernel: [<ffffffff8190e5dc>] ? klist_next+0x= 8c/0xf0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810d46c6>] ? poison_obj+0x= 36/0x50 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810d4a18>] ? cache_alloc_d= ebugcheck_after+0xe8/0x1f0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff8122c21e>] nfs4_pnfs_getde= vicelist+0x4e/0xa0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810d677d>] ? kmem_cache_al= loc_notrace+0xfd/0x1a0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff81250e81>] bl_initialize_m= ountpoint+0x161/0x6a0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff812497c9>] set_pnfs_layout= driver+0x89/0x120 > May 17 16:36:14 pearlet4 kernel: [<ffffffff8120c71f>] nfs_probe_fsinf= o+0x54f/0x5f0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff8120d789>] nfs_clone_serve= r+0x129/0x270 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810d46c6>] ? poison_obj+0x= 36/0x50 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810d4a18>] ? cache_alloc_d= ebugcheck_after+0xe8/0x1f0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810f6db1>] ? alloc_vfsmnt+= 0xa1/0x180 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810d627d>] ? __kmalloc_tra= ck_caller+0x16d/0x2b0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810f6db1>] ? alloc_vfsmnt+= 0xa1/0x180 > May 17 16:36:14 pearlet4 kernel: [<ffffffff81219fa1>] nfs4_xdev_get_s= b+0x61/0x340 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810dd15a>] vfs_kern_mount+= 0x8a/0x1e0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff81224f23>] nfs_follow_moun= tpoint+0x3b3/0x4b0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810e73b7>] link_path_walk+= 0xb67/0xd20 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810e76b0>] path_walk+0x60/= 0xd0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810e778d>] vfs_path_lookup= +0x6d/0x90 > May 17 16:36:14 pearlet4 kernel: [<ffffffff8121988d>] nfs_follow_remo= te_path+0x6d/0x170 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810637fd>] ? trace_hardirq= s_on_caller+0x14d/0x190 > May 17 16:36:14 pearlet4 kernel: [<ffffffff812197fb>] ? nfs_do_root_m= ount+0x8b/0xb0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff81219abf>] nfs4_try_mount+= 0x6f/0xd0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff81219bc2>] nfs4_get_sb+0xa= 2/0x360 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810dd15a>] vfs_kern_mount+= 0x8a/0x1e0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810dd322>] do_kern_mount+0= x52/0x130 > May 17 16:36:14 pearlet4 kernel: [<ffffffff81926cda>] ? _lock_kernel+= 0x6a/0x16a > May 17 16:36:14 pearlet4 kernel: [<ffffffff810f788e>] do_mount+0x2de/= 0x850 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810f585a>] ? copy_mount_op= tions+0xea/0x190 > May 17 16:36:14 pearlet4 kernel: [<ffffffff810f7e98>] sys_mount+0x98/= 0xf0 > May 17 16:36:14 pearlet4 kernel: [<ffffffff81002518>] system_call_fas= tpath+0x16/0x1b > May 17 16:36:14 pearlet4 kernel: Code: 00 00 00 00 00 55 48 89 e5 53 = 48 81 ec 88 00 00 00 0f 1f 44 00 00 48 8b 87 70 02 00 00 f6 05 75 38 7e= 01 10 48 8d 4d 80 48 89 fb <8b> 00 48 89 55 80 48 8d 55 d0 48 c7 45 d8= 00 00 00 00 48 c7 45 > May 17 16:36:14 pearlet4 kernel: RIP =A0[<ffffffff8122bc36>] _nfs4_pn= fs_getdevicelist+0x26/0x110 > May 17 16:36:14 pearlet4 kernel: RSP <ffff880004e99538> > May 17 16:36:14 pearlet4 kernel: CR2: 0000000000000000 > May 17 16:36:14 pearlet4 kernel: ---[ end trace 3956532521eb7ba1 ]--- > May 17 16:36:14 pearlet4 kernel: mount.nfs4 used greatest stack depth= : 2104 bytes left > May 17 16:36:21 pearlet4 kernel: eth0: no IPv6 routers present > May 17 16:40:32 pearlet4 ntpd[2255]: synchronized to 91.189.94.4, str= atum 2 > May 17 16:40:32 pearlet4 ntpd[2255]: kernel time sync status change 2= 001 > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" = in > the body of a message to majordomo@vger.kernel.org > More majordomo info at =A0http://vger.kernel.org/majordomo-info.html > --=20 Zhang Jingwang National Research Centre for High Performance Computers Institute of Computing Technology, Chinese Academy of Sciences No. 6, South Kexueyuan Road, Haidian District Beijing, China ^ permalink raw reply [flat|nested] 14+ messages in thread
[parent not found: <AANLkTilUpAHrtHH8pauvYrAuD3rWgj7aDmrTOzrmU-h5-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order [not found] ` <AANLkTilUpAHrtHH8pauvYrAuD3rWgj7aDmrTOzrmU-h5-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2010-05-18 16:20 ` J. Bruce Fields 2010-05-19 4:56 ` Tao Guo 0 siblings, 1 reply; 14+ messages in thread From: J. Bruce Fields @ 2010-05-18 16:20 UTC (permalink / raw) To: Zhang Jingwang Cc: Boaz Harrosh, Benny Halevy, Zhang Jingwang, linux-nfs, iisaman On Tue, May 18, 2010 at 01:22:52AM +0800, Zhang Jingwang wrote: > I've sent two patches to solve this problem, you can try them. >=20 > [PATCH] pnfs: set pnfs_curr_ld before calling initialize_mountpoint > [PATCH] pnfs: set pnfs_blksize before calling set_pnfs_layoutdriver Thanks. With Benny's latest block all (97602fc6, which includes the tw= o patches above), I'm back to the previous behavior: >=20 > 2010/5/18 J. Bruce Fields <bfields@fieldses.org>: > > On Mon, May 17, 2010 at 10:53:11AM -0400, J. Bruce Fields wrote: > >> On Mon, May 17, 2010 at 05:24:39PM +0300, Boaz Harrosh wrote: > >> > On 05/17/2010 04:53 PM, J. Bruce Fields wrote: > >> > > On Wed, May 12, 2010 at 04:28:12PM -0400, bfields wrote: > >> > >> The one thing I've noticed is that the connectathon general t= est has > >> > >> started failing right at the start with an IO error. =C2=A0Th= e last good > >> > >> version I tested was b5c09c21, which was based on 33-rc6. =C2= =A0The earliest > >> > >> bad version I tested was 419312ada, based on 34-rc2. =C2=A0A = quick look at > >> > >> network traces from the two traces didn't turn up anything ob= vious. =C2=A0I > >> > >> haven't had the chance yet to look closer. So I still see the IO error at the start of the connectathon general tests. Also, I get the following warning--I don't know if it's new or not. --b. =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D [ INFO: possible circular locking dependency detected ] 2.6.34-pnfs-00322-g97602fc #141 ------------------------------------------------------- cp/2789 is trying to acquire lock: (&(&nfsi->lo_lock)->rlock){+.+...}, at: [<ffffffff8124dbee>] T.947+0x4= e/0x210 but task is already holding lock: (&sb->s_type->i_lock_key#11){+.+...}, at: [<ffffffff81223689>] nfs_upd= atepage+0x139/0x5a0 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (&sb->s_type->i_lock_key#11){+.+...}: [<ffffffff81065913>] __lock_acquire+0x1293/0x1d30 [<ffffffff81066442>] lock_acquire+0x92/0x170 [<ffffffff81925d5b>] _raw_spin_lock+0x3b/0x50 [<ffffffff81244173>] nfs_inode_set_delegation+0x203/0x2c0 [<ffffffff81231b7a>] nfs4_opendata_to_nfs4_state+0x31a/0x3d0 [<ffffffff81231fb2>] nfs4_do_open+0x242/0x460 [<ffffffff81232a05>] nfs4_proc_create+0x85/0x220 [<ffffffff8120ec64>] nfs_create+0x74/0x120 [<ffffffff810e5d63>] vfs_create+0xb3/0x100 [<ffffffff810e656b>] do_last+0x59b/0x6c0 [<ffffffff810e88e2>] do_filp_open+0x212/0x690 [<ffffffff810d8059>] do_sys_open+0x69/0x140 [<ffffffff810d8170>] sys_open+0x20/0x30 [<ffffffff81002518>] system_call_fastpath+0x16/0x1b -> #1 (&(&clp->cl_lock)->rlock){+.+...}: [<ffffffff81065913>] __lock_acquire+0x1293/0x1d30 [<ffffffff81066442>] lock_acquire+0x92/0x170 [<ffffffff81925d5b>] _raw_spin_lock+0x3b/0x50 [<ffffffff8124b378>] pnfs_update_layout+0x2f8/0xaf0 [<ffffffff8124c7e4>] pnfs_file_write+0x64/0xc0 [<ffffffff810daab7>] vfs_write+0xb7/0x180 [<ffffffff810dac71>] sys_write+0x51/0x90 [<ffffffff81002518>] system_call_fastpath+0x16/0x1b -> #0 (&(&nfsi->lo_lock)->rlock){+.+...}: [<ffffffff81065dd2>] __lock_acquire+0x1752/0x1d30 [<ffffffff81066442>] lock_acquire+0x92/0x170 [<ffffffff81925d5b>] _raw_spin_lock+0x3b/0x50 [<ffffffff8124dbee>] T.947+0x4e/0x210 [<ffffffff8124ddfb>] _pnfs_do_flush+0x4b/0xf0 [<ffffffff8122364d>] nfs_updatepage+0xfd/0x5a0 [<ffffffff812126b5>] nfs_write_end+0x265/0x3e0 [<ffffffff810a3397>] generic_file_buffered_write+0x187/0x2a0 [<ffffffff810a5890>] __generic_file_aio_write+0x240/0x460 [<ffffffff810a5b17>] generic_file_aio_write+0x67/0xd0 [<ffffffff81213661>] nfs_file_write+0xb1/0x1f0 [<ffffffff810d9fca>] do_sync_write+0xda/0x120 [<ffffffff8124c802>] pnfs_file_write+0x82/0xc0 [<ffffffff810daab7>] vfs_write+0xb7/0x180 [<ffffffff810dac71>] sys_write+0x51/0x90 [<ffffffff81002518>] system_call_fastpath+0x16/0x1b other info that might help us debug this: 2 locks held by cp/2789: #0: (&sb->s_type->i_mutex_key#13){+.+.+.}, at: [<ffffffff810a5b04>] g= eneric_file_aio_write+0x54/0xd0 #1: (&sb->s_type->i_lock_key#11){+.+...}, at: [<ffffffff81223689>] nf= s_updatepage+0x139/0x5a0 stack backtrace: Pid: 2789, comm: cp Not tainted 2.6.34-pnfs-00322-g97602fc #141 Call Trace: [<ffffffff81064033>] print_circular_bug+0xf3/0x100 [<ffffffff81065dd2>] __lock_acquire+0x1752/0x1d30 [<ffffffff81066442>] lock_acquire+0x92/0x170 [<ffffffff8124dbee>] ? T.947+0x4e/0x210 [<ffffffff81929d59>] ? sub_preempt_count+0x9/0xa0 [<ffffffff81925d5b>] _raw_spin_lock+0x3b/0x50 [<ffffffff8124dbee>] ? T.947+0x4e/0x210 [<ffffffff8124dbee>] T.947+0x4e/0x210 [<ffffffff8124ddfb>] _pnfs_do_flush+0x4b/0xf0 [<ffffffff8122364d>] nfs_updatepage+0xfd/0x5a0 [<ffffffff812126b5>] nfs_write_end+0x265/0x3e0 [<ffffffff810a3397>] generic_file_buffered_write+0x187/0x2a0 [<ffffffff810a5890>] __generic_file_aio_write+0x240/0x460 [<ffffffff81929d59>] ? sub_preempt_count+0x9/0xa0 [<ffffffff810a5b17>] generic_file_aio_write+0x67/0xd0 [<ffffffff81213661>] nfs_file_write+0xb1/0x1f0 [<ffffffff810d9fca>] do_sync_write+0xda/0x120 [<ffffffff810528a0>] ? autoremove_wake_function+0x0/0x40 [<ffffffff8124c802>] pnfs_file_write+0x82/0xc0 [<ffffffff810daab7>] vfs_write+0xb7/0x180 [<ffffffff810dac71>] sys_write+0x51/0x90 [<ffffffff81002518>] system_call_fastpath+0x16/0x1b eth0: no IPv6 routers present ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order 2010-05-18 16:20 ` J. Bruce Fields @ 2010-05-19 4:56 ` Tao Guo [not found] ` <AANLkTik9L15tqpSboBpb9cSTy3hVPLEK487w94pEbLrS-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 14+ messages in thread From: Tao Guo @ 2010-05-19 4:56 UTC (permalink / raw) To: J. Bruce Fields Cc: Zhang Jingwang, Boaz Harrosh, Benny Halevy, Zhang Jingwang, linux-nfs, iisaman I think the warning just indicate a possible bug: nfs_inode_set_delegation(): clp->cl_lock --> inode->i_lock get_lock_alloc_layout(): nfsi->lo_lock --> clp->cl_lock nfs_try_to_update_request()->pnfs_do_flush()->_pnfs_do_flush()-> pnfs_find_get_lseg()->get_lock_current_layout(): inode->i_lock -->nfsi->lo_lock In nfs_inode_set_delegation(), maybe we should unlock clp->cl_lock befo= re taking inode->i_lock spinlock? PS: I just use the latest pnfsblock code(pnfs-all-2.6.34-2010-05-17) do= ing some basic r/w tests and it works fine. Can you find out which code path lead to IO errror? On Wed, May 19, 2010 at 12:20 AM, J. Bruce Fields <bfields@fieldses.org= > wrote: > On Tue, May 18, 2010 at 01:22:52AM +0800, Zhang Jingwang wrote: >> I've sent two patches to solve this problem, you can try them. >> >> [PATCH] pnfs: set pnfs_curr_ld before calling initialize_mountpoint >> [PATCH] pnfs: set pnfs_blksize before calling set_pnfs_layoutdriver > > Thanks. =C2=A0With Benny's latest block all (97602fc6, which includes= the two > patches above), I'm back to the previous behavior: > >> >> 2010/5/18 J. Bruce Fields <bfields@fieldses.org>: >> > On Mon, May 17, 2010 at 10:53:11AM -0400, J. Bruce Fields wrote: >> >> On Mon, May 17, 2010 at 05:24:39PM +0300, Boaz Harrosh wrote: >> >> > On 05/17/2010 04:53 PM, J. Bruce Fields wrote: >> >> > > On Wed, May 12, 2010 at 04:28:12PM -0400, bfields wrote: >> >> > >> The one thing I've noticed is that the connectathon general = test has >> >> > >> started failing right at the start with an IO error. =C2=A0T= he last good >> >> > >> version I tested was b5c09c21, which was based on 33-rc6. =C2= =A0The earliest >> >> > >> bad version I tested was 419312ada, based on 34-rc2. =C2=A0A= quick look at >> >> > >> network traces from the two traces didn't turn up anything o= bvious. =C2=A0I >> >> > >> haven't had the chance yet to look closer. > > So I still see the IO error at the start of the connectathon general > tests. > > Also, I get the following warning--I don't know if it's new or not. > > --b. > > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D > [ INFO: possible circular locking dependency detected ] > 2.6.34-pnfs-00322-g97602fc #141 > ------------------------------------------------------- > cp/2789 is trying to acquire lock: > =C2=A0(&(&nfsi->lo_lock)->rlock){+.+...}, at: [<ffffffff8124dbee>] T.= 947+0x4e/0x210 > > but task is already holding lock: > =C2=A0(&sb->s_type->i_lock_key#11){+.+...}, at: [<ffffffff81223689>] = nfs_updatepage+0x139/0x5a0 > > which lock already depends on the new lock. > > > the existing dependency chain (in reverse order) is: > > -> #2 (&sb->s_type->i_lock_key#11){+.+...}: > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81065913>] __lock_acquire+0x1293/0x1d3= 0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81066442>] lock_acquire+0x92/0x170 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81925d5b>] _raw_spin_lock+0x3b/0x50 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81244173>] nfs_inode_set_delegation+0x= 203/0x2c0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81231b7a>] nfs4_opendata_to_nfs4_state= +0x31a/0x3d0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81231fb2>] nfs4_do_open+0x242/0x460 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81232a05>] nfs4_proc_create+0x85/0x220 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8120ec64>] nfs_create+0x74/0x120 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810e5d63>] vfs_create+0xb3/0x100 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810e656b>] do_last+0x59b/0x6c0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810e88e2>] do_filp_open+0x212/0x690 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810d8059>] do_sys_open+0x69/0x140 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810d8170>] sys_open+0x20/0x30 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81002518>] system_call_fastpath+0x16/0= x1b > > -> #1 (&(&clp->cl_lock)->rlock){+.+...}: > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81065913>] __lock_acquire+0x1293/0x1d3= 0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81066442>] lock_acquire+0x92/0x170 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81925d5b>] _raw_spin_lock+0x3b/0x50 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8124b378>] pnfs_update_layout+0x2f8/0x= af0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8124c7e4>] pnfs_file_write+0x64/0xc0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810daab7>] vfs_write+0xb7/0x180 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810dac71>] sys_write+0x51/0x90 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81002518>] system_call_fastpath+0x16/0= x1b > > -> #0 (&(&nfsi->lo_lock)->rlock){+.+...}: > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81065dd2>] __lock_acquire+0x1752/0x1d3= 0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81066442>] lock_acquire+0x92/0x170 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81925d5b>] _raw_spin_lock+0x3b/0x50 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8124dbee>] T.947+0x4e/0x210 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8124ddfb>] _pnfs_do_flush+0x4b/0xf0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8122364d>] nfs_updatepage+0xfd/0x5a0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff812126b5>] nfs_write_end+0x265/0x3e0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810a3397>] generic_file_buffered_write= +0x187/0x2a0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810a5890>] __generic_file_aio_write+0x= 240/0x460 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810a5b17>] generic_file_aio_write+0x67= /0xd0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81213661>] nfs_file_write+0xb1/0x1f0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810d9fca>] do_sync_write+0xda/0x120 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8124c802>] pnfs_file_write+0x82/0xc0 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810daab7>] vfs_write+0xb7/0x180 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810dac71>] sys_write+0x51/0x90 > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81002518>] system_call_fastpath+0x16/0= x1b > > other info that might help us debug this: > > 2 locks held by cp/2789: > =C2=A0#0: =C2=A0(&sb->s_type->i_mutex_key#13){+.+.+.}, at: [<ffffffff= 810a5b04>] generic_file_aio_write+0x54/0xd0 > =C2=A0#1: =C2=A0(&sb->s_type->i_lock_key#11){+.+...}, at: [<ffffffff8= 1223689>] nfs_updatepage+0x139/0x5a0 > > stack backtrace: > Pid: 2789, comm: cp Not tainted 2.6.34-pnfs-00322-g97602fc #141 > Call Trace: > =C2=A0[<ffffffff81064033>] print_circular_bug+0xf3/0x100 > =C2=A0[<ffffffff81065dd2>] __lock_acquire+0x1752/0x1d30 > =C2=A0[<ffffffff81066442>] lock_acquire+0x92/0x170 > =C2=A0[<ffffffff8124dbee>] ? T.947+0x4e/0x210 > =C2=A0[<ffffffff81929d59>] ? sub_preempt_count+0x9/0xa0 > =C2=A0[<ffffffff81925d5b>] _raw_spin_lock+0x3b/0x50 > =C2=A0[<ffffffff8124dbee>] ? T.947+0x4e/0x210 > =C2=A0[<ffffffff8124dbee>] T.947+0x4e/0x210 > =C2=A0[<ffffffff8124ddfb>] _pnfs_do_flush+0x4b/0xf0 > =C2=A0[<ffffffff8122364d>] nfs_updatepage+0xfd/0x5a0 > =C2=A0[<ffffffff812126b5>] nfs_write_end+0x265/0x3e0 > =C2=A0[<ffffffff810a3397>] generic_file_buffered_write+0x187/0x2a0 > =C2=A0[<ffffffff810a5890>] __generic_file_aio_write+0x240/0x460 > =C2=A0[<ffffffff81929d59>] ? sub_preempt_count+0x9/0xa0 > =C2=A0[<ffffffff810a5b17>] generic_file_aio_write+0x67/0xd0 > =C2=A0[<ffffffff81213661>] nfs_file_write+0xb1/0x1f0 > =C2=A0[<ffffffff810d9fca>] do_sync_write+0xda/0x120 > =C2=A0[<ffffffff810528a0>] ? autoremove_wake_function+0x0/0x40 > =C2=A0[<ffffffff8124c802>] pnfs_file_write+0x82/0xc0 > =C2=A0[<ffffffff810daab7>] vfs_write+0xb7/0x180 > =C2=A0[<ffffffff810dac71>] sys_write+0x51/0x90 > =C2=A0[<ffffffff81002518>] system_call_fastpath+0x16/0x1b > eth0: no IPv6 routers present > -- > To unsubscribe from this list: send the line "unsubscribe linux-nfs" = in > the body of a message to majordomo@vger.kernel.org > More majordomo info at =C2=A0http://vger.kernel.org/majordomo-info.ht= ml > --=20 tao. ^ permalink raw reply [flat|nested] 14+ messages in thread
[parent not found: <AANLkTik9L15tqpSboBpb9cSTy3hVPLEK487w94pEbLrS-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order [not found] ` <AANLkTik9L15tqpSboBpb9cSTy3hVPLEK487w94pEbLrS-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2010-05-19 16:36 ` J. Bruce Fields 2010-05-19 21:38 ` J. Bruce Fields 0 siblings, 1 reply; 14+ messages in thread From: J. Bruce Fields @ 2010-05-19 16:36 UTC (permalink / raw) To: Tao Guo Cc: Zhang Jingwang, Boaz Harrosh, Benny Halevy, Zhang Jingwang, linux-nfs, iisaman On Wed, May 19, 2010 at 12:56:42PM +0800, Tao Guo wrote: > I think the warning just indicate a possible bug: > nfs_inode_set_delegation(): > clp->cl_lock > --> inode->i_lock > get_lock_alloc_layout(): > nfsi->lo_lock > --> clp->cl_lock > nfs_try_to_update_request()->pnfs_do_flush()->_pnfs_do_flush()-> > pnfs_find_get_lseg()->get_lock_current_layout(): >=20 > inode->i_lock >=20 > -->nfsi->lo_lock > In nfs_inode_set_delegation(), maybe we should unlock clp->cl_lock be= fore > taking inode->i_lock spinlock? >=20 > PS: I just use the latest pnfsblock code(pnfs-all-2.6.34-2010-05-17) = doing some > basic r/w tests and it works fine. Could you try running the connectathon general test? > Can you find out which code path > lead to IO errror? I'll try to narrow down the test case. --b. >=20 > On Wed, May 19, 2010 at 12:20 AM, J. Bruce Fields <bfields@fieldses.o= rg> wrote: > > On Tue, May 18, 2010 at 01:22:52AM +0800, Zhang Jingwang wrote: > >> I've sent two patches to solve this problem, you can try them. > >> > >> [PATCH] pnfs: set pnfs_curr_ld before calling initialize_mountpoin= t > >> [PATCH] pnfs: set pnfs_blksize before calling set_pnfs_layoutdrive= r > > > > Thanks. =C2=A0With Benny's latest block all (97602fc6, which includ= es the two > > patches above), I'm back to the previous behavior: > > > >> > >> 2010/5/18 J. Bruce Fields <bfields@fieldses.org>: > >> > On Mon, May 17, 2010 at 10:53:11AM -0400, J. Bruce Fields wrote: > >> >> On Mon, May 17, 2010 at 05:24:39PM +0300, Boaz Harrosh wrote: > >> >> > On 05/17/2010 04:53 PM, J. Bruce Fields wrote: > >> >> > > On Wed, May 12, 2010 at 04:28:12PM -0400, bfields wrote: > >> >> > >> The one thing I've noticed is that the connectathon genera= l test has > >> >> > >> started failing right at the start with an IO error. =C2=A0= The last good > >> >> > >> version I tested was b5c09c21, which was based on 33-rc6. = =C2=A0The earliest > >> >> > >> bad version I tested was 419312ada, based on 34-rc2. =C2=A0= A quick look at > >> >> > >> network traces from the two traces didn't turn up anything= obvious. =C2=A0I > >> >> > >> haven't had the chance yet to look closer. > > > > So I still see the IO error at the start of the connectathon genera= l > > tests. > > > > Also, I get the following warning--I don't know if it's new or not. > > > > --b. > > > > =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D > > [ INFO: possible circular locking dependency detected ] > > 2.6.34-pnfs-00322-g97602fc #141 > > ------------------------------------------------------- > > cp/2789 is trying to acquire lock: > > =C2=A0(&(&nfsi->lo_lock)->rlock){+.+...}, at: [<ffffffff8124dbee>] = T.947+0x4e/0x210 > > > > but task is already holding lock: > > =C2=A0(&sb->s_type->i_lock_key#11){+.+...}, at: [<ffffffff81223689>= ] nfs_updatepage+0x139/0x5a0 > > > > which lock already depends on the new lock. > > > > > > the existing dependency chain (in reverse order) is: > > > > -> #2 (&sb->s_type->i_lock_key#11){+.+...}: > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81065913>] __lock_acquire+0x1293/0x1= d30 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81066442>] lock_acquire+0x92/0x170 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81925d5b>] _raw_spin_lock+0x3b/0x50 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81244173>] nfs_inode_set_delegation+= 0x203/0x2c0 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81231b7a>] nfs4_opendata_to_nfs4_sta= te+0x31a/0x3d0 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81231fb2>] nfs4_do_open+0x242/0x460 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81232a05>] nfs4_proc_create+0x85/0x2= 20 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8120ec64>] nfs_create+0x74/0x120 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810e5d63>] vfs_create+0xb3/0x100 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810e656b>] do_last+0x59b/0x6c0 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810e88e2>] do_filp_open+0x212/0x690 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810d8059>] do_sys_open+0x69/0x140 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810d8170>] sys_open+0x20/0x30 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81002518>] system_call_fastpath+0x16= /0x1b > > > > -> #1 (&(&clp->cl_lock)->rlock){+.+...}: > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81065913>] __lock_acquire+0x1293/0x1= d30 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81066442>] lock_acquire+0x92/0x170 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81925d5b>] _raw_spin_lock+0x3b/0x50 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8124b378>] pnfs_update_layout+0x2f8/= 0xaf0 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8124c7e4>] pnfs_file_write+0x64/0xc0 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810daab7>] vfs_write+0xb7/0x180 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810dac71>] sys_write+0x51/0x90 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81002518>] system_call_fastpath+0x16= /0x1b > > > > -> #0 (&(&nfsi->lo_lock)->rlock){+.+...}: > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81065dd2>] __lock_acquire+0x1752/0x1= d30 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81066442>] lock_acquire+0x92/0x170 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81925d5b>] _raw_spin_lock+0x3b/0x50 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8124dbee>] T.947+0x4e/0x210 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8124ddfb>] _pnfs_do_flush+0x4b/0xf0 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8122364d>] nfs_updatepage+0xfd/0x5a0 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff812126b5>] nfs_write_end+0x265/0x3e0 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810a3397>] generic_file_buffered_wri= te+0x187/0x2a0 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810a5890>] __generic_file_aio_write+= 0x240/0x460 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810a5b17>] generic_file_aio_write+0x= 67/0xd0 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81213661>] nfs_file_write+0xb1/0x1f0 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810d9fca>] do_sync_write+0xda/0x120 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff8124c802>] pnfs_file_write+0x82/0xc0 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810daab7>] vfs_write+0xb7/0x180 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff810dac71>] sys_write+0x51/0x90 > > =C2=A0 =C2=A0 =C2=A0 [<ffffffff81002518>] system_call_fastpath+0x16= /0x1b > > > > other info that might help us debug this: > > > > 2 locks held by cp/2789: > > =C2=A0#0: =C2=A0(&sb->s_type->i_mutex_key#13){+.+.+.}, at: [<ffffff= ff810a5b04>] generic_file_aio_write+0x54/0xd0 > > =C2=A0#1: =C2=A0(&sb->s_type->i_lock_key#11){+.+...}, at: [<fffffff= f81223689>] nfs_updatepage+0x139/0x5a0 > > > > stack backtrace: > > Pid: 2789, comm: cp Not tainted 2.6.34-pnfs-00322-g97602fc #141 > > Call Trace: > > =C2=A0[<ffffffff81064033>] print_circular_bug+0xf3/0x100 > > =C2=A0[<ffffffff81065dd2>] __lock_acquire+0x1752/0x1d30 > > =C2=A0[<ffffffff81066442>] lock_acquire+0x92/0x170 > > =C2=A0[<ffffffff8124dbee>] ? T.947+0x4e/0x210 > > =C2=A0[<ffffffff81929d59>] ? sub_preempt_count+0x9/0xa0 > > =C2=A0[<ffffffff81925d5b>] _raw_spin_lock+0x3b/0x50 > > =C2=A0[<ffffffff8124dbee>] ? T.947+0x4e/0x210 > > =C2=A0[<ffffffff8124dbee>] T.947+0x4e/0x210 > > =C2=A0[<ffffffff8124ddfb>] _pnfs_do_flush+0x4b/0xf0 > > =C2=A0[<ffffffff8122364d>] nfs_updatepage+0xfd/0x5a0 > > =C2=A0[<ffffffff812126b5>] nfs_write_end+0x265/0x3e0 > > =C2=A0[<ffffffff810a3397>] generic_file_buffered_write+0x187/0x2a0 > > =C2=A0[<ffffffff810a5890>] __generic_file_aio_write+0x240/0x460 > > =C2=A0[<ffffffff81929d59>] ? sub_preempt_count+0x9/0xa0 > > =C2=A0[<ffffffff810a5b17>] generic_file_aio_write+0x67/0xd0 > > =C2=A0[<ffffffff81213661>] nfs_file_write+0xb1/0x1f0 > > =C2=A0[<ffffffff810d9fca>] do_sync_write+0xda/0x120 > > =C2=A0[<ffffffff810528a0>] ? autoremove_wake_function+0x0/0x40 > > =C2=A0[<ffffffff8124c802>] pnfs_file_write+0x82/0xc0 > > =C2=A0[<ffffffff810daab7>] vfs_write+0xb7/0x180 > > =C2=A0[<ffffffff810dac71>] sys_write+0x51/0x90 > > =C2=A0[<ffffffff81002518>] system_call_fastpath+0x16/0x1b > > eth0: no IPv6 routers present > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-nfs= " in > > the body of a message to majordomo@vger.kernel.org > > More majordomo info at =C2=A0http://vger.kernel.org/majordomo-info.= html > > >=20 >=20 >=20 > --=20 > tao. ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order 2010-05-19 16:36 ` J. Bruce Fields @ 2010-05-19 21:38 ` J. Bruce Fields 2010-05-20 5:44 ` Tao Guo 0 siblings, 1 reply; 14+ messages in thread From: J. Bruce Fields @ 2010-05-19 21:38 UTC (permalink / raw) To: Tao Guo Cc: Zhang Jingwang, Boaz Harrosh, Benny Halevy, Zhang Jingwang, linux-nfs, iisaman On Wed, May 19, 2010 at 12:36:32PM -0400, J. Bruce Fields wrote: > On Wed, May 19, 2010 at 12:56:42PM +0800, Tao Guo wrote: > > I think the warning just indicate a possible bug: > > nfs_inode_set_delegation(): > > clp->cl_lock > > --> inode->i_lock > > get_lock_alloc_layout(): > > nfsi->lo_lock > > --> clp->cl_lock > > nfs_try_to_update_request()->pnfs_do_flush()->_pnfs_do_flush()-> > > pnfs_find_get_lseg()->get_lock_current_layout(): > > > > inode->i_lock > > > > -->nfsi->lo_lock > > In nfs_inode_set_delegation(), maybe we should unlock clp->cl_lock before > > taking inode->i_lock spinlock? > > > > PS: I just use the latest pnfsblock code(pnfs-all-2.6.34-2010-05-17) doing some > > basic r/w tests and it works fine. > > Could you try running the connectathon general test? > > > Can you find out which code path > > lead to IO errror? > > I'll try to narrow down the test case. I haven't gotten the chance to narrow it down any more. So all I can say is that: ~/cthon04# NFSTESTDIR=/mnt/TMP ./runtests -g GENERAL TESTS: directory /mnt/TMP if test ! -x runtests; then chmod a+x runtests; fi cd /mnt/TMP; rm -f Makefile runtests runtests.wrk *.sh *.c mkdummy rmdummy nroff.in makefile.tst cp Makefile runtests runtests.wrk *.sh *.c mkdummy rmdummy nroff.in makefile.tst /mnt/TMP Small Compile runtests.wrk: 63: ./stat: Input/output error general tests failed is a consistent result for me. (Using the tests from http://www.connectathon.org/nfstests.html.) --b. ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order 2010-05-19 21:38 ` J. Bruce Fields @ 2010-05-20 5:44 ` Tao Guo 2010-05-21 23:00 ` J. Bruce Fields 0 siblings, 1 reply; 14+ messages in thread From: Tao Guo @ 2010-05-20 5:44 UTC (permalink / raw) To: J. Bruce Fields; +Cc: linux-nfs Hi, Fields I used the connectathon general test with my latest code, and it passed the tests. Can you just try the two patches I have sent in the mailing list to see if it helps? On Thu, May 20, 2010 at 5:38 AM, J. Bruce Fields <bfields@fieldses.org>= wrote: > On Wed, May 19, 2010 at 12:36:32PM -0400, J. Bruce Fields wrote: >> On Wed, May 19, 2010 at 12:56:42PM +0800, Tao Guo wrote: >> > I think the warning just indicate a possible bug: >> > nfs_inode_set_delegation(): >> > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= clp->cl_lock >> > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0= =C2=A0 =C2=A0 --> inode->i_lock >> > get_lock_alloc_layout(): >> > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0nfsi->lo_loc= k >> > =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2= =A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 =C2=A0 --> = clp->cl_lock >> > nfs_try_to_update_request()->pnfs_do_flush()->_pnfs_do_flush()-> >> > pnfs_find_get_lseg()->get_lock_current_layout(): >> > >> > inode->i_lock >> > >> > -->nfsi->lo_lock >> > In nfs_inode_set_delegation(), maybe we should unlock clp->cl_lock= before >> > taking inode->i_lock spinlock? >> > >> > PS: I just use the latest pnfsblock code(pnfs-all-2.6.34-2010-05-1= 7) doing some >> > basic r/w tests and it works fine. >> >> Could you try running the connectathon general test? >> >> > Can you find out which code path >> > lead to IO errror? >> >> I'll try to narrow down the test case. > > I haven't gotten the chance to narrow it down any more. =C2=A0So all = I can > say is that: > > =C2=A0 =C2=A0 =C2=A0 =C2=A0~/cthon04# NFSTESTDIR=3D/mnt/TMP ./runtest= s -g > > =C2=A0 =C2=A0 =C2=A0 =C2=A0GENERAL TESTS: directory /mnt/TMP > =C2=A0 =C2=A0 =C2=A0 =C2=A0if test ! -x runtests; then chmod a+x runt= ests; fi > =C2=A0 =C2=A0 =C2=A0 =C2=A0cd /mnt/TMP; rm -f Makefile runtests runte= sts.wrk *.sh *.c > =C2=A0 =C2=A0 =C2=A0 =C2=A0mkdummy rmdummy nroff.in makefile.tst > =C2=A0 =C2=A0 =C2=A0 =C2=A0cp Makefile runtests runtests.wrk *.sh *.c= mkdummy rmdummy > =C2=A0 =C2=A0 =C2=A0 =C2=A0nroff.in makefile.tst /mnt/TMP > > =C2=A0 =C2=A0 =C2=A0 =C2=A0Small Compile > =C2=A0 =C2=A0 =C2=A0 =C2=A0runtests.wrk: 63: ./stat: Input/output err= or > =C2=A0 =C2=A0 =C2=A0 =C2=A0general tests failed > > is a consistent result for me. =C2=A0(Using the tests from > http://www.connectathon.org/nfstests.html.) > > --b. > --=20 tao. ^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order 2010-05-20 5:44 ` Tao Guo @ 2010-05-21 23:00 ` J. Bruce Fields 0 siblings, 0 replies; 14+ messages in thread From: J. Bruce Fields @ 2010-05-21 23:00 UTC (permalink / raw) To: Tao Guo; +Cc: linux-nfs On Thu, May 20, 2010 at 01:44:13PM +0800, Tao Guo wrote: > Hi, Fields > I used the connectathon general test with my latest code, and it > passed the tests. > Can you just try the two patches I have sent in the mailing list to > see if it helps? GENERAL TESTS: directory /mnt/TMP if test ! -x runtests; then chmod a+x runtests; fi cd /mnt/TMP; rm -f Makefile runtests runtests.wrk *.sh *.c mkdummy rmdummy nroff.in makefile.tst cp Makefile runtests runtests.wrk *.sh *.c mkdummy rmdummy nroff.in makefile.tst /mnt/TMP Small Compile /usr/lib/gcc/x86_64-linux-gnu/4.4.1/../../../../lib/crt1.o: In function `_start': /build/buildd/eglibc-2.10.1/csu/../sysdeps/x86_64/elf/start.S:109: undefined reference to `main' collect2: ld returned 1 exit status Command exited with non-zero status 1 0.02user 0.05system 0:00.12elapsed 66%CPU (0avgtext+0avgdata 22464maxresident)k 0inputs+0outputs (0major+4946minor)pagefaults 0swaps general tests failed I don't know what's going on there; sounds like data corruption? --b. ^ permalink raw reply [flat|nested] 14+ messages in thread
end of thread, other threads:[~2010-05-21 23:00 UTC | newest]
Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-10 3:36 [PATCH] pnfsblock: Lookup list entry of layouts and tags in reverse order Zhang Jingwang
[not found] ` <20100510033610.GA5443-nK6E9TRyOkVSq9BJjBFyUp/QNRX+jHPU@public.gmane.org>
2010-05-12 6:46 ` Benny Halevy
2010-05-12 20:28 ` J. Bruce Fields
2010-05-17 13:53 ` J. Bruce Fields
2010-05-17 14:24 ` Boaz Harrosh
2010-05-17 14:53 ` J. Bruce Fields
2010-05-17 16:53 ` J. Bruce Fields
2010-05-17 17:22 ` Zhang Jingwang
[not found] ` <AANLkTilUpAHrtHH8pauvYrAuD3rWgj7aDmrTOzrmU-h5-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-05-18 16:20 ` J. Bruce Fields
2010-05-19 4:56 ` Tao Guo
[not found] ` <AANLkTik9L15tqpSboBpb9cSTy3hVPLEK487w94pEbLrS-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-05-19 16:36 ` J. Bruce Fields
2010-05-19 21:38 ` J. Bruce Fields
2010-05-20 5:44 ` Tao Guo
2010-05-21 23:00 ` J. Bruce Fields
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).