From: Benny Halevy <bhalevy@panasas.com>
To: Fred Isaman <iisaman@netapp.com>
Cc: linux-nfs@vger.kernel.org
Subject: Re: [PATCH 03/18] pnfs-submit: remove _pnfs_can_return_lseg call from pnfs_clear_lseg_list
Date: Thu, 11 Nov 2010 16:39:41 +0200 [thread overview]
Message-ID: <4CDC002D.7050303@panasas.com> (raw)
In-Reply-To: <AANLkTik69Y1+qmxF2QPKXM_4-VQsseRQ672S+u23h4nX@mail.gmail.com>
On 2010-11-11 15:52, Fred Isaman wrote:
> On Thu, Nov 11, 2010 at 2:00 AM, Benny Halevy <bhalevy@panasas.com> wrote:
>> On 2010-11-10 16:46, Fred Isaman wrote:
>>> On Wed, Nov 10, 2010 at 9:35 AM, Benny Halevy <bhalevy@panasas.com> wrote:
>>>> On 2010-11-04 17:22, Fred Isaman wrote:
>>>>> Instead, have mark_invalid function that marks lseg invalid and
>>>>> removes the reference that holds it in the list. Now when io is finished,
>>>>> the lseg will automatically be removed from the list. This is
>>>>> at the heart of many of the upcoming cb_layoutrecall changes.
>>>>>
>>>>> Signed-off-by: Fred Isaman <iisaman@netapp.com>
>>>>> ---
>>>>> fs/nfs/nfs4xdr.c | 3 +-
>>>>> fs/nfs/pnfs.c | 145 ++++++++++++++++++++++++++++++++++-------------------
>>>>> fs/nfs/pnfs.h | 1 +
>>>>> 3 files changed, 95 insertions(+), 54 deletions(-)
>>>>>
>>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>>>> index 238eeb2..6d9ef2b 100644
>>>>> --- a/fs/nfs/nfs4xdr.c
>>>>> +++ b/fs/nfs/nfs4xdr.c
>>>>> @@ -1915,8 +1915,7 @@ encode_layoutreturn(struct xdr_stream *xdr,
>>>>> p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
>>>>> p = xdr_encode_hyper(p, args->range.offset);
>>>>> p = xdr_encode_hyper(p, args->range.length);
>>>>> - pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
>>>>> - NULL);
>>>>> + pnfs_copy_layout_stateid(&stateid, NFS_I(args->inode)->layout);
>>>>> p = xdr_encode_opaque_fixed(p, &stateid.data,
>>>>> NFS4_STATEID_SIZE);
>>>>> p = reserve_space(xdr, 4);
>>>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>>>> index 3bbe3be..4e5c68b 100644
>>>>> --- a/fs/nfs/pnfs.c
>>>>> +++ b/fs/nfs/pnfs.c
>>>>> @@ -272,10 +272,42 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
>>>>> lseg->layout = lo;
>>>>> }
>>>>>
>>>>> +static void
>>>>> +_put_lseg_common(struct pnfs_layout_segment *lseg)
>>>>> +{
>>>>> + BUG_ON(lseg->valid == true);
>>>>> + list_del(&lseg->fi_list);
>>>>> + if (list_empty(&lseg->layout->segs)) {
>>>>> + struct nfs_client *clp;
>>>>> +
>>>>> + clp = NFS_SERVER(lseg->layout->inode)->nfs_client;
>>>>> + spin_lock(&clp->cl_lock);
>>>>> + /* List does not take a reference, so no need for put here */
>>>>> + list_del_init(&lseg->layout->layouts);
>>>>> + spin_unlock(&clp->cl_lock);
>>>>> + pnfs_invalidate_layout_stateid(lseg->layout);
>>>>> + }
>>>>> + rpc_wake_up(&NFS_I(lseg->layout->inode)->lo_rpcwaitq);
>>>>> +}
>>>>> +
>>>>> +/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
>>>>> + * could sleep, so must be called outside of the lock.
>>>>> + */
>>>>> +static void
>>>>> +put_lseg_locked(struct pnfs_layout_segment *lseg,
>>>>> + struct list_head *tmp_list)
>>>>> +{
>>>>> + dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>>>>> + atomic_read(&lseg->pls_refcount), lseg->valid);
>>>>> + if (atomic_dec_and_test(&lseg->pls_refcount)) {
>>>>> + _put_lseg_common(lseg);
>>>>> + list_add(&lseg->fi_list, tmp_list);
>>>>> + }
>>>>> +}
>>>>> +
>>>>> void
>>>>> put_lseg(struct pnfs_layout_segment *lseg)
>>>>> {
>>>>> - bool do_wake_up;
>>>>> struct inode *ino;
>>>>>
>>>>> if (!lseg)
>>>>> @@ -283,15 +315,14 @@ put_lseg(struct pnfs_layout_segment *lseg)
>>>>>
>>>>> dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>>>>> atomic_read(&lseg->pls_refcount), lseg->valid);
>>>>> - do_wake_up = !lseg->valid;
>>>>> ino = lseg->layout->inode;
>>>>> - if (atomic_dec_and_test(&lseg->pls_refcount)) {
>>>>> + if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) {
>>>>> + _put_lseg_common(lseg);
>>>>> + spin_unlock(&ino->i_lock);
>>>>> NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
>>>>> /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
>>>>> put_layout_hdr(ino);
>>>>> }
>>>>> - if (do_wake_up)
>>>>> - rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq);
>>>>> }
>>>>> EXPORT_SYMBOL_GPL(put_lseg);
>>>>>
>>>>> @@ -314,10 +345,18 @@ should_free_lseg(struct pnfs_layout_segment *lseg,
>>>>> lseg->range.iomode == range->iomode);
>>>>> }
>>>>>
>>>>> -static bool
>>>>> -_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
>>>>> +static void mark_lseg_invalid(struct pnfs_layout_segment *lseg,
>>>>> + struct list_head *tmp_list)
>>>>> {
>>>>> - return atomic_read(&lseg->pls_refcount) == 1;
>>>>> + assert_spin_locked(&lseg->layout->inode->i_lock);
>>>>> + if (lseg->valid) {
>>>>> + lseg->valid = false;
>>>>> + /* Remove the reference keeping the lseg in the
>>>>> + * list. It will now be removed when all
>>>>> + * outstanding io is finished.
>>>>> + */
>>>>> + put_lseg_locked(lseg, tmp_list);
>>>>> + }
>>>>> }
>>>>>
>>>>> static void
>>>>> @@ -330,42 +369,31 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
>>>>> __func__, lo, range->offset, range->length, range->iomode);
>>>>>
>>>>> assert_spin_locked(&lo->inode->i_lock);
>>>>> - list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
>>>>> - if (!should_free_lseg(lseg, range) ||
>>>>> - !_pnfs_can_return_lseg(lseg))
>>>>> - continue;
>>>>> - dprintk("%s: freeing lseg %p iomode %d "
>>>>> - "offset %llu length %llu\n", __func__,
>>>>> - lseg, lseg->range.iomode, lseg->range.offset,
>>>>> - lseg->range.length);
>>>>> - list_move(&lseg->fi_list, tmp_list);
>>>>> - }
>>>>> - if (list_empty(&lo->segs)) {
>>>>> - struct nfs_client *clp;
>>>>> -
>>>>> - clp = NFS_SERVER(lo->inode)->nfs_client;
>>>>> - spin_lock(&clp->cl_lock);
>>>>> - /* List does not take a reference, so no need for put here */
>>>>> - list_del_init(&lo->layouts);
>>>>> - spin_unlock(&clp->cl_lock);
>>>>> - pnfs_invalidate_layout_stateid(lo);
>>>>> - }
>>>>> -
>>>>> + list_for_each_entry_safe(lseg, next, &lo->segs, fi_list)
>>>>> + if (should_free_lseg(lseg, range)) {
>>>>> + dprintk("%s: freeing lseg %p iomode %d "
>>>>> + "offset %llu length %llu\n", __func__,
>>>>> + lseg, lseg->range.iomode, lseg->range.offset,
>>>>> + lseg->range.length);
>>>>> + mark_lseg_invalid(lseg, tmp_list);
>>>>> + }
>>>>> dprintk("%s:Return\n", __func__);
>>>>> }
>>>>>
>>>>> static void
>>>>> -pnfs_free_lseg_list(struct list_head *tmp_list)
>>>>> +pnfs_free_lseg_list(struct list_head *free_me)
>>>>> {
>>>>> - struct pnfs_layout_segment *lseg;
>>>>> + struct pnfs_layout_segment *lseg, *tmp;
>>>>> + struct inode *ino;
>>>>>
>>>>> - while (!list_empty(tmp_list)) {
>>>>> - lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
>>>>> - fi_list);
>>>>> - dprintk("%s calling put_lseg on %p\n", __func__, lseg);
>>>>> - list_del(&lseg->fi_list);
>>>>> - put_lseg(lseg);
>>>>> + list_for_each_entry_safe(lseg, tmp, free_me, fi_list) {
>>>>> + BUG_ON(atomic_read(&lseg->pls_refcount) != 0);
>>>>> + ino = lseg->layout->inode;
>>>>> + NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
>>>>> + /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
>>>>> + put_layout_hdr(ino);
>>>>> }
>>>>> + INIT_LIST_HEAD(free_me);
>>>>> }
>>>>>
>>>>> void
>>>>> @@ -463,6 +491,17 @@ pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
>>>>> dprintk("<-- %s\n", __func__);
>>>>> }
>>>>>
>>>>> +/* Layoutreturn may use an invalid stateid, just copy what is there */
>>>>> +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
>>>>> +{
>>>>> + int seq;
>>>>> +
>>>>> + do {
>>>>> + seq = read_seqbegin(&lo->seqlock);
>>>>> + memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
>>>>> + } while (read_seqretry(&lo->seqlock, seq));
>>>>> +}
>>>>> +
>>>>> void
>>>>> pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
>>>>> struct nfs4_state *open_state)
>>>>> @@ -546,25 +585,23 @@ has_layout_to_return(struct pnfs_layout_hdr *lo,
>>>>> return out;
>>>>> }
>>>>>
>>>>> +/* Return true if there is layout based io in progress in the given range.
>>>>> + * Assumes range has already been marked invalid, and layout marked to
>>>>> + * prevent any new lseg from being inserted.
>>>>> + */
>>>>> bool
>>>>> pnfs_return_layout_barrier(struct nfs_inode *nfsi,
>>>>> struct pnfs_layout_range *range)
>>>>> {
>>>>> - struct pnfs_layout_segment *lseg;
>>>>> + struct pnfs_layout_segment *lseg, *tmp;
>>>>> bool ret = false;
>>>>>
>>>>> spin_lock(&nfsi->vfs_inode.i_lock);
>>>>> - list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
>>>>> - if (!should_free_lseg(lseg, range))
>>>>> - continue;
>>>>> - lseg->valid = false;
>>>>> - if (!_pnfs_can_return_lseg(lseg)) {
>>>>> - dprintk("%s: wait on lseg %p refcount %d\n",
>>>>> - __func__, lseg,
>>>>> - atomic_read(&lseg->pls_refcount));
>>>>> + list_for_each_entry_safe(lseg, tmp, &nfsi->layout->segs, fi_list)
>>>>
>>>> Why do you need the safe version here while the inode is locked?
>>>>
>>>
>>> We don't.
>>
>> OK. I'll fix that then :)
>>
>>>
>>>
>>>>> + if (should_free_lseg(lseg, range)) {
>>>>> ret = true;
>>>>
>>>> But this will always return "true" if there's any lseg to return,
>>>> not only if (!_pnfs_can_return_lseg(lseg)).
>>>>
>>>> What am I missing? :)
>>>>
>>>
>>> A return of "true" means the caller should wait. So if there is any
>>> lseg still left to return, we should return true. The refcounting has
>>> changed so that once the pending IO is finished, the lseg will
>>> automatically be removed from the list. I suspect that what you are
>>> missing is that...the refcount in the invalid case is one less than
>>> what it used to be.
>>
>> Thanks. I see what you mean now.
>>
>> What's missing is plh_block_lgets which is introduced only
>> in [PATCH 13/18] pnfs-submit: rewrite of layout state handling and cb_layoutrecall
>> Otherwise, new lsegs can be inserted into the list in between.
>>
>> Benny
>>
>
> Hmmm, you're right. Let me see if I can tease out enough of the
> blocking code from patch 13 to make it work.
Thanks!
> The other option is to
> just merge the two patches together.
This can work too if the former is too complicated. :)
benny
>
> Fred
>
next prev parent reply other threads:[~2010-11-11 14:39 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-11-04 15:22 [PATCH 00/18] rewrite of CB_LAYOUTRECALL and layoutstate code Fred Isaman
2010-11-04 15:22 ` [PATCH 01/18] NFSv4.1: Callback share session between ops Fred Isaman
2010-11-10 13:37 ` Benny Halevy
2010-11-10 13:41 ` [PATCH] SQUASHME: pnfs-submit: fixups for nfsv4.1 callbacks Benny Halevy
2010-11-10 14:53 ` Fred Isaman
2010-11-04 15:22 ` [PATCH 02/18] pnfs-submit: change pnfs_layout_segment refcounting from kref to atomic_t Fred Isaman
2010-11-04 15:22 ` [PATCH 03/18] pnfs-submit: remove _pnfs_can_return_lseg call from pnfs_clear_lseg_list Fred Isaman
2010-11-10 14:35 ` Benny Halevy
2010-11-10 14:46 ` Fred Isaman
2010-11-11 7:00 ` Benny Halevy
2010-11-11 13:52 ` Fred Isaman
2010-11-11 14:39 ` Benny Halevy [this message]
2010-11-04 15:22 ` [PATCH 04/18] pnfs-submit: change layout state seqlock to a spinlock Fred Isaman
2010-11-11 15:00 ` Benny Halevy
2010-11-11 15:09 ` Fred Isaman
2010-11-04 15:22 ` [PATCH 05/18] pnfs-submit: layoutreturn' rpc_call_op functions need to handle bulk returns Fred Isaman
2010-11-11 15:01 ` Benny Halevy
2010-11-04 15:22 ` [PATCH 06/18] pnfs_submit: nfs4_layoutreturn_release should not reference results Fred Isaman
2010-11-11 15:16 ` Benny Halevy
2010-11-04 15:22 ` [PATCH 07/18] pnfs-submit: reorganize struct cb_layoutrecallargs Fred Isaman
2010-11-04 15:22 ` [PATCH 08/18] pnfs-submit: rename lo->state to lo->plh_flags Fred Isaman
2010-11-04 15:22 ` [PATCH 09/18] pnfs-submit: change pnfs_layout_hdr refcount to atomic_t Fred Isaman
2010-11-04 15:22 ` [PATCH 10/18] pnfs-submit: argument to should_free_lseg changed from lseg to range Fred Isaman
2010-11-04 15:22 ` [PATCH 11/18] pnfs-submit: remove unnecessary field lgp->status Fred Isaman
2010-11-04 15:22 ` [PATCH 12/18] pnfs-submit: remove RPC_ASSASSINATED(task) checks Fred Isaman
2010-11-04 15:22 ` [PATCH 13/18] pnfs-submit: rewrite of layout state handling and cb_layoutrecall Fred Isaman
2010-11-04 15:22 ` [PATCH 14/18] pnfs-submit: increase number of outstanding CB_LAYOUTRECALLS we can handle Fred Isaman
2010-11-04 15:22 ` [PATCH 15/18] pnfs-submit: roc add layoutreturn op to close compound Fred Isaman
2010-11-04 15:22 ` [PATCH 16/18] pnfs-submit refactor layoutcommit xdr structures Fred Isaman
2010-11-04 15:22 ` [PATCH 17/18] pnfs-submit refactor pnfs_layoutcommit_setup Fred Isaman
2010-11-04 15:22 ` [PATCH 18/18] pnfs_submit: roc add layoutcommit op to close compound Fred Isaman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4CDC002D.7050303@panasas.com \
--to=bhalevy@panasas.com \
--cc=iisaman@netapp.com \
--cc=linux-nfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).