linux-nfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Fred Isaman <iisaman@netapp.com>
To: Benny Halevy <bhalevy@panasas.com>
Cc: linux-nfs@vger.kernel.org
Subject: Re: [PATCH 03/18] pnfs-submit: remove _pnfs_can_return_lseg call from pnfs_clear_lseg_list
Date: Thu, 11 Nov 2010 08:52:23 -0500	[thread overview]
Message-ID: <AANLkTik69Y1+qmxF2QPKXM_4-VQsseRQ672S+u23h4nX@mail.gmail.com> (raw)
In-Reply-To: <4CDB94AA.3020508@panasas.com>

On Thu, Nov 11, 2010 at 2:00 AM, Benny Halevy <bhalevy@panasas.com> wrote:
> On 2010-11-10 16:46, Fred Isaman wrote:
>> On Wed, Nov 10, 2010 at 9:35 AM, Benny Halevy <bhalevy@panasas.com> wrote:
>>> On 2010-11-04 17:22, Fred Isaman wrote:
>>>> Instead, have mark_invalid function that marks lseg invalid and
>>>> removes the reference that holds it in the list.  Now when io is finished,
>>>> the lseg will automatically be removed from the list.  This is
>>>> at the heart of many of the upcoming cb_layoutrecall changes.
>>>>
>>>> Signed-off-by: Fred Isaman <iisaman@netapp.com>
>>>> ---
>>>>  fs/nfs/nfs4xdr.c |    3 +-
>>>>  fs/nfs/pnfs.c    |  145 ++++++++++++++++++++++++++++++++++-------------------
>>>>  fs/nfs/pnfs.h    |    1 +
>>>>  3 files changed, 95 insertions(+), 54 deletions(-)
>>>>
>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>>> index 238eeb2..6d9ef2b 100644
>>>> --- a/fs/nfs/nfs4xdr.c
>>>> +++ b/fs/nfs/nfs4xdr.c
>>>> @@ -1915,8 +1915,7 @@ encode_layoutreturn(struct xdr_stream *xdr,
>>>>               p = reserve_space(xdr, 16 + NFS4_STATEID_SIZE);
>>>>               p = xdr_encode_hyper(p, args->range.offset);
>>>>               p = xdr_encode_hyper(p, args->range.length);
>>>> -             pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout,
>>>> -                                     NULL);
>>>> +             pnfs_copy_layout_stateid(&stateid, NFS_I(args->inode)->layout);
>>>>               p = xdr_encode_opaque_fixed(p, &stateid.data,
>>>>                                           NFS4_STATEID_SIZE);
>>>>               p = reserve_space(xdr, 4);
>>>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>>>> index 3bbe3be..4e5c68b 100644
>>>> --- a/fs/nfs/pnfs.c
>>>> +++ b/fs/nfs/pnfs.c
>>>> @@ -272,10 +272,42 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
>>>>       lseg->layout = lo;
>>>>  }
>>>>
>>>> +static void
>>>> +_put_lseg_common(struct pnfs_layout_segment *lseg)
>>>> +{
>>>> +     BUG_ON(lseg->valid == true);
>>>> +     list_del(&lseg->fi_list);
>>>> +     if (list_empty(&lseg->layout->segs)) {
>>>> +             struct nfs_client *clp;
>>>> +
>>>> +             clp = NFS_SERVER(lseg->layout->inode)->nfs_client;
>>>> +             spin_lock(&clp->cl_lock);
>>>> +             /* List does not take a reference, so no need for put here */
>>>> +             list_del_init(&lseg->layout->layouts);
>>>> +             spin_unlock(&clp->cl_lock);
>>>> +             pnfs_invalidate_layout_stateid(lseg->layout);
>>>> +     }
>>>> +     rpc_wake_up(&NFS_I(lseg->layout->inode)->lo_rpcwaitq);
>>>> +}
>>>> +
>>>> +/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg
>>>> + * could sleep, so must be called outside of the lock.
>>>> + */
>>>> +static void
>>>> +put_lseg_locked(struct pnfs_layout_segment *lseg,
>>>> +             struct list_head *tmp_list)
>>>> +{
>>>> +     dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>>>> +             atomic_read(&lseg->pls_refcount), lseg->valid);
>>>> +     if (atomic_dec_and_test(&lseg->pls_refcount)) {
>>>> +             _put_lseg_common(lseg);
>>>> +             list_add(&lseg->fi_list, tmp_list);
>>>> +     }
>>>> +}
>>>> +
>>>>  void
>>>>  put_lseg(struct pnfs_layout_segment *lseg)
>>>>  {
>>>> -     bool do_wake_up;
>>>>       struct inode *ino;
>>>>
>>>>       if (!lseg)
>>>> @@ -283,15 +315,14 @@ put_lseg(struct pnfs_layout_segment *lseg)
>>>>
>>>>       dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
>>>>               atomic_read(&lseg->pls_refcount), lseg->valid);
>>>> -     do_wake_up = !lseg->valid;
>>>>       ino = lseg->layout->inode;
>>>> -     if (atomic_dec_and_test(&lseg->pls_refcount)) {
>>>> +     if (atomic_dec_and_lock(&lseg->pls_refcount, &ino->i_lock)) {
>>>> +             _put_lseg_common(lseg);
>>>> +             spin_unlock(&ino->i_lock);
>>>>               NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
>>>>               /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
>>>>               put_layout_hdr(ino);
>>>>       }
>>>> -     if (do_wake_up)
>>>> -             rpc_wake_up(&NFS_I(ino)->lo_rpcwaitq);
>>>>  }
>>>>  EXPORT_SYMBOL_GPL(put_lseg);
>>>>
>>>> @@ -314,10 +345,18 @@ should_free_lseg(struct pnfs_layout_segment *lseg,
>>>>               lseg->range.iomode == range->iomode);
>>>>  }
>>>>
>>>> -static bool
>>>> -_pnfs_can_return_lseg(struct pnfs_layout_segment *lseg)
>>>> +static void mark_lseg_invalid(struct pnfs_layout_segment *lseg,
>>>> +                           struct list_head *tmp_list)
>>>>  {
>>>> -     return atomic_read(&lseg->pls_refcount) == 1;
>>>> +     assert_spin_locked(&lseg->layout->inode->i_lock);
>>>> +     if (lseg->valid) {
>>>> +             lseg->valid = false;
>>>> +             /* Remove the reference keeping the lseg in the
>>>> +              * list.  It will now be removed when all
>>>> +              * outstanding io is finished.
>>>> +              */
>>>> +             put_lseg_locked(lseg, tmp_list);
>>>> +     }
>>>>  }
>>>>
>>>>  static void
>>>> @@ -330,42 +369,31 @@ pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list,
>>>>               __func__, lo, range->offset, range->length, range->iomode);
>>>>
>>>>       assert_spin_locked(&lo->inode->i_lock);
>>>> -     list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) {
>>>> -             if (!should_free_lseg(lseg, range) ||
>>>> -                 !_pnfs_can_return_lseg(lseg))
>>>> -                     continue;
>>>> -             dprintk("%s: freeing lseg %p iomode %d "
>>>> -                     "offset %llu length %llu\n", __func__,
>>>> -                     lseg, lseg->range.iomode, lseg->range.offset,
>>>> -                     lseg->range.length);
>>>> -             list_move(&lseg->fi_list, tmp_list);
>>>> -     }
>>>> -     if (list_empty(&lo->segs)) {
>>>> -             struct nfs_client *clp;
>>>> -
>>>> -             clp = NFS_SERVER(lo->inode)->nfs_client;
>>>> -             spin_lock(&clp->cl_lock);
>>>> -             /* List does not take a reference, so no need for put here */
>>>> -             list_del_init(&lo->layouts);
>>>> -             spin_unlock(&clp->cl_lock);
>>>> -             pnfs_invalidate_layout_stateid(lo);
>>>> -     }
>>>> -
>>>> +     list_for_each_entry_safe(lseg, next, &lo->segs, fi_list)
>>>> +             if (should_free_lseg(lseg, range)) {
>>>> +                     dprintk("%s: freeing lseg %p iomode %d "
>>>> +                             "offset %llu length %llu\n", __func__,
>>>> +                             lseg, lseg->range.iomode, lseg->range.offset,
>>>> +                             lseg->range.length);
>>>> +                     mark_lseg_invalid(lseg, tmp_list);
>>>> +             }
>>>>       dprintk("%s:Return\n", __func__);
>>>>  }
>>>>
>>>>  static void
>>>> -pnfs_free_lseg_list(struct list_head *tmp_list)
>>>> +pnfs_free_lseg_list(struct list_head *free_me)
>>>>  {
>>>> -     struct pnfs_layout_segment *lseg;
>>>> +     struct pnfs_layout_segment *lseg, *tmp;
>>>> +     struct inode *ino;
>>>>
>>>> -     while (!list_empty(tmp_list)) {
>>>> -             lseg = list_entry(tmp_list->next, struct pnfs_layout_segment,
>>>> -                             fi_list);
>>>> -             dprintk("%s calling put_lseg on %p\n", __func__, lseg);
>>>> -             list_del(&lseg->fi_list);
>>>> -             put_lseg(lseg);
>>>> +     list_for_each_entry_safe(lseg, tmp, free_me, fi_list) {
>>>> +             BUG_ON(atomic_read(&lseg->pls_refcount) != 0);
>>>> +             ino = lseg->layout->inode;
>>>> +             NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
>>>> +             /* Matched by get_layout_hdr_locked in pnfs_insert_layout */
>>>> +             put_layout_hdr(ino);
>>>>       }
>>>> +     INIT_LIST_HEAD(free_me);
>>>>  }
>>>>
>>>>  void
>>>> @@ -463,6 +491,17 @@ pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo,
>>>>       dprintk("<-- %s\n", __func__);
>>>>  }
>>>>
>>>> +/* Layoutreturn may use an invalid stateid, just copy what is there */
>>>> +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo)
>>>> +{
>>>> +     int seq;
>>>> +
>>>> +     do {
>>>> +             seq = read_seqbegin(&lo->seqlock);
>>>> +             memcpy(dst->data, lo->stateid.data, sizeof(lo->stateid.data));
>>>> +     } while (read_seqretry(&lo->seqlock, seq));
>>>> +}
>>>> +
>>>>  void
>>>>  pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
>>>>                       struct nfs4_state *open_state)
>>>> @@ -546,25 +585,23 @@ has_layout_to_return(struct pnfs_layout_hdr *lo,
>>>>       return out;
>>>>  }
>>>>
>>>> +/* Return true if there is layout based io in progress in the given range.
>>>> + * Assumes range has already been marked invalid, and layout marked to
>>>> + * prevent any new lseg from being inserted.
>>>> + */
>>>>  bool
>>>>  pnfs_return_layout_barrier(struct nfs_inode *nfsi,
>>>>                          struct pnfs_layout_range *range)
>>>>  {
>>>> -     struct pnfs_layout_segment *lseg;
>>>> +     struct pnfs_layout_segment *lseg, *tmp;
>>>>       bool ret = false;
>>>>
>>>>       spin_lock(&nfsi->vfs_inode.i_lock);
>>>> -     list_for_each_entry(lseg, &nfsi->layout->segs, fi_list) {
>>>> -             if (!should_free_lseg(lseg, range))
>>>> -                     continue;
>>>> -             lseg->valid = false;
>>>> -             if (!_pnfs_can_return_lseg(lseg)) {
>>>> -                     dprintk("%s: wait on lseg %p refcount %d\n",
>>>> -                             __func__, lseg,
>>>> -                             atomic_read(&lseg->pls_refcount));
>>>> +     list_for_each_entry_safe(lseg, tmp, &nfsi->layout->segs, fi_list)
>>>
>>> Why do you need the safe version here while the inode is locked?
>>>
>>
>> We don't.
>
> OK. I'll fix that then :)
>
>>
>>
>>>> +             if (should_free_lseg(lseg, range)) {
>>>>                       ret = true;
>>>
>>> But this will always return "true" if there's any lseg to return,
>>> not only if (!_pnfs_can_return_lseg(lseg)).
>>>
>>> What am I missing? :)
>>>
>>
>> A return of "true" means the caller should wait.  So if there is any
>> lseg still left to return, we should return true.  The refcounting has
>> changed so that once the pending IO is finished, the lseg will
>> automatically be removed from the list.  I suspect that what you are
>> missing is that...the refcount in the invalid case is one less than
>> what it used to be.
>
> Thanks. I see what you mean now.
>
> What's missing is plh_block_lgets which is introduced only
> in [PATCH 13/18] pnfs-submit: rewrite of layout state handling and cb_layoutrecall
> Otherwise, new lsegs can be inserted into the list in between.
>
> Benny
>

Hmmm, you're right.  Let me see if I can tease out enough of the
blocking code from patch 13 to make it work.  The other option is to
just merge the two patches together.

Fred

>>
>> Fred
>>
>>> Benny
>>>
>>>> +                     break;
>>>>               }
>>>> -     }
>>>>       spin_unlock(&nfsi->vfs_inode.i_lock);
>>>>       dprintk("%s:Return %d\n", __func__, ret);
>>>>       return ret;
>>>> @@ -574,12 +611,10 @@ void
>>>>  pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
>>>>  {
>>>>       struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
>>>> -     LIST_HEAD(tmp_list);
>>>>
>>>>       if (lrp->args.return_type != RETURN_FILE)
>>>>               return;
>>>>       spin_lock(&lrp->args.inode->i_lock);
>>>> -     pnfs_clear_lseg_list(lo, &tmp_list, &lrp->args.range);
>>>>       if (!lrp->res.valid)
>>>>               ;       /* forgetful model internal release */
>>>>       else if (!lrp->res.lrs_present)
>>>> @@ -588,7 +623,6 @@ pnfs_layoutreturn_release(struct nfs4_layoutreturn *lrp)
>>>>               pnfs_set_layout_stateid(lo, &lrp->res.stateid);
>>>>       put_layout_hdr_locked(lo); /* Matched in _pnfs_return_layout */
>>>>       spin_unlock(&lrp->args.inode->i_lock);
>>>> -     pnfs_free_lseg_list(&tmp_list);
>>>>  }
>>>>
>>>>  static int
>>>> @@ -641,7 +675,11 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
>>>>       arg.offset = 0;
>>>>       arg.length = NFS4_MAX_UINT64;
>>>>
>>>> +     /* probably should BUGON if type != RETURN_FILE */
>>>>       if (type == RETURN_FILE) {
>>>> +             LIST_HEAD(tmp_list);
>>>> +             struct pnfs_layout_segment *lseg, *tmp;
>>>> +
>>>>               spin_lock(&ino->i_lock);
>>>>               lo = nfsi->layout;
>>>>               if (lo && !has_layout_to_return(lo, &arg))
>>>> @@ -652,10 +690,13 @@ _pnfs_return_layout(struct inode *ino, struct pnfs_layout_range *range,
>>>>                       goto out;
>>>>               }
>>>>
>>>> +             list_for_each_entry_safe(lseg, tmp, &lo->segs, fi_list)
>>>> +                     if (should_free_lseg(lseg, &arg))
>>>> +                             mark_lseg_invalid(lseg, &tmp_list);
>>>>               /* Reference matched in pnfs_layoutreturn_release */
>>>>               get_layout_hdr_locked(lo);
>>>> -
>>>>               spin_unlock(&ino->i_lock);
>>>> +             pnfs_free_lseg_list(&tmp_list);
>>>>
>>>>               if (layoutcommit_needed(nfsi)) {
>>>>                       if (stateid && !wait) { /* callback */
>>>> @@ -1171,7 +1212,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
>>>>       nfsi->layout->write_end_pos = 0;
>>>>       nfsi->layout->cred = NULL;
>>>>       __clear_bit(NFS_LAYOUT_NEED_LCOMMIT, &nfsi->layout->state);
>>>> -     pnfs_get_layout_stateid(&data->args.stateid, nfsi->layout, NULL);
>>>> +     pnfs_copy_layout_stateid(&data->args.stateid, nfsi->layout);
>>>>
>>>>       /* Reference for layoutcommit matched in pnfs_layoutcommit_release */
>>>>       get_layout_hdr_locked(NFS_I(inode)->layout);
>>>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>>>> index 05dd5e0..000acf0 100644
>>>> --- a/fs/nfs/pnfs.h
>>>> +++ b/fs/nfs/pnfs.h
>>>> @@ -206,6 +206,7 @@ void pnfs_layoutreturn_release(struct nfs4_layoutreturn *lpr);
>>>>  void pnfs_destroy_layout(struct nfs_inode *);
>>>>  void pnfs_destroy_all_layouts(struct nfs_client *);
>>>>  void put_layout_hdr(struct inode *inode);
>>>> +void pnfs_copy_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo);
>>>>  void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
>>>>                            struct nfs4_state *open_state);
>>>>
>>> --
>>> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>>> the body of a message to majordomo@vger.kernel.org
>>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

  reply	other threads:[~2010-11-11 13:52 UTC|newest]

Thread overview: 31+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-11-04 15:22 [PATCH 00/18] rewrite of CB_LAYOUTRECALL and layoutstate code Fred Isaman
2010-11-04 15:22 ` [PATCH 01/18] NFSv4.1: Callback share session between ops Fred Isaman
2010-11-10 13:37   ` Benny Halevy
2010-11-10 13:41     ` [PATCH] SQUASHME: pnfs-submit: fixups for nfsv4.1 callbacks Benny Halevy
2010-11-10 14:53       ` Fred Isaman
2010-11-04 15:22 ` [PATCH 02/18] pnfs-submit: change pnfs_layout_segment refcounting from kref to atomic_t Fred Isaman
2010-11-04 15:22 ` [PATCH 03/18] pnfs-submit: remove _pnfs_can_return_lseg call from pnfs_clear_lseg_list Fred Isaman
2010-11-10 14:35   ` Benny Halevy
2010-11-10 14:46     ` Fred Isaman
2010-11-11  7:00       ` Benny Halevy
2010-11-11 13:52         ` Fred Isaman [this message]
2010-11-11 14:39           ` Benny Halevy
2010-11-04 15:22 ` [PATCH 04/18] pnfs-submit: change layout state seqlock to a spinlock Fred Isaman
2010-11-11 15:00   ` Benny Halevy
2010-11-11 15:09     ` Fred Isaman
2010-11-04 15:22 ` [PATCH 05/18] pnfs-submit: layoutreturn' rpc_call_op functions need to handle bulk returns Fred Isaman
2010-11-11 15:01   ` Benny Halevy
2010-11-04 15:22 ` [PATCH 06/18] pnfs_submit: nfs4_layoutreturn_release should not reference results Fred Isaman
2010-11-11 15:16   ` Benny Halevy
2010-11-04 15:22 ` [PATCH 07/18] pnfs-submit: reorganize struct cb_layoutrecallargs Fred Isaman
2010-11-04 15:22 ` [PATCH 08/18] pnfs-submit: rename lo->state to lo->plh_flags Fred Isaman
2010-11-04 15:22 ` [PATCH 09/18] pnfs-submit: change pnfs_layout_hdr refcount to atomic_t Fred Isaman
2010-11-04 15:22 ` [PATCH 10/18] pnfs-submit: argument to should_free_lseg changed from lseg to range Fred Isaman
2010-11-04 15:22 ` [PATCH 11/18] pnfs-submit: remove unnecessary field lgp->status Fred Isaman
2010-11-04 15:22 ` [PATCH 12/18] pnfs-submit: remove RPC_ASSASSINATED(task) checks Fred Isaman
2010-11-04 15:22 ` [PATCH 13/18] pnfs-submit: rewrite of layout state handling and cb_layoutrecall Fred Isaman
2010-11-04 15:22 ` [PATCH 14/18] pnfs-submit: increase number of outstanding CB_LAYOUTRECALLS we can handle Fred Isaman
2010-11-04 15:22 ` [PATCH 15/18] pnfs-submit: roc add layoutreturn op to close compound Fred Isaman
2010-11-04 15:22 ` [PATCH 16/18] pnfs-submit refactor layoutcommit xdr structures Fred Isaman
2010-11-04 15:22 ` [PATCH 17/18] pnfs-submit refactor pnfs_layoutcommit_setup Fred Isaman
2010-11-04 15:22 ` [PATCH 18/18] pnfs_submit: roc add layoutcommit op to close compound Fred Isaman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=AANLkTik69Y1+qmxF2QPKXM_4-VQsseRQ672S+u23h4nX@mail.gmail.com \
    --to=iisaman@netapp.com \
    --cc=bhalevy@panasas.com \
    --cc=linux-nfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).