From mboxrd@z Thu Jan 1 00:00:00 1970 From: Xiubo Li Subject: Re: [PATCH v2 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock Date: Tue, 26 May 2020 11:52:26 +0800 Message-ID: References: <1590405385-27406-1-git-send-email-xiubli@redhat.com> <1590405385-27406-2-git-send-email-xiubli@redhat.com> <23e444d7-49f7-8c5e-f179-00354d4b9b68@redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 8bit Return-path: Received: from us-smtp-2.mimecast.com ([207.211.31.81]:47666 "EHLO us-smtp-delivery-1.mimecast.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1725263AbgEZDwl (ORCPT ); Mon, 25 May 2020 23:52:41 -0400 In-Reply-To: <23e444d7-49f7-8c5e-f179-00354d4b9b68@redhat.com> Content-Language: en-US Sender: ceph-devel-owner@vger.kernel.org List-ID: To: "Yan, Zheng" , jlayton@kernel.org, idryomov@gmail.com Cc: pdonnell@redhat.com, ceph-devel@vger.kernel.org On 2020/5/26 11:11, Yan, Zheng wrote: > On 5/25/20 7:16 PM, xiubli@redhat.com wrote: >> From: Xiubo Li >> >> In the ceph_check_caps() it may call the session lock/unlock stuff. >> >> There have some deadlock cases, like: >> handle_forward() >> ... >> mutex_lock(&mdsc->mutex) >> ... >> ceph_mdsc_put_request() >>    --> ceph_mdsc_release_request() >>      --> ceph_put_cap_request() >>        --> ceph_put_cap_refs() >>          --> ceph_check_caps() >> ... >> mutex_unlock(&mdsc->mutex) >> >> And also there maybe has some double session lock cases, like: >> >> send_mds_reconnect() >> ... >> mutex_lock(&session->s_mutex); >> ... >>    --> replay_unsafe_requests() >>      --> ceph_mdsc_release_dir_caps() >>        --> ceph_put_cap_refs() >>          --> ceph_check_caps() >> ... >> mutex_unlock(&session->s_mutex); >> >> URL: https://tracker.ceph.com/issues/45635 >> Signed-off-by: Xiubo Li >> --- >>   fs/ceph/caps.c       | 29 +++++++++++++++++++++++++++++ >>   fs/ceph/inode.c      |  3 +++ >>   fs/ceph/mds_client.c | 12 +++++++----- >>   fs/ceph/super.h      |  5 +++++ >>   4 files changed, 44 insertions(+), 5 deletions(-) >> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c >> index 27c2e60..aea66c1 100644 >> --- a/fs/ceph/caps.c >> +++ b/fs/ceph/caps.c >> @@ -3082,6 +3082,35 @@ void ceph_put_cap_refs(struct ceph_inode_info >> *ci, int had) >>           iput(inode); >>   } >>   +void ceph_async_put_cap_refs_work(struct work_struct *work) >> +{ >> +    struct ceph_inode_info *ci = container_of(work, struct >> ceph_inode_info, >> +                          put_cap_refs_work); >> +    int caps; >> + >> +    spin_lock(&ci->i_ceph_lock); >> +    caps = xchg(&ci->pending_put_caps, 0); >> +    spin_unlock(&ci->i_ceph_lock); >> + >> +    ceph_put_cap_refs(ci, caps); >> +} >> + >> +void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had) >> +{ >> +    struct inode *inode = &ci->vfs_inode; >> + >> +    spin_lock(&ci->i_ceph_lock); >> +    if (ci->pending_put_caps & had) { >> +            spin_unlock(&ci->i_ceph_lock); >> +        return; >> +    } > > this will cause cap ref leak. > > I thought about this again. all the trouble is caused by calling > ceph_mdsc_release_dir_caps() inside ceph_mdsc_release_request(). > We already call ceph_mdsc_release_dir_caps() in ceph_async_foo_cb() > for normal circumdtance. We just need to call > ceph_mdsc_release_dir_caps() in 'session closed' case (we never abort > async request). In the 'session closed' case, we can use > ceph_put_cap_refs_no_check_caps() > IMO, this ceph_async_put_cap_refs helper still needed, such as there have many other cases like: cleanup_session_requests() { ...    --> mutex_lock(&mdsc->mutex) ...    --> __unregister_request()        --> ceph_mdsc_put_request()            --> ceph_mdsc_release_request()                --> ceph_put_cap_request()                    --> ceph_put_cap_refs()                        --> ceph_check_caps()  ===> will do the session->s_mutex lock/unlock ...    --> mutex_unlock(&mdsc->mutex) } > Regards > Yan, Zheng > >> + >> +    ci->pending_put_caps |= had; >> +    spin_unlock(&ci->i_ceph_lock); >> + >> +    queue_work(ceph_inode_to_client(inode)->inode_wq, >> +           &ci->put_cap_refs_work); >> +} >>   /* >>    * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap >>    * context.  Adjust per-snap dirty page accounting as appropriate. >> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c >> index 357c937..303276a 100644 >> --- a/fs/ceph/inode.c >> +++ b/fs/ceph/inode.c >> @@ -517,6 +517,9 @@ struct inode *ceph_alloc_inode(struct super_block >> *sb) >>       INIT_LIST_HEAD(&ci->i_snap_realm_item); >>       INIT_LIST_HEAD(&ci->i_snap_flush_item); >>   +    INIT_WORK(&ci->put_cap_refs_work, ceph_async_put_cap_refs_work); >> +    ci->pending_put_caps = 0; >> + >>       INIT_WORK(&ci->i_work, ceph_inode_work); >>       ci->i_work_mask = 0; >>       memset(&ci->i_btime, '\0', sizeof(ci->i_btime)); >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c >> index 0e0ab01..40b31da 100644 >> --- a/fs/ceph/mds_client.c >> +++ b/fs/ceph/mds_client.c >> @@ -811,12 +811,14 @@ void ceph_mdsc_release_request(struct kref *kref) >>       if (req->r_reply) >>           ceph_msg_put(req->r_reply); >>       if (req->r_inode) { >> -        ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); >> +        ceph_async_put_cap_refs(ceph_inode(req->r_inode), >> +                    CEPH_CAP_PIN); >>           /* avoid calling iput_final() in mds dispatch threads */ >>           ceph_async_iput(req->r_inode); >>       } >>       if (req->r_parent) { >> -        ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); >> +        ceph_async_put_cap_refs(ceph_inode(req->r_parent), >> +                    CEPH_CAP_PIN); >>           ceph_async_iput(req->r_parent); >>       } >>       ceph_async_iput(req->r_target_inode); >> @@ -831,8 +833,8 @@ void ceph_mdsc_release_request(struct kref *kref) >>            * changed between the dir mutex being dropped and >>            * this request being freed. >>            */ >> -        ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), >> -                  CEPH_CAP_PIN); >> + ceph_async_put_cap_refs(ceph_inode(req->r_old_dentry_dir), >> +                    CEPH_CAP_PIN); >>           ceph_async_iput(req->r_old_dentry_dir); >>       } >>       kfree(req->r_path1); >> @@ -3398,7 +3400,7 @@ void ceph_mdsc_release_dir_caps(struct >> ceph_mds_request *req) >>       dcaps = xchg(&req->r_dir_caps, 0); >>       if (dcaps) { >>           dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps)); >> -        ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps); >> +        ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps); >>       } >>   } >>   diff --git a/fs/ceph/super.h b/fs/ceph/super.h >> index 226f19c..01d206f 100644 >> --- a/fs/ceph/super.h >> +++ b/fs/ceph/super.h >> @@ -421,6 +421,9 @@ struct ceph_inode_info { >>       struct timespec64 i_btime; >>       struct timespec64 i_snap_btime; >>   +    struct work_struct put_cap_refs_work; >> +    int pending_put_caps; >> + >>       struct work_struct i_work; >>       unsigned long  i_work_mask; >>   @@ -1095,6 +1098,8 @@ extern void ceph_take_cap_refs(struct >> ceph_inode_info *ci, int caps, >>                   bool snap_rwsem_locked); >>   extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps); >>   extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); >> +extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int >> had); >> +extern void ceph_async_put_cap_refs_work(struct work_struct *work); >>   extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, >> int nr, >>                          struct ceph_snap_context *snapc); >>   extern void ceph_flush_snaps(struct ceph_inode_info *ci, >> >