* [PATCH v2 0/2] ceph: fix dead lock and double lock
@ 2020-05-25 11:16 xiubli
2020-05-25 11:16 ` [PATCH v2 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock xiubli
2020-05-25 11:16 ` [PATCH v2 2/2] ceph: do not check the caps when reconnecting to mds xiubli
0 siblings, 2 replies; 8+ messages in thread
From: xiubli @ 2020-05-25 11:16 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
Changed in V2:
- do not check the caps when reconnecting to mds
- switch ceph_async_check_caps() to ceph_async_put_cap_refs()
Xiubo Li (2):
ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock
ceph: do not check the caps when reconnecting to mds
fs/ceph/caps.c | 45 +++++++++++++++++++++++++++++++++++++++++++--
fs/ceph/dir.c | 2 +-
fs/ceph/file.c | 2 +-
fs/ceph/inode.c | 3 +++
fs/ceph/mds_client.c | 24 ++++++++++++++++--------
fs/ceph/mds_client.h | 3 ++-
fs/ceph/super.h | 7 +++++++
7 files changed, 73 insertions(+), 13 deletions(-)
--
1.8.3.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock
2020-05-25 11:16 [PATCH v2 0/2] ceph: fix dead lock and double lock xiubli
@ 2020-05-25 11:16 ` xiubli
2020-05-26 3:11 ` Yan, Zheng
2020-05-25 11:16 ` [PATCH v2 2/2] ceph: do not check the caps when reconnecting to mds xiubli
1 sibling, 1 reply; 8+ messages in thread
From: xiubli @ 2020-05-25 11:16 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
In the ceph_check_caps() it may call the session lock/unlock stuff.
There have some deadlock cases, like:
handle_forward()
...
mutex_lock(&mdsc->mutex)
...
ceph_mdsc_put_request()
--> ceph_mdsc_release_request()
--> ceph_put_cap_request()
--> ceph_put_cap_refs()
--> ceph_check_caps()
...
mutex_unlock(&mdsc->mutex)
And also there maybe has some double session lock cases, like:
send_mds_reconnect()
...
mutex_lock(&session->s_mutex);
...
--> replay_unsafe_requests()
--> ceph_mdsc_release_dir_caps()
--> ceph_put_cap_refs()
--> ceph_check_caps()
...
mutex_unlock(&session->s_mutex);
URL: https://tracker.ceph.com/issues/45635
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/caps.c | 29 +++++++++++++++++++++++++++++
fs/ceph/inode.c | 3 +++
fs/ceph/mds_client.c | 12 +++++++-----
fs/ceph/super.h | 5 +++++
4 files changed, 44 insertions(+), 5 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 27c2e60..aea66c1 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3082,6 +3082,35 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
iput(inode);
}
+void ceph_async_put_cap_refs_work(struct work_struct *work)
+{
+ struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
+ put_cap_refs_work);
+ int caps;
+
+ spin_lock(&ci->i_ceph_lock);
+ caps = xchg(&ci->pending_put_caps, 0);
+ spin_unlock(&ci->i_ceph_lock);
+
+ ceph_put_cap_refs(ci, caps);
+}
+
+void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had)
+{
+ struct inode *inode = &ci->vfs_inode;
+
+ spin_lock(&ci->i_ceph_lock);
+ if (ci->pending_put_caps & had) {
+ spin_unlock(&ci->i_ceph_lock);
+ return;
+ }
+
+ ci->pending_put_caps |= had;
+ spin_unlock(&ci->i_ceph_lock);
+
+ queue_work(ceph_inode_to_client(inode)->inode_wq,
+ &ci->put_cap_refs_work);
+}
/*
* Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
* context. Adjust per-snap dirty page accounting as appropriate.
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 357c937..303276a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -517,6 +517,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
INIT_LIST_HEAD(&ci->i_snap_realm_item);
INIT_LIST_HEAD(&ci->i_snap_flush_item);
+ INIT_WORK(&ci->put_cap_refs_work, ceph_async_put_cap_refs_work);
+ ci->pending_put_caps = 0;
+
INIT_WORK(&ci->i_work, ceph_inode_work);
ci->i_work_mask = 0;
memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0e0ab01..40b31da 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -811,12 +811,14 @@ void ceph_mdsc_release_request(struct kref *kref)
if (req->r_reply)
ceph_msg_put(req->r_reply);
if (req->r_inode) {
- ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
+ ceph_async_put_cap_refs(ceph_inode(req->r_inode),
+ CEPH_CAP_PIN);
/* avoid calling iput_final() in mds dispatch threads */
ceph_async_iput(req->r_inode);
}
if (req->r_parent) {
- ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
+ ceph_async_put_cap_refs(ceph_inode(req->r_parent),
+ CEPH_CAP_PIN);
ceph_async_iput(req->r_parent);
}
ceph_async_iput(req->r_target_inode);
@@ -831,8 +833,8 @@ void ceph_mdsc_release_request(struct kref *kref)
* changed between the dir mutex being dropped and
* this request being freed.
*/
- ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
- CEPH_CAP_PIN);
+ ceph_async_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
+ CEPH_CAP_PIN);
ceph_async_iput(req->r_old_dentry_dir);
}
kfree(req->r_path1);
@@ -3398,7 +3400,7 @@ void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
dcaps = xchg(&req->r_dir_caps, 0);
if (dcaps) {
dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
- ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
+ ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps);
}
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 226f19c..01d206f 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -421,6 +421,9 @@ struct ceph_inode_info {
struct timespec64 i_btime;
struct timespec64 i_snap_btime;
+ struct work_struct put_cap_refs_work;
+ int pending_put_caps;
+
struct work_struct i_work;
unsigned long i_work_mask;
@@ -1095,6 +1098,8 @@ extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
bool snap_rwsem_locked);
extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
+extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had);
+extern void ceph_async_put_cap_refs_work(struct work_struct *work);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc);
extern void ceph_flush_snaps(struct ceph_inode_info *ci,
--
1.8.3.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 2/2] ceph: do not check the caps when reconnecting to mds
2020-05-25 11:16 [PATCH v2 0/2] ceph: fix dead lock and double lock xiubli
2020-05-25 11:16 ` [PATCH v2 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock xiubli
@ 2020-05-25 11:16 ` xiubli
1 sibling, 0 replies; 8+ messages in thread
From: xiubli @ 2020-05-25 11:16 UTC (permalink / raw)
To: jlayton, idryomov, zyan; +Cc: pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
It make no sense to check the caps when reconnecting to mds.
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
fs/ceph/caps.c | 16 ++++++++++++++--
fs/ceph/dir.c | 2 +-
fs/ceph/file.c | 2 +-
fs/ceph/mds_client.c | 14 ++++++++++----
fs/ceph/mds_client.h | 3 ++-
fs/ceph/super.h | 2 ++
6 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index aea66c1..1fcf0a9 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3016,7 +3016,8 @@ static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
* If we are releasing a WR cap (from a sync write), finalize any affected
* cap_snap, and wake up any waiters.
*/
-void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
+void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
+ bool skip_checking_caps)
{
struct inode *inode = &ci->vfs_inode;
int last = 0, put = 0, flushsnaps = 0, wake = 0;
@@ -3072,7 +3073,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
last ? " last" : "", put ? " put" : "");
- if (last)
+ if (last && !skip_checking_caps)
ceph_check_caps(ci, 0, NULL);
else if (flushsnaps)
ceph_flush_snaps(ci, NULL);
@@ -3082,6 +3083,16 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
iput(inode);
}
+void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
+{
+ __ceph_put_cap_refs(ci, had, false);
+}
+
+void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, int had)
+{
+ __ceph_put_cap_refs(ci, had, true);
+}
+
void ceph_async_put_cap_refs_work(struct work_struct *work)
{
struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
@@ -3111,6 +3122,7 @@ void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had)
queue_work(ceph_inode_to_client(inode)->inode_wq,
&ci->put_cap_refs_work);
}
+
/*
* Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
* context. Adjust per-snap dirty page accounting as appropriate.
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 39f5311..9f66ea5 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1079,7 +1079,7 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc,
}
out:
iput(req->r_old_inode);
- ceph_mdsc_release_dir_caps(req);
+ ceph_mdsc_release_dir_caps(req, false);
}
static int get_caps_for_async_unlink(struct inode *dir, struct dentry *dentry)
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 160644d..812da94 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -565,7 +565,7 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
req->r_deleg_ino);
}
out:
- ceph_mdsc_release_dir_caps(req);
+ ceph_mdsc_release_dir_caps(req, false);
}
static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 40b31da..a784f0e 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -804,7 +804,7 @@ void ceph_mdsc_release_request(struct kref *kref)
struct ceph_mds_request *req = container_of(kref,
struct ceph_mds_request,
r_kref);
- ceph_mdsc_release_dir_caps(req);
+ ceph_mdsc_release_dir_caps(req, false);
destroy_reply_info(&req->r_reply_info);
if (req->r_request)
ceph_msg_put(req->r_request);
@@ -3393,14 +3393,20 @@ static void handle_session(struct ceph_mds_session *session,
return;
}
-void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
+void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req,
+ bool skip_checking_caps)
{
int dcaps;
dcaps = xchg(&req->r_dir_caps, 0);
if (dcaps) {
dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
- ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps);
+ if (skip_checking_caps)
+ ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent),
+ dcaps);
+ else
+ ceph_async_put_cap_refs(ceph_inode(req->r_parent),
+ dcaps);
}
}
@@ -3436,7 +3442,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
if (req->r_session->s_mds != session->s_mds)
continue;
- ceph_mdsc_release_dir_caps(req);
+ ceph_mdsc_release_dir_caps(req, true);
__send_request(mdsc, session, req, true);
}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 43111e4..73ee022 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -506,7 +506,8 @@ extern int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc,
extern int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
struct inode *dir,
struct ceph_mds_request *req);
-extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req);
+extern void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req,
+ bool skip_checking_caps);
static inline void ceph_mdsc_get_request(struct ceph_mds_request *req)
{
kref_get(&req->r_kref);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 01d206f..4e0b18a 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1098,6 +1098,8 @@ extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
bool snap_rwsem_locked);
extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
+extern void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci,
+ int had);
extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had);
extern void ceph_async_put_cap_refs_work(struct work_struct *work);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
--
1.8.3.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v2 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock
2020-05-25 11:16 ` [PATCH v2 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock xiubli
@ 2020-05-26 3:11 ` Yan, Zheng
2020-05-26 3:38 ` Xiubo Li
2020-05-26 3:52 ` Xiubo Li
0 siblings, 2 replies; 8+ messages in thread
From: Yan, Zheng @ 2020-05-26 3:11 UTC (permalink / raw)
To: xiubli, jlayton, idryomov; +Cc: pdonnell, ceph-devel
On 5/25/20 7:16 PM, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> In the ceph_check_caps() it may call the session lock/unlock stuff.
>
> There have some deadlock cases, like:
> handle_forward()
> ...
> mutex_lock(&mdsc->mutex)
> ...
> ceph_mdsc_put_request()
> --> ceph_mdsc_release_request()
> --> ceph_put_cap_request()
> --> ceph_put_cap_refs()
> --> ceph_check_caps()
> ...
> mutex_unlock(&mdsc->mutex)
>
> And also there maybe has some double session lock cases, like:
>
> send_mds_reconnect()
> ...
> mutex_lock(&session->s_mutex);
> ...
> --> replay_unsafe_requests()
> --> ceph_mdsc_release_dir_caps()
> --> ceph_put_cap_refs()
> --> ceph_check_caps()
> ...
> mutex_unlock(&session->s_mutex);
>
> URL: https://tracker.ceph.com/issues/45635
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
> fs/ceph/caps.c | 29 +++++++++++++++++++++++++++++
> fs/ceph/inode.c | 3 +++
> fs/ceph/mds_client.c | 12 +++++++-----
> fs/ceph/super.h | 5 +++++
> 4 files changed, 44 insertions(+), 5 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index 27c2e60..aea66c1 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -3082,6 +3082,35 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
> iput(inode);
> }
>
> +void ceph_async_put_cap_refs_work(struct work_struct *work)
> +{
> + struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
> + put_cap_refs_work);
> + int caps;
> +
> + spin_lock(&ci->i_ceph_lock);
> + caps = xchg(&ci->pending_put_caps, 0);
> + spin_unlock(&ci->i_ceph_lock);
> +
> + ceph_put_cap_refs(ci, caps);
> +}
> +
> +void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had)
> +{
> + struct inode *inode = &ci->vfs_inode;
> +
> + spin_lock(&ci->i_ceph_lock);
> + if (ci->pending_put_caps & had) {
> + spin_unlock(&ci->i_ceph_lock);
> + return;
> + }
this will cause cap ref leak.
I thought about this again. all the trouble is caused by calling
ceph_mdsc_release_dir_caps() inside ceph_mdsc_release_request().
We already call ceph_mdsc_release_dir_caps() in ceph_async_foo_cb() for
normal circumdtance. We just need to call ceph_mdsc_release_dir_caps()
in 'session closed' case (we never abort async request). In the 'session
closed' case, we can use ceph_put_cap_refs_no_check_caps()
Regards
Yan, Zheng
> +
> + ci->pending_put_caps |= had;
> + spin_unlock(&ci->i_ceph_lock);
> +
> + queue_work(ceph_inode_to_client(inode)->inode_wq,
> + &ci->put_cap_refs_work);
> +}
> /*
> * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
> * context. Adjust per-snap dirty page accounting as appropriate.
> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> index 357c937..303276a 100644
> --- a/fs/ceph/inode.c
> +++ b/fs/ceph/inode.c
> @@ -517,6 +517,9 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
> INIT_LIST_HEAD(&ci->i_snap_realm_item);
> INIT_LIST_HEAD(&ci->i_snap_flush_item);
>
> + INIT_WORK(&ci->put_cap_refs_work, ceph_async_put_cap_refs_work);
> + ci->pending_put_caps = 0;
> +
> INIT_WORK(&ci->i_work, ceph_inode_work);
> ci->i_work_mask = 0;
> memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 0e0ab01..40b31da 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -811,12 +811,14 @@ void ceph_mdsc_release_request(struct kref *kref)
> if (req->r_reply)
> ceph_msg_put(req->r_reply);
> if (req->r_inode) {
> - ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
> + ceph_async_put_cap_refs(ceph_inode(req->r_inode),
> + CEPH_CAP_PIN);
> /* avoid calling iput_final() in mds dispatch threads */
> ceph_async_iput(req->r_inode);
> }
> if (req->r_parent) {
> - ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
> + ceph_async_put_cap_refs(ceph_inode(req->r_parent),
> + CEPH_CAP_PIN);
> ceph_async_iput(req->r_parent);
> }
> ceph_async_iput(req->r_target_inode);
> @@ -831,8 +833,8 @@ void ceph_mdsc_release_request(struct kref *kref)
> * changed between the dir mutex being dropped and
> * this request being freed.
> */
> - ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
> - CEPH_CAP_PIN);
> + ceph_async_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
> + CEPH_CAP_PIN);
> ceph_async_iput(req->r_old_dentry_dir);
> }
> kfree(req->r_path1);
> @@ -3398,7 +3400,7 @@ void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
> dcaps = xchg(&req->r_dir_caps, 0);
> if (dcaps) {
> dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
> - ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
> + ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps);
> }
> }
>
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 226f19c..01d206f 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -421,6 +421,9 @@ struct ceph_inode_info {
> struct timespec64 i_btime;
> struct timespec64 i_snap_btime;
>
> + struct work_struct put_cap_refs_work;
> + int pending_put_caps;
> +
> struct work_struct i_work;
> unsigned long i_work_mask;
>
> @@ -1095,6 +1098,8 @@ extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int caps,
> bool snap_rwsem_locked);
> extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
> extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
> +extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had);
> +extern void ceph_async_put_cap_refs_work(struct work_struct *work);
> extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
> struct ceph_snap_context *snapc);
> extern void ceph_flush_snaps(struct ceph_inode_info *ci,
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock
2020-05-26 3:11 ` Yan, Zheng
@ 2020-05-26 3:38 ` Xiubo Li
2020-05-26 6:29 ` Yan, Zheng
2020-05-26 3:52 ` Xiubo Li
1 sibling, 1 reply; 8+ messages in thread
From: Xiubo Li @ 2020-05-26 3:38 UTC (permalink / raw)
To: Yan, Zheng, jlayton, idryomov; +Cc: pdonnell, ceph-devel
On 2020/5/26 11:11, Yan, Zheng wrote:
> On 5/25/20 7:16 PM, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> In the ceph_check_caps() it may call the session lock/unlock stuff.
>>
>> There have some deadlock cases, like:
>> handle_forward()
>> ...
>> mutex_lock(&mdsc->mutex)
>> ...
>> ceph_mdsc_put_request()
>> --> ceph_mdsc_release_request()
>> --> ceph_put_cap_request()
>> --> ceph_put_cap_refs()
>> --> ceph_check_caps()
>> ...
>> mutex_unlock(&mdsc->mutex)
>>
>> And also there maybe has some double session lock cases, like:
>>
>> send_mds_reconnect()
>> ...
>> mutex_lock(&session->s_mutex);
>> ...
>> --> replay_unsafe_requests()
>> --> ceph_mdsc_release_dir_caps()
>> --> ceph_put_cap_refs()
>> --> ceph_check_caps()
>> ...
>> mutex_unlock(&session->s_mutex);
>>
>> URL: https://tracker.ceph.com/issues/45635
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>> fs/ceph/caps.c | 29 +++++++++++++++++++++++++++++
>> fs/ceph/inode.c | 3 +++
>> fs/ceph/mds_client.c | 12 +++++++-----
>> fs/ceph/super.h | 5 +++++
>> 4 files changed, 44 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 27c2e60..aea66c1 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -3082,6 +3082,35 @@ void ceph_put_cap_refs(struct ceph_inode_info
>> *ci, int had)
>> iput(inode);
>> }
>> +void ceph_async_put_cap_refs_work(struct work_struct *work)
>> +{
>> + struct ceph_inode_info *ci = container_of(work, struct
>> ceph_inode_info,
>> + put_cap_refs_work);
>> + int caps;
>> +
>> + spin_lock(&ci->i_ceph_lock);
>> + caps = xchg(&ci->pending_put_caps, 0);
>> + spin_unlock(&ci->i_ceph_lock);
>> +
>> + ceph_put_cap_refs(ci, caps);
>> +}
>> +
>> +void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had)
>> +{
>> + struct inode *inode = &ci->vfs_inode;
>> +
>> + spin_lock(&ci->i_ceph_lock);
>> + if (ci->pending_put_caps & had) {
>> + spin_unlock(&ci->i_ceph_lock);
>> + return;
>> + }
>
> this will cause cap ref leak.
Ah, yeah, right.
>
> I thought about this again. all the trouble is caused by calling
> ceph_mdsc_release_dir_caps() inside ceph_mdsc_release_request().
And also in ceph_mdsc_release_request() it is calling
ceph_put_cap_refs() directly in other 3 places.
BRs
Xiubo
> We already call ceph_mdsc_release_dir_caps() in ceph_async_foo_cb()
> for normal circumdtance. We just need to call
> ceph_mdsc_release_dir_caps() in 'session closed' case (we never abort
> async request). In the 'session closed' case, we can use
> ceph_put_cap_refs_no_check_caps()
>
> Regards
> Yan, Zheng
>
>> +
>> + ci->pending_put_caps |= had;
>> + spin_unlock(&ci->i_ceph_lock);
>> +
>> + queue_work(ceph_inode_to_client(inode)->inode_wq,
>> + &ci->put_cap_refs_work);
>> +}
>> /*
>> * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
>> * context. Adjust per-snap dirty page accounting as appropriate.
>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>> index 357c937..303276a 100644
>> --- a/fs/ceph/inode.c
>> +++ b/fs/ceph/inode.c
>> @@ -517,6 +517,9 @@ struct inode *ceph_alloc_inode(struct super_block
>> *sb)
>> INIT_LIST_HEAD(&ci->i_snap_realm_item);
>> INIT_LIST_HEAD(&ci->i_snap_flush_item);
>> + INIT_WORK(&ci->put_cap_refs_work, ceph_async_put_cap_refs_work);
>> + ci->pending_put_caps = 0;
>> +
>> INIT_WORK(&ci->i_work, ceph_inode_work);
>> ci->i_work_mask = 0;
>> memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 0e0ab01..40b31da 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -811,12 +811,14 @@ void ceph_mdsc_release_request(struct kref *kref)
>> if (req->r_reply)
>> ceph_msg_put(req->r_reply);
>> if (req->r_inode) {
>> - ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
>> + ceph_async_put_cap_refs(ceph_inode(req->r_inode),
>> + CEPH_CAP_PIN);
>> /* avoid calling iput_final() in mds dispatch threads */
>> ceph_async_iput(req->r_inode);
>> }
>> if (req->r_parent) {
>> - ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
>> + ceph_async_put_cap_refs(ceph_inode(req->r_parent),
>> + CEPH_CAP_PIN);
>> ceph_async_iput(req->r_parent);
>> }
>> ceph_async_iput(req->r_target_inode);
>> @@ -831,8 +833,8 @@ void ceph_mdsc_release_request(struct kref *kref)
>> * changed between the dir mutex being dropped and
>> * this request being freed.
>> */
>> - ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
>> - CEPH_CAP_PIN);
>> + ceph_async_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
>> + CEPH_CAP_PIN);
>> ceph_async_iput(req->r_old_dentry_dir);
>> }
>> kfree(req->r_path1);
>> @@ -3398,7 +3400,7 @@ void ceph_mdsc_release_dir_caps(struct
>> ceph_mds_request *req)
>> dcaps = xchg(&req->r_dir_caps, 0);
>> if (dcaps) {
>> dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
>> - ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
>> + ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps);
>> }
>> }
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 226f19c..01d206f 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -421,6 +421,9 @@ struct ceph_inode_info {
>> struct timespec64 i_btime;
>> struct timespec64 i_snap_btime;
>> + struct work_struct put_cap_refs_work;
>> + int pending_put_caps;
>> +
>> struct work_struct i_work;
>> unsigned long i_work_mask;
>> @@ -1095,6 +1098,8 @@ extern void ceph_take_cap_refs(struct
>> ceph_inode_info *ci, int caps,
>> bool snap_rwsem_locked);
>> extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
>> extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
>> +extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int
>> had);
>> +extern void ceph_async_put_cap_refs_work(struct work_struct *work);
>> extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci,
>> int nr,
>> struct ceph_snap_context *snapc);
>> extern void ceph_flush_snaps(struct ceph_inode_info *ci,
>>
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock
2020-05-26 3:11 ` Yan, Zheng
2020-05-26 3:38 ` Xiubo Li
@ 2020-05-26 3:52 ` Xiubo Li
1 sibling, 0 replies; 8+ messages in thread
From: Xiubo Li @ 2020-05-26 3:52 UTC (permalink / raw)
To: Yan, Zheng, jlayton, idryomov; +Cc: pdonnell, ceph-devel
On 2020/5/26 11:11, Yan, Zheng wrote:
> On 5/25/20 7:16 PM, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> In the ceph_check_caps() it may call the session lock/unlock stuff.
>>
>> There have some deadlock cases, like:
>> handle_forward()
>> ...
>> mutex_lock(&mdsc->mutex)
>> ...
>> ceph_mdsc_put_request()
>> --> ceph_mdsc_release_request()
>> --> ceph_put_cap_request()
>> --> ceph_put_cap_refs()
>> --> ceph_check_caps()
>> ...
>> mutex_unlock(&mdsc->mutex)
>>
>> And also there maybe has some double session lock cases, like:
>>
>> send_mds_reconnect()
>> ...
>> mutex_lock(&session->s_mutex);
>> ...
>> --> replay_unsafe_requests()
>> --> ceph_mdsc_release_dir_caps()
>> --> ceph_put_cap_refs()
>> --> ceph_check_caps()
>> ...
>> mutex_unlock(&session->s_mutex);
>>
>> URL: https://tracker.ceph.com/issues/45635
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>> fs/ceph/caps.c | 29 +++++++++++++++++++++++++++++
>> fs/ceph/inode.c | 3 +++
>> fs/ceph/mds_client.c | 12 +++++++-----
>> fs/ceph/super.h | 5 +++++
>> 4 files changed, 44 insertions(+), 5 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index 27c2e60..aea66c1 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -3082,6 +3082,35 @@ void ceph_put_cap_refs(struct ceph_inode_info
>> *ci, int had)
>> iput(inode);
>> }
>> +void ceph_async_put_cap_refs_work(struct work_struct *work)
>> +{
>> + struct ceph_inode_info *ci = container_of(work, struct
>> ceph_inode_info,
>> + put_cap_refs_work);
>> + int caps;
>> +
>> + spin_lock(&ci->i_ceph_lock);
>> + caps = xchg(&ci->pending_put_caps, 0);
>> + spin_unlock(&ci->i_ceph_lock);
>> +
>> + ceph_put_cap_refs(ci, caps);
>> +}
>> +
>> +void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had)
>> +{
>> + struct inode *inode = &ci->vfs_inode;
>> +
>> + spin_lock(&ci->i_ceph_lock);
>> + if (ci->pending_put_caps & had) {
>> + spin_unlock(&ci->i_ceph_lock);
>> + return;
>> + }
>
> this will cause cap ref leak.
>
> I thought about this again. all the trouble is caused by calling
> ceph_mdsc_release_dir_caps() inside ceph_mdsc_release_request().
> We already call ceph_mdsc_release_dir_caps() in ceph_async_foo_cb()
> for normal circumdtance. We just need to call
> ceph_mdsc_release_dir_caps() in 'session closed' case (we never abort
> async request). In the 'session closed' case, we can use
> ceph_put_cap_refs_no_check_caps()
>
IMO, this ceph_async_put_cap_refs helper still needed, such as there
have many other cases like:
cleanup_session_requests()
{
...
--> mutex_lock(&mdsc->mutex)
...
--> __unregister_request()
--> ceph_mdsc_put_request()
--> ceph_mdsc_release_request()
--> ceph_put_cap_request()
--> ceph_put_cap_refs()
--> ceph_check_caps() ===> will do the
session->s_mutex lock/unlock
...
--> mutex_unlock(&mdsc->mutex)
}
> Regards
> Yan, Zheng
>
>> +
>> + ci->pending_put_caps |= had;
>> + spin_unlock(&ci->i_ceph_lock);
>> +
>> + queue_work(ceph_inode_to_client(inode)->inode_wq,
>> + &ci->put_cap_refs_work);
>> +}
>> /*
>> * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
>> * context. Adjust per-snap dirty page accounting as appropriate.
>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>> index 357c937..303276a 100644
>> --- a/fs/ceph/inode.c
>> +++ b/fs/ceph/inode.c
>> @@ -517,6 +517,9 @@ struct inode *ceph_alloc_inode(struct super_block
>> *sb)
>> INIT_LIST_HEAD(&ci->i_snap_realm_item);
>> INIT_LIST_HEAD(&ci->i_snap_flush_item);
>> + INIT_WORK(&ci->put_cap_refs_work, ceph_async_put_cap_refs_work);
>> + ci->pending_put_caps = 0;
>> +
>> INIT_WORK(&ci->i_work, ceph_inode_work);
>> ci->i_work_mask = 0;
>> memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 0e0ab01..40b31da 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -811,12 +811,14 @@ void ceph_mdsc_release_request(struct kref *kref)
>> if (req->r_reply)
>> ceph_msg_put(req->r_reply);
>> if (req->r_inode) {
>> - ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
>> + ceph_async_put_cap_refs(ceph_inode(req->r_inode),
>> + CEPH_CAP_PIN);
>> /* avoid calling iput_final() in mds dispatch threads */
>> ceph_async_iput(req->r_inode);
>> }
>> if (req->r_parent) {
>> - ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
>> + ceph_async_put_cap_refs(ceph_inode(req->r_parent),
>> + CEPH_CAP_PIN);
>> ceph_async_iput(req->r_parent);
>> }
>> ceph_async_iput(req->r_target_inode);
>> @@ -831,8 +833,8 @@ void ceph_mdsc_release_request(struct kref *kref)
>> * changed between the dir mutex being dropped and
>> * this request being freed.
>> */
>> - ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
>> - CEPH_CAP_PIN);
>> + ceph_async_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
>> + CEPH_CAP_PIN);
>> ceph_async_iput(req->r_old_dentry_dir);
>> }
>> kfree(req->r_path1);
>> @@ -3398,7 +3400,7 @@ void ceph_mdsc_release_dir_caps(struct
>> ceph_mds_request *req)
>> dcaps = xchg(&req->r_dir_caps, 0);
>> if (dcaps) {
>> dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
>> - ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
>> + ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps);
>> }
>> }
>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>> index 226f19c..01d206f 100644
>> --- a/fs/ceph/super.h
>> +++ b/fs/ceph/super.h
>> @@ -421,6 +421,9 @@ struct ceph_inode_info {
>> struct timespec64 i_btime;
>> struct timespec64 i_snap_btime;
>> + struct work_struct put_cap_refs_work;
>> + int pending_put_caps;
>> +
>> struct work_struct i_work;
>> unsigned long i_work_mask;
>> @@ -1095,6 +1098,8 @@ extern void ceph_take_cap_refs(struct
>> ceph_inode_info *ci, int caps,
>> bool snap_rwsem_locked);
>> extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
>> extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
>> +extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int
>> had);
>> +extern void ceph_async_put_cap_refs_work(struct work_struct *work);
>> extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci,
>> int nr,
>> struct ceph_snap_context *snapc);
>> extern void ceph_flush_snaps(struct ceph_inode_info *ci,
>>
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock
2020-05-26 3:38 ` Xiubo Li
@ 2020-05-26 6:29 ` Yan, Zheng
2020-05-26 7:30 ` Xiubo Li
0 siblings, 1 reply; 8+ messages in thread
From: Yan, Zheng @ 2020-05-26 6:29 UTC (permalink / raw)
To: Xiubo Li
Cc: Yan, Zheng, Jeff Layton, Ilya Dryomov, Patrick Donnelly,
ceph-devel
On Tue, May 26, 2020 at 11:42 AM Xiubo Li <xiubli@redhat.com> wrote:
>
> On 2020/5/26 11:11, Yan, Zheng wrote:
> > On 5/25/20 7:16 PM, xiubli@redhat.com wrote:
> >> From: Xiubo Li <xiubli@redhat.com>
> >>
> >> In the ceph_check_caps() it may call the session lock/unlock stuff.
> >>
> >> There have some deadlock cases, like:
> >> handle_forward()
> >> ...
> >> mutex_lock(&mdsc->mutex)
> >> ...
> >> ceph_mdsc_put_request()
> >> --> ceph_mdsc_release_request()
> >> --> ceph_put_cap_request()
> >> --> ceph_put_cap_refs()
> >> --> ceph_check_caps()
> >> ...
> >> mutex_unlock(&mdsc->mutex)
> >>
> >> And also there maybe has some double session lock cases, like:
> >>
> >> send_mds_reconnect()
> >> ...
> >> mutex_lock(&session->s_mutex);
> >> ...
> >> --> replay_unsafe_requests()
> >> --> ceph_mdsc_release_dir_caps()
> >> --> ceph_put_cap_refs()
> >> --> ceph_check_caps()
> >> ...
> >> mutex_unlock(&session->s_mutex);
> >>
> >> URL: https://tracker.ceph.com/issues/45635
> >> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> >> ---
> >> fs/ceph/caps.c | 29 +++++++++++++++++++++++++++++
> >> fs/ceph/inode.c | 3 +++
> >> fs/ceph/mds_client.c | 12 +++++++-----
> >> fs/ceph/super.h | 5 +++++
> >> 4 files changed, 44 insertions(+), 5 deletions(-)
> >>
> >> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> >> index 27c2e60..aea66c1 100644
> >> --- a/fs/ceph/caps.c
> >> +++ b/fs/ceph/caps.c
> >> @@ -3082,6 +3082,35 @@ void ceph_put_cap_refs(struct ceph_inode_info
> >> *ci, int had)
> >> iput(inode);
> >> }
> >> +void ceph_async_put_cap_refs_work(struct work_struct *work)
> >> +{
> >> + struct ceph_inode_info *ci = container_of(work, struct
> >> ceph_inode_info,
> >> + put_cap_refs_work);
> >> + int caps;
> >> +
> >> + spin_lock(&ci->i_ceph_lock);
> >> + caps = xchg(&ci->pending_put_caps, 0);
> >> + spin_unlock(&ci->i_ceph_lock);
> >> +
> >> + ceph_put_cap_refs(ci, caps);
> >> +}
> >> +
> >> +void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had)
> >> +{
> >> + struct inode *inode = &ci->vfs_inode;
> >> +
> >> + spin_lock(&ci->i_ceph_lock);
> >> + if (ci->pending_put_caps & had) {
> >> + spin_unlock(&ci->i_ceph_lock);
> >> + return;
> >> + }
> >
> > this will cause cap ref leak.
>
> Ah, yeah, right.
>
>
> >
> > I thought about this again. all the trouble is caused by calling
> > ceph_mdsc_release_dir_caps() inside ceph_mdsc_release_request().
>
> And also in ceph_mdsc_release_request() it is calling
> ceph_put_cap_refs() directly in other 3 places.
>
putting CEPH_CAP_PIN does not trigger check_caps(). So only
ceph_mdsc_release_dir_caps() matters.
> BRs
>
> Xiubo
>
> > We already call ceph_mdsc_release_dir_caps() in ceph_async_foo_cb()
> > for normal circumdtance. We just need to call
> > ceph_mdsc_release_dir_caps() in 'session closed' case (we never abort
> > async request). In the 'session closed' case, we can use
> > ceph_put_cap_refs_no_check_caps()
> >
> > Regards
> > Yan, Zheng
> >
> >> +
> >> + ci->pending_put_caps |= had;
> >> + spin_unlock(&ci->i_ceph_lock);
> >> +
> >> + queue_work(ceph_inode_to_client(inode)->inode_wq,
> >> + &ci->put_cap_refs_work);
> >> +}
> >> /*
> >> * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
> >> * context. Adjust per-snap dirty page accounting as appropriate.
> >> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
> >> index 357c937..303276a 100644
> >> --- a/fs/ceph/inode.c
> >> +++ b/fs/ceph/inode.c
> >> @@ -517,6 +517,9 @@ struct inode *ceph_alloc_inode(struct super_block
> >> *sb)
> >> INIT_LIST_HEAD(&ci->i_snap_realm_item);
> >> INIT_LIST_HEAD(&ci->i_snap_flush_item);
> >> + INIT_WORK(&ci->put_cap_refs_work, ceph_async_put_cap_refs_work);
> >> + ci->pending_put_caps = 0;
> >> +
> >> INIT_WORK(&ci->i_work, ceph_inode_work);
> >> ci->i_work_mask = 0;
> >> memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
> >> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> >> index 0e0ab01..40b31da 100644
> >> --- a/fs/ceph/mds_client.c
> >> +++ b/fs/ceph/mds_client.c
> >> @@ -811,12 +811,14 @@ void ceph_mdsc_release_request(struct kref *kref)
> >> if (req->r_reply)
> >> ceph_msg_put(req->r_reply);
> >> if (req->r_inode) {
> >> - ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
> >> + ceph_async_put_cap_refs(ceph_inode(req->r_inode),
> >> + CEPH_CAP_PIN);
> >> /* avoid calling iput_final() in mds dispatch threads */
> >> ceph_async_iput(req->r_inode);
> >> }
> >> if (req->r_parent) {
> >> - ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
> >> + ceph_async_put_cap_refs(ceph_inode(req->r_parent),
> >> + CEPH_CAP_PIN);
> >> ceph_async_iput(req->r_parent);
> >> }
> >> ceph_async_iput(req->r_target_inode);
> >> @@ -831,8 +833,8 @@ void ceph_mdsc_release_request(struct kref *kref)
> >> * changed between the dir mutex being dropped and
> >> * this request being freed.
> >> */
> >> - ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
> >> - CEPH_CAP_PIN);
> >> + ceph_async_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
> >> + CEPH_CAP_PIN);
> >> ceph_async_iput(req->r_old_dentry_dir);
> >> }
> >> kfree(req->r_path1);
> >> @@ -3398,7 +3400,7 @@ void ceph_mdsc_release_dir_caps(struct
> >> ceph_mds_request *req)
> >> dcaps = xchg(&req->r_dir_caps, 0);
> >> if (dcaps) {
> >> dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
> >> - ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
> >> + ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps);
> >> }
> >> }
> >> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> >> index 226f19c..01d206f 100644
> >> --- a/fs/ceph/super.h
> >> +++ b/fs/ceph/super.h
> >> @@ -421,6 +421,9 @@ struct ceph_inode_info {
> >> struct timespec64 i_btime;
> >> struct timespec64 i_snap_btime;
> >> + struct work_struct put_cap_refs_work;
> >> + int pending_put_caps;
> >> +
> >> struct work_struct i_work;
> >> unsigned long i_work_mask;
> >> @@ -1095,6 +1098,8 @@ extern void ceph_take_cap_refs(struct
> >> ceph_inode_info *ci, int caps,
> >> bool snap_rwsem_locked);
> >> extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
> >> extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
> >> +extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int
> >> had);
> >> +extern void ceph_async_put_cap_refs_work(struct work_struct *work);
> >> extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci,
> >> int nr,
> >> struct ceph_snap_context *snapc);
> >> extern void ceph_flush_snaps(struct ceph_inode_info *ci,
> >>
> >
>
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock
2020-05-26 6:29 ` Yan, Zheng
@ 2020-05-26 7:30 ` Xiubo Li
0 siblings, 0 replies; 8+ messages in thread
From: Xiubo Li @ 2020-05-26 7:30 UTC (permalink / raw)
To: Yan, Zheng
Cc: Yan, Zheng, Jeff Layton, Ilya Dryomov, Patrick Donnelly,
ceph-devel
On 2020/5/26 14:29, Yan, Zheng wrote:
> On Tue, May 26, 2020 at 11:42 AM Xiubo Li <xiubli@redhat.com> wrote:
>> On 2020/5/26 11:11, Yan, Zheng wrote:
>>> On 5/25/20 7:16 PM, xiubli@redhat.com wrote:
>>>> From: Xiubo Li <xiubli@redhat.com>
>>>>
>>>> In the ceph_check_caps() it may call the session lock/unlock stuff.
>>>>
>>>> There have some deadlock cases, like:
>>>> handle_forward()
>>>> ...
>>>> mutex_lock(&mdsc->mutex)
>>>> ...
>>>> ceph_mdsc_put_request()
>>>> --> ceph_mdsc_release_request()
>>>> --> ceph_put_cap_request()
>>>> --> ceph_put_cap_refs()
>>>> --> ceph_check_caps()
>>>> ...
>>>> mutex_unlock(&mdsc->mutex)
>>>>
>>>> And also there maybe has some double session lock cases, like:
>>>>
>>>> send_mds_reconnect()
>>>> ...
>>>> mutex_lock(&session->s_mutex);
>>>> ...
>>>> --> replay_unsafe_requests()
>>>> --> ceph_mdsc_release_dir_caps()
>>>> --> ceph_put_cap_refs()
>>>> --> ceph_check_caps()
>>>> ...
>>>> mutex_unlock(&session->s_mutex);
>>>>
>>>> URL: https://tracker.ceph.com/issues/45635
>>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>>> ---
>>>> fs/ceph/caps.c | 29 +++++++++++++++++++++++++++++
>>>> fs/ceph/inode.c | 3 +++
>>>> fs/ceph/mds_client.c | 12 +++++++-----
>>>> fs/ceph/super.h | 5 +++++
>>>> 4 files changed, 44 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>>>> index 27c2e60..aea66c1 100644
>>>> --- a/fs/ceph/caps.c
>>>> +++ b/fs/ceph/caps.c
>>>> @@ -3082,6 +3082,35 @@ void ceph_put_cap_refs(struct ceph_inode_info
>>>> *ci, int had)
>>>> iput(inode);
>>>> }
>>>> +void ceph_async_put_cap_refs_work(struct work_struct *work)
>>>> +{
>>>> + struct ceph_inode_info *ci = container_of(work, struct
>>>> ceph_inode_info,
>>>> + put_cap_refs_work);
>>>> + int caps;
>>>> +
>>>> + spin_lock(&ci->i_ceph_lock);
>>>> + caps = xchg(&ci->pending_put_caps, 0);
>>>> + spin_unlock(&ci->i_ceph_lock);
>>>> +
>>>> + ceph_put_cap_refs(ci, caps);
>>>> +}
>>>> +
>>>> +void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int had)
>>>> +{
>>>> + struct inode *inode = &ci->vfs_inode;
>>>> +
>>>> + spin_lock(&ci->i_ceph_lock);
>>>> + if (ci->pending_put_caps & had) {
>>>> + spin_unlock(&ci->i_ceph_lock);
>>>> + return;
>>>> + }
>>> this will cause cap ref leak.
>> Ah, yeah, right.
>>
>>
>>> I thought about this again. all the trouble is caused by calling
>>> ceph_mdsc_release_dir_caps() inside ceph_mdsc_release_request().
>> And also in ceph_mdsc_release_request() it is calling
>> ceph_put_cap_refs() directly in other 3 places.
>>
> putting CEPH_CAP_PIN does not trigger check_caps(). So only
> ceph_mdsc_release_dir_caps() matters.
Yeah. Right. I will fix this.
Thanks
BRs
>> BRs
>>
>> Xiubo
>>
>>> We already call ceph_mdsc_release_dir_caps() in ceph_async_foo_cb()
>>> for normal circumdtance. We just need to call
>>> ceph_mdsc_release_dir_caps() in 'session closed' case (we never abort
>>> async request). In the 'session closed' case, we can use
>>> ceph_put_cap_refs_no_check_caps()
>>>
>>> Regards
>>> Yan, Zheng
>>>
>>>> +
>>>> + ci->pending_put_caps |= had;
>>>> + spin_unlock(&ci->i_ceph_lock);
>>>> +
>>>> + queue_work(ceph_inode_to_client(inode)->inode_wq,
>>>> + &ci->put_cap_refs_work);
>>>> +}
>>>> /*
>>>> * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
>>>> * context. Adjust per-snap dirty page accounting as appropriate.
>>>> diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
>>>> index 357c937..303276a 100644
>>>> --- a/fs/ceph/inode.c
>>>> +++ b/fs/ceph/inode.c
>>>> @@ -517,6 +517,9 @@ struct inode *ceph_alloc_inode(struct super_block
>>>> *sb)
>>>> INIT_LIST_HEAD(&ci->i_snap_realm_item);
>>>> INIT_LIST_HEAD(&ci->i_snap_flush_item);
>>>> + INIT_WORK(&ci->put_cap_refs_work, ceph_async_put_cap_refs_work);
>>>> + ci->pending_put_caps = 0;
>>>> +
>>>> INIT_WORK(&ci->i_work, ceph_inode_work);
>>>> ci->i_work_mask = 0;
>>>> memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
>>>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>>>> index 0e0ab01..40b31da 100644
>>>> --- a/fs/ceph/mds_client.c
>>>> +++ b/fs/ceph/mds_client.c
>>>> @@ -811,12 +811,14 @@ void ceph_mdsc_release_request(struct kref *kref)
>>>> if (req->r_reply)
>>>> ceph_msg_put(req->r_reply);
>>>> if (req->r_inode) {
>>>> - ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
>>>> + ceph_async_put_cap_refs(ceph_inode(req->r_inode),
>>>> + CEPH_CAP_PIN);
>>>> /* avoid calling iput_final() in mds dispatch threads */
>>>> ceph_async_iput(req->r_inode);
>>>> }
>>>> if (req->r_parent) {
>>>> - ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
>>>> + ceph_async_put_cap_refs(ceph_inode(req->r_parent),
>>>> + CEPH_CAP_PIN);
>>>> ceph_async_iput(req->r_parent);
>>>> }
>>>> ceph_async_iput(req->r_target_inode);
>>>> @@ -831,8 +833,8 @@ void ceph_mdsc_release_request(struct kref *kref)
>>>> * changed between the dir mutex being dropped and
>>>> * this request being freed.
>>>> */
>>>> - ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
>>>> - CEPH_CAP_PIN);
>>>> + ceph_async_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
>>>> + CEPH_CAP_PIN);
>>>> ceph_async_iput(req->r_old_dentry_dir);
>>>> }
>>>> kfree(req->r_path1);
>>>> @@ -3398,7 +3400,7 @@ void ceph_mdsc_release_dir_caps(struct
>>>> ceph_mds_request *req)
>>>> dcaps = xchg(&req->r_dir_caps, 0);
>>>> if (dcaps) {
>>>> dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
>>>> - ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
>>>> + ceph_async_put_cap_refs(ceph_inode(req->r_parent), dcaps);
>>>> }
>>>> }
>>>> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
>>>> index 226f19c..01d206f 100644
>>>> --- a/fs/ceph/super.h
>>>> +++ b/fs/ceph/super.h
>>>> @@ -421,6 +421,9 @@ struct ceph_inode_info {
>>>> struct timespec64 i_btime;
>>>> struct timespec64 i_snap_btime;
>>>> + struct work_struct put_cap_refs_work;
>>>> + int pending_put_caps;
>>>> +
>>>> struct work_struct i_work;
>>>> unsigned long i_work_mask;
>>>> @@ -1095,6 +1098,8 @@ extern void ceph_take_cap_refs(struct
>>>> ceph_inode_info *ci, int caps,
>>>> bool snap_rwsem_locked);
>>>> extern void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps);
>>>> extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
>>>> +extern void ceph_async_put_cap_refs(struct ceph_inode_info *ci, int
>>>> had);
>>>> +extern void ceph_async_put_cap_refs_work(struct work_struct *work);
>>>> extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci,
>>>> int nr,
>>>> struct ceph_snap_context *snapc);
>>>> extern void ceph_flush_snaps(struct ceph_inode_info *ci,
>>>>
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2020-05-26 7:31 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-05-25 11:16 [PATCH v2 0/2] ceph: fix dead lock and double lock xiubli
2020-05-25 11:16 ` [PATCH v2 1/2] ceph: add ceph_async_put_cap_refs to avoid double lock and deadlock xiubli
2020-05-26 3:11 ` Yan, Zheng
2020-05-26 3:38 ` Xiubo Li
2020-05-26 6:29 ` Yan, Zheng
2020-05-26 7:30 ` Xiubo Li
2020-05-26 3:52 ` Xiubo Li
2020-05-25 11:16 ` [PATCH v2 2/2] ceph: do not check the caps when reconnecting to mds xiubli
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).