* [PATCH v2] ceph: defer flushing the capsnap if the Fb is used
@ 2021-01-07 2:30 xiubli
2021-01-08 18:24 ` Jeff Layton
0 siblings, 1 reply; 3+ messages in thread
From: xiubli @ 2021-01-07 2:30 UTC (permalink / raw)
To: jlayton; +Cc: idryomov, pdonnell, ceph-devel, Xiubo Li
From: Xiubo Li <xiubli@redhat.com>
If the Fb cap is used it means the client is flushing the dirty
data to OSD, just defer flushing the capsnap.
URL: https://tracker.ceph.com/issues/48679
URL: https://tracker.ceph.com/issues/48640
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
V2:
- Fix inode reference leak bug
fs/ceph/caps.c | 32 +++++++++++++++++++-------------
fs/ceph/snap.c | 6 +++---
2 files changed, 22 insertions(+), 16 deletions(-)
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index abbf48fc6230..2f2451d563bd 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -3047,6 +3047,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
{
struct inode *inode = &ci->vfs_inode;
int last = 0, put = 0, flushsnaps = 0, wake = 0;
+ bool check_flushsnaps = false;
spin_lock(&ci->i_ceph_lock);
if (had & CEPH_CAP_PIN)
@@ -3064,25 +3065,15 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
if (--ci->i_wb_ref == 0) {
last++;
put++;
+ check_flushsnaps = true;
}
dout("put_cap_refs %p wb %d -> %d (?)\n",
inode, ci->i_wb_ref+1, ci->i_wb_ref);
}
- if (had & CEPH_CAP_FILE_WR)
+ if (had & CEPH_CAP_FILE_WR) {
if (--ci->i_wr_ref == 0) {
last++;
- if (__ceph_have_pending_cap_snap(ci)) {
- struct ceph_cap_snap *capsnap =
- list_last_entry(&ci->i_cap_snaps,
- struct ceph_cap_snap,
- ci_item);
- capsnap->writing = 0;
- if (ceph_try_drop_cap_snap(ci, capsnap))
- put++;
- else if (__ceph_finish_cap_snap(ci, capsnap))
- flushsnaps = 1;
- wake = 1;
- }
+ check_flushsnaps = true;
if (ci->i_wrbuffer_ref_head == 0 &&
ci->i_dirty_caps == 0 &&
ci->i_flushing_caps == 0) {
@@ -3094,6 +3085,21 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
drop_inode_snap_realm(ci);
}
+ }
+ if (check_flushsnaps) {
+ if (__ceph_have_pending_cap_snap(ci)) {
+ struct ceph_cap_snap *capsnap =
+ list_last_entry(&ci->i_cap_snaps,
+ struct ceph_cap_snap,
+ ci_item);
+ capsnap->writing = 0;
+ if (ceph_try_drop_cap_snap(ci, capsnap))
+ put++;
+ else if (__ceph_finish_cap_snap(ci, capsnap))
+ flushsnaps = 1;
+ wake = 1;
+ }
+ }
spin_unlock(&ci->i_ceph_lock);
dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index b611f829cb61..639fb91cc9db 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -561,10 +561,10 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
capsnap->context = old_snapc;
list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
- if (used & CEPH_CAP_FILE_WR) {
+ if (used & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER)) {
dout("queue_cap_snap %p cap_snap %p snapc %p"
- " seq %llu used WR, now pending\n", inode,
- capsnap, old_snapc, old_snapc->seq);
+ " seq %llu used WR | BUFFFER, now pending\n",
+ inode, capsnap, old_snapc, old_snapc->seq);
capsnap->writing = 1;
} else {
/* note mtime, size NOW. */
--
2.27.0
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH v2] ceph: defer flushing the capsnap if the Fb is used
2021-01-07 2:30 [PATCH v2] ceph: defer flushing the capsnap if the Fb is used xiubli
@ 2021-01-08 18:24 ` Jeff Layton
2021-01-09 2:08 ` Xiubo Li
0 siblings, 1 reply; 3+ messages in thread
From: Jeff Layton @ 2021-01-08 18:24 UTC (permalink / raw)
To: xiubli; +Cc: idryomov, pdonnell, ceph-devel
On Thu, 2021-01-07 at 10:30 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
>
> If the Fb cap is used it means the client is flushing the dirty
> data to OSD, just defer flushing the capsnap.
>
> URL: https://tracker.ceph.com/issues/48679
> URL: https://tracker.ceph.com/issues/48640
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>
> V2:
> - Fix inode reference leak bug
>
> fs/ceph/caps.c | 32 +++++++++++++++++++-------------
> fs/ceph/snap.c | 6 +++---
> 2 files changed, 22 insertions(+), 16 deletions(-)
>
> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
> index abbf48fc6230..2f2451d563bd 100644
> --- a/fs/ceph/caps.c
> +++ b/fs/ceph/caps.c
> @@ -3047,6 +3047,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
> {
> struct inode *inode = &ci->vfs_inode;
> int last = 0, put = 0, flushsnaps = 0, wake = 0;
> + bool check_flushsnaps = false;
>
>
>
>
> spin_lock(&ci->i_ceph_lock);
> if (had & CEPH_CAP_PIN)
> @@ -3064,25 +3065,15 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
> if (--ci->i_wb_ref == 0) {
> last++;
> put++;
> + check_flushsnaps = true;
> }
> dout("put_cap_refs %p wb %d -> %d (?)\n",
> inode, ci->i_wb_ref+1, ci->i_wb_ref);
> }
> - if (had & CEPH_CAP_FILE_WR)
> + if (had & CEPH_CAP_FILE_WR) {
> if (--ci->i_wr_ref == 0) {
> last++;
> - if (__ceph_have_pending_cap_snap(ci)) {
> - struct ceph_cap_snap *capsnap =
> - list_last_entry(&ci->i_cap_snaps,
> - struct ceph_cap_snap,
> - ci_item);
> - capsnap->writing = 0;
> - if (ceph_try_drop_cap_snap(ci, capsnap))
> - put++;
> - else if (__ceph_finish_cap_snap(ci, capsnap))
> - flushsnaps = 1;
> - wake = 1;
> - }
> + check_flushsnaps = true;
> if (ci->i_wrbuffer_ref_head == 0 &&
> ci->i_dirty_caps == 0 &&
> ci->i_flushing_caps == 0) {
> @@ -3094,6 +3085,21 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
> if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
> drop_inode_snap_realm(ci);
> }
> + }
> + if (check_flushsnaps) {
> + if (__ceph_have_pending_cap_snap(ci)) {
> + struct ceph_cap_snap *capsnap =
> + list_last_entry(&ci->i_cap_snaps,
> + struct ceph_cap_snap,
> + ci_item);
> + capsnap->writing = 0;
> + if (ceph_try_drop_cap_snap(ci, capsnap))
> + put++;
> + else if (__ceph_finish_cap_snap(ci, capsnap))
> + flushsnaps = 1;
> + wake = 1;
> + }
> + }
Ok, so let's assume you're putting Fb. You increment put and set
check_flushsnaps to true. Later, you get down to here and call
ceph_try_drop_cap_snap and it returns true and now you've incremented
"put" twice.
Is that right? Do Fb caps hold two inode references?
Either way, I think this function needs some better
documentation/comments, particularly since you're making a significant
change to how it works.
> spin_unlock(&ci->i_ceph_lock);
>
>
>
>
> dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
> index b611f829cb61..639fb91cc9db 100644
> --- a/fs/ceph/snap.c
> +++ b/fs/ceph/snap.c
> @@ -561,10 +561,10 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
> capsnap->context = old_snapc;
> list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
>
>
>
>
> - if (used & CEPH_CAP_FILE_WR) {
> + if (used & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER)) {
> dout("queue_cap_snap %p cap_snap %p snapc %p"
> - " seq %llu used WR, now pending\n", inode,
> - capsnap, old_snapc, old_snapc->seq);
> + " seq %llu used WR | BUFFFER, now pending\n",
> + inode, capsnap, old_snapc, old_snapc->seq);
> capsnap->writing = 1;
> } else {
> /* note mtime, size NOW. */
--
Jeff Layton <jlayton@kernel.org>
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [PATCH v2] ceph: defer flushing the capsnap if the Fb is used
2021-01-08 18:24 ` Jeff Layton
@ 2021-01-09 2:08 ` Xiubo Li
0 siblings, 0 replies; 3+ messages in thread
From: Xiubo Li @ 2021-01-09 2:08 UTC (permalink / raw)
To: Jeff Layton; +Cc: idryomov, pdonnell, ceph-devel
On 2021/1/9 2:24, Jeff Layton wrote:
> On Thu, 2021-01-07 at 10:30 +0800, xiubli@redhat.com wrote:
>> From: Xiubo Li <xiubli@redhat.com>
>>
>> If the Fb cap is used it means the client is flushing the dirty
>> data to OSD, just defer flushing the capsnap.
>>
>> URL: https://tracker.ceph.com/issues/48679
>> URL: https://tracker.ceph.com/issues/48640
>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>> ---
>>
>> V2:
>> - Fix inode reference leak bug
>>
>> fs/ceph/caps.c | 32 +++++++++++++++++++-------------
>> fs/ceph/snap.c | 6 +++---
>> 2 files changed, 22 insertions(+), 16 deletions(-)
>>
>> diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
>> index abbf48fc6230..2f2451d563bd 100644
>> --- a/fs/ceph/caps.c
>> +++ b/fs/ceph/caps.c
>> @@ -3047,6 +3047,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>> {
>> struct inode *inode = &ci->vfs_inode;
>> int last = 0, put = 0, flushsnaps = 0, wake = 0;
>> + bool check_flushsnaps = false;
>>
>>
>>
>>
>> spin_lock(&ci->i_ceph_lock);
>> if (had & CEPH_CAP_PIN)
>> @@ -3064,25 +3065,15 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>> if (--ci->i_wb_ref == 0) {
>> last++;
>> put++;
>> + check_flushsnaps = true;
>> }
>> dout("put_cap_refs %p wb %d -> %d (?)\n",
>> inode, ci->i_wb_ref+1, ci->i_wb_ref);
>> }
>> - if (had & CEPH_CAP_FILE_WR)
>> + if (had & CEPH_CAP_FILE_WR) {
>> if (--ci->i_wr_ref == 0) {
>> last++;
>> - if (__ceph_have_pending_cap_snap(ci)) {
>> - struct ceph_cap_snap *capsnap =
>> - list_last_entry(&ci->i_cap_snaps,
>> - struct ceph_cap_snap,
>> - ci_item);
>> - capsnap->writing = 0;
>> - if (ceph_try_drop_cap_snap(ci, capsnap))
>> - put++;
>> - else if (__ceph_finish_cap_snap(ci, capsnap))
>> - flushsnaps = 1;
>> - wake = 1;
>> - }
>> + check_flushsnaps = true;
>> if (ci->i_wrbuffer_ref_head == 0 &&
>> ci->i_dirty_caps == 0 &&
>> ci->i_flushing_caps == 0) {
>> @@ -3094,6 +3085,21 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
>> if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
>> drop_inode_snap_realm(ci);
>> }
>> + }
>> + if (check_flushsnaps) {
>> + if (__ceph_have_pending_cap_snap(ci)) {
>> + struct ceph_cap_snap *capsnap =
>> + list_last_entry(&ci->i_cap_snaps,
>> + struct ceph_cap_snap,
>> + ci_item);
>> + capsnap->writing = 0;
>> + if (ceph_try_drop_cap_snap(ci, capsnap))
>> + put++;
>> + else if (__ceph_finish_cap_snap(ci, capsnap))
>> + flushsnaps = 1;
>> + wake = 1;
>> + }
>> + }
>
> Ok, so let's assume you're putting Fb. You increment put and set
> check_flushsnaps to true. Later, you get down to here and call
> ceph_try_drop_cap_snap and it returns true and now you've incremented
> "put" twice.
>
> Is that right? Do Fb caps hold two inode references?
Yeah, one in ceph_take_cap_refs().
Another one is in ceph_queue_cap_snap() and when `used & (Fb | Fw)` is
true, the flush capsnap will be delayed by holding the inode ref, so we
need to put the inode ref here or in __ceph_finish_cap_snap().
> Either way, I think this function needs some better
> documentation/comments, particularly since you're making a significant
> change to how it works.
Okay, I will post the V3 after my back later to add more comments about
this.
Thanks
>
>> spin_unlock(&ci->i_ceph_lock);
>>
>>
>>
>>
>> dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
>> diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
>> index b611f829cb61..639fb91cc9db 100644
>> --- a/fs/ceph/snap.c
>> +++ b/fs/ceph/snap.c
>> @@ -561,10 +561,10 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
>> capsnap->context = old_snapc;
>> list_add_tail(&capsnap->ci_item, &ci->i_cap_snaps);
>>
>>
>>
>>
>> - if (used & CEPH_CAP_FILE_WR) {
>> + if (used & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER)) {
>> dout("queue_cap_snap %p cap_snap %p snapc %p"
>> - " seq %llu used WR, now pending\n", inode,
>> - capsnap, old_snapc, old_snapc->seq);
>> + " seq %llu used WR | BUFFFER, now pending\n",
>> + inode, capsnap, old_snapc, old_snapc->seq);
>> capsnap->writing = 1;
>> } else {
>> /* note mtime, size NOW. */
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2021-01-09 2:10 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-01-07 2:30 [PATCH v2] ceph: defer flushing the capsnap if the Fb is used xiubli
2021-01-08 18:24 ` Jeff Layton
2021-01-09 2:08 ` Xiubo Li
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox