ceph-devel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] ceph: fix caps revocation stuck
@ 2023-09-25  5:28 xiubli
  2023-09-25  5:28 ` [PATCH 1/3] ceph: do not break the loop if CEPH_I_FLUSH is set xiubli
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: xiubli @ 2023-09-25  5:28 UTC (permalink / raw)
  To: ceph-devel; +Cc: idryomov, jlayton, vshankar, mchangir, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

Try to issue a check caps immediately when unlinking, else the MDS may
wait for a long time when revoking caps, such as the 'Fx' and 'Fb'.


Xiubo Li (3):
  ceph: do not break the loop if CEPH_I_FLUSH is set
  ceph: always queue a writeback when revoking the Fb caps
  ceph: add ceph_cap_unlink_work to fire check caps immediately

 fs/ceph/caps.c       | 84 +++++++++++++++++++++++++++-----------------
 fs/ceph/mds_client.c | 34 ++++++++++++++++++
 fs/ceph/mds_client.h |  4 +++
 3 files changed, 89 insertions(+), 33 deletions(-)

-- 
2.39.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] ceph: do not break the loop if CEPH_I_FLUSH is set
  2023-09-25  5:28 [PATCH 0/3] ceph: fix caps revocation stuck xiubli
@ 2023-09-25  5:28 ` xiubli
  2023-09-25  5:28 ` [PATCH 2/3] ceph: always queue a writeback when revoking the Fb caps xiubli
  2023-09-25  5:28 ` [PATCH 3/3] ceph: add ceph_cap_unlink_work to fire check caps immediately xiubli
  2 siblings, 0 replies; 4+ messages in thread
From: xiubli @ 2023-09-25  5:28 UTC (permalink / raw)
  To: ceph-devel; +Cc: idryomov, jlayton, vshankar, mchangir, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

For the unlink case we do not want it to be delayed. Else it will
be trigger 5 seconds later. Because the MDS maybe stuck waiting
for cap revocation.

URL: https://tracker.ceph.com/issues/50223
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index dc0402258384..efa036e7619f 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4624,7 +4624,7 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 	struct ceph_inode_info *ci;
 	struct ceph_mount_options *opt = mdsc->fsc->mount_options;
 	unsigned long delay_max = opt->caps_wanted_delay_max * HZ;
-	unsigned long loop_start = jiffies;
+	unsigned long loop_start = jiffies, end;
 	unsigned long delay = 0;
 
 	doutc(cl, "begin\n");
@@ -4633,14 +4633,17 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 		ci = list_first_entry(&mdsc->cap_delay_list,
 				      struct ceph_inode_info,
 				      i_cap_delay_list);
-		if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) {
-			doutc(cl, "caps added recently.  Exiting loop");
-			delay = ci->i_hold_caps_max;
-			break;
+		/* Do not break the loop if CEPH_I_FLUSH is set. */
+		if (!(ci->i_ceph_flags & CEPH_I_FLUSH)) {
+			end = ci->i_hold_caps_max - delay_max;
+			if (time_before(loop_start, end)) {
+				doutc(cl, "caps added recently.  Exiting loop");
+				delay = ci->i_hold_caps_max;
+				break;
+			}
+			if (time_before(jiffies, ci->i_hold_caps_max))
+				break;
 		}
-		if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 &&
-		    time_before(jiffies, ci->i_hold_caps_max))
-			break;
 		list_del_init(&ci->i_cap_delay_list);
 
 		inode = igrab(&ci->netfs.inode);
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] ceph: always queue a writeback when revoking the Fb caps
  2023-09-25  5:28 [PATCH 0/3] ceph: fix caps revocation stuck xiubli
  2023-09-25  5:28 ` [PATCH 1/3] ceph: do not break the loop if CEPH_I_FLUSH is set xiubli
@ 2023-09-25  5:28 ` xiubli
  2023-09-25  5:28 ` [PATCH 3/3] ceph: add ceph_cap_unlink_work to fire check caps immediately xiubli
  2 siblings, 0 replies; 4+ messages in thread
From: xiubli @ 2023-09-25  5:28 UTC (permalink / raw)
  To: ceph-devel; +Cc: idryomov, jlayton, vshankar, mchangir, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

In case there is 'Fw' dirty caps and 'CHECK_CAPS_FLUSH' is set we
will always ignore queue a writeback. Queue a writeback is very
important because it will block kclient flushing the snapcaps to
MDS and which will block MDS waiting for revoking the 'Fb' caps.

URL: https://tracker.ceph.com/issues/50223
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c | 48 ++++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index efa036e7619f..7ce275838007 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2156,6 +2156,30 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
 		      ceph_cap_string(cap->implemented),
 		      ceph_cap_string(revoking));
 
+		/* completed revocation? going down and there are no caps? */
+		if (revoking) {
+			if ((revoking & cap_used) == 0) {
+				doutc(cl, "completed revocation of %s\n",
+				      ceph_cap_string(cap->implemented & ~cap->issued));
+				goto ack;
+			}
+
+			/*
+			 * If the "i_wrbuffer_ref" was increased by mmap or generic
+			 * cache write just before the ceph_check_caps() is called,
+			 * the Fb capability revoking will fail this time. Then we
+			 * must wait for the BDI's delayed work to flush the dirty
+			 * pages and to release the "i_wrbuffer_ref", which will cost
+			 * at most 5 seconds. That means the MDS needs to wait at
+			 * most 5 seconds to finished the Fb capability's revocation.
+			 *
+			 * Let's queue a writeback for it.
+			 */
+			if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
+			    (revoking & CEPH_CAP_FILE_BUFFER))
+				queue_writeback = true;
+		}
+
 		if (cap == ci->i_auth_cap &&
 		    (cap->issued & CEPH_CAP_FILE_WR)) {
 			/* request larger max_size from MDS? */
@@ -2183,30 +2207,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
 			}
 		}
 
-		/* completed revocation? going down and there are no caps? */
-		if (revoking) {
-			if ((revoking & cap_used) == 0) {
-				doutc(cl, "completed revocation of %s\n",
-				      ceph_cap_string(cap->implemented & ~cap->issued));
-				goto ack;
-			}
-
-			/*
-			 * If the "i_wrbuffer_ref" was increased by mmap or generic
-			 * cache write just before the ceph_check_caps() is called,
-			 * the Fb capability revoking will fail this time. Then we
-			 * must wait for the BDI's delayed work to flush the dirty
-			 * pages and to release the "i_wrbuffer_ref", which will cost
-			 * at most 5 seconds. That means the MDS needs to wait at
-			 * most 5 seconds to finished the Fb capability's revocation.
-			 *
-			 * Let's queue a writeback for it.
-			 */
-			if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
-			    (revoking & CEPH_CAP_FILE_BUFFER))
-				queue_writeback = true;
-		}
-
 		/* want more caps from mds? */
 		if (want & ~cap->mds_wanted) {
 			if (want & ~(cap->mds_wanted | cap->issued))
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] ceph: add ceph_cap_unlink_work to fire check caps immediately
  2023-09-25  5:28 [PATCH 0/3] ceph: fix caps revocation stuck xiubli
  2023-09-25  5:28 ` [PATCH 1/3] ceph: do not break the loop if CEPH_I_FLUSH is set xiubli
  2023-09-25  5:28 ` [PATCH 2/3] ceph: always queue a writeback when revoking the Fb caps xiubli
@ 2023-09-25  5:28 ` xiubli
  2 siblings, 0 replies; 4+ messages in thread
From: xiubli @ 2023-09-25  5:28 UTC (permalink / raw)
  To: ceph-devel; +Cc: idryomov, jlayton, vshankar, mchangir, Xiubo Li

From: Xiubo Li <xiubli@redhat.com>

When unlinking a file the check caps could be delayed for more than
5 seconds, but in MDS side it maybe waiting for the clients to
release caps.

This will add a dedicated work queue and list to help trigger to
fire the check caps and dirty buffer flushing immediately.

URL: https://tracker.ceph.com/issues/50223
Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/caps.c       | 17 ++++++++++++++++-
 fs/ceph/mds_client.c | 34 ++++++++++++++++++++++++++++++++++
 fs/ceph/mds_client.h |  4 ++++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 7ce275838007..a36366df7773 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -4793,7 +4793,22 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
 		if (__ceph_caps_dirty(ci)) {
 			struct ceph_mds_client *mdsc =
 				ceph_inode_to_fs_client(inode)->mdsc;
-			__cap_delay_requeue_front(mdsc, ci);
+
+			doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
+			      ceph_vinop(inode));
+			spin_lock(&mdsc->cap_unlink_delay_lock);
+			ci->i_ceph_flags |= CEPH_I_FLUSH;
+			if (!list_empty(&ci->i_cap_delay_list))
+				list_del_init(&ci->i_cap_delay_list);
+			list_add_tail(&ci->i_cap_delay_list,
+				      &mdsc->cap_unlink_delay_list);
+			spin_unlock(&mdsc->cap_unlink_delay_lock);
+
+			/*
+			 * Fire the work immediately, because the MDS maybe
+			 * waiting for caps release.
+			 */
+			schedule_work(&mdsc->cap_unlink_work);
 		}
 	}
 	spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 6fa7134beec8..a7bffb030036 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2500,6 +2500,37 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
 	}
 }
 
+static void ceph_cap_unlink_work(struct work_struct *work)
+{
+	struct ceph_mds_client *mdsc =
+		container_of(work, struct ceph_mds_client, cap_unlink_work);
+	struct ceph_client *cl = mdsc->fsc->client;
+
+	doutc(cl, "begin\n");
+	spin_lock(&mdsc->cap_unlink_delay_lock);
+	while (!list_empty(&mdsc->cap_unlink_delay_list)) {
+		struct ceph_inode_info *ci;
+		struct inode *inode;
+
+		ci = list_first_entry(&mdsc->cap_unlink_delay_list,
+				      struct ceph_inode_info,
+				      i_cap_delay_list);
+		list_del_init(&ci->i_cap_delay_list);
+
+		inode = igrab(&ci->netfs.inode);
+		if (inode) {
+			spin_unlock(&mdsc->cap_unlink_delay_lock);
+			doutc(cl, "on %p %llx.%llx\n", inode,
+			      ceph_vinop(inode));
+			ceph_check_caps(ci, CHECK_CAPS_FLUSH);
+			iput(inode);
+			spin_lock(&mdsc->cap_unlink_delay_lock);
+		}
+	}
+	spin_unlock(&mdsc->cap_unlink_delay_lock);
+	doutc(cl, "done\n");
+}
+
 /*
  * requests
  */
@@ -5372,6 +5403,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	INIT_LIST_HEAD(&mdsc->cap_delay_list);
 	INIT_LIST_HEAD(&mdsc->cap_wait_list);
 	spin_lock_init(&mdsc->cap_delay_lock);
+	INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
+	spin_lock_init(&mdsc->cap_unlink_delay_lock);
 	INIT_LIST_HEAD(&mdsc->snap_flush_list);
 	spin_lock_init(&mdsc->snap_flush_lock);
 	mdsc->last_cap_flush_tid = 1;
@@ -5380,6 +5413,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
 	spin_lock_init(&mdsc->cap_dirty_lock);
 	init_waitqueue_head(&mdsc->cap_flushing_wq);
 	INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
+	INIT_WORK(&mdsc->cap_unlink_work, ceph_cap_unlink_work);
 	err = ceph_metric_init(&mdsc->metric);
 	if (err)
 		goto err_mdsmap;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 2e6ddaa13d72..f25117fa910f 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -462,6 +462,8 @@ struct ceph_mds_client {
 	unsigned long    last_renew_caps;  /* last time we renewed our caps */
 	struct list_head cap_delay_list;   /* caps with delayed release */
 	spinlock_t       cap_delay_lock;   /* protects cap_delay_list */
+	struct list_head cap_unlink_delay_list;  /* caps with delayed release for unlink */
+	spinlock_t       cap_unlink_delay_lock;  /* protects cap_unlink_delay_list */
 	struct list_head snap_flush_list;  /* cap_snaps ready to flush */
 	spinlock_t       snap_flush_lock;
 
@@ -475,6 +477,8 @@ struct ceph_mds_client {
 	struct work_struct cap_reclaim_work;
 	atomic_t	   cap_reclaim_pending;
 
+	struct work_struct cap_unlink_work;
+
 	/*
 	 * Cap reservations
 	 *
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2023-09-25  5:31 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-09-25  5:28 [PATCH 0/3] ceph: fix caps revocation stuck xiubli
2023-09-25  5:28 ` [PATCH 1/3] ceph: do not break the loop if CEPH_I_FLUSH is set xiubli
2023-09-25  5:28 ` [PATCH 2/3] ceph: always queue a writeback when revoking the Fb caps xiubli
2023-09-25  5:28 ` [PATCH 3/3] ceph: add ceph_cap_unlink_work to fire check caps immediately xiubli

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).