public inbox for linux-xfs@vger.kernel.org
 help / color / mirror / Atom feed
From: Brian Foster <bfoster@redhat.com>
To: linux-xfs@vger.kernel.org
Subject: [RFC v4 1/2] xfs: automatic log item relog mechanism
Date: Thu,  5 Dec 2019 12:50:36 -0500	[thread overview]
Message-ID: <20191205175037.52529-2-bfoster@redhat.com> (raw)
In-Reply-To: <20191205175037.52529-1-bfoster@redhat.com>

This is an AIL based mechanism to enable automatic relogging of
selected log items. The use case is for particular operations that
commit an item known to pin the tail of the log for a potentially
long period of time and otherwise cannot use a rolling transaction.
While this does not provide the deadlock avoidance guarantees of a
rolling transaction, it ties the relog transaction into AIL pushing
pressure such that we should expect the transaction to reserve the
necessary log space long before deadlock becomes a problem.

To enable relogging, a bit is set on the log item before it is first
committed to the log subsystem. Once the item commits to the on-disk
log and inserts to the AIL, AIL pushing dictates when the item is
ready for a relog. When that occurs, the item relogs in an
independent transaction to ensure the log tail keeps moving without
intervention from the original committer.  To disable relogging, the
original committer clears the log item bit and optionally waits on
relogging activity to cease if it needs to reuse the item before the
operation completes.

While the current use case for automatic relogging is limited, the
mechanism is AIL based because it 1.) provides existing callbacks
into all possible log item types for future support and 2.) has
applicable context to determine when to relog particular items (such
as when an item pins the log tail). This provides enough flexibility
to support various log item types and future workloads without
introducing complexity up front for currently unknown use cases.
Further complexity, such as preallocated or regranted relog
transaction reservation or custom relog handlers can be considered
as the need arises.

Signed-off-by: Brian Foster <bfoster@redhat.com>
---
 fs/xfs/xfs_trace.h      |  1 +
 fs/xfs/xfs_trans.c      | 30 ++++++++++++++++++++++
 fs/xfs/xfs_trans.h      |  7 +++++-
 fs/xfs/xfs_trans_ail.c  | 56 +++++++++++++++++++++++++++++++++++++++--
 fs/xfs/xfs_trans_priv.h |  5 ++++
 5 files changed, 96 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index c13bb3655e48..6c2a9cdadd03 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1063,6 +1063,7 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_push);
 DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned);
 DEFINE_LOG_ITEM_EVENT(xfs_ail_locked);
 DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing);
+DEFINE_LOG_ITEM_EVENT(xfs_ail_relog);
 
 DECLARE_EVENT_CLASS(xfs_ail_class,
 	TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 3b208f9a865c..f2c06cdd1074 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -763,6 +763,35 @@ xfs_trans_del_item(
 	list_del_init(&lip->li_trans);
 }
 
+void
+xfs_trans_enable_relog(
+	struct xfs_log_item	*lip)
+{
+	set_bit(XFS_LI_RELOG, &lip->li_flags);
+}
+
+void
+xfs_trans_disable_relog(
+	struct xfs_log_item	*lip,
+	bool			drain) /* wait for relogging to cease */
+{
+	struct xfs_mount	*mp = lip->li_mountp;
+
+	clear_bit(XFS_LI_RELOG, &lip->li_flags);
+
+	if (!drain)
+		return;
+
+	/*
+	 * Some operations might require relog activity to cease before they can
+	 * proceed. For example, an operation must wait before including a
+	 * non-lockable log item (i.e. intent) in another transaction.
+	 */
+	while (wait_on_bit_timeout(&lip->li_flags, XFS_LI_RELOGGED,
+				   TASK_UNINTERRUPTIBLE, HZ))
+		xfs_log_force(mp, XFS_LOG_SYNC);
+}
+
 /* Detach and unlock all of the items in a transaction */
 static void
 xfs_trans_free_items(
@@ -848,6 +877,7 @@ xfs_trans_committed_bulk(
 
 		if (aborted)
 			set_bit(XFS_LI_ABORTED, &lip->li_flags);
+		clear_and_wake_up_bit(XFS_LI_RELOGGED, &lip->li_flags);
 
 		if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
 			lip->li_ops->iop_release(lip);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 64d7f171ebd3..6d4311d82c4c 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -59,12 +59,16 @@ struct xfs_log_item {
 #define	XFS_LI_ABORTED	1
 #define	XFS_LI_FAILED	2
 #define	XFS_LI_DIRTY	3	/* log item dirty in transaction */
+#define	XFS_LI_RELOG	4	/* automatic relogging */
+#define	XFS_LI_RELOGGED	5	/* relogged by xfsaild */
 
 #define XFS_LI_FLAGS \
 	{ (1 << XFS_LI_IN_AIL),		"IN_AIL" }, \
 	{ (1 << XFS_LI_ABORTED),	"ABORTED" }, \
 	{ (1 << XFS_LI_FAILED),		"FAILED" }, \
-	{ (1 << XFS_LI_DIRTY),		"DIRTY" }
+	{ (1 << XFS_LI_DIRTY),		"DIRTY" }, \
+	{ (1 << XFS_LI_RELOG),		"RELOG" }, \
+	{ (1 << XFS_LI_RELOGGED),	"RELOGGED" }
 
 struct xfs_item_ops {
 	unsigned flags;
@@ -95,6 +99,7 @@ void	xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
 #define XFS_ITEM_PINNED		1
 #define XFS_ITEM_LOCKED		2
 #define XFS_ITEM_FLUSHING	3
+#define XFS_ITEM_RELOG		4
 
 /*
  * Deferred operation item relogging limits.
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 00cc5b8734be..bb54d00ae095 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -143,6 +143,38 @@ xfs_ail_max_lsn(
 	return lsn;
 }
 
+/*
+ * Relog log items on the AIL relog queue.
+ */
+static void
+xfs_ail_relog(
+	struct work_struct	*work)
+{
+	struct xfs_ail		*ailp = container_of(work, struct xfs_ail,
+						     ail_relog_work);
+	struct xfs_mount	*mp = ailp->ail_mount;
+	struct xfs_trans	*tp;
+	struct xfs_log_item	*lip, *lipp;
+	int			error;
+
+	/* XXX: define a ->tr_relog reservation */
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
+	if (error)
+		return;
+
+	spin_lock(&ailp->ail_lock);
+	list_for_each_entry_safe(lip, lipp, &ailp->ail_relog_list, li_trans) {
+		list_del_init(&lip->li_trans);
+		xfs_trans_add_item(tp, lip);
+		set_bit(XFS_LI_DIRTY, &lip->li_flags);
+		tp->t_flags |= XFS_TRANS_DIRTY;
+	}
+	spin_unlock(&ailp->ail_lock);
+
+	error = xfs_trans_commit(tp);
+	ASSERT(!error);
+}
+
 /*
  * The cursor keeps track of where our current traversal is up to by tracking
  * the next item in the list for us. However, for this to be safe, removing an
@@ -363,7 +395,7 @@ static long
 xfsaild_push(
 	struct xfs_ail		*ailp)
 {
-	xfs_mount_t		*mp = ailp->ail_mount;
+	struct xfs_mount	*mp = ailp->ail_mount;
 	struct xfs_ail_cursor	cur;
 	struct xfs_log_item	*lip;
 	xfs_lsn_t		lsn;
@@ -425,6 +457,13 @@ xfsaild_push(
 			ailp->ail_last_pushed_lsn = lsn;
 			break;
 
+		case XFS_ITEM_RELOG:
+			trace_xfs_ail_relog(lip);
+			ASSERT(list_empty(&lip->li_trans));
+			list_add_tail(&lip->li_trans, &ailp->ail_relog_list);
+			set_bit(XFS_LI_RELOGGED, &lip->li_flags);
+			break;
+
 		case XFS_ITEM_FLUSHING:
 			/*
 			 * The item or its backing buffer is already being
@@ -491,6 +530,9 @@ xfsaild_push(
 	if (xfs_buf_delwri_submit_nowait(&ailp->ail_buf_list))
 		ailp->ail_log_flush++;
 
+	if (!list_empty(&ailp->ail_relog_list))
+		queue_work(ailp->ail_relog_wq, &ailp->ail_relog_work);
+
 	if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
 out_done:
 		/*
@@ -834,15 +876,24 @@ xfs_trans_ail_init(
 	spin_lock_init(&ailp->ail_lock);
 	INIT_LIST_HEAD(&ailp->ail_buf_list);
 	init_waitqueue_head(&ailp->ail_empty);
+	INIT_LIST_HEAD(&ailp->ail_relog_list);
+	INIT_WORK(&ailp->ail_relog_work, xfs_ail_relog);
+
+	ailp->ail_relog_wq = alloc_workqueue("xfs-relog/%s", WQ_FREEZABLE, 0,
+					     mp->m_super->s_id);
+	if (!ailp->ail_relog_wq)
+		goto out_free_ailp;
 
 	ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
 			ailp->ail_mount->m_super->s_id);
 	if (IS_ERR(ailp->ail_task))
-		goto out_free_ailp;
+		goto out_destroy_wq;
 
 	mp->m_ail = ailp;
 	return 0;
 
+out_destroy_wq:
+	destroy_workqueue(ailp->ail_relog_wq);
 out_free_ailp:
 	kmem_free(ailp);
 	return -ENOMEM;
@@ -855,5 +906,6 @@ xfs_trans_ail_destroy(
 	struct xfs_ail	*ailp = mp->m_ail;
 
 	kthread_stop(ailp->ail_task);
+	destroy_workqueue(ailp->ail_relog_wq);
 	kmem_free(ailp);
 }
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 2e073c1c4614..3cefc821350e 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -16,6 +16,8 @@ struct xfs_log_vec;
 void	xfs_trans_init(struct xfs_mount *);
 void	xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
 void	xfs_trans_del_item(struct xfs_log_item *);
+void	xfs_trans_enable_relog(struct xfs_log_item *);
+void	xfs_trans_disable_relog(struct xfs_log_item *, bool);
 void	xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
 
 void	xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
@@ -61,6 +63,9 @@ struct xfs_ail {
 	int			ail_log_flush;
 	struct list_head	ail_buf_list;
 	wait_queue_head_t	ail_empty;
+	struct work_struct	ail_relog_work;
+	struct list_head	ail_relog_list;
+	struct workqueue_struct	*ail_relog_wq;
 };
 
 /*
-- 
2.20.1


  reply	other threads:[~2019-12-05 17:50 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-12-05 17:50 [RFC v4 0/2] xfs: automatic relogging experiment Brian Foster
2019-12-05 17:50 ` Brian Foster [this message]
2019-12-05 21:02   ` [RFC v4 1/2] xfs: automatic log item relog mechanism Dave Chinner
2019-12-06 14:56     ` Brian Foster
2019-12-05 17:50 ` [RFC v4 2/2] xfs: automatically relog the quotaoff start intent Brian Foster

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191205175037.52529-2-bfoster@redhat.com \
    --to=bfoster@redhat.com \
    --cc=linux-xfs@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox