From mboxrd@z Thu Jan  1 00:00:00 1970
From: Steven Whitehouse <swhiteho@redhat.com>
Date: Tue, 13 Jul 2021 19:41:13 +0100
Subject: [Cluster-devel] [GFS2 PATCH 08/10] gfs2: New log flush watchdog
In-Reply-To: <20210713180958.66995-9-rpeterso@redhat.com>
References: <20210713180958.66995-1-rpeterso@redhat.com>
	<20210713180958.66995-9-rpeterso@redhat.com>
Message-ID: <ac3ca605d90d29e8424abc56ac5f410c0cc5e0b0.camel@redhat.com>
List-Id: <cluster-devel.redhat.com>
To: cluster-devel.redhat.com
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit

Hi,

On Tue, 2021-07-13 at 13:09 -0500, Bob Peterson wrote:
> This patch adds a new watchdog whose sole purpose is to complain when
> gfs2_log_flush operations are taking too long.
> 
This one is a bit confusing. It says that it is to check if the log
flush is taking too long, but it appears to set a timeout based on the
amount of dirty data that will be written back, so it isn't really the
log flush, but the writeback and log flush that is being timed I think?

It also looks like the timeout is entirely dependent upon the number of
dirty pages too, and not on the log flush size. I wonder about the
performance impact of traversing the list of dirty pages too. If that
can be avoided it should make the implementation rather more efficient,

Steve.

> Signed-off-by: Bob Peterson <rpeterso@redhat.com>
> ---
>  fs/gfs2/incore.h     |  6 ++++++
>  fs/gfs2/log.c        | 47
> ++++++++++++++++++++++++++++++++++++++++++++
>  fs/gfs2/log.h        |  1 +
>  fs/gfs2/main.c       |  8 ++++++++
>  fs/gfs2/ops_fstype.c |  2 ++
>  fs/gfs2/sys.c        |  6 ++++--
>  6 files changed, 68 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
> index 6f31a067a5f2..566c0053b7c5 100644
> --- a/fs/gfs2/incore.h
> +++ b/fs/gfs2/incore.h
> @@ -683,6 +683,8 @@ struct local_statfs_inode {
>  	unsigned int si_jid; /* journal id this statfs inode
> corresponds to */
>  };
>  
> +#define GFS2_LOG_FLUSH_TIMEOUT (HZ / 10) /* arbitrary: 1/10 second
> per page */
> +
>  struct gfs2_sbd {
>  	struct super_block *sd_vfs;
>  	struct gfs2_pcpu_lkstats __percpu *sd_lkstats;
> @@ -849,6 +851,10 @@ struct gfs2_sbd {
>  	unsigned long sd_last_warning;
>  	struct dentry *debugfs_dir;    /* debugfs directory */
>  	unsigned long sd_glock_dqs_held;
> +
> +	struct delayed_work sd_log_flush_watchdog;
> +	unsigned long sd_dirty_pages;
> +	unsigned long sd_log_flush_start;
>  };
>  
>  static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int
> which)
> diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
> index f0ee3ff6f9a8..bd2ff5ef4b91 100644
> --- a/fs/gfs2/log.c
> +++ b/fs/gfs2/log.c
> @@ -19,6 +19,7 @@
>  #include <linux/blkdev.h>
>  #include <linux/writeback.h>
>  #include <linux/list_sort.h>
> +#include <linux/sched/debug.h>
>  
>  #include "gfs2.h"
>  #include "incore.h"
> @@ -32,8 +33,22 @@
>  #include "trace_gfs2.h"
>  #include "trans.h"
>  
> +extern struct workqueue_struct *gfs2_log_flush_wq;
> +
>  static void gfs2_log_shutdown(struct gfs2_sbd *sdp);
>  
> +void gfs2_log_flush_watchdog_func(struct work_struct *work)
> +{
> +	struct delayed_work *dwork = to_delayed_work(work);
> +	struct gfs2_sbd *sdp = container_of(dwork, struct gfs2_sbd,
> +					    sd_log_flush_watchdog);
> +
> +	fs_err(sdp, "log flush pid %u took > %lu secs to write %lu
> pages.\n",
> +	       sdp->sd_logd_process ? pid_nr(task_pid(sdp-
> >sd_logd_process)) :
> +	       0, (jiffies - sdp->sd_log_flush_start) / HZ,
> +	       sdp->sd_dirty_pages);
> +}
> +
>  /**
>   * gfs2_struct2blk - compute stuff
>   * @sdp: the filesystem
> @@ -1016,6 +1031,26 @@ static void trans_drain(struct gfs2_trans *tr)
>  	}
>  }
>  
> +/**
> + * count_dirty_pages - rough count the dirty ordered writes pages
> + * @sdp: the filesystem
> + *
> + * This is not meant to be exact. It's simply a rough estimate of
> how many
> + * dirty pages are on the ordered writes list. The actual number of
> pages
> + * may change because we don't keep the lock held during the log
> flush.
> + */
> +static unsigned long count_dirty_pages(struct gfs2_sbd *sdp)
> +{
> +	struct gfs2_inode *ip;
> +	unsigned long dpages = 0;
> +
> +	spin_lock(&sdp->sd_ordered_lock);
> +	list_for_each_entry(ip, &sdp->sd_log_ordered, i_ordered)
> +		dpages += ip->i_inode.i_mapping->nrpages;
> +	spin_unlock(&sdp->sd_ordered_lock);
> +	return dpages;
> +}
> +
>  /**
>   * gfs2_log_flush - flush incore transaction(s)
>   * @sdp: The filesystem
> @@ -1031,8 +1066,19 @@ void gfs2_log_flush(struct gfs2_sbd *sdp,
> struct gfs2_glock *gl, u32 flags)
>  	enum gfs2_freeze_state state = atomic_read(&sdp-
> >sd_freeze_state);
>  	unsigned int first_log_head;
>  	unsigned int reserved_revokes = 0;
> +	unsigned long dpages;
> +
> +	dpages = count_dirty_pages(sdp);
>  
>  	down_write(&sdp->sd_log_flush_lock);
> +	if (dpages)
> +		if (queue_delayed_work(gfs2_log_flush_wq,
> +				       &sdp->sd_log_flush_watchdog,
> +				       round_up(dpages *
> +						GFS2_LOG_FLUSH_TIMEOUT,
> HZ))) {
> +			sdp->sd_dirty_pages = dpages;
> +			sdp->sd_log_flush_start = jiffies;
> +		}
>  	trace_gfs2_log_flush(sdp, 1, flags);
>  
>  repeat:
> @@ -1144,6 +1190,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp,
> struct gfs2_glock *gl, u32 flags)
>  		gfs2_assert_withdraw_delayed(sdp, used_blocks <
> reserved_blocks);
>  		gfs2_log_release(sdp, reserved_blocks - used_blocks);
>  	}
> +	cancel_delayed_work(&sdp->sd_log_flush_watchdog);
>  	up_write(&sdp->sd_log_flush_lock);
>  	gfs2_trans_free(sdp, tr);
>  	if (gfs2_withdrawing(sdp))
> diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
> index fc905c2af53c..962044fba53a 100644
> --- a/fs/gfs2/log.h
> +++ b/fs/gfs2/log.h
> @@ -94,5 +94,6 @@ extern void gfs2_add_revoke(struct gfs2_sbd *sdp,
> struct gfs2_bufdata *bd);
>  extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
>  extern void gfs2_flush_revokes(struct gfs2_sbd *sdp);
>  extern void gfs2_ail_drain(struct gfs2_sbd *sdp);
> +extern void gfs2_log_flush_watchdog_func(struct work_struct *work);
>  
>  #endif /* __LOG_DOT_H__ */
> diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
> index 28d0eb23e18e..55a7f29742b3 100644
> --- a/fs/gfs2/main.c
> +++ b/fs/gfs2/main.c
> @@ -30,6 +30,7 @@
>  #include "glops.h"
>  
>  struct workqueue_struct *gfs2_control_wq;
> +struct workqueue_struct *gfs2_log_flush_wq;
>  
>  static void gfs2_init_inode_once(void *foo)
>  {
> @@ -178,6 +179,10 @@ static int __init init_gfs2_fs(void)
>  	if (!gfs2_freeze_wq)
>  		goto fail_wq3;
>  
> +	gfs2_log_flush_wq = alloc_workqueue("gfs2_log_flush_wq", 0, 0);
> +	if (!gfs2_log_flush_wq)
> +		goto fail_wq4;
> +
>  	gfs2_page_pool = mempool_create_page_pool(64, 0);
>  	if (!gfs2_page_pool)
>  		goto fail_mempool;
> @@ -189,6 +194,8 @@ static int __init init_gfs2_fs(void)
>  	return 0;
>  
>  fail_mempool:
> +	destroy_workqueue(gfs2_log_flush_wq);
> +fail_wq4:
>  	destroy_workqueue(gfs2_freeze_wq);
>  fail_wq3:
>  	destroy_workqueue(gfs2_control_wq);
> @@ -240,6 +247,7 @@ static void __exit exit_gfs2_fs(void)
>  	destroy_workqueue(gfs_recovery_wq);
>  	destroy_workqueue(gfs2_control_wq);
>  	destroy_workqueue(gfs2_freeze_wq);
> +	destroy_workqueue(gfs2_log_flush_wq);
>  	list_lru_destroy(&gfs2_qd_lru);
>  
>  	rcu_barrier();
> diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
> index 6a950c4a61e9..b09e61457b23 100644
> --- a/fs/gfs2/ops_fstype.c
> +++ b/fs/gfs2/ops_fstype.c
> @@ -139,6 +139,8 @@ static struct gfs2_sbd *init_sbd(struct
> super_block *sb)
>  	init_waitqueue_head(&sdp->sd_log_flush_wait);
>  	atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
>  	mutex_init(&sdp->sd_freeze_mutex);
> +	INIT_DELAYED_WORK(&sdp->sd_log_flush_watchdog,
> +			  gfs2_log_flush_watchdog_func);
>  
>  	return sdp;
>  
> diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
> index c0a34d9ddee4..c90d9f48571a 100644
> --- a/fs/gfs2/sys.c
> +++ b/fs/gfs2/sys.c
> @@ -96,7 +96,8 @@ static ssize_t status_show(struct gfs2_sbd *sdp,
> char *buf)
>  		     "sd_log_flush_head:        %d\n"
>  		     "sd_log_flush_tail:        %d\n"
>  		     "sd_log_blks_reserved:     %d\n"
> -		     "sd_log_revokes_available: %d\n",
> +		     "sd_log_revokes_available: %d\n"
> +		     "sd_dirty_pages:           %lu\n",
>  		     test_bit(SDF_JOURNAL_CHECKED, &f),
>  		     test_bit(SDF_JOURNAL_LIVE, &f),
>  		     (sdp->sd_jdesc ? sdp->sd_jdesc->jd_jid : 0),
> @@ -124,7 +125,8 @@ static ssize_t status_show(struct gfs2_sbd *sdp,
> char *buf)
>  		     sdp->sd_log_flush_head,
>  		     sdp->sd_log_flush_tail,
>  		     sdp->sd_log_blks_reserved,
> -		     atomic_read(&sdp->sd_log_revokes_available));
> +		     atomic_read(&sdp->sd_log_revokes_available),
> +		     sdp->sd_dirty_pages);
>  	return s;
>  }
>