From mboxrd@z Thu Jan 1 00:00:00 1970 From: Steven Whitehouse Date: Tue, 13 Jul 2021 19:41:13 +0100 Subject: [Cluster-devel] [GFS2 PATCH 08/10] gfs2: New log flush watchdog In-Reply-To: <20210713180958.66995-9-rpeterso@redhat.com> References: <20210713180958.66995-1-rpeterso@redhat.com> <20210713180958.66995-9-rpeterso@redhat.com> Message-ID: List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi, On Tue, 2021-07-13 at 13:09 -0500, Bob Peterson wrote: > This patch adds a new watchdog whose sole purpose is to complain when > gfs2_log_flush operations are taking too long. > This one is a bit confusing. It says that it is to check if the log flush is taking too long, but it appears to set a timeout based on the amount of dirty data that will be written back, so it isn't really the log flush, but the writeback and log flush that is being timed I think? It also looks like the timeout is entirely dependent upon the number of dirty pages too, and not on the log flush size. I wonder about the performance impact of traversing the list of dirty pages too. If that can be avoided it should make the implementation rather more efficient, Steve. > Signed-off-by: Bob Peterson > --- > fs/gfs2/incore.h | 6 ++++++ > fs/gfs2/log.c | 47 > ++++++++++++++++++++++++++++++++++++++++++++ > fs/gfs2/log.h | 1 + > fs/gfs2/main.c | 8 ++++++++ > fs/gfs2/ops_fstype.c | 2 ++ > fs/gfs2/sys.c | 6 ++++-- > 6 files changed, 68 insertions(+), 2 deletions(-) > > diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h > index 6f31a067a5f2..566c0053b7c5 100644 > --- a/fs/gfs2/incore.h > +++ b/fs/gfs2/incore.h > @@ -683,6 +683,8 @@ struct local_statfs_inode { > unsigned int si_jid; /* journal id this statfs inode > corresponds to */ > }; > > +#define GFS2_LOG_FLUSH_TIMEOUT (HZ / 10) /* arbitrary: 1/10 second > per page */ > + > struct gfs2_sbd { > struct super_block *sd_vfs; > struct gfs2_pcpu_lkstats __percpu *sd_lkstats; > @@ -849,6 +851,10 @@ struct gfs2_sbd { > unsigned long sd_last_warning; > struct dentry *debugfs_dir; /* debugfs directory */ > unsigned long sd_glock_dqs_held; > + > + struct delayed_work sd_log_flush_watchdog; > + unsigned long sd_dirty_pages; > + unsigned long sd_log_flush_start; > }; > > static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int > which) > diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c > index f0ee3ff6f9a8..bd2ff5ef4b91 100644 > --- a/fs/gfs2/log.c > +++ b/fs/gfs2/log.c > @@ -19,6 +19,7 @@ > #include > #include > #include > +#include > > #include "gfs2.h" > #include "incore.h" > @@ -32,8 +33,22 @@ > #include "trace_gfs2.h" > #include "trans.h" > > +extern struct workqueue_struct *gfs2_log_flush_wq; > + > static void gfs2_log_shutdown(struct gfs2_sbd *sdp); > > +void gfs2_log_flush_watchdog_func(struct work_struct *work) > +{ > + struct delayed_work *dwork = to_delayed_work(work); > + struct gfs2_sbd *sdp = container_of(dwork, struct gfs2_sbd, > + sd_log_flush_watchdog); > + > + fs_err(sdp, "log flush pid %u took > %lu secs to write %lu > pages.\n", > + sdp->sd_logd_process ? pid_nr(task_pid(sdp- > >sd_logd_process)) : > + 0, (jiffies - sdp->sd_log_flush_start) / HZ, > + sdp->sd_dirty_pages); > +} > + > /** > * gfs2_struct2blk - compute stuff > * @sdp: the filesystem > @@ -1016,6 +1031,26 @@ static void trans_drain(struct gfs2_trans *tr) > } > } > > +/** > + * count_dirty_pages - rough count the dirty ordered writes pages > + * @sdp: the filesystem > + * > + * This is not meant to be exact. It's simply a rough estimate of > how many > + * dirty pages are on the ordered writes list. The actual number of > pages > + * may change because we don't keep the lock held during the log > flush. > + */ > +static unsigned long count_dirty_pages(struct gfs2_sbd *sdp) > +{ > + struct gfs2_inode *ip; > + unsigned long dpages = 0; > + > + spin_lock(&sdp->sd_ordered_lock); > + list_for_each_entry(ip, &sdp->sd_log_ordered, i_ordered) > + dpages += ip->i_inode.i_mapping->nrpages; > + spin_unlock(&sdp->sd_ordered_lock); > + return dpages; > +} > + > /** > * gfs2_log_flush - flush incore transaction(s) > * @sdp: The filesystem > @@ -1031,8 +1066,19 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, > struct gfs2_glock *gl, u32 flags) > enum gfs2_freeze_state state = atomic_read(&sdp- > >sd_freeze_state); > unsigned int first_log_head; > unsigned int reserved_revokes = 0; > + unsigned long dpages; > + > + dpages = count_dirty_pages(sdp); > > down_write(&sdp->sd_log_flush_lock); > + if (dpages) > + if (queue_delayed_work(gfs2_log_flush_wq, > + &sdp->sd_log_flush_watchdog, > + round_up(dpages * > + GFS2_LOG_FLUSH_TIMEOUT, > HZ))) { > + sdp->sd_dirty_pages = dpages; > + sdp->sd_log_flush_start = jiffies; > + } > trace_gfs2_log_flush(sdp, 1, flags); > > repeat: > @@ -1144,6 +1190,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, > struct gfs2_glock *gl, u32 flags) > gfs2_assert_withdraw_delayed(sdp, used_blocks < > reserved_blocks); > gfs2_log_release(sdp, reserved_blocks - used_blocks); > } > + cancel_delayed_work(&sdp->sd_log_flush_watchdog); > up_write(&sdp->sd_log_flush_lock); > gfs2_trans_free(sdp, tr); > if (gfs2_withdrawing(sdp)) > diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h > index fc905c2af53c..962044fba53a 100644 > --- a/fs/gfs2/log.h > +++ b/fs/gfs2/log.h > @@ -94,5 +94,6 @@ extern void gfs2_add_revoke(struct gfs2_sbd *sdp, > struct gfs2_bufdata *bd); > extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl); > extern void gfs2_flush_revokes(struct gfs2_sbd *sdp); > extern void gfs2_ail_drain(struct gfs2_sbd *sdp); > +extern void gfs2_log_flush_watchdog_func(struct work_struct *work); > > #endif /* __LOG_DOT_H__ */ > diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c > index 28d0eb23e18e..55a7f29742b3 100644 > --- a/fs/gfs2/main.c > +++ b/fs/gfs2/main.c > @@ -30,6 +30,7 @@ > #include "glops.h" > > struct workqueue_struct *gfs2_control_wq; > +struct workqueue_struct *gfs2_log_flush_wq; > > static void gfs2_init_inode_once(void *foo) > { > @@ -178,6 +179,10 @@ static int __init init_gfs2_fs(void) > if (!gfs2_freeze_wq) > goto fail_wq3; > > + gfs2_log_flush_wq = alloc_workqueue("gfs2_log_flush_wq", 0, 0); > + if (!gfs2_log_flush_wq) > + goto fail_wq4; > + > gfs2_page_pool = mempool_create_page_pool(64, 0); > if (!gfs2_page_pool) > goto fail_mempool; > @@ -189,6 +194,8 @@ static int __init init_gfs2_fs(void) > return 0; > > fail_mempool: > + destroy_workqueue(gfs2_log_flush_wq); > +fail_wq4: > destroy_workqueue(gfs2_freeze_wq); > fail_wq3: > destroy_workqueue(gfs2_control_wq); > @@ -240,6 +247,7 @@ static void __exit exit_gfs2_fs(void) > destroy_workqueue(gfs_recovery_wq); > destroy_workqueue(gfs2_control_wq); > destroy_workqueue(gfs2_freeze_wq); > + destroy_workqueue(gfs2_log_flush_wq); > list_lru_destroy(&gfs2_qd_lru); > > rcu_barrier(); > diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c > index 6a950c4a61e9..b09e61457b23 100644 > --- a/fs/gfs2/ops_fstype.c > +++ b/fs/gfs2/ops_fstype.c > @@ -139,6 +139,8 @@ static struct gfs2_sbd *init_sbd(struct > super_block *sb) > init_waitqueue_head(&sdp->sd_log_flush_wait); > atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); > mutex_init(&sdp->sd_freeze_mutex); > + INIT_DELAYED_WORK(&sdp->sd_log_flush_watchdog, > + gfs2_log_flush_watchdog_func); > > return sdp; > > diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c > index c0a34d9ddee4..c90d9f48571a 100644 > --- a/fs/gfs2/sys.c > +++ b/fs/gfs2/sys.c > @@ -96,7 +96,8 @@ static ssize_t status_show(struct gfs2_sbd *sdp, > char *buf) > "sd_log_flush_head: %d\n" > "sd_log_flush_tail: %d\n" > "sd_log_blks_reserved: %d\n" > - "sd_log_revokes_available: %d\n", > + "sd_log_revokes_available: %d\n" > + "sd_dirty_pages: %lu\n", > test_bit(SDF_JOURNAL_CHECKED, &f), > test_bit(SDF_JOURNAL_LIVE, &f), > (sdp->sd_jdesc ? sdp->sd_jdesc->jd_jid : 0), > @@ -124,7 +125,8 @@ static ssize_t status_show(struct gfs2_sbd *sdp, > char *buf) > sdp->sd_log_flush_head, > sdp->sd_log_flush_tail, > sdp->sd_log_blks_reserved, > - atomic_read(&sdp->sd_log_revokes_available)); > + atomic_read(&sdp->sd_log_revokes_available), > + sdp->sd_dirty_pages); > return s; > } >