From mboxrd@z Thu Jan 1 00:00:00 1970 From: Bob Peterson Date: Thu, 13 Sep 2018 14:24:47 -0400 (EDT) Subject: [Cluster-devel] [GFS2 PATCH] gfs2: purge ail1 and ail2 lists on withdraw In-Reply-To: <292195193.12986730.1536862981666.JavaMail.zimbra@redhat.com> Message-ID: <2113925841.12987130.1536863087425.JavaMail.zimbra@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Hi, I don't know if this is the right approach, but it does seem to help prevent gfs2 file system corruption due to journal replay. I wanted to post it early to get people's thoughts. --- This patch purges the ail1 and ail2 lists forceably whenever gfs2 does a file system withdraw. This should help mitigate file system damage caused by buffers that are in transit or in transactions. Signed-off-by: Bob Peterson --- fs/gfs2/log.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/gfs2/log.h | 2 +- fs/gfs2/util.c | 4 ++++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index ee20ea42e7b5..5c9a1057cb48 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -958,6 +958,70 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) sdp->sd_log_tail = sdp->sd_log_head; } +static void gfs2_ordered_purge(struct gfs2_sbd *sdp) +{ + struct gfs2_inode *ip; + + spin_lock(&sdp->sd_ordered_lock); + while (!list_empty(&sdp->sd_log_le_ordered)) { + ip = list_first_entry(&sdp->sd_log_le_ordered, + struct gfs2_inode, i_ordered); + list_del(&ip->i_ordered); + clear_bit(GIF_ORDERED, &ip->i_flags); + if (ip->i_inode.i_mapping->nrpages) { + spin_unlock(&sdp->sd_ordered_lock); + filemap_flush(ip->i_inode.i_mapping); + spin_lock(&sdp->sd_ordered_lock); + } + } + spin_unlock(&sdp->sd_ordered_lock); +} + +static void tr_list_purge(struct gfs2_sbd *sdp, struct list_head *head) +__releases(&sdp->sd_ail_lock) +__acquires(&sdp->sd_ail_lock) +{ + struct gfs2_bufdata *bd; + struct buffer_head *bh; + + while (!list_empty(head)) { + bd = list_first_entry(head, struct gfs2_bufdata, + bd_ail_st_list); + bh = bd->bd_bh; + if (buffer_locked(bh)) { + spin_unlock(&sdp->sd_ail_lock); + wait_on_buffer(bh); + spin_lock(&sdp->sd_ail_lock); + } else { + clear_buffer_dirty(bh); + } + gfs2_remove_from_ail(bd); + } +} + +static void ail_list_purge(struct gfs2_sbd *sdp, struct list_head *head) +{ + struct gfs2_trans *tr; + + while (!list_empty(head)) { + tr = list_first_entry(head, struct gfs2_trans, tr_list); + list_del(&tr->tr_list); + tr_list_purge(sdp, &tr->tr_ail1_list); + tr_list_purge(sdp, &tr->tr_ail2_list); + kfree(tr); + } +} + +void gfs2_ail_purge(struct gfs2_sbd *sdp) +{ + gfs2_ordered_purge(sdp); + + spin_lock(&sdp->sd_ail_lock); + ail_list_purge(sdp, &sdp->sd_ail1_list); + ail_list_purge(sdp, &sdp->sd_ail2_list); + spin_unlock(&sdp->sd_ail_lock); +} + static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) { return (atomic_read(&sdp->sd_log_pinned) + diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 20241436126d..28052dc8d282 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h @@ -76,7 +76,7 @@ extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 type); extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans); extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc); - +extern void gfs2_ail_purge(struct gfs2_sbd *sdp); extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); extern int gfs2_logd(void *data); extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index b5a6e958432d..550918039139 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -19,6 +19,7 @@ #include "gfs2.h" #include "incore.h" #include "glock.h" +#include "log.h" #include "rgrp.h" #include "util.h" @@ -62,6 +63,9 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) fs_err(sdp, "about to withdraw this file system\n"); BUG_ON(sdp->sd_args.ar_debug); + /* Purge our ail1 and ail2 lists: we can't trust them anyway */ + gfs2_ail_purge(sdp); + kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE); if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))