cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
From: Bob Peterson <rpeterso@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [GFS2 PATCH 24/28] gfs2: Do proper error checking for go_sync family of glops functions
Date: Thu, 20 Feb 2020 13:53:25 -0600	[thread overview]
Message-ID: <20200220195329.952027-25-rpeterso@redhat.com> (raw)
In-Reply-To: <20200220195329.952027-1-rpeterso@redhat.com>

Before this patch, function do_xmote would try to sync out the glock
dirty data by calling the appropriate glops function XXX_go_sync()
but it did not check for a good return code. If the sync was not
possible due to an io error or whatever, do_xmote would continue on
and call go_inval and release the glock to other cluster nodes.
When those nodes go to replay the journal, they may already be holding
glocks for the journal records that should have been synced, but were
not due to the ignored error.

This patch introduces proper error code checking to the go_sync
family of glops functions.

Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Reviewed-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c  | 16 ++++++++++++++--
 fs/gfs2/glops.c  | 30 +++++++++++++++++++-----------
 fs/gfs2/incore.h |  2 +-
 3 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 73cb5bcc37a7..0bfa58e5a64e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -602,8 +602,20 @@ __acquires(&gl->gl_lockref.lock)
 	    (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
 		clear_bit(GLF_BLOCKING, &gl->gl_flags);
 	spin_unlock(&gl->gl_lockref.lock);
-	if (glops->go_sync)
-		glops->go_sync(gl);
+	if (glops->go_sync) {
+		ret = glops->go_sync(gl);
+		/* If we had a problem syncing (due to io errors or whatever,
+		 * we should not invalidate the metadata or tell dlm to
+		 * release the glock to other nodes.
+		 */
+		if (ret) {
+			if (cmpxchg(&sdp->sd_log_error, 0, ret)) {
+				fs_err(sdp, "Error %d syncing glock \n", ret);
+				gfs2_dump_glock(NULL, gl, true);
+			}
+			return;
+		}
+	}
 	if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) {
 		/*
 		 * The call to go_sync should have cleared out the ail list.
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index bbbcae8d853c..9e9c7a4b8c66 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -82,10 +82,11 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
 }
 
 
-static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
+static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 	struct gfs2_trans tr;
+	int ret;
 
 	memset(&tr, 0, sizeof(tr));
 	INIT_LIST_HEAD(&tr.tr_buf);
@@ -116,7 +117,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
 			goto flush;
 		if (log_in_flight)
 			log_flush_wait(sdp);
-		return;
+		return 0;
 	}
 
 	/* A shortened, inline version of gfs2_trans_begin()
@@ -124,8 +125,9 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
          * on the stack */
 	tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes);
 	tr.tr_ip = _RET_IP_;
-	if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0)
-		return;
+	ret = gfs2_log_reserve(sdp, tr.tr_reserved);
+	if (ret < 0)
+		return ret;
 	WARN_ON_ONCE(current->journal_info);
 	current->journal_info = &tr;
 
@@ -135,6 +137,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
 flush:
 	gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
 		       GFS2_LFC_AIL_EMPTY_GL);
+	return 0;
 }
 
 void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
@@ -168,7 +171,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
  * return to caller to demote/unlock the glock until I/O is complete.
  */
 
-static void rgrp_go_sync(struct gfs2_glock *gl)
+static int rgrp_go_sync(struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 	struct address_space *mapping = &sdp->sd_aspace;
@@ -176,21 +179,24 @@ static void rgrp_go_sync(struct gfs2_glock *gl)
 	int error;
 
 	if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
-		return;
+		return 0;
 	GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
 
 	gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
 		       GFS2_LFC_RGRP_GO_SYNC);
 	filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
 	error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
+	WARN_ON_ONCE(error);
 	mapping_set_error(mapping, error);
-	gfs2_ail_empty_gl(gl);
+	if (!error)
+		error = gfs2_ail_empty_gl(gl);
 
 	spin_lock(&gl->gl_lockref.lock);
 	rgd = gl->gl_object;
 	if (rgd)
 		gfs2_free_clones(rgd);
 	spin_unlock(&gl->gl_lockref.lock);
+	return error;
 }
 
 /**
@@ -257,12 +263,12 @@ static void gfs2_clear_glop_pending(struct gfs2_inode *ip)
  *
  */
 
-static void inode_go_sync(struct gfs2_glock *gl)
+static int inode_go_sync(struct gfs2_glock *gl)
 {
 	struct gfs2_inode *ip = gfs2_glock2inode(gl);
 	int isreg = ip && S_ISREG(ip->i_inode.i_mode);
 	struct address_space *metamapping = gfs2_glock2aspace(gl);
-	int error;
+	int error = 0;
 
 	if (isreg) {
 		if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
@@ -295,6 +301,7 @@ static void inode_go_sync(struct gfs2_glock *gl)
 
 out:
 	gfs2_clear_glop_pending(ip);
+	return error;
 }
 
 /**
@@ -515,7 +522,7 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
  *
  */
 
-static void freeze_go_sync(struct gfs2_glock *gl)
+static int freeze_go_sync(struct gfs2_glock *gl)
 {
 	int error = 0;
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
@@ -529,7 +536,7 @@ static void freeze_go_sync(struct gfs2_glock *gl)
 				error);
 			if (gfs2_withdrawn(sdp)) {
 				atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
-				return;
+				return 0;
 			}
 			gfs2_assert_withdraw(sdp, 0);
 		}
@@ -537,6 +544,7 @@ static void freeze_go_sync(struct gfs2_glock *gl)
 		gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
 			       GFS2_LFC_FREEZE_GO_SYNC);
 	}
+	return 0;
 }
 
 /**
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 8cd564bcf5e6..04549a8cae7e 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -234,7 +234,7 @@ struct lm_lockname {
 
 
 struct gfs2_glock_operations {
-	void (*go_sync) (struct gfs2_glock *gl);
+	int (*go_sync) (struct gfs2_glock *gl);
 	int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
 	void (*go_inval) (struct gfs2_glock *gl, int flags);
 	int (*go_demote_ok) (const struct gfs2_glock *gl);
-- 
2.24.1



  parent reply	other threads:[~2020-02-20 19:53 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-02-20 19:53 [Cluster-devel] [GFS2 PATCH 00/28] GFS2 recovery patches v10 Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 01/28] gfs2: Split gfs2_lm_withdraw into two functions Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 02/28] gfs2: Report errors before withdraw Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 03/28] gfs2: Remove usused cluster_wide arguments of gfs2_consist functions Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 04/28] gfs2: Turn gfs2_consist into void functions Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 05/28] gfs2: Return bool from gfs2_assert functions Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 06/28] gfs2: Introduce concept of a pending withdraw Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 07/28] gfs2: clear ail1 list when gfs2 withdraws Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 08/28] gfs2: Rework how rgrp buffer_heads are managed Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 09/28] gfs2: log error reform Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 10/28] gfs2: Only complain the first time an io error occurs in quota or log Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 11/28] gfs2: Ignore dlm recovery requests if gfs2 is withdrawn Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 12/28] gfs2: move check_journal_clean to util.c for future use Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 13/28] gfs2: Allow some glocks to be used during withdraw Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 14/28] gfs2: Force withdraw to replay journals and wait for it to finish Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 15/28] gfs2: fix infinite loop when checking ail item count before go_inval Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 16/28] gfs2: Add verbose option to check_journal_clean Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 17/28] gfs2: Issue revokes more intelligently Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 18/28] gfs2: Prepare to withdraw as soon as an IO error occurs in log write Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 19/28] gfs2: Check for log write errors before telling dlm to unlock Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 20/28] gfs2: Do log_flush in gfs2_ail_empty_gl even if ail list is empty Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 21/28] gfs2: Withdraw in gfs2_ail1_flush if write_cache_pages fails Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 22/28] gfs2: drain the ail2 list after io errors Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 23/28] gfs2: Don't demote a glock until its revokes are written Bob Peterson
2020-02-20 19:53 ` Bob Peterson [this message]
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 25/28] gfs2: flesh out delayed withdraw for gfs2_log_flush Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 26/28] fs: clean up __block_commit_write Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 27/28] gfs2: don't allow releasepage to free bd still used for revokes Bob Peterson
2020-02-20 19:53 ` [Cluster-devel] [GFS2 PATCH 28/28] gfs2: allow journal replay to hold sd_log_flush_lock Bob Peterson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200220195329.952027-25-rpeterso@redhat.com \
    --to=rpeterso@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).