[Cluster-devel] [GFS2 PATCH 4/4] gfs2: rework go_lock mechanism for node_scope race

cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed

From: Bob Peterson <rpeterso@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [GFS2 PATCH 4/4] gfs2: rework go_lock mechanism for node_scope race
Date: Mon, 13 Sep 2021 14:30:28 -0500	[thread overview]
Message-ID: <20210913193028.75116-5-rpeterso@redhat.com> (raw)
In-Reply-To: <20210913193028.75116-1-rpeterso@redhat.com>

Before this patch, glocks only performed their go_lock glop function
when the first holder was queued. When we introduced the new "node
scope" mechanism, we allowed multiple holders to hold a glock in EX
at the same time, with local locking. But it introduced a new race:
If the first holder specified GL_SKIP (skip the go_lock function until
deemed necessary) any secondary holders WITHOUT GL_SKIP assumed the
go_lock mechanism had been performed. In the case of rgrps, that was
not always the case: for example, when the first process is searching
for free blocks for allocation (GL_SKIP) and a second holder (without
GL_SKIP) wants to free blocks, which just assumes the buffers exist.

To remedy this race, this patch reworks how the GL_SKIP mechanism is
managed. The first holder checks if GL_SKIP is specified, and if so,
a new glock flag, GLF_GO_LOCK_SKIPPED, is set. Subsequent holders
without GL_SKIP who need the lock check this flag to see if the
go_lock function was skipped, and call it if necessary.

This also "accidentally" fixes a minor bug with glock do-promote
tracing: glocks that had no "go_lock" glop would not properly log the
"first" flag to the kernel trace.

Fixes: 06e908cd9ead ("gfs2: Allow node-wide exclusive glock sharing")
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
 fs/gfs2/glock.c  | 81 ++++++++++++++++++++++++++++++++----------------
 fs/gfs2/glops.c  |  1 +
 fs/gfs2/incore.h |  1 +
 fs/gfs2/rgrp.c   |  1 +
 4 files changed, 58 insertions(+), 26 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b8248ceff3c3..faad2cb105bb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -380,6 +380,46 @@ static void do_error(struct gfs2_glock *gl, const int ret)
 	}
 }
 
+static int glock_go_lock(struct gfs2_glock *gl, struct gfs2_holder *gh,
+			 struct gfs2_holder *gh1)
+__releases(&gl->gl_lockref.lock)
+__acquires(&gl->gl_lockref.lock)
+{
+	const struct gfs2_glock_operations *glops = gl->gl_ops;
+	int ret = 0;
+
+	/*
+	 * When the first holder on the list is granted, it can SKIP or not.
+	 * If it skips go_lock, it sets the GLF_GO_LOCK_SKIPPED flag.
+	 * All subsequent holders need to know if it was skipped. They may
+	 * also choose to skip go_lock, but only the first granted should
+	 * set GLF_GO_LOCK_SKIPPED. If not skipped, they need to know if
+	 * go_lock was previously skipped, call go_lock, and clear the flag.
+	 */
+	if (gh->gh_flags & GL_SKIP) {
+		if (gh == gh1)
+		    set_bit(GLF_GO_LOCK_SKIPPED, &gl->gl_flags);
+		goto out;
+	}
+	if ((gh == gh1) || test_bit(GLF_GO_LOCK_SKIPPED, &gl->gl_flags)) {
+		spin_unlock(&gl->gl_lockref.lock);
+		/* FIXME: eliminate this eventually */
+		ret = glops->go_lock(gh);
+		spin_lock(&gl->gl_lockref.lock);
+		clear_bit(GLF_GO_LOCK_SKIPPED, &gl->gl_flags);
+		if (ret) {
+			if (ret == 1)
+				return 2;
+			gh->gh_error = ret;
+			list_del_init(&gh->gh_list);
+			trace_gfs2_glock_queue(gh, 0);
+			gfs2_holder_wake(gh);
+		}
+	}
+out:
+	return ret;
+}
+
 /**
  * do_promote - promote as many requests as possible on the current queue
  * @gl: The glock
@@ -389,46 +429,33 @@ static void do_error(struct gfs2_glock *gl, const int ret)
  */
 
 static int do_promote(struct gfs2_glock *gl)
-__releases(&gl->gl_lockref.lock)
-__acquires(&gl->gl_lockref.lock)
 {
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
-	struct gfs2_holder *gh, *tmp;
+	struct gfs2_holder *gh, *gh1, *tmp;
 	int ret;
 
 restart:
+	gh1 = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
+
 	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
 		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
 			continue;
 		if (may_grant(gl, gh)) {
-			if (gh->gh_list.prev == &gl->gl_holders &&
-			    glops->go_lock) {
-				if (!(gh->gh_flags & GL_SKIP)) {
-					spin_unlock(&gl->gl_lockref.lock);
-					/* FIXME: eliminate this eventually */
-					ret = glops->go_lock(gh);
-					spin_lock(&gl->gl_lockref.lock);
-					if (ret) {
-						if (ret == 1)
-							return 2;
-						gh->gh_error = ret;
-						list_del_init(&gh->gh_list);
-						trace_gfs2_glock_queue(gh, 0);
-						gfs2_holder_wake(gh);
-						goto restart;
-					}
-				}
-				set_bit(HIF_HOLDER, &gh->gh_iflags);
-				trace_gfs2_promote(gh, 1);
-				gfs2_holder_wake(gh);
-				goto restart;
+			ret = 0;
+			if (glops->go_lock) {
+				ret = glock_go_lock(gl, gh, gh1);
+				if (ret == 2)
+					return ret;
 			}
+
 			set_bit(HIF_HOLDER, &gh->gh_iflags);
-			trace_gfs2_promote(gh, 0);
+			trace_gfs2_promote(gh, gh == gh1 ? 1 : 0);
 			gfs2_holder_wake(gh);
+			if (ret)
+				goto restart;
 			continue;
 		}
-		if (gh->gh_list.prev == &gl->gl_holders)
+		if (gh == gh1)
 			return 1;
 		do_error(gl, 0);
 		break;
@@ -2148,6 +2175,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
 		*p++ = 'P';
 	if (test_bit(GLF_FREEING, gflags))
 		*p++ = 'x';
+	if (test_bit(GLF_GO_LOCK_SKIPPED, gflags))
+		*p++ = 's';
 	*p = 0;
 	return buf;
 }
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 4b19f513570f..923efcd71269 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -477,6 +477,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
 	error = gfs2_dinode_in(ip, dibh->b_data);
 	brelse(dibh);
 	clear_bit(GIF_INVALID, &ip->i_flags);
+	clear_bit(GLF_GO_LOCK_SKIPPED, &ip->i_gl->gl_flags);
 
 	return error;
 }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 0fe49770166e..4880818db83d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -315,6 +315,7 @@ struct gfs2_alloc_parms {
 
 enum {
 	GLF_LOCK			= 1,
+	GLF_GO_LOCK_SKIPPED		= 2, /* go_lock was skipped */
 	GLF_DEMOTE			= 3,
 	GLF_PENDING_DEMOTE		= 4,
 	GLF_DEMOTE_IN_PROGRESS		= 5,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 1fb66f6e6a0c..83fd5c750041 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1248,6 +1248,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
 		if (rgd->rd_rgl->rl_unlinked == 0)
 			rgd->rd_flags &= ~GFS2_RDF_CHECK;
 	}
+	clear_bit(GLF_GO_LOCK_SKIPPED, &gl->gl_flags);
 	return 0;
 
 fail:
-- 
2.31.1

     prev parent reply	other threads:[~2021-09-13 19:30 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-13 19:30 [Cluster-devel] [GFS2 PATCH 0/4] gfs2: fix bugs related to node_scope and go_lock Bob Peterson
2021-09-13 19:30 ` [Cluster-devel] [GFS2 PATCH 1/4] gfs2: remove redundant check in gfs2_rgrp_go_lock Bob Peterson
2021-09-13 19:30 ` [Cluster-devel] [GFS2 PATCH 2/4] gfs2: Add GL_SKIP holder flag to dump_holder Bob Peterson
2021-09-13 19:30 ` [Cluster-devel] [GFS2 PATCH 3/4] gfs2: move GL_SKIP check from glops to do_promote Bob Peterson
2021-09-13 19:30 ` Bob Peterson [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:b8248ceff3c dfblob:faad2cb105b dfblob:4b19f513570
dfblob:923efcd7126 dfblob:0fe49770166 dfblob:4880818db83
dfblob:1fb66f6e6a0 dfblob:83fd5c75004 )
 OR (
bs:"[Cluster-devel] [GFS2 PATCH 4/4] gfs2: rework go_lock mechanism for node_scope race" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210913193028.75116-5-rpeterso@redhat.com \
    --to=rpeterso@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).