From: Bob Peterson <rpeterso@redhat.com>
To: cluster-devel.redhat.com
Subject: [Cluster-devel] [GFS2 PATCH 4/4] gfs2: rework go_lock mechanism for node_scope race
Date: Mon, 13 Sep 2021 14:30:28 -0500 [thread overview]
Message-ID: <20210913193028.75116-5-rpeterso@redhat.com> (raw)
In-Reply-To: <20210913193028.75116-1-rpeterso@redhat.com>
Before this patch, glocks only performed their go_lock glop function
when the first holder was queued. When we introduced the new "node
scope" mechanism, we allowed multiple holders to hold a glock in EX
at the same time, with local locking. But it introduced a new race:
If the first holder specified GL_SKIP (skip the go_lock function until
deemed necessary) any secondary holders WITHOUT GL_SKIP assumed the
go_lock mechanism had been performed. In the case of rgrps, that was
not always the case: for example, when the first process is searching
for free blocks for allocation (GL_SKIP) and a second holder (without
GL_SKIP) wants to free blocks, which just assumes the buffers exist.
To remedy this race, this patch reworks how the GL_SKIP mechanism is
managed. The first holder checks if GL_SKIP is specified, and if so,
a new glock flag, GLF_GO_LOCK_SKIPPED, is set. Subsequent holders
without GL_SKIP who need the lock check this flag to see if the
go_lock function was skipped, and call it if necessary.
This also "accidentally" fixes a minor bug with glock do-promote
tracing: glocks that had no "go_lock" glop would not properly log the
"first" flag to the kernel trace.
Fixes: 06e908cd9ead ("gfs2: Allow node-wide exclusive glock sharing")
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
fs/gfs2/glock.c | 81 ++++++++++++++++++++++++++++++++----------------
fs/gfs2/glops.c | 1 +
fs/gfs2/incore.h | 1 +
fs/gfs2/rgrp.c | 1 +
4 files changed, 58 insertions(+), 26 deletions(-)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b8248ceff3c3..faad2cb105bb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -380,6 +380,46 @@ static void do_error(struct gfs2_glock *gl, const int ret)
}
}
+static int glock_go_lock(struct gfs2_glock *gl, struct gfs2_holder *gh,
+ struct gfs2_holder *gh1)
+__releases(&gl->gl_lockref.lock)
+__acquires(&gl->gl_lockref.lock)
+{
+ const struct gfs2_glock_operations *glops = gl->gl_ops;
+ int ret = 0;
+
+ /*
+ * When the first holder on the list is granted, it can SKIP or not.
+ * If it skips go_lock, it sets the GLF_GO_LOCK_SKIPPED flag.
+ * All subsequent holders need to know if it was skipped. They may
+ * also choose to skip go_lock, but only the first granted should
+ * set GLF_GO_LOCK_SKIPPED. If not skipped, they need to know if
+ * go_lock was previously skipped, call go_lock, and clear the flag.
+ */
+ if (gh->gh_flags & GL_SKIP) {
+ if (gh == gh1)
+ set_bit(GLF_GO_LOCK_SKIPPED, &gl->gl_flags);
+ goto out;
+ }
+ if ((gh == gh1) || test_bit(GLF_GO_LOCK_SKIPPED, &gl->gl_flags)) {
+ spin_unlock(&gl->gl_lockref.lock);
+ /* FIXME: eliminate this eventually */
+ ret = glops->go_lock(gh);
+ spin_lock(&gl->gl_lockref.lock);
+ clear_bit(GLF_GO_LOCK_SKIPPED, &gl->gl_flags);
+ if (ret) {
+ if (ret == 1)
+ return 2;
+ gh->gh_error = ret;
+ list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
+ gfs2_holder_wake(gh);
+ }
+ }
+out:
+ return ret;
+}
+
/**
* do_promote - promote as many requests as possible on the current queue
* @gl: The glock
@@ -389,46 +429,33 @@ static void do_error(struct gfs2_glock *gl, const int ret)
*/
static int do_promote(struct gfs2_glock *gl)
-__releases(&gl->gl_lockref.lock)
-__acquires(&gl->gl_lockref.lock)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
- struct gfs2_holder *gh, *tmp;
+ struct gfs2_holder *gh, *gh1, *tmp;
int ret;
restart:
+ gh1 = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
+
list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
continue;
if (may_grant(gl, gh)) {
- if (gh->gh_list.prev == &gl->gl_holders &&
- glops->go_lock) {
- if (!(gh->gh_flags & GL_SKIP)) {
- spin_unlock(&gl->gl_lockref.lock);
- /* FIXME: eliminate this eventually */
- ret = glops->go_lock(gh);
- spin_lock(&gl->gl_lockref.lock);
- if (ret) {
- if (ret == 1)
- return 2;
- gh->gh_error = ret;
- list_del_init(&gh->gh_list);
- trace_gfs2_glock_queue(gh, 0);
- gfs2_holder_wake(gh);
- goto restart;
- }
- }
- set_bit(HIF_HOLDER, &gh->gh_iflags);
- trace_gfs2_promote(gh, 1);
- gfs2_holder_wake(gh);
- goto restart;
+ ret = 0;
+ if (glops->go_lock) {
+ ret = glock_go_lock(gl, gh, gh1);
+ if (ret == 2)
+ return ret;
}
+
set_bit(HIF_HOLDER, &gh->gh_iflags);
- trace_gfs2_promote(gh, 0);
+ trace_gfs2_promote(gh, gh == gh1 ? 1 : 0);
gfs2_holder_wake(gh);
+ if (ret)
+ goto restart;
continue;
}
- if (gh->gh_list.prev == &gl->gl_holders)
+ if (gh == gh1)
return 1;
do_error(gl, 0);
break;
@@ -2148,6 +2175,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*p++ = 'P';
if (test_bit(GLF_FREEING, gflags))
*p++ = 'x';
+ if (test_bit(GLF_GO_LOCK_SKIPPED, gflags))
+ *p++ = 's';
*p = 0;
return buf;
}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 4b19f513570f..923efcd71269 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -477,6 +477,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
error = gfs2_dinode_in(ip, dibh->b_data);
brelse(dibh);
clear_bit(GIF_INVALID, &ip->i_flags);
+ clear_bit(GLF_GO_LOCK_SKIPPED, &ip->i_gl->gl_flags);
return error;
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 0fe49770166e..4880818db83d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -315,6 +315,7 @@ struct gfs2_alloc_parms {
enum {
GLF_LOCK = 1,
+ GLF_GO_LOCK_SKIPPED = 2, /* go_lock was skipped */
GLF_DEMOTE = 3,
GLF_PENDING_DEMOTE = 4,
GLF_DEMOTE_IN_PROGRESS = 5,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 1fb66f6e6a0c..83fd5c750041 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1248,6 +1248,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
if (rgd->rd_rgl->rl_unlinked == 0)
rgd->rd_flags &= ~GFS2_RDF_CHECK;
}
+ clear_bit(GLF_GO_LOCK_SKIPPED, &gl->gl_flags);
return 0;
fail:
--
2.31.1
prev parent reply other threads:[~2021-09-13 19:30 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-09-13 19:30 [Cluster-devel] [GFS2 PATCH 0/4] gfs2: fix bugs related to node_scope and go_lock Bob Peterson
2021-09-13 19:30 ` [Cluster-devel] [GFS2 PATCH 1/4] gfs2: remove redundant check in gfs2_rgrp_go_lock Bob Peterson
2021-09-13 19:30 ` [Cluster-devel] [GFS2 PATCH 2/4] gfs2: Add GL_SKIP holder flag to dump_holder Bob Peterson
2021-09-13 19:30 ` [Cluster-devel] [GFS2 PATCH 3/4] gfs2: move GL_SKIP check from glops to do_promote Bob Peterson
2021-09-13 19:30 ` Bob Peterson [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210913193028.75116-5-rpeterso@redhat.com \
--to=rpeterso@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).