* [Cluster-devel] [GFS2 PATCH 3/4] gfs2: move GL_SKIP check from glops to do_promote
2021-09-13 19:30 [Cluster-devel] [GFS2 PATCH 0/4] gfs2: fix bugs related to node_scope and go_lock Bob Peterson
2021-09-13 19:30 ` [Cluster-devel] [GFS2 PATCH 1/4] gfs2: remove redundant check in gfs2_rgrp_go_lock Bob Peterson
2021-09-13 19:30 ` [Cluster-devel] [GFS2 PATCH 2/4] gfs2: Add GL_SKIP holder flag to dump_holder Bob Peterson
@ 2021-09-13 19:30 ` Bob Peterson
2021-09-13 19:30 ` [Cluster-devel] [GFS2 PATCH 4/4] gfs2: rework go_lock mechanism for node_scope race Bob Peterson
3 siblings, 0 replies; 5+ messages in thread
From: Bob Peterson @ 2021-09-13 19:30 UTC (permalink / raw)
To: cluster-devel.redhat.com
Before this patch, each individual "go_lock" glock operation
(glop) would check the GL_SKIP flag, and if set, would skip further
processing. This patch changes the logic so the go_lock caller,
function go_promote, checks the GL_SKIP flag before calling the
go_lock op in the first place. There are two main reasons for doing
this:
1. For cases in the GL_SKIP is specified, it avoids having to
unnecessarily unlock gl_lockref.lock only to re-lock it again.
2. This makes it a little easier to follow and understand the next
patch which breaks up function do_promote.
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
fs/gfs2/glock.c | 26 ++++++++++++++------------
fs/gfs2/glops.c | 2 +-
fs/gfs2/rgrp.c | 2 --
3 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 6144d7fe28e6..b8248ceff3c3 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -403,18 +403,20 @@ __acquires(&gl->gl_lockref.lock)
if (may_grant(gl, gh)) {
if (gh->gh_list.prev == &gl->gl_holders &&
glops->go_lock) {
- spin_unlock(&gl->gl_lockref.lock);
- /* FIXME: eliminate this eventually */
- ret = glops->go_lock(gh);
- spin_lock(&gl->gl_lockref.lock);
- if (ret) {
- if (ret == 1)
- return 2;
- gh->gh_error = ret;
- list_del_init(&gh->gh_list);
- trace_gfs2_glock_queue(gh, 0);
- gfs2_holder_wake(gh);
- goto restart;
+ if (!(gh->gh_flags & GL_SKIP)) {
+ spin_unlock(&gl->gl_lockref.lock);
+ /* FIXME: eliminate this eventually */
+ ret = glops->go_lock(gh);
+ spin_lock(&gl->gl_lockref.lock);
+ if (ret) {
+ if (ret == 1)
+ return 2;
+ gh->gh_error = ret;
+ list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
+ gfs2_holder_wake(gh);
+ goto restart;
+ }
}
set_bit(HIF_HOLDER, &gh->gh_iflags);
trace_gfs2_promote(gh, 1);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 79c621c7863d..4b19f513570f 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -495,7 +495,7 @@ static int inode_go_lock(struct gfs2_holder *gh)
struct gfs2_inode *ip = gl->gl_object;
int error = 0;
- if (!ip || (gh->gh_flags & GL_SKIP))
+ if (!ip)
return 0;
if (test_bit(GIF_INVALID, &ip->i_flags)) {
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 7a13a687e4f2..1fb66f6e6a0c 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1292,8 +1292,6 @@ int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
{
struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
- if (gh->gh_flags & GL_SKIP)
- return 0;
return gfs2_rgrp_bh_get(rgd);
}
--
2.31.1
^ permalink raw reply related [flat|nested] 5+ messages in thread* [Cluster-devel] [GFS2 PATCH 4/4] gfs2: rework go_lock mechanism for node_scope race
2021-09-13 19:30 [Cluster-devel] [GFS2 PATCH 0/4] gfs2: fix bugs related to node_scope and go_lock Bob Peterson
` (2 preceding siblings ...)
2021-09-13 19:30 ` [Cluster-devel] [GFS2 PATCH 3/4] gfs2: move GL_SKIP check from glops to do_promote Bob Peterson
@ 2021-09-13 19:30 ` Bob Peterson
3 siblings, 0 replies; 5+ messages in thread
From: Bob Peterson @ 2021-09-13 19:30 UTC (permalink / raw)
To: cluster-devel.redhat.com
Before this patch, glocks only performed their go_lock glop function
when the first holder was queued. When we introduced the new "node
scope" mechanism, we allowed multiple holders to hold a glock in EX
at the same time, with local locking. But it introduced a new race:
If the first holder specified GL_SKIP (skip the go_lock function until
deemed necessary) any secondary holders WITHOUT GL_SKIP assumed the
go_lock mechanism had been performed. In the case of rgrps, that was
not always the case: for example, when the first process is searching
for free blocks for allocation (GL_SKIP) and a second holder (without
GL_SKIP) wants to free blocks, which just assumes the buffers exist.
To remedy this race, this patch reworks how the GL_SKIP mechanism is
managed. The first holder checks if GL_SKIP is specified, and if so,
a new glock flag, GLF_GO_LOCK_SKIPPED, is set. Subsequent holders
without GL_SKIP who need the lock check this flag to see if the
go_lock function was skipped, and call it if necessary.
This also "accidentally" fixes a minor bug with glock do-promote
tracing: glocks that had no "go_lock" glop would not properly log the
"first" flag to the kernel trace.
Fixes: 06e908cd9ead ("gfs2: Allow node-wide exclusive glock sharing")
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
---
fs/gfs2/glock.c | 81 ++++++++++++++++++++++++++++++++----------------
fs/gfs2/glops.c | 1 +
fs/gfs2/incore.h | 1 +
fs/gfs2/rgrp.c | 1 +
4 files changed, 58 insertions(+), 26 deletions(-)
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index b8248ceff3c3..faad2cb105bb 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -380,6 +380,46 @@ static void do_error(struct gfs2_glock *gl, const int ret)
}
}
+static int glock_go_lock(struct gfs2_glock *gl, struct gfs2_holder *gh,
+ struct gfs2_holder *gh1)
+__releases(&gl->gl_lockref.lock)
+__acquires(&gl->gl_lockref.lock)
+{
+ const struct gfs2_glock_operations *glops = gl->gl_ops;
+ int ret = 0;
+
+ /*
+ * When the first holder on the list is granted, it can SKIP or not.
+ * If it skips go_lock, it sets the GLF_GO_LOCK_SKIPPED flag.
+ * All subsequent holders need to know if it was skipped. They may
+ * also choose to skip go_lock, but only the first granted should
+ * set GLF_GO_LOCK_SKIPPED. If not skipped, they need to know if
+ * go_lock was previously skipped, call go_lock, and clear the flag.
+ */
+ if (gh->gh_flags & GL_SKIP) {
+ if (gh == gh1)
+ set_bit(GLF_GO_LOCK_SKIPPED, &gl->gl_flags);
+ goto out;
+ }
+ if ((gh == gh1) || test_bit(GLF_GO_LOCK_SKIPPED, &gl->gl_flags)) {
+ spin_unlock(&gl->gl_lockref.lock);
+ /* FIXME: eliminate this eventually */
+ ret = glops->go_lock(gh);
+ spin_lock(&gl->gl_lockref.lock);
+ clear_bit(GLF_GO_LOCK_SKIPPED, &gl->gl_flags);
+ if (ret) {
+ if (ret == 1)
+ return 2;
+ gh->gh_error = ret;
+ list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
+ gfs2_holder_wake(gh);
+ }
+ }
+out:
+ return ret;
+}
+
/**
* do_promote - promote as many requests as possible on the current queue
* @gl: The glock
@@ -389,46 +429,33 @@ static void do_error(struct gfs2_glock *gl, const int ret)
*/
static int do_promote(struct gfs2_glock *gl)
-__releases(&gl->gl_lockref.lock)
-__acquires(&gl->gl_lockref.lock)
{
const struct gfs2_glock_operations *glops = gl->gl_ops;
- struct gfs2_holder *gh, *tmp;
+ struct gfs2_holder *gh, *gh1, *tmp;
int ret;
restart:
+ gh1 = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
+
list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
continue;
if (may_grant(gl, gh)) {
- if (gh->gh_list.prev == &gl->gl_holders &&
- glops->go_lock) {
- if (!(gh->gh_flags & GL_SKIP)) {
- spin_unlock(&gl->gl_lockref.lock);
- /* FIXME: eliminate this eventually */
- ret = glops->go_lock(gh);
- spin_lock(&gl->gl_lockref.lock);
- if (ret) {
- if (ret == 1)
- return 2;
- gh->gh_error = ret;
- list_del_init(&gh->gh_list);
- trace_gfs2_glock_queue(gh, 0);
- gfs2_holder_wake(gh);
- goto restart;
- }
- }
- set_bit(HIF_HOLDER, &gh->gh_iflags);
- trace_gfs2_promote(gh, 1);
- gfs2_holder_wake(gh);
- goto restart;
+ ret = 0;
+ if (glops->go_lock) {
+ ret = glock_go_lock(gl, gh, gh1);
+ if (ret == 2)
+ return ret;
}
+
set_bit(HIF_HOLDER, &gh->gh_iflags);
- trace_gfs2_promote(gh, 0);
+ trace_gfs2_promote(gh, gh == gh1 ? 1 : 0);
gfs2_holder_wake(gh);
+ if (ret)
+ goto restart;
continue;
}
- if (gh->gh_list.prev == &gl->gl_holders)
+ if (gh == gh1)
return 1;
do_error(gl, 0);
break;
@@ -2148,6 +2175,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
*p++ = 'P';
if (test_bit(GLF_FREEING, gflags))
*p++ = 'x';
+ if (test_bit(GLF_GO_LOCK_SKIPPED, gflags))
+ *p++ = 's';
*p = 0;
return buf;
}
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 4b19f513570f..923efcd71269 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -477,6 +477,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip)
error = gfs2_dinode_in(ip, dibh->b_data);
brelse(dibh);
clear_bit(GIF_INVALID, &ip->i_flags);
+ clear_bit(GLF_GO_LOCK_SKIPPED, &ip->i_gl->gl_flags);
return error;
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 0fe49770166e..4880818db83d 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -315,6 +315,7 @@ struct gfs2_alloc_parms {
enum {
GLF_LOCK = 1,
+ GLF_GO_LOCK_SKIPPED = 2, /* go_lock was skipped */
GLF_DEMOTE = 3,
GLF_PENDING_DEMOTE = 4,
GLF_DEMOTE_IN_PROGRESS = 5,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 1fb66f6e6a0c..83fd5c750041 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1248,6 +1248,7 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
if (rgd->rd_rgl->rl_unlinked == 0)
rgd->rd_flags &= ~GFS2_RDF_CHECK;
}
+ clear_bit(GLF_GO_LOCK_SKIPPED, &gl->gl_flags);
return 0;
fail:
--
2.31.1
^ permalink raw reply related [flat|nested] 5+ messages in thread