cluster-devel.redhat.com archive mirror
 help / color / mirror / Atom feed
* [Cluster-devel] [PATCH RFC gfs2/for-next] fs: gfs2: fix suspicious RCU usage
@ 2021-09-25  0:23 Alexander Aring
  0 siblings, 0 replies; only message in thread
From: Alexander Aring @ 2021-09-25  0:23 UTC (permalink / raw)
  To: cluster-devel.redhat.com

This patch fixes a suspicious RCU usage by defer "thaw_glock()" call
outside of atomic context of "glock_hash_walk()". See:

[  993.426039] =============================
[  993.426765] WARNING: suspicious RCU usage
[  993.427522] 5.14.0-rc2+ #265 Tainted: G        W
[  993.428492] -----------------------------
[  993.429237] include/linux/rcupdate.h:328 Illegal context switch in RCU read-side critical section!
[  993.430860]
               other info that might help us debug this:

[  993.432304]
               rcu_scheduler_active = 2, debug_locks = 1
[  993.433493] 3 locks held by kworker/u32:2/194:
[  993.434319]  #0: ffff888109c23148 ((wq_completion)gfs2_control){+.+.}-{0:0}, at: process_one_work+0x452/0xad0
[  993.436135]  #1: ffff888109507e10 ((work_completion)(&(&sdp->sd_control_work)->work)){+.+.}-{0:0}, at: process_one_work+0x452/0xad0
[  993.438081]  #2: ffffffff85ee05c0 (rcu_read_lock){....}-{1:2}, at: rhashtable_walk_start_check+0x0/0x520
[  993.439665]
               stack backtrace:
[  993.440402] CPU: 13 PID: 194 Comm: kworker/u32:2 Tainted: G        W         5.14.0-rc2+ #265
[  993.441786] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.14.0-1.module+el8.6.0+12648+6ede71a5 04/01/2014
[  993.443304] Workqueue: gfs2_control gfs2_control_func
[  993.444147] Call Trace:
[  993.444565]  dump_stack_lvl+0x56/0x6f
[  993.445186]  ___might_sleep+0x191/0x1e0
[  993.445838]  down_read+0x7b/0x460
[  993.446400]  ? down_write_killable+0x2b0/0x2b0
[  993.447141]  ? find_held_lock+0xb3/0xd0
[  993.447794]  ? do_raw_spin_unlock+0xa2/0x130
[  993.448521]  dlm_unlock+0x9e/0x1a0
[  993.449102]  ? dlm_lock+0x260/0x260
[  993.449695]  ? pvclock_clocksource_read+0xdc/0x180
[  993.450495]  ? kvm_clock_get_cycles+0x14/0x20
[  993.451210]  ? ktime_get_with_offset+0xc6/0x170
[  993.451971]  gdlm_put_lock+0x29e/0x2d0
[  993.452599]  ? gfs2_cancel_delete_work+0x40/0x40
[  993.453361]  glock_hash_walk+0x16c/0x180
[  993.454014]  ? gfs2_glock_seq_stop+0x30/0x30
[  993.454754]  process_one_work+0x55e/0xad0
[  993.455443]  ? pwq_dec_nr_in_flight+0x110/0x110
[  993.456219]  worker_thread+0x65/0x5e0
[  993.456839]  ? process_one_work+0xad0/0xad0
[  993.457524]  kthread+0x1ed/0x220
[  993.458067]  ? set_kthread_struct+0x80/0x80
[  993.458764]  ret_from_fork+0x22/0x30
[  993.459426] BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1352
[  993.460816] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 194, name: kworker/u32:2
[  993.462172] 3 locks held by kworker/u32:2/194:
[  993.462916]  #0: ffff888109c23148 ((wq_completion)gfs2_control){+.+.}-{0:0}, at: process_one_work+0x452/0xad0
[  993.464542]  #1: ffff888109507e10 ((work_completion)(&(&sdp->sd_control_work)->work)){+.+.}-{0:0}, at: process_one_work+0x452/0xad0
[  993.466467]  #2: ffffffff85ee05c0 (rcu_read_lock){....}-{1:2}, at: rhashtable_walk_start_check+0x0/0x520
[  993.468016] CPU: 13 PID: 194 Comm: kworker/u32:2 Tainted: G        W         5.14.0-rc2+ #265
[  993.469378] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.14.0-1.module+el8.6.0+12648+6ede71a5 04/01/2014

The problem is that "thaw_glock()" will call
"sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);" which ends in dlm case in
callback gdlm_put_lock() and this finally calls in some cases
"dlm_unlock()" but "dlm_unlock()" can't be called from atomic context
because semaphores, mutexes, etc.

This patch will simple store all glocks of the hash into a linked list
and do the job afterwards.

Fixes: 88ffbf3e037e ("GFS2: Use resizable hash table for glocks")
Signed-off-by: Alexander Aring <aahringo@redhat.com>
---
I am not sure if "thaw_glock()" can be simple deferred after
glock_hash_walk(). Also see that I extended the gfs2_glock structure
with another list entry, I am not sure if these structure are somehow
optimized to cachelines, etc. Otherwise there need to be a different
solution, however this patch should show the problem and maybe a
possible fix?

Sad that dlm API is sleepable context even it seems to offer an
asynchronous API, not easy to change that yet.

 fs/gfs2/glock.c  | 34 ++++++++++++++++++++++++----------
 fs/gfs2/incore.h |  1 +
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 6dfd33dc206b..5c53b478ce2d 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -55,7 +55,7 @@ struct gfs2_glock_iter {
 	loff_t last_pos;		/* last position               */
 };
 
-typedef void (*glock_examiner) (struct gfs2_glock * gl);
+typedef void (*glock_examiner) (struct gfs2_glock * gl, void *data);
 
 static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
 
@@ -1890,7 +1890,8 @@ static struct shrinker glock_shrinker = {
  * that.
  */
 
-static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
+static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp,
+			    void *data)
 {
 	struct gfs2_glock *gl;
 	struct rhashtable_iter iter;
@@ -1903,7 +1904,7 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
 		while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
 			if (gl->gl_name.ln_sbd == sdp &&
 			    lockref_get_not_dead(&gl->gl_lockref))
-				examiner(gl);
+				examiner(gl, data);
 
 		rhashtable_walk_stop(&iter);
 	} while (cond_resched(), gl == ERR_PTR(-EAGAIN));
@@ -1937,7 +1938,7 @@ bool gfs2_delete_work_queued(const struct gfs2_glock *gl)
 	return test_bit(GLF_PENDING_DELETE, &gl->gl_flags);
 }
 
-static void flush_delete_work(struct gfs2_glock *gl)
+static void flush_delete_work(struct gfs2_glock *gl, void *data)
 {
 	if (gl->gl_name.ln_type == LM_TYPE_IOPEN) {
 		if (cancel_delayed_work(&gl->gl_delete)) {
@@ -1950,7 +1951,7 @@ static void flush_delete_work(struct gfs2_glock *gl)
 
 void gfs2_flush_delete_work(struct gfs2_sbd *sdp)
 {
-	glock_hash_walk(flush_delete_work, sdp);
+	glock_hash_walk(flush_delete_work, sdp, NULL);
 	flush_workqueue(gfs2_delete_workqueue);
 }
 
@@ -1976,7 +1977,7 @@ static void thaw_glock(struct gfs2_glock *gl)
  *
  */
 
-static void clear_glock(struct gfs2_glock *gl)
+static void clear_glock(struct gfs2_glock *gl, void *data)
 {
 	gfs2_glock_remove_from_lru(gl);
 
@@ -1987,6 +1988,13 @@ static void clear_glock(struct gfs2_glock *gl)
 	spin_unlock(&gl->gl_lockref.lock);
 }
 
+static void deferred_list_glock(struct gfs2_glock *gl, void *data)
+{
+	struct list_head *deferred_list = data;
+
+	list_add_tail(&gl->gl_deferred_list, deferred_list);
+}
+
 /**
  * gfs2_glock_thaw - Thaw any frozen glocks
  * @sdp: The super block
@@ -1995,7 +2003,13 @@ static void clear_glock(struct gfs2_glock *gl)
 
 void gfs2_glock_thaw(struct gfs2_sbd *sdp)
 {
-	glock_hash_walk(thaw_glock, sdp);
+	LIST_HEAD(deferred_list);
+	struct gfs2_glock *gl;
+
+	glock_hash_walk(deferred_list_glock, sdp, &deferred_list);
+
+	list_for_each_entry(gl, &deferred_list, gl_deferred_list)
+		thaw_glock(gl);
 }
 
 static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
@@ -2005,7 +2019,7 @@ static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
 	spin_unlock(&gl->gl_lockref.lock);
 }
 
-static void dump_glock_func(struct gfs2_glock *gl)
+static void dump_glock_func(struct gfs2_glock *gl, void *data)
 {
 	dump_glock(NULL, gl, true);
 }
@@ -2021,12 +2035,12 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
 {
 	set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
 	flush_workqueue(glock_workqueue);
-	glock_hash_walk(clear_glock, sdp);
+	glock_hash_walk(clear_glock, sdp, NULL);
 	flush_workqueue(glock_workqueue);
 	wait_event_timeout(sdp->sd_glock_wait,
 			   atomic_read(&sdp->sd_glock_disposal) == 0,
 			   HZ * 600);
-	glock_hash_walk(dump_glock_func, sdp);
+	glock_hash_walk(dump_glock_func, sdp, NULL);
 }
 
 void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index dc5c9dccb060..9d95084819c9 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -358,6 +358,7 @@ struct gfs2_glock {
 
 	struct list_head gl_lru;
 	struct list_head gl_ail_list;
+	struct list_head gl_deferred_list;
 	atomic_t gl_ail_count;
 	atomic_t gl_revokes;
 	struct delayed_work gl_work;
-- 
2.27.0



^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2021-09-25  0:23 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-09-25  0:23 [Cluster-devel] [PATCH RFC gfs2/for-next] fs: gfs2: fix suspicious RCU usage Alexander Aring

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).