From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alexander Aring Date: Fri, 24 Sep 2021 20:23:53 -0400 Subject: [Cluster-devel] [PATCH RFC gfs2/for-next] fs: gfs2: fix suspicious RCU usage Message-ID: <20210925002353.1861426-1-aahringo@redhat.com> List-Id: To: cluster-devel.redhat.com MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit This patch fixes a suspicious RCU usage by defer "thaw_glock()" call outside of atomic context of "glock_hash_walk()". See: [ 993.426039] ============================= [ 993.426765] WARNING: suspicious RCU usage [ 993.427522] 5.14.0-rc2+ #265 Tainted: G W [ 993.428492] ----------------------------- [ 993.429237] include/linux/rcupdate.h:328 Illegal context switch in RCU read-side critical section! [ 993.430860] other info that might help us debug this: [ 993.432304] rcu_scheduler_active = 2, debug_locks = 1 [ 993.433493] 3 locks held by kworker/u32:2/194: [ 993.434319] #0: ffff888109c23148 ((wq_completion)gfs2_control){+.+.}-{0:0}, at: process_one_work+0x452/0xad0 [ 993.436135] #1: ffff888109507e10 ((work_completion)(&(&sdp->sd_control_work)->work)){+.+.}-{0:0}, at: process_one_work+0x452/0xad0 [ 993.438081] #2: ffffffff85ee05c0 (rcu_read_lock){....}-{1:2}, at: rhashtable_walk_start_check+0x0/0x520 [ 993.439665] stack backtrace: [ 993.440402] CPU: 13 PID: 194 Comm: kworker/u32:2 Tainted: G W 5.14.0-rc2+ #265 [ 993.441786] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.14.0-1.module+el8.6.0+12648+6ede71a5 04/01/2014 [ 993.443304] Workqueue: gfs2_control gfs2_control_func [ 993.444147] Call Trace: [ 993.444565] dump_stack_lvl+0x56/0x6f [ 993.445186] ___might_sleep+0x191/0x1e0 [ 993.445838] down_read+0x7b/0x460 [ 993.446400] ? down_write_killable+0x2b0/0x2b0 [ 993.447141] ? find_held_lock+0xb3/0xd0 [ 993.447794] ? do_raw_spin_unlock+0xa2/0x130 [ 993.448521] dlm_unlock+0x9e/0x1a0 [ 993.449102] ? dlm_lock+0x260/0x260 [ 993.449695] ? pvclock_clocksource_read+0xdc/0x180 [ 993.450495] ? kvm_clock_get_cycles+0x14/0x20 [ 993.451210] ? ktime_get_with_offset+0xc6/0x170 [ 993.451971] gdlm_put_lock+0x29e/0x2d0 [ 993.452599] ? gfs2_cancel_delete_work+0x40/0x40 [ 993.453361] glock_hash_walk+0x16c/0x180 [ 993.454014] ? gfs2_glock_seq_stop+0x30/0x30 [ 993.454754] process_one_work+0x55e/0xad0 [ 993.455443] ? pwq_dec_nr_in_flight+0x110/0x110 [ 993.456219] worker_thread+0x65/0x5e0 [ 993.456839] ? process_one_work+0xad0/0xad0 [ 993.457524] kthread+0x1ed/0x220 [ 993.458067] ? set_kthread_struct+0x80/0x80 [ 993.458764] ret_from_fork+0x22/0x30 [ 993.459426] BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1352 [ 993.460816] in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 194, name: kworker/u32:2 [ 993.462172] 3 locks held by kworker/u32:2/194: [ 993.462916] #0: ffff888109c23148 ((wq_completion)gfs2_control){+.+.}-{0:0}, at: process_one_work+0x452/0xad0 [ 993.464542] #1: ffff888109507e10 ((work_completion)(&(&sdp->sd_control_work)->work)){+.+.}-{0:0}, at: process_one_work+0x452/0xad0 [ 993.466467] #2: ffffffff85ee05c0 (rcu_read_lock){....}-{1:2}, at: rhashtable_walk_start_check+0x0/0x520 [ 993.468016] CPU: 13 PID: 194 Comm: kworker/u32:2 Tainted: G W 5.14.0-rc2+ #265 [ 993.469378] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.14.0-1.module+el8.6.0+12648+6ede71a5 04/01/2014 The problem is that "thaw_glock()" will call "sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);" which ends in dlm case in callback gdlm_put_lock() and this finally calls in some cases "dlm_unlock()" but "dlm_unlock()" can't be called from atomic context because semaphores, mutexes, etc. This patch will simple store all glocks of the hash into a linked list and do the job afterwards. Fixes: 88ffbf3e037e ("GFS2: Use resizable hash table for glocks") Signed-off-by: Alexander Aring --- I am not sure if "thaw_glock()" can be simple deferred after glock_hash_walk(). Also see that I extended the gfs2_glock structure with another list entry, I am not sure if these structure are somehow optimized to cachelines, etc. Otherwise there need to be a different solution, however this patch should show the problem and maybe a possible fix? Sad that dlm API is sleepable context even it seems to offer an asynchronous API, not easy to change that yet. fs/gfs2/glock.c | 34 ++++++++++++++++++++++++---------- fs/gfs2/incore.h | 1 + 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 6dfd33dc206b..5c53b478ce2d 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -55,7 +55,7 @@ struct gfs2_glock_iter { loff_t last_pos; /* last position */ }; -typedef void (*glock_examiner) (struct gfs2_glock * gl); +typedef void (*glock_examiner) (struct gfs2_glock * gl, void *data); static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); @@ -1890,7 +1890,8 @@ static struct shrinker glock_shrinker = { * that. */ -static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) +static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp, + void *data) { struct gfs2_glock *gl; struct rhashtable_iter iter; @@ -1903,7 +1904,7 @@ static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp) while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl)) if (gl->gl_name.ln_sbd == sdp && lockref_get_not_dead(&gl->gl_lockref)) - examiner(gl); + examiner(gl, data); rhashtable_walk_stop(&iter); } while (cond_resched(), gl == ERR_PTR(-EAGAIN)); @@ -1937,7 +1938,7 @@ bool gfs2_delete_work_queued(const struct gfs2_glock *gl) return test_bit(GLF_PENDING_DELETE, &gl->gl_flags); } -static void flush_delete_work(struct gfs2_glock *gl) +static void flush_delete_work(struct gfs2_glock *gl, void *data) { if (gl->gl_name.ln_type == LM_TYPE_IOPEN) { if (cancel_delayed_work(&gl->gl_delete)) { @@ -1950,7 +1951,7 @@ static void flush_delete_work(struct gfs2_glock *gl) void gfs2_flush_delete_work(struct gfs2_sbd *sdp) { - glock_hash_walk(flush_delete_work, sdp); + glock_hash_walk(flush_delete_work, sdp, NULL); flush_workqueue(gfs2_delete_workqueue); } @@ -1976,7 +1977,7 @@ static void thaw_glock(struct gfs2_glock *gl) * */ -static void clear_glock(struct gfs2_glock *gl) +static void clear_glock(struct gfs2_glock *gl, void *data) { gfs2_glock_remove_from_lru(gl); @@ -1987,6 +1988,13 @@ static void clear_glock(struct gfs2_glock *gl) spin_unlock(&gl->gl_lockref.lock); } +static void deferred_list_glock(struct gfs2_glock *gl, void *data) +{ + struct list_head *deferred_list = data; + + list_add_tail(&gl->gl_deferred_list, deferred_list); +} + /** * gfs2_glock_thaw - Thaw any frozen glocks * @sdp: The super block @@ -1995,7 +2003,13 @@ static void clear_glock(struct gfs2_glock *gl) void gfs2_glock_thaw(struct gfs2_sbd *sdp) { - glock_hash_walk(thaw_glock, sdp); + LIST_HEAD(deferred_list); + struct gfs2_glock *gl; + + glock_hash_walk(deferred_list_glock, sdp, &deferred_list); + + list_for_each_entry(gl, &deferred_list, gl_deferred_list) + thaw_glock(gl); } static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) @@ -2005,7 +2019,7 @@ static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid) spin_unlock(&gl->gl_lockref.lock); } -static void dump_glock_func(struct gfs2_glock *gl) +static void dump_glock_func(struct gfs2_glock *gl, void *data) { dump_glock(NULL, gl, true); } @@ -2021,12 +2035,12 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) { set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); flush_workqueue(glock_workqueue); - glock_hash_walk(clear_glock, sdp); + glock_hash_walk(clear_glock, sdp, NULL); flush_workqueue(glock_workqueue); wait_event_timeout(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0, HZ * 600); - glock_hash_walk(dump_glock_func, sdp); + glock_hash_walk(dump_glock_func, sdp, NULL); } void gfs2_glock_finish_truncate(struct gfs2_inode *ip) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index dc5c9dccb060..9d95084819c9 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -358,6 +358,7 @@ struct gfs2_glock { struct list_head gl_lru; struct list_head gl_ail_list; + struct list_head gl_deferred_list; atomic_t gl_ail_count; atomic_t gl_revokes; struct delayed_work gl_work; -- 2.27.0