From: Mike Galbraith <umgwanakikbuti@gmail.com>
To: Lai Jiangshan <laijs@cn.fujitsu.com>
Cc: RT <linux-rt-users@vger.kernel.org>,
LKML <linux-kernel@vger.kernel.org>,
Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
Steven Rostedt <rostedt@goodmis.org>,
Thomas Gleixner <tglx@linutronix.de>,
"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>,
Benjamin LaHaise <bcrl@kvack.org>
Subject: [RFC PATCH V2] rt/aio: fix rcu garbage collection might_sleep() splat
Date: Tue, 10 Jun 2014 05:47:28 +0200 [thread overview]
Message-ID: <1402372048.5124.20.camel@marge.simpson.net> (raw)
In-Reply-To: <5395172E.4010007@cn.fujitsu.com>
On Mon, 2014-06-09 at 10:08 +0800, Lai Jiangshan wrote:
> Hi, rt-people
>
> I don't think it is the correct direction.
> Softirq (including local_bh_disable()) in RT kernel should be preemptible.
How about the below then?
I was sorely tempted to post a tiny variant that dropped taking ctx_lock
in free_ioctx_users() entirely, as someone diddling with no reference
didn't make sense. Cc Ben, he would know.
[ 172.743098] BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:768
[ 172.743116] in_atomic(): 1, irqs_disabled(): 0, pid: 26, name: rcuos/2
[ 172.743117] 2 locks held by rcuos/2/26:
[ 172.743128] #0: (rcu_callback){.+.+..}, at: [<ffffffff810b1a12>] rcu_nocb_kthread+0x1e2/0x380
[ 172.743135] #1: (rcu_read_lock_sched){.+.+..}, at: [<ffffffff812acd26>] percpu_ref_kill_rcu+0xa6/0x1c0
[ 172.743138] Preemption disabled at:[<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
[ 172.743138]
[ 172.743142] CPU: 0 PID: 26 Comm: rcuos/2 Not tainted 3.14.4-rt5 #31
[ 172.743143] Hardware name: MEDIONPC MS-7502/MS-7502, BIOS 6.00 PG 12/26/2007
[ 172.743148] ffff8802231aa190 ffff8802231a5d08 ffffffff81582e9e 0000000000000000
[ 172.743151] ffff8802231a5d28 ffffffff81077aeb ffff880209f68140 ffff880209f681c0
[ 172.743154] ffff8802231a5d48 ffffffff81589304 ffff880209f68000 ffff880209f68000
[ 172.743155] Call Trace:
[ 172.743160] [<ffffffff81582e9e>] dump_stack+0x4e/0x9c
[ 172.743163] [<ffffffff81077aeb>] __might_sleep+0xfb/0x170
[ 172.743167] [<ffffffff81589304>] rt_spin_lock+0x24/0x70
[ 172.743171] [<ffffffff811c5790>] free_ioctx_users+0x30/0x130
[ 172.743174] [<ffffffff812ace34>] percpu_ref_kill_rcu+0x1b4/0x1c0
[ 172.743177] [<ffffffff812acd26>] ? percpu_ref_kill_rcu+0xa6/0x1c0
[ 172.743180] [<ffffffff812acc80>] ? percpu_ref_kill_and_confirm+0x70/0x70
[ 172.743183] [<ffffffff810b1a93>] rcu_nocb_kthread+0x263/0x380
[ 172.743185] [<ffffffff810b1a12>] ? rcu_nocb_kthread+0x1e2/0x380
[ 172.743189] [<ffffffff810b1830>] ? rcu_report_exp_rnp.isra.52+0xc0/0xc0
[ 172.743192] [<ffffffff8106e046>] kthread+0xd6/0xf0
[ 172.743194] [<ffffffff8158900c>] ? _raw_spin_unlock_irq+0x2c/0x70
[ 172.743197] [<ffffffff8106df70>] ? __kthread_parkme+0x70/0x70
[ 172.743200] [<ffffffff81591eec>] ret_from_fork+0x7c/0xb0
[ 172.743203] [<ffffffff8106df70>] ? __kthread_parkme+0x70/0x70
crash> gdb list *percpu_ref_kill_rcu+0x1b4
0xffffffff812ace34 is in percpu_ref_kill_rcu (include/linux/percpu-refcount.h:169).
164 pcpu_count = ACCESS_ONCE(ref->pcpu_count);
165
166 if (likely(REF_STATUS(pcpu_count) == PCPU_REF_PTR))
167 __this_cpu_dec(*pcpu_count);
168 else if (unlikely(atomic_dec_and_test(&ref->count)))
169 ref->release(ref);
170
171 rcu_read_unlock_sched();
172 }
173
ref->release() path can't take sleeping locks, but in an rt kernel it does.
Defer ->release() work via call_rcu() to move sleeping locks out from under
rcu_read_lock_sched().
Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com>
---
fs/aio.c | 38 +++++++++++++++++++++++++-------------
1 file changed, 25 insertions(+), 13 deletions(-)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -110,8 +110,6 @@ struct kioctx {
struct page **ring_pages;
long nr_pages;
- struct work_struct free_work;
-
struct {
/*
* This counts the number of available slots in the ringbuffer,
@@ -143,6 +141,7 @@ struct kioctx {
struct file *aio_ring_file;
unsigned id;
+ struct rcu_head rcu;
};
/*------ sysctl variables----*/
@@ -493,9 +492,9 @@ static int kiocb_cancel(struct kioctx *c
return cancel(kiocb);
}
-static void free_ioctx(struct work_struct *work)
+static void free_ioctx_rcu(struct rcu_head *rcu)
{
- struct kioctx *ctx = container_of(work, struct kioctx, free_work);
+ struct kioctx *ctx = container_of(rcu, struct kioctx, rcu);
pr_debug("freeing %p\n", ctx);
@@ -508,18 +507,12 @@ static void free_ioctx_reqs(struct percp
{
struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
- INIT_WORK(&ctx->free_work, free_ioctx);
- schedule_work(&ctx->free_work);
+ call_rcu(&ctx->rcu, free_ioctx_rcu);
}
-/*
- * When this function runs, the kioctx has been removed from the "hash table"
- * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
- * now it's safe to cancel any that need to be.
- */
-static void free_ioctx_users(struct percpu_ref *ref)
+static void free_ioctx_users_rcu(struct rcu_head *rcu)
{
- struct kioctx *ctx = container_of(ref, struct kioctx, users);
+ struct kioctx *ctx = container_of(rcu, struct kioctx, rcu);
struct kiocb *req;
spin_lock_irq(&ctx->ctx_lock);
@@ -538,6 +531,25 @@ static void free_ioctx_users(struct perc
percpu_ref_put(&ctx->reqs);
}
+/*
+ * When this function runs, the kioctx has been removed from the "hash table"
+ * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
+ * now it's safe to cancel any that need to be.
+ *
+ * NOTE: -rt must defer this to avoid taking sleeping ctx_lock while under
+ * rcu_real_lock_sched() during ->release().
+ */
+static void free_ioctx_users(struct percpu_ref *ref)
+{
+ struct kioctx *ctx = container_of(ref, struct kioctx, users);
+
+#ifdef CONFIG_PREEMPT_RT_BASE
+ call_rcu(&ctx->rcu, free_ioctx_users_rcu);
+#else
+ free_ioctx_users_rcu(&ctx->rcu);
+#endif
+}
+
static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
{
unsigned i, new_nr;
next prev parent reply other threads:[~2014-06-10 3:47 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-06-08 8:35 [RFC PATCH] rt/aio: fix rcu garbage collection might_sleep() splat Mike Galbraith
2014-06-09 2:08 ` Lai Jiangshan
2014-06-09 3:17 ` Mike Galbraith
2014-06-09 6:22 ` Mike Galbraith
2014-06-09 9:04 ` Pavel Vasilyev
2014-06-10 3:47 ` Mike Galbraith [this message]
2014-06-10 17:50 ` [RFC PATCH V2] " Benjamin LaHaise
2014-06-11 4:10 ` Mike Galbraith
2014-06-12 20:26 ` Kent Overstreet
2014-06-25 15:24 ` Benjamin LaHaise
2014-06-26 7:37 ` Mike Galbraith
2014-06-26 16:42 ` Benjamin LaHaise
2015-02-16 17:59 ` Sebastian Andrzej Siewior
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1402372048.5124.20.camel@marge.simpson.net \
--to=umgwanakikbuti@gmail.com \
--cc=bcrl@kvack.org \
--cc=bigeasy@linutronix.de \
--cc=laijs@cn.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rt-users@vger.kernel.org \
--cc=paulmck@linux.vnet.ibm.com \
--cc=rostedt@goodmis.org \
--cc=tglx@linutronix.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.