From: Austin Schuh <austin@peloton-tech.com>
To: Richard Weinberger <richard.weinberger@gmail.com>,
Thomas Gleixner <tglx@linutronix.de>,
Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: LKML <linux-kernel@vger.kernel.org>,
rt-users <linux-rt-users@vger.kernel.org>
Subject: Re: Filesystem lockup with CONFIG_PREEMPT_RT
Date: Thu, 26 Jun 2014 12:50:24 -0700 [thread overview]
Message-ID: <CANGgnMYVoP-Z0Bv-VDEkJnvfa7Fi4-zY2F4A0PhMewGvwo3VVw@mail.gmail.com> (raw)
In-Reply-To: <CAFLxGvxfBt7OvW=a2Kz08GLHSEiiOZsN-vB19CXnQiwqFxqMsA@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 1371 bytes --]
On Wed, May 21, 2014 at 12:33 AM, Richard Weinberger
<richard.weinberger@gmail.com> wrote:
> CC'ing RT folks
>
> On Wed, May 21, 2014 at 8:23 AM, Austin Schuh <austin@peloton-tech.com> wrote:
>> On Tue, May 13, 2014 at 7:29 PM, Austin Schuh <austin@peloton-tech.com> wrote:
>>> Hi,
>>>
>>> I am observing a filesystem lockup with XFS on a CONFIG_PREEMPT_RT
>>> patched kernel. I have currently only triggered it using dpkg. Dave
>>> Chinner on the XFS mailing list suggested that it was a rt-kernel
>>> workqueue issue as opposed to a XFS problem after looking at the
>>> kernel messages.
I've got a 100% reproducible test case that doesn't involve a
filesystem. I wrote a module that triggers the bug when the device is
written to, making it easy to enable tracing during the event and
capture everything.
It looks like rw_semaphores don't trigger wq_worker_sleeping to run
when work goes to sleep on a rw_semaphore. This only happens with the
RT patches, not with the mainline kernel. I'm foreseeing a second
deadlock/bug coming into play shortly. If a task holding the work
pool spinlock gets preempted, and we need to schedule more work from
another worker thread which was just blocked by a mutex, we'll then
end up trying to go to sleep on 2 locks at once.
That is getting a bit deep into the scheduler for me... Any
suggestions on how to fix it?
Austin
[-- Attachment #2: killer_module.c --]
[-- Type: text/x-csrc, Size: 4183 bytes --]
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <asm/uaccess.h>
#include <linux/semaphore.h>
static int device_open(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char *, size_t, loff_t *);
// Dev name as it appears in /proc/devices
#define DEVICE_NAME "aschuh"
// Major number assigned to our device driver
static int major;
static struct workqueue_struct *lockup_wq1;
static struct workqueue_struct *lockup_wq2;
static struct file_operations fops = {
.read = device_read,
.write = device_write,
.open = device_open,
.release = device_release
};
static int __init init_killer_module(void) {
lockup_wq1 = alloc_workqueue("lockup_wq1", WQ_MEM_RECLAIM, 0);
if (!lockup_wq1) return -ENOMEM;
lockup_wq2 = alloc_workqueue("lockup_wq2", WQ_MEM_RECLAIM, 0);
if (!lockup_wq2) {
destroy_workqueue(lockup_wq1);
return -ENOMEM;
}
major = register_chrdev(0, DEVICE_NAME, &fops);
if (major < 0) {
printk(KERN_ALERT "Registering char device failed with %d\n", major);
destroy_workqueue(lockup_wq1);
destroy_workqueue(lockup_wq2);
return major;
}
printk(KERN_INFO "'mknod /dev/%s c %d 0'.\n", DEVICE_NAME, major);
// A non 0 return means init_module failed; module can't be loaded.
return 0;
}
// Called when a process tries to open the device file.
static int device_open(struct inode *inode, struct file *file) {
try_module_get(THIS_MODULE);
return 0;
}
// Called when a process closes the device file.
static int device_release(struct inode *inode, struct file *file) {
// Decrement the usage count, or else once you opened the file, you'll never
// get get rid of the module.
module_put(THIS_MODULE);
return 0;
}
static ssize_t device_read(struct file *filp, char *buffer, size_t length,
loff_t *offset) {
return 0;
}
#if 0
#define SEM_INIT(sem) sema_init(sem, 1)
#define SEM_TYPE struct semaphore
#define SEM_DOWN(sem) down(sem)
#define SEM_UP(sem) up(sem)
#else
#define SEM_INIT(sem) init_rwsem(sem)
#define SEM_TYPE struct rw_semaphore
#define SEM_DOWN(sem) down_write_nested(sem, 0)
#define SEM_UP(sem) up_write(sem)
#endif
struct mywork {
struct work_struct work;
int index;
SEM_TYPE *sem;
};
static void work1(struct work_struct *work) {
struct mywork *my_work = container_of(work, struct mywork, work);
trace_printk("work1 Called with index %d\n", my_work->index);
}
static void work2(struct work_struct *work) {
struct mywork *my_work = container_of(work, struct mywork, work);
trace_printk("work2 Called with index %d\n", my_work->index);
SEM_DOWN(my_work->sem);
SEM_UP(my_work->sem);
trace_printk("work2 Finished with index %d\n", my_work->index);
}
static ssize_t device_write(struct file *filp, const char *buff, size_t len,
loff_t *off) {
SEM_TYPE write_sem;
SEM_INIT(&write_sem);
struct mywork my_work1;
struct mywork my_work2;
trace_printk("lockup_wq1 %p lockup_wq2 %p\n", lockup_wq1, lockup_wq2);
trace_printk("Got a write\n");
SEM_DOWN(&write_sem);
my_work1.index = len;
my_work1.sem = &write_sem;
INIT_WORK_ONSTACK(&my_work1.work, work1);
my_work2.index = len;
my_work2.sem = &write_sem;
INIT_WORK_ONSTACK(&my_work2.work, work2);
queue_work(lockup_wq2, &my_work2.work);
queue_work(lockup_wq1, &my_work1.work);
flush_work(&my_work1.work);
destroy_work_on_stack(&my_work1.work);
SEM_UP(&write_sem);
flush_work(&my_work2.work);
destroy_work_on_stack(&my_work2.work);
trace_printk("Write done\n");
tracing_off();
return len;
}
static void __exit cleanup_killer_module(void) {
printk(KERN_INFO "Goodbye world 1.\n");
unregister_chrdev(major, DEVICE_NAME);
destroy_workqueue(lockup_wq1);
destroy_workqueue(lockup_wq2);
}
module_init(init_killer_module);
module_exit(cleanup_killer_module);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Austin Schuh <austin@peloton-tech.com>");
MODULE_DESCRIPTION("Triggers a workqueue bug on write.");
next prev parent reply other threads:[~2014-06-26 19:50 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <CANGgnMbHckBQdKGN_N5Q6qEKc9n1CenxvMpeXog1NbSdL8UrTw@mail.gmail.com>
[not found] ` <CANGgnMYDXerOUDOO9-RHMJKadKACA2KBGskZwoP-1ZwAhDEfVA@mail.gmail.com>
2014-05-21 7:33 ` Filesystem lockup with CONFIG_PREEMPT_RT Richard Weinberger
2014-06-26 19:50 ` Austin Schuh [this message]
2014-06-26 22:35 ` Thomas Gleixner
2014-06-27 0:07 ` Austin Schuh
2014-06-27 3:22 ` Mike Galbraith
2014-06-27 12:57 ` Mike Galbraith
2014-06-27 14:01 ` Steven Rostedt
2014-06-27 17:34 ` Mike Galbraith
2014-06-27 17:54 ` Steven Rostedt
2014-06-27 18:07 ` Mike Galbraith
2014-06-27 18:19 ` Steven Rostedt
2014-06-27 19:11 ` Mike Galbraith
2014-06-28 1:18 ` Austin Schuh
2014-06-28 3:32 ` Mike Galbraith
2014-06-28 6:20 ` Austin Schuh
2014-06-28 7:11 ` Mike Galbraith
2014-06-27 14:24 ` Thomas Gleixner
2014-06-28 4:51 ` Mike Galbraith
2014-07-01 0:12 ` Austin Schuh
2014-07-01 0:53 ` Austin Schuh
2014-07-05 20:26 ` Thomas Gleixner
2014-07-06 4:55 ` Austin Schuh
2014-07-01 3:01 ` Austin Schuh
2014-07-01 19:32 ` Austin Schuh
2014-07-03 23:08 ` Austin Schuh
2014-07-04 4:42 ` Mike Galbraith
2014-05-21 19:30 John Blackwood
2014-05-21 21:59 ` Austin Schuh
2014-07-05 20:36 ` Thomas Gleixner
-- strict thread matches above, loose matches on Subject: below --
2014-07-05 19:30 Jan de Kruyf
2014-07-07 8:48 Jan de Kruyf
2014-07-07 13:00 ` Thomas Gleixner
2014-07-07 16:23 ` Austin Schuh
2014-07-08 8:03 ` Jan de Kruyf
2014-07-08 16:09 ` Austin Schuh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CANGgnMYVoP-Z0Bv-VDEkJnvfa7Fi4-zY2F4A0PhMewGvwo3VVw@mail.gmail.com \
--to=austin@peloton-tech.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-rt-users@vger.kernel.org \
--cc=richard.weinberger@gmail.com \
--cc=tglx@linutronix.de \
--cc=umgwanakikbuti@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).