linux-rt-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Austin Schuh <austin@peloton-tech.com>
To: Richard Weinberger <richard.weinberger@gmail.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Mike Galbraith <umgwanakikbuti@gmail.com>
Cc: LKML <linux-kernel@vger.kernel.org>,
	rt-users <linux-rt-users@vger.kernel.org>
Subject: Re: Filesystem lockup with CONFIG_PREEMPT_RT
Date: Thu, 26 Jun 2014 12:50:24 -0700	[thread overview]
Message-ID: <CANGgnMYVoP-Z0Bv-VDEkJnvfa7Fi4-zY2F4A0PhMewGvwo3VVw@mail.gmail.com> (raw)
In-Reply-To: <CAFLxGvxfBt7OvW=a2Kz08GLHSEiiOZsN-vB19CXnQiwqFxqMsA@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1371 bytes --]

On Wed, May 21, 2014 at 12:33 AM, Richard Weinberger
<richard.weinberger@gmail.com> wrote:
> CC'ing RT folks
>
> On Wed, May 21, 2014 at 8:23 AM, Austin Schuh <austin@peloton-tech.com> wrote:
>> On Tue, May 13, 2014 at 7:29 PM, Austin Schuh <austin@peloton-tech.com> wrote:
>>> Hi,
>>>
>>> I am observing a filesystem lockup with XFS on a CONFIG_PREEMPT_RT
>>> patched kernel.  I have currently only triggered it using dpkg.  Dave
>>> Chinner on the XFS mailing list suggested that it was a rt-kernel
>>> workqueue issue as opposed to a XFS problem after looking at the
>>> kernel messages.

I've got a 100% reproducible test case that doesn't involve a
filesystem.  I wrote a module that triggers the bug when the device is
written to, making it easy to enable tracing during the event and
capture everything.

It looks like rw_semaphores don't trigger wq_worker_sleeping to run
when work goes to sleep on a rw_semaphore.  This only happens with the
RT patches, not with the mainline kernel.  I'm foreseeing a second
deadlock/bug coming into play shortly.  If a task holding the work
pool spinlock gets preempted, and we need to schedule more work from
another worker thread which was just blocked by a mutex, we'll then
end up trying to go to sleep on 2 locks at once.

That is getting a bit deep into the scheduler for me...  Any
suggestions on how to fix it?

Austin

[-- Attachment #2: killer_module.c --]
[-- Type: text/x-csrc, Size: 4183 bytes --]

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <asm/uaccess.h>
#include <linux/semaphore.h>

static int device_open(struct inode *, struct file *);
static int device_release(struct inode *, struct file *);
static ssize_t device_read(struct file *, char *, size_t, loff_t *);
static ssize_t device_write(struct file *, const char *, size_t, loff_t *);

// Dev name as it appears in /proc/devices
#define DEVICE_NAME "aschuh"

// Major number assigned to our device driver
static int major;
static struct workqueue_struct *lockup_wq1;
static struct workqueue_struct *lockup_wq2;

static struct file_operations fops = {
  .read = device_read,
  .write = device_write,
  .open = device_open,
  .release = device_release
};

static int __init init_killer_module(void) {

  lockup_wq1 = alloc_workqueue("lockup_wq1", WQ_MEM_RECLAIM, 0);
  if (!lockup_wq1) return -ENOMEM;

  lockup_wq2 = alloc_workqueue("lockup_wq2", WQ_MEM_RECLAIM, 0);
  if (!lockup_wq2) {
    destroy_workqueue(lockup_wq1);
    return -ENOMEM;
  }

  major = register_chrdev(0, DEVICE_NAME, &fops);
  if (major < 0) {
    printk(KERN_ALERT "Registering char device failed with %d\n", major);
    destroy_workqueue(lockup_wq1);
    destroy_workqueue(lockup_wq2);

    return major;
  }

  printk(KERN_INFO "'mknod /dev/%s c %d 0'.\n", DEVICE_NAME, major);

  // A non 0 return means init_module failed; module can't be loaded.
  return 0;
}

// Called when a process tries to open the device file.
static int device_open(struct inode *inode, struct file *file) {
  try_module_get(THIS_MODULE);
  return 0;
}

// Called when a process closes the device file.
static int device_release(struct inode *inode, struct file *file) {
  // Decrement the usage count, or else once you opened the file, you'll never
  // get get rid of the module.
  module_put(THIS_MODULE);

  return 0;
}

static ssize_t device_read(struct file *filp, char *buffer, size_t length,
                           loff_t *offset) {
  return 0;
}

#if 0

#define SEM_INIT(sem) sema_init(sem, 1)
#define SEM_TYPE struct semaphore
#define SEM_DOWN(sem) down(sem)
#define SEM_UP(sem) up(sem)

#else

#define SEM_INIT(sem) init_rwsem(sem)
#define SEM_TYPE struct rw_semaphore
#define SEM_DOWN(sem) down_write_nested(sem, 0)
#define SEM_UP(sem) up_write(sem)

#endif

struct mywork {
  struct work_struct work;
  int index;
  SEM_TYPE *sem;
};

static void work1(struct work_struct *work) {
  struct mywork *my_work = container_of(work, struct mywork, work);
  trace_printk("work1 Called with index %d\n", my_work->index);
}

static void work2(struct work_struct *work) {
  struct mywork *my_work = container_of(work, struct mywork, work);
  trace_printk("work2 Called with index %d\n", my_work->index);
  SEM_DOWN(my_work->sem);
  SEM_UP(my_work->sem);
  trace_printk("work2 Finished with index %d\n", my_work->index);
}


static ssize_t device_write(struct file *filp, const char *buff, size_t len,
                            loff_t *off) {
  SEM_TYPE write_sem;
  SEM_INIT(&write_sem);
  
  struct mywork my_work1;
  struct mywork my_work2;
  trace_printk("lockup_wq1 %p lockup_wq2 %p\n", lockup_wq1, lockup_wq2);

  trace_printk("Got a write\n");

  SEM_DOWN(&write_sem);
  my_work1.index = len;
  my_work1.sem = &write_sem;
  INIT_WORK_ONSTACK(&my_work1.work, work1);

  my_work2.index = len;
  my_work2.sem = &write_sem;
  INIT_WORK_ONSTACK(&my_work2.work, work2);

  queue_work(lockup_wq2, &my_work2.work);

  queue_work(lockup_wq1, &my_work1.work);
  flush_work(&my_work1.work);
  destroy_work_on_stack(&my_work1.work);

  SEM_UP(&write_sem);

  flush_work(&my_work2.work);
  destroy_work_on_stack(&my_work2.work);
  trace_printk("Write done\n");
  tracing_off();

  return len;
}

static void __exit cleanup_killer_module(void) {
  printk(KERN_INFO "Goodbye world 1.\n");
  unregister_chrdev(major, DEVICE_NAME);
	destroy_workqueue(lockup_wq1);
	destroy_workqueue(lockup_wq2);
}

module_init(init_killer_module);
module_exit(cleanup_killer_module);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Austin Schuh <austin@peloton-tech.com>");
MODULE_DESCRIPTION("Triggers a workqueue bug on write.");

  reply	other threads:[~2014-06-26 19:50 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CANGgnMbHckBQdKGN_N5Q6qEKc9n1CenxvMpeXog1NbSdL8UrTw@mail.gmail.com>
     [not found] ` <CANGgnMYDXerOUDOO9-RHMJKadKACA2KBGskZwoP-1ZwAhDEfVA@mail.gmail.com>
2014-05-21  7:33   ` Filesystem lockup with CONFIG_PREEMPT_RT Richard Weinberger
2014-06-26 19:50     ` Austin Schuh [this message]
2014-06-26 22:35       ` Thomas Gleixner
2014-06-27  0:07         ` Austin Schuh
2014-06-27  3:22           ` Mike Galbraith
2014-06-27 12:57           ` Mike Galbraith
2014-06-27 14:01             ` Steven Rostedt
2014-06-27 17:34               ` Mike Galbraith
2014-06-27 17:54                 ` Steven Rostedt
2014-06-27 18:07                   ` Mike Galbraith
2014-06-27 18:19                     ` Steven Rostedt
2014-06-27 19:11                       ` Mike Galbraith
2014-06-28  1:18                       ` Austin Schuh
2014-06-28  3:32                         ` Mike Galbraith
2014-06-28  6:20                           ` Austin Schuh
2014-06-28  7:11                             ` Mike Galbraith
2014-06-27 14:24           ` Thomas Gleixner
2014-06-28  4:51             ` Mike Galbraith
2014-07-01  0:12             ` Austin Schuh
2014-07-01  0:53               ` Austin Schuh
2014-07-05 20:26                 ` Thomas Gleixner
2014-07-06  4:55                   ` Austin Schuh
2014-07-01  3:01             ` Austin Schuh
2014-07-01 19:32               ` Austin Schuh
2014-07-03 23:08                 ` Austin Schuh
2014-07-04  4:42                   ` Mike Galbraith
2014-05-21 19:30 John Blackwood
2014-05-21 21:59 ` Austin Schuh
2014-07-05 20:36 ` Thomas Gleixner
  -- strict thread matches above, loose matches on Subject: below --
2014-07-05 19:30 Jan de Kruyf
2014-07-07  8:48 Jan de Kruyf
2014-07-07 13:00 ` Thomas Gleixner
2014-07-07 16:23 ` Austin Schuh
2014-07-08  8:03   ` Jan de Kruyf
2014-07-08 16:09     ` Austin Schuh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CANGgnMYVoP-Z0Bv-VDEkJnvfa7Fi4-zY2F4A0PhMewGvwo3VVw@mail.gmail.com \
    --to=austin@peloton-tech.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-rt-users@vger.kernel.org \
    --cc=richard.weinberger@gmail.com \
    --cc=tglx@linutronix.de \
    --cc=umgwanakikbuti@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).