From: Jeremy Fitzhardinge <jeremy@goop.org>
To: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Cc: "xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>,
Gerd Hoffmann <kraxel@redhat.com>
Subject: Re: [PATCH] [PVOPS] fix gntdev on PAE
Date: Tue, 01 Jun 2010 09:31:36 -0700 [thread overview]
Message-ID: <4C0535E8.3030201@goop.org> (raw)
In-Reply-To: <alpine.DEB.2.00.1006011036520.3028@kaball-desktop>
On 06/01/2010 02:38 AM, Stefano Stabellini wrote:
> On Fri, 28 May 2010, Jeremy Fitzhardinge wrote:
>
>> I managed to catch a lockdep problem in gntdev, which may be the same as
>> before:
>>
>> BUG: sleeping function called from invalid context at kernel/rwsem.c:21
>> in_atomic(): 1, irqs_disabled(): 0, pid: 4091, name: qemu-dm
>> 2 locks held by qemu-dm/4091:
>> #0: (&mm->mmap_sem){++++++}, at: [<ffffffff810bb50f>] sys_munmap+0x33/0x58
>> #1: (rcu_read_lock){.+.+..}, at: [<ffffffff810cd63a>] __mmu_notifier_invalidate_range_start+0x0/0xc7
>> Pid: 4091, comm: qemu-dm Not tainted 2.6.32.13 #23
>> Call Trace:
>> [<ffffffff8106705b>] ? __debug_show_held_locks+0x22/0x24
>> [<ffffffff81039522>] __might_sleep+0x123/0x127
>> [<ffffffff810a8536>] ? release_pages+0xd2/0x1e7
>> [<ffffffff81498849>] down_read+0x1f/0x57
>> [<ffffffff81010142>] ? check_events+0x12/0x20
>> [<ffffffff810a8536>] ? release_pages+0xd2/0x1e7
>> [<ffffffff810cd63a>] ? __mmu_notifier_invalidate_range_start+0x0/0xc7
>> [<ffffffff8123e069>] mn_invl_range_start+0x32/0x118
>> [<ffffffff810cd69c>] __mmu_notifier_invalidate_range_start+0x62/0xc7
>> [<ffffffff810cd63a>] ? __mmu_notifier_invalidate_range_start+0x0/0xc7
>> [<ffffffff810b54bc>] unmap_vmas+0x8c/0x91a
>> [<ffffffff810ba363>] unmap_region+0xda/0x178
>> [<ffffffff810bb472>] do_munmap+0x2ae/0x318
>> [<ffffffff810bb51d>] sys_munmap+0x41/0x58
>> [<ffffffff81013b82>] system_call_fastpath+0x16/0x1b
>>
>>
>> The problem is that mn_invl_range_start does a down_read(), but it is
>> called from __mmu_notifier_invalidate_range_start(), which does an
>> rcu_read_lock, which has the side-effect of disabling preemption.
>>
>> The mmu notifier code seems to have always used rcu_read_lock this way,
>> so I guess this bug has always been there. It's not immediately obvious
>> how to fix it.
>>
>> Thoughts?
>>
> What about turning the semaphore into a rwlock?
> Performances shouldn't matter in this case.
> Something like this:
>
The problem is that the rcu lock disables preemption, so anything inside
it must be non-scheduling. So it would need to be a spinlock type
thing, I think.
J
> ---
>
>
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index ddc59cc..91846b9 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -28,7 +28,7 @@
> #include <linux/types.h>
> #include <linux/uaccess.h>
> #include <linux/sched.h>
> -#include <linux/rwsem.h>
> +#include <linux/spinlock_types.h>
>
> #include <xen/xen.h>
> #include <xen/grant_table.h>
> @@ -51,7 +51,7 @@ struct gntdev_priv {
> struct list_head maps;
> uint32_t used;
> uint32_t limit;
> - struct rw_semaphore sem;
> + rwlock_t rwlock;
> struct mm_struct *mm;
> struct mmu_notifier mn;
> };
> @@ -277,7 +277,7 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
> unsigned long mstart, mend;
> int err;
>
> - down_read(&priv->sem);
> + read_lock(&priv->rwlock);
> list_for_each_entry(map, &priv->maps, next) {
> if (!map->vma)
> continue;
> @@ -299,7 +299,7 @@ static void mn_invl_range_start(struct mmu_notifier *mn,
> (mend - mstart) >> PAGE_SHIFT);
> WARN_ON(err);
> }
> - up_read(&priv->sem);
> + read_unlock(&priv->rwlock);
> }
>
> static void mn_invl_page(struct mmu_notifier *mn,
> @@ -316,7 +316,7 @@ static void mn_release(struct mmu_notifier *mn,
> struct grant_map *map;
> int err;
>
> - down_read(&priv->sem);
> + read_lock(&priv->rwlock);
> list_for_each_entry(map, &priv->maps, next) {
> if (!map->vma)
> continue;
> @@ -327,7 +327,7 @@ static void mn_release(struct mmu_notifier *mn,
> err = unmap_grant_pages(map, 0, map->count);
> WARN_ON(err);
> }
> - up_read(&priv->sem);
> + read_unlock(&priv->rwlock);
> }
>
> struct mmu_notifier_ops gntdev_mmu_ops = {
> @@ -347,7 +347,7 @@ static int gntdev_open(struct inode *inode, struct file *flip)
> return -ENOMEM;
>
> INIT_LIST_HEAD(&priv->maps);
> - init_rwsem(&priv->sem);
> + priv->rwlock = RW_LOCK_UNLOCKED;
> priv->limit = limit;
>
> priv->mm = get_task_mm(current);
> @@ -375,13 +375,13 @@ static int gntdev_release(struct inode *inode, struct file *flip)
> if (debug)
> printk("%s: priv %p\n", __FUNCTION__, priv);
>
> - down_write(&priv->sem);
> + write_lock(&priv->rwlock);
> while (!list_empty(&priv->maps)) {
> map = list_entry(priv->maps.next, struct grant_map, next);
> err = gntdev_del_map(map);
> WARN_ON(err);
> }
> - up_write(&priv->sem);
> + write_unlock(&priv->rwlock);
> mmu_notifier_unregister(&priv->mn, priv->mm);
> kfree(priv);
> return 0;
> @@ -404,7 +404,7 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
> if (unlikely(op.count > priv->limit))
> return -EINVAL;
>
> - down_write(&priv->sem);
> + write_lock(&priv->rwlock);
> err = -ENOMEM;
> map = gntdev_add_map(priv, op.count);
> if (!map)
> @@ -417,13 +417,13 @@ static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
> op.index = map->index << PAGE_SHIFT;
> if (copy_to_user(u, &op, sizeof(op)) != 0)
> goto err_free;
> - up_write(&priv->sem);
> + write_unlock(&priv->rwlock);
> return 0;
>
> err_free:
> gntdev_del_map(map);
> err_unlock:
> - up_write(&priv->sem);
> + write_unlock(&priv->rwlock);
> return err;
> }
>
> @@ -440,11 +440,11 @@ static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
> printk("%s: priv %p, del %d+%d\n", __FUNCTION__, priv,
> (int)op.index, (int)op.count);
>
> - down_write(&priv->sem);
> + write_lock(&priv->rwlock);
> map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
> if (map)
> err = gntdev_del_map(map);
> - up_write(&priv->sem);
> + write_unlock(&priv->rwlock);
> return err;
> }
>
> @@ -460,16 +460,16 @@ static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
> printk("%s: priv %p, offset for vaddr %lx\n", __FUNCTION__, priv,
> (unsigned long)op.vaddr);
>
> - down_read(&priv->sem);
> + read_lock(&priv->rwlock);
> map = gntdev_find_map_vaddr(priv, op.vaddr);
> if (map == NULL ||
> map->vma->vm_start != op.vaddr) {
> - up_read(&priv->sem);
> + read_unlock(&priv->rwlock);
> return -EINVAL;
> }
> op.offset = map->index << PAGE_SHIFT;
> op.count = map->count;
> - up_read(&priv->sem);
> + read_unlock(&priv->rwlock);
>
> if (copy_to_user(u, &op, sizeof(op)) != 0)
> return -EFAULT;
> @@ -488,9 +488,9 @@ static long gntdev_ioctl_set_max_grants(struct gntdev_priv *priv,
> if (op.count > limit)
> return -EINVAL;
>
> - down_write(&priv->sem);
> + write_lock(&priv->rwlock);
> priv->limit = op.count;
> - up_write(&priv->sem);
> + write_unlock(&priv->rwlock);
> return 0;
> }
>
> @@ -538,7 +538,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
> printk("%s: map %d+%d at %lx (pgoff %lx)\n", __FUNCTION__,
> index, count, vma->vm_start, vma->vm_pgoff);
>
> - down_read(&priv->sem);
> + read_lock(&priv->rwlock);
> map = gntdev_find_map_index(priv, index, count);
> if (!map)
> goto unlock_out;
> @@ -580,7 +580,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
> map->is_mapped = 1;
>
> unlock_out:
> - up_read(&priv->sem);
> + read_unlock(&priv->rwlock);
> return err;
> }
>
>
>
next prev parent reply other threads:[~2010-06-01 16:31 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-02-01 15:11 [PATCH] [PVOPS] fix gntdev on PAE Stefano Stabellini
2010-02-01 15:46 ` Stefano Stabellini
2010-02-09 21:57 ` Jeremy Fitzhardinge
2010-02-10 12:19 ` Stefano Stabellini
2010-02-10 23:12 ` Jeremy Fitzhardinge
2010-05-28 17:29 ` Jeremy Fitzhardinge
2010-06-01 9:38 ` Stefano Stabellini
2010-06-01 16:31 ` Jeremy Fitzhardinge [this message]
2010-06-01 16:36 ` Stefano Stabellini
2010-06-01 16:46 ` Jeremy Fitzhardinge
2010-06-02 14:11 ` Stefano Stabellini
2010-06-02 17:11 ` Jeremy Fitzhardinge
2010-06-03 9:32 ` Stefano Stabellini
2010-06-09 19:38 ` Jeremy Fitzhardinge
2010-02-09 22:24 ` Jeremy Fitzhardinge
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4C0535E8.3030201@goop.org \
--to=jeremy@goop.org \
--cc=kraxel@redhat.com \
--cc=stefano.stabellini@eu.citrix.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.