From: Oleg Nesterov <oleg@redhat.com>
To: Ingo Molnar <mingo@elte.hu>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>,
Anton Arapov <anton@redhat.com>,
Peter Zijlstra <peterz@infradead.org>,
Srikar Dronamraju <srikar@linux.vnet.ibm.com>,
linux-kernel@vger.kernel.org
Subject: [PATCH 05/15] uprobes: rework register_for_each_vma() to make it O(n)
Date: Fri, 15 Jun 2012 17:43:33 +0200 [thread overview]
Message-ID: <20120615154333.GA9581@redhat.com> (raw)
In-Reply-To: <20120615154241.GA9524@redhat.com>
Currently register_for_each_vma() is O(n ** 2) + O(n ** 3), every
time find_next_vma_info() "restarts" the vma_prio_tree_foreach()
loop and each iteration rechecks the whole try_list. This also
means that try_list can grow "indefinitely" if register/unregister
races with munmap/mmap activity even if the number of mapping is
bounded at any time.
With this patch register_for_each_vma() builds the list of mm/vaddr
structures only once and does install_breakpoint() for each entry.
We do not care about the new mappings which can be created after
build_map_info() drops mapping->i_mmap_mutex, uprobe_mmap() should
do its work.
Note that we do not allocate map_info under i_mmap_mutex, this can
deadlock with page reclaim (but see the next patch). So we use 2
lists, "curr" which we are going to return, and "prev" which holds
the already allocated memory. The main loop deques the entry from
"prev" (initially it is empty), and if "prev" becomes empty again
it counts the number of entries we need to pre-allocate outside of
i_mmap_mutex.
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
---
kernel/events/uprobes.c | 199 ++++++++++++++++++++---------------------------
1 files changed, 86 insertions(+), 113 deletions(-)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ec78152..4e0db34 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -60,17 +60,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
*/
static atomic_t uprobe_events = ATOMIC_INIT(0);
-/*
- * Maintain a temporary per vma info that can be used to search if a vma
- * has already been handled. This structure is introduced since extending
- * vm_area_struct wasnt recommended.
- */
-struct vma_info {
- struct list_head probe_list;
- struct mm_struct *mm;
- loff_t vaddr;
-};
-
struct uprobe {
struct rb_node rb_node; /* node in the rb tree */
atomic_t ref;
@@ -742,139 +731,123 @@ static void delete_uprobe(struct uprobe *uprobe)
atomic_dec(&uprobe_events);
}
-static struct vma_info *
-__find_next_vma_info(struct address_space *mapping, struct list_head *head,
- struct vma_info *vi, loff_t offset, bool is_register)
+struct map_info {
+ struct map_info *next;
+ struct mm_struct *mm;
+ loff_t vaddr;
+};
+
+static inline struct map_info *free_map_info(struct map_info *info)
{
+ struct map_info *next = info->next;
+ kfree(info);
+ return next;
+}
+
+static struct map_info *
+build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
+{
+ unsigned long pgoff = offset >> PAGE_SHIFT;
struct prio_tree_iter iter;
struct vm_area_struct *vma;
- struct vma_info *tmpvi;
- unsigned long pgoff;
- int existing_vma;
- loff_t vaddr;
-
- pgoff = offset >> PAGE_SHIFT;
+ struct map_info *curr = NULL;
+ struct map_info *prev = NULL;
+ struct map_info *info;
+ int more = 0;
+ again:
+ mutex_lock(&mapping->i_mmap_mutex);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
if (!valid_vma(vma, is_register))
continue;
- existing_vma = 0;
- vaddr = vma_address(vma, offset);
-
- list_for_each_entry(tmpvi, head, probe_list) {
- if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) {
- existing_vma = 1;
- break;
- }
- }
-
- /*
- * Another vma needs a probe to be installed. However skip
- * installing the probe if the vma is about to be unlinked.
- */
- if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) {
- vi->mm = vma->vm_mm;
- vi->vaddr = vaddr;
- list_add(&vi->probe_list, head);
-
- return vi;
+ if (!prev) {
+ more++;
+ continue;
}
- }
-
- return NULL;
-}
-/*
- * Iterate in the rmap prio tree and find a vma where a probe has not
- * yet been inserted.
- */
-static struct vma_info *
-find_next_vma_info(struct address_space *mapping, struct list_head *head,
- loff_t offset, bool is_register)
-{
- struct vma_info *vi, *retvi;
+ if (!atomic_inc_not_zero(&vma->vm_mm->mm_users))
+ continue;
- vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL);
- if (!vi)
- return ERR_PTR(-ENOMEM);
+ info = prev;
+ prev = prev->next;
+ info->next = curr;
+ curr = info;
- mutex_lock(&mapping->i_mmap_mutex);
- retvi = __find_next_vma_info(mapping, head, vi, offset, is_register);
+ info->mm = vma->vm_mm;
+ info->vaddr = vma_address(vma, offset);
+ }
mutex_unlock(&mapping->i_mmap_mutex);
- if (!retvi)
- kfree(vi);
+ if (!more)
+ goto out;
+
+ prev = curr;
+ while (curr) {
+ mmput(curr->mm);
+ curr = curr->next;
+ }
- return retvi;
+ do {
+ info = kmalloc(sizeof(struct map_info), GFP_KERNEL);
+ if (!info) {
+ curr = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+ info->next = prev;
+ prev = info;
+ } while (--more);
+
+ goto again;
+ out:
+ while (prev)
+ prev = free_map_info(prev);
+ return curr;
}
static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
{
- struct list_head try_list;
- struct vm_area_struct *vma;
- struct address_space *mapping;
- struct vma_info *vi, *tmpvi;
- struct mm_struct *mm;
- loff_t vaddr;
- int ret;
+ struct map_info *info;
+ int err = 0;
- mapping = uprobe->inode->i_mapping;
- INIT_LIST_HEAD(&try_list);
-
- ret = 0;
+ info = build_map_info(uprobe->inode->i_mapping,
+ uprobe->offset, is_register);
+ if (IS_ERR(info))
+ return PTR_ERR(info);
- for (;;) {
- vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register);
- if (!vi)
- break;
+ while (info) {
+ struct mm_struct *mm = info->mm;
+ struct vm_area_struct *vma;
+ loff_t vaddr;
- if (IS_ERR(vi)) {
- ret = PTR_ERR(vi);
- break;
- }
+ if (err)
+ goto free;
- mm = vi->mm;
down_write(&mm->mmap_sem);
- vma = find_vma(mm, (unsigned long)vi->vaddr);
- if (!vma || !valid_vma(vma, is_register)) {
- list_del(&vi->probe_list);
- kfree(vi);
- up_write(&mm->mmap_sem);
- mmput(mm);
- continue;
- }
+ vma = find_vma(mm, (unsigned long)info->vaddr);
+ if (!vma || !valid_vma(vma, is_register))
+ goto unlock;
+
vaddr = vma_address(vma, uprobe->offset);
if (vma->vm_file->f_mapping->host != uprobe->inode ||
- vaddr != vi->vaddr) {
- list_del(&vi->probe_list);
- kfree(vi);
- up_write(&mm->mmap_sem);
- mmput(mm);
- continue;
- }
+ vaddr != info->vaddr)
+ goto unlock;
- if (is_register)
- ret = install_breakpoint(uprobe, mm, vma, vi->vaddr);
- else
- remove_breakpoint(uprobe, mm, vi->vaddr);
-
- up_write(&mm->mmap_sem);
- mmput(mm);
if (is_register) {
- if (ret && ret == -EEXIST)
- ret = 0;
- if (ret)
- break;
+ err = install_breakpoint(uprobe, mm, vma, info->vaddr);
+ if (err == -EEXIST)
+ err = 0;
+ } else {
+ remove_breakpoint(uprobe, mm, info->vaddr);
}
+ unlock:
+ up_write(&mm->mmap_sem);
+ free:
+ mmput(mm);
+ info = free_map_info(info);
}
- list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) {
- list_del(&vi->probe_list);
- kfree(vi);
- }
-
- return ret;
+ return err;
}
static int __uprobe_register(struct uprobe *uprobe)
--
1.5.5.1
next prev parent reply other threads:[~2012-06-15 15:45 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-06-15 15:42 [PATCH 0/15] uprobes: misc Oleg Nesterov
2012-06-15 15:43 ` [PATCH 01/15] uprobes: valid_vma() should reject VM_HUGETLB Oleg Nesterov
2012-06-18 8:50 ` [tip:perf/core] uprobes: Valid_vma() " tip-bot for Oleg Nesterov
2012-06-15 15:43 ` [PATCH 02/15] uprobes: __copy_insn() should ensure a_ops->readpage != NULL Oleg Nesterov
2012-06-18 8:50 ` [tip:perf/core] uprobes: __copy_insn() should ensure a_ops-> readpage " tip-bot for Oleg Nesterov
2012-06-15 15:43 ` [PATCH 03/15] uprobes: write_opcode()->__replace_page() can race with try_to_unmap() Oleg Nesterov
2012-06-18 8:51 ` [tip:perf/core] uprobes: Write_opcode()->__replace_page() " tip-bot for Oleg Nesterov
2012-06-15 15:43 ` [PATCH 04/15] uprobes: install_breakpoint() should fail if is_swbp_insn() == T Oleg Nesterov
2012-06-18 8:52 ` [tip:perf/core] uprobes: Install_breakpoint() " tip-bot for Oleg Nesterov
2012-06-15 15:43 ` Oleg Nesterov [this message]
2012-06-18 8:53 ` [tip:perf/core] uprobes: Rework register_for_each_vma() to make it O(n) tip-bot for Oleg Nesterov
2012-06-15 15:43 ` [PATCH 06/15] uprobes: change build_map_info() to try kmalloc(GFP_NOWAIT) first Oleg Nesterov
2012-06-18 8:54 ` [tip:perf/core] uprobes: Change build_map_info() to try kmalloc( GFP_NOWAIT) first tip-bot for Oleg Nesterov
2012-06-15 15:43 ` [PATCH 07/15] uprobes: document uprobe_register() vs uprobe_mmap() race Oleg Nesterov
2012-06-18 8:55 ` [tip:perf/core] uprobes: Document uprobe_register() vs uprobe_mmap () race tip-bot for Peter Zijlstra
2012-06-15 15:43 ` [PATCH 08/15] uprobes: copy_insn() shouldn't depend on mm/vma/vaddr Oleg Nesterov
2012-06-18 8:56 ` [tip:perf/core] uprobes: Copy_insn() shouldn't depend on mm/vma/ vaddr tip-bot for Oleg Nesterov
2012-06-15 15:43 ` [PATCH 09/15] uprobes: copy_insn() should not return -ENOMEM if __copy_insn() fails Oleg Nesterov
2012-06-18 8:57 ` [tip:perf/core] uprobes: Copy_insn() " tip-bot for Oleg Nesterov
2012-06-15 15:43 ` [PATCH 10/15] uprobes: no need to re-check vma_address() in write_opcode() Oleg Nesterov
2012-06-18 8:57 ` [tip:perf/core] uprobes: No " tip-bot for Oleg Nesterov
2012-06-15 15:43 ` [PATCH 11/15] uprobes: move BUG_ON(UPROBE_SWBP_INSN_SIZE) from write_opcode() to install_breakpoint() Oleg Nesterov
2012-06-15 16:36 ` Srikar Dronamraju
2012-06-15 17:52 ` Oleg Nesterov
2012-06-18 12:08 ` Srikar Dronamraju
2012-06-18 8:58 ` [tip:perf/core] uprobes: Move " tip-bot for Oleg Nesterov
2012-06-15 15:43 ` [PATCH 12/15] uprobes: simplify the usage of uprobe->pending_list Oleg Nesterov
2012-06-18 8:59 ` [tip:perf/core] uprobes: Simplify the usage of uprobe-> pending_list tip-bot for Oleg Nesterov
2012-06-15 15:43 ` [PATCH 13/15] uprobes: don't use loff_t for the valid virtual address Oleg Nesterov
2012-06-18 9:00 ` [tip:perf/core] uprobes: Don' t " tip-bot for Oleg Nesterov
2012-06-15 15:43 ` [PATCH 14/15] uprobes: __copy_insn() needs "loff_t offset" Oleg Nesterov
2012-06-18 9:01 ` [tip:perf/core] " tip-bot for Oleg Nesterov
2012-06-15 15:44 ` [PATCH 15/15] uprobes: remove the unnecessary initialization in add_utask() Oleg Nesterov
2012-06-18 9:02 ` [tip:perf/core] uprobes: Remove " tip-bot for Oleg Nesterov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120615154333.GA9581@redhat.com \
--to=oleg@redhat.com \
--cc=ananth@in.ibm.com \
--cc=anton@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=peterz@infradead.org \
--cc=srikar@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox