From: sagig@mellanox.com
To: aarcange@redhat.com
Cc: ogerlitz@mellanox.com, gleb@redhat.com, oren@mellanox.com,
linux-mm@kvack.org
Subject: [PATCH RFC V1] mm: convert rcu_read_lock() to srcu_read_lock(), thus allowing to sleep in callbacks
Date: Sun, 5 Feb 2012 18:29:12 +0200 [thread overview]
Message-ID: <4f2eae5e.e951b40a.3aa3.5ddc@mx.google.com> (raw)
In-Reply-To: <y>
Now that anon_vma lock and i_mmap_mutex are both sleepable mutex, it is possible to schedule inside invalidation callbacks
(such as invalidate_page, invalidate_range_start/end and change_pte) .
This is essential for a scheduling HW sync in RDMA drivers which apply on demand paging methods.
Signed-off-by: sagi grimberg <sagig@mellanox.co.il>
---
changes from V0:
1. srcu_struct should be shared and not allocated in each callback - removed from callbacks
2. added srcu_struct under mmu_notifier_mm
3. init_srcu_struct when creating mmu_notifier_mm
4. srcu_cleanup when destroying mmu_notifier_mm
include/linux/mmu_notifier.h | 3 +++
mm/mmu_notifier.c | 23 +++++++++++++++--------
2 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 1d1b1e1..f3d6f30 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -4,6 +4,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/mm_types.h>
+#include <linux/srcu.h>
struct mmu_notifier;
struct mmu_notifier_ops;
@@ -21,6 +22,8 @@ struct mmu_notifier_mm {
struct hlist_head list;
/* to serialize the list modifications and hlist_unhashed */
spinlock_t lock;
+ /* to enable sleeping in callbacks */
+ struct srcu_struct srcu;
};
struct mmu_notifier_ops {
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 9a611d3..3d4f007 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -123,10 +123,11 @@ int __mmu_notifier_test_young(struct mm_struct *mm,
void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
pte_t pte)
{
+ int idx;
struct mmu_notifier *mn;
struct hlist_node *n;
- rcu_read_lock();
+ idx = srcu_read_lock(&mm->mmu_notifier_mm->srcu);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->change_pte)
mn->ops->change_pte(mn, mm, address, pte);
@@ -137,49 +138,52 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address,
else if (mn->ops->invalidate_page)
mn->ops->invalidate_page(mn, mm, address);
}
- rcu_read_unlock();
+ srcu_read_unlock(&mm->mmu_notifier_mm->srcu, idx);
}
void __mmu_notifier_invalidate_page(struct mm_struct *mm,
unsigned long address)
{
+ int idx;
struct mmu_notifier *mn;
struct hlist_node *n;
- rcu_read_lock();
+ idx = srcu_read_lock(&mm->mmu_notifier_mm->srcu);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->invalidate_page)
mn->ops->invalidate_page(mn, mm, address);
}
- rcu_read_unlock();
+ srcu_read_unlock(&mm->mmu_notifier_mm->srcu, idx);
}
void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
+ int idx;
struct mmu_notifier *mn;
struct hlist_node *n;
- rcu_read_lock();
+ idx = srcu_read_lock(&mm->mmu_notifier_mm->srcu);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->invalidate_range_start)
mn->ops->invalidate_range_start(mn, mm, start, end);
}
- rcu_read_unlock();
+ srcu_read_unlock(&mm->mmu_notifier_mm->srcu, idx);
}
void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
+ int idx;
struct mmu_notifier *mn;
struct hlist_node *n;
- rcu_read_lock();
+ idx = srcu_read_lock(&mm->mmu_notifier_mm->srcu);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) {
if (mn->ops->invalidate_range_end)
mn->ops->invalidate_range_end(mn, mm, start, end);
}
- rcu_read_unlock();
+ srcu_read_unlock(&mm->mmu_notifier_mm->srcu, idx);
}
static int do_mmu_notifier_register(struct mmu_notifier *mn,
@@ -204,6 +208,8 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn,
if (!mm_has_notifiers(mm)) {
INIT_HLIST_HEAD(&mmu_notifier_mm->list);
+ if (init_srcu_struct(&mmu_notifier_mm->srcu))
+ goto out_cleanup;
spin_lock_init(&mmu_notifier_mm->lock);
mm->mmu_notifier_mm = mmu_notifier_mm;
mmu_notifier_mm = NULL;
@@ -266,6 +272,7 @@ EXPORT_SYMBOL_GPL(__mmu_notifier_register);
void __mmu_notifier_mm_destroy(struct mm_struct *mm)
{
BUG_ON(!hlist_empty(&mm->mmu_notifier_mm->list));
+ cleanup_srcu_struct(&mm->mmu_notifier_mm->srcu);
kfree(mm->mmu_notifier_mm);
mm->mmu_notifier_mm = LIST_POISON1; /* debug */
}
--
1.7.8.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next reply other threads:[~2012-02-05 16:29 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-02-05 16:29 sagig [this message]
2012-02-05 20:27 ` [PATCH RFC V1] mm: convert rcu_read_lock() to srcu_read_lock(), thus allowing to sleep in callbacks Konstantin Khlebnikov
2012-02-06 9:11 ` sagig
2012-02-06 11:29 ` Konstantin Khlebnikov
2012-02-06 9:11 ` sagig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4f2eae5e.e951b40a.3aa3.5ddc@mx.google.com \
--to=sagig@mellanox.com \
--cc=aarcange@redhat.com \
--cc=gleb@redhat.com \
--cc=linux-mm@kvack.org \
--cc=ogerlitz@mellanox.com \
--cc=oren@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.