From: Andrea Arcangeli <andrea@qumranet.com>
To: Christoph Lameter <clameter@sgi.com>
Cc: Jack Steiner <steiner@sgi.com>, Nick Piggin <npiggin@suse.de>,
akpm@linux-foundation.org, Robin Holt <holt@sgi.com>,
Avi Kivity <avi@qumranet.com>,
kvm-devel@lists.sourceforge.net,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
general@lists.openfabrics.org,
Steve Wise <swise@opengridcomputing.com>,
Roland Dreier <rdreier@cisco.com>,
Kanoj Sarcar <kanojsarcar@yahoo.com>,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
daniel.blueman@quadrics.com
Subject: Re: [PATCH] 3/4 combine RCU with seqlock to allow mmu notifier methods to sleep (#v9 was 1/4)
Date: Fri, 7 Mar 2008 16:23:28 +0100 [thread overview]
Message-ID: <20080307152328.GE24114@v2.random> (raw)
In-Reply-To: <20080307151722.GD24114@v2.random>
This combines the non-sleep-capable RCU locking of #v9 with a seqlock
so the mmu notifier fast path will require zero cacheline
writes/bouncing while still providing mmu_notifier_unregister and
allowing to schedule inside the mmu notifier methods. If we drop
mmu_notifier_unregister we can as well drop all seqlock and
rcu_read_lock()s. But this locking scheme combination is sexy enough
and 100% scalable (the mmu_notifier_list cacheline will be preloaded
anyway and that will most certainly include the sequence number value
in l1 for free even in Christoph's NUMA systems) so IMHO it worth to
keep mmu_notifier_unregister.
Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -10,6 +10,7 @@
#include <linux/rbtree.h>
#include <linux/rwsem.h>
#include <linux/completion.h>
+#include <linux/seqlock.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -230,6 +231,7 @@ struct mm_struct {
#endif
#ifdef CONFIG_MMU_NOTIFIER
struct hlist_head mmu_notifier_list;
+ seqlock_t mmu_notifier_lock;
#endif
};
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -130,6 +130,7 @@ static inline void mmu_notifier_mm_init(
static inline void mmu_notifier_mm_init(struct mm_struct *mm)
{
INIT_HLIST_HEAD(&mm->mmu_notifier_list);
+ seqlock_init(&mm->mmu_notifier_lock);
}
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -20,7 +20,9 @@ void __mmu_notifier_release(struct mm_st
void __mmu_notifier_release(struct mm_struct *mm)
{
struct mmu_notifier *mn;
+ unsigned seq;
+ seq = read_seqbegin(&mm->mmu_notifier_lock);
while (unlikely(!hlist_empty(&mm->mmu_notifier_list))) {
mn = hlist_entry(mm->mmu_notifier_list.first,
struct mmu_notifier,
@@ -28,6 +30,7 @@ void __mmu_notifier_release(struct mm_st
hlist_del(&mn->hlist);
if (mn->ops->release)
mn->ops->release(mn, mm);
+ BUG_ON(read_seqretry(&mm->mmu_notifier_lock, seq));
}
}
@@ -42,11 +45,19 @@ int __mmu_notifier_clear_flush_young(str
struct mmu_notifier *mn;
struct hlist_node *n;
int young = 0;
+ unsigned seq;
rcu_read_lock();
+restart:
+ seq = read_seqbegin(&mm->mmu_notifier_lock);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_list, hlist) {
- if (mn->ops->clear_flush_young)
+ if (mn->ops->clear_flush_young) {
+ rcu_read_unlock();
young |= mn->ops->clear_flush_young(mn, mm, address);
+ rcu_read_lock();
+ }
+ if (read_seqretry(&mm->mmu_notifier_lock, seq))
+ goto restart;
}
rcu_read_unlock();
@@ -58,11 +69,19 @@ void __mmu_notifier_invalidate_page(stru
{
struct mmu_notifier *mn;
struct hlist_node *n;
+ unsigned seq;
rcu_read_lock();
+restart:
+ seq = read_seqbegin(&mm->mmu_notifier_lock);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_list, hlist) {
- if (mn->ops->invalidate_page)
+ if (mn->ops->invalidate_page) {
+ rcu_read_unlock();
mn->ops->invalidate_page(mn, mm, address);
+ rcu_read_lock();
+ }
+ if (read_seqretry(&mm->mmu_notifier_lock, seq))
+ goto restart;
}
rcu_read_unlock();
}
@@ -72,11 +91,19 @@ void __mmu_notifier_invalidate_range_beg
{
struct mmu_notifier *mn;
struct hlist_node *n;
+ unsigned seq;
rcu_read_lock();
+restart:
+ seq = read_seqbegin(&mm->mmu_notifier_lock);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_list, hlist) {
- if (mn->ops->invalidate_range_begin)
+ if (mn->ops->invalidate_range_begin) {
+ rcu_read_unlock();
mn->ops->invalidate_range_begin(mn, mm, start, end);
+ rcu_read_lock();
+ }
+ if (read_seqretry(&mm->mmu_notifier_lock, seq))
+ goto restart;
}
rcu_read_unlock();
}
@@ -86,11 +113,19 @@ void __mmu_notifier_invalidate_range_end
{
struct mmu_notifier *mn;
struct hlist_node *n;
+ unsigned seq;
rcu_read_lock();
+restart:
+ seq = read_seqbegin(&mm->mmu_notifier_lock);
hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_list, hlist) {
- if (mn->ops->invalidate_range_end)
+ if (mn->ops->invalidate_range_end) {
+ rcu_read_unlock();
mn->ops->invalidate_range_end(mn, mm, start, end);
+ rcu_read_lock();
+ }
+ if (read_seqretry(&mm->mmu_notifier_lock, seq))
+ goto restart;
}
rcu_read_unlock();
}
@@ -103,12 +138,20 @@ void __mmu_notifier_invalidate_range_end
*/
void mmu_notifier_register(struct mmu_notifier *mn, struct mm_struct *mm)
{
+ /* no need of seqlock for hlist_add_head_rcu */
hlist_add_head_rcu(&mn->hlist, &mm->mmu_notifier_list);
}
EXPORT_SYMBOL_GPL(mmu_notifier_register);
void mmu_notifier_unregister(struct mmu_notifier *mn, struct mm_struct *mm)
{
+ /*
+ * The seqlock tracks if a hlist_del_rcu happens while a
+ * notifier method is scheduling and in such a case the "mn"
+ * memory may have been freed by the time the method returns.
+ */
+ write_seqlock(&mm->mmu_notifier_lock);
hlist_del_rcu(&mn->hlist);
+ write_sequnlock(&mm->mmu_notifier_lock);
}
EXPORT_SYMBOL_GPL(mmu_notifier_unregister);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-03-07 15:23 UTC|newest]
Thread overview: 115+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-02-19 8:43 [patch] my mmu notifiers Nick Piggin
2008-02-19 8:44 ` [patch] my mmu notifier sample driver Nick Piggin
2008-02-19 11:59 ` [patch] my mmu notifiers Robin Holt
2008-02-19 13:58 ` Andrea Arcangeli
2008-02-19 14:27 ` Jack Steiner
2008-02-19 23:04 ` Nick Piggin
2008-02-20 0:52 ` Andrea Arcangeli
2008-02-20 2:46 ` Robin Holt
2008-02-27 22:50 ` Christoph Lameter
2008-02-19 22:59 ` Nick Piggin
2008-02-20 0:46 ` Andrea Arcangeli
2008-02-27 22:55 ` Christoph Lameter
2008-02-19 23:11 ` Nick Piggin
2008-02-19 23:40 ` Jack Steiner
2008-02-21 4:42 ` Nick Piggin
2008-02-22 16:31 ` Jack Steiner
2008-02-20 1:09 ` Andrea Arcangeli
2008-02-20 10:39 ` [PATCH] mmu notifiers #v6 Andrea Arcangeli
2008-02-20 10:45 ` [PATCH] KVM swapping (+ seqlock fix) with " Andrea Arcangeli
2008-02-27 22:06 ` [PATCH] KVM swapping with mmu notifiers #v7 Andrea Arcangeli
2008-02-28 8:42 ` izik eidus
2008-02-20 11:33 ` [PATCH] mmu notifiers #v6 Robin Holt
2008-02-20 12:03 ` Andrea Arcangeli
2008-02-20 12:24 ` Robin Holt
2008-02-20 12:32 ` Andrea Arcangeli
2008-02-20 13:15 ` Robin Holt
2008-02-21 5:02 ` Nick Piggin
2008-02-20 14:41 ` Robin Holt
2008-02-20 15:34 ` Andrea Arcangeli
2008-02-20 21:03 ` Jack Steiner
2008-02-21 4:54 ` Nick Piggin
2008-02-21 14:40 ` Andrea Arcangeli
2008-02-21 16:10 ` Jack Steiner
2008-02-27 19:26 ` [PATCH] mmu notifiers #v7 Andrea Arcangeli
2008-02-27 20:04 ` Peter Zijlstra
2008-02-27 23:06 ` Christoph Lameter
2008-02-27 23:43 ` [kvm-devel] " Andrea Arcangeli
2008-02-28 0:08 ` Christoph Lameter
2008-02-28 0:21 ` Andrea Arcangeli
2008-02-28 0:24 ` Christoph Lameter
2008-02-28 19:48 ` Christoph Lameter
2008-02-28 21:52 ` Andrea Arcangeli
2008-02-28 22:00 ` Christoph Lameter
2008-02-28 23:17 ` Jack Steiner
2008-02-29 0:24 ` Andrea Arcangeli
2008-02-29 1:13 ` Christoph Lameter
2008-02-28 23:05 ` Christoph Lameter
2008-02-29 0:40 ` Andrea Arcangeli
2008-02-29 0:56 ` Andrew Morton
2008-02-29 1:03 ` Christoph Lameter
2008-02-29 13:09 ` Andrea Arcangeli
2008-02-29 19:46 ` Christoph Lameter
2008-03-02 15:54 ` [PATCH] mmu notifiers #v8 Andrea Arcangeli
2008-03-02 16:03 ` [PATCH] mmu notifiers #v8 + xpmem Andrea Arcangeli
2008-03-02 16:23 ` Peter Zijlstra
2008-03-03 3:29 ` [PATCH] mmu notifiers #v8 Nick Piggin
2008-03-03 12:51 ` Andrea Arcangeli
2008-03-03 13:10 ` Nick Piggin
2008-03-03 13:24 ` Andrea Arcangeli
2008-03-03 15:18 ` Jack Steiner
2008-03-03 16:59 ` Nick Piggin
2008-03-03 18:06 ` Jack Steiner
2008-03-03 18:09 ` Avi Kivity
2008-03-03 18:23 ` Jack Steiner
2008-03-03 18:45 ` Nick Piggin
2008-03-03 19:15 ` Jack Steiner
2008-03-04 10:35 ` Peter Zijlstra
2008-03-04 14:44 ` Jack Steiner
2008-03-03 19:02 ` Christoph Lameter
2008-03-03 19:01 ` Christoph Lameter
2008-03-03 21:15 ` Andrea Arcangeli
2008-03-05 0:37 ` Nick Piggin
2008-03-05 18:48 ` Christoph Lameter
2008-03-06 2:59 ` Nick Piggin
2008-03-03 3:33 ` Nick Piggin
2008-03-03 19:03 ` Christoph Lameter
2008-03-03 3:34 ` Nick Piggin
2008-03-03 19:04 ` Christoph Lameter
2008-03-03 3:39 ` Nick Piggin
2008-03-03 21:37 ` [PATCH] mmu notifiers #v9 Andrea Arcangeli
2008-03-03 22:05 ` [PATCH] KVM swapping with " Andrea Arcangeli
2008-03-04 0:44 ` izik eidus
2008-03-04 7:31 ` [RFC] Notifier for Externally Mapped Memory (EMM) Christoph Lameter
2008-03-04 7:34 ` [Early draft] Conversion of i_mmap_lock to semaphore Christoph Lameter
2008-03-04 13:30 ` [RFC] Notifier for Externally Mapped Memory (EMM) Andrea Arcangeli
2008-03-04 19:00 ` Christoph Lameter
2008-03-04 22:20 ` Andrea Arcangeli
2008-03-04 22:35 ` Christoph Lameter
2008-03-04 22:42 ` Peter Zijlstra
2008-03-04 23:14 ` Christoph Lameter
2008-03-04 23:25 ` Peter Zijlstra
2008-03-04 23:30 ` Peter Zijlstra
2008-03-05 5:09 ` Avi Kivity
2008-03-05 9:47 ` Robin Holt
2008-03-05 9:53 ` Avi Kivity
2008-03-05 10:02 ` [kvm-devel] " Dor Laor
2008-03-07 15:17 ` [PATCH] 2/4 move all invalidate_page outside of PT lock (#v9 was 1/4) Andrea Arcangeli
2008-03-07 15:23 ` Andrea Arcangeli [this message]
2008-03-07 15:52 ` [PATCH] 4/4 i_mmap_lock spinlock2rwsem " Andrea Arcangeli
2008-03-07 20:03 ` Christoph Lameter
2008-03-19 21:27 ` Christoph Lameter
2008-03-07 16:52 ` [PATCH] 3/4 combine RCU with seqlock to allow mmu notifier methods to sleep " Peter Zijlstra
2008-03-07 17:50 ` Andrea Arcangeli
2008-03-07 18:01 ` Peter Zijlstra
2008-03-07 18:45 ` Andrea Arcangeli
2008-03-07 19:47 ` Andrea Arcangeli
2008-03-07 20:15 ` Christoph Lameter
2008-03-07 20:12 ` Christoph Lameter
2008-03-07 20:10 ` Christoph Lameter
2008-03-07 20:00 ` Christoph Lameter
2008-03-07 19:54 ` [PATCH] 2/4 move all invalidate_page outside of PT lock " Christoph Lameter
2008-03-04 13:21 ` [PATCH] KVM swapping with mmu notifiers #v9 Andrea Arcangeli
2008-02-21 4:47 ` [patch] my mmu notifiers Nick Piggin
2008-02-20 2:49 ` Robin Holt
2008-02-27 22:56 ` Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080307152328.GE24114@v2.random \
--to=andrea@qumranet.com \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=avi@qumranet.com \
--cc=clameter@sgi.com \
--cc=daniel.blueman@quadrics.com \
--cc=general@lists.openfabrics.org \
--cc=holt@sgi.com \
--cc=kanojsarcar@yahoo.com \
--cc=kvm-devel@lists.sourceforge.net \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=npiggin@suse.de \
--cc=rdreier@cisco.com \
--cc=steiner@sgi.com \
--cc=swise@opengridcomputing.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).