* [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-12 17:38 [PATCH 0/5] always lock the root anon_vma Rik van Riel
2010-05-12 17:40 ` [PATCH 4/5] always lock the root (oldest) anon_vma Rik van Riel
@ 2010-05-12 17:41 ` Rik van Riel
2010-05-12 21:07 ` Mel Gorman
1 sibling, 1 reply; 30+ messages in thread
From: Rik van Riel @ 2010-05-12 17:41 UTC (permalink / raw)
To: Andrew Morton
Cc: Mel Gorman, Andrea Arcangeli, Minchan Kim, Linux-MM,
KAMEZAWA Hiroyuki, LKML, Linus Torvalds
Subject: extend KSM refcounts to the anon_vma root
KSM reference counts can cause an anon_vma to exist after the processe
it belongs to have already exited. Because the anon_vma lock now lives
in the root anon_vma, we need to ensure that the root anon_vma stays
around until after all the "child" anon_vmas have been freed.
The obvious way to do this is to have a "child" anon_vma take a
reference to the root in anon_vma_fork. When the anon_vma is freed
at munmap or process exit, we drop the refcount in anon_vma_unlink
and possibly free the root anon_vma.
The KSM anon_vma reference count function also needs to be modified
to deal with the possibility of freeing 2 levels of anon_vma. The
easiest way to do this is to break out the KSM magic and make it
generic.
When compiling without CONFIG_KSM, this code is compiled out.
Signed-off-by: Rik van Riel <riel@redhat.com>
---
include/linux/rmap.h | 12 ++++++++++++
mm/ksm.c | 17 ++++++-----------
mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 62 insertions(+), 12 deletions(-)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 33ffe14..387d40c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -126,6 +126,18 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
void __anon_vma_link(struct vm_area_struct *);
void anon_vma_free(struct anon_vma *);
+#ifdef CONFIG_KSM
+static inline void get_anon_vma(struct anon_vma *anon_vma)
+{
+ atomic_inc(&anon_vma->ksm_refcount);
+}
+
+void drop_anon_vma(struct anon_vma *);
+#else
+#define get_anon_vma(x) do {} while(0)
+#define drop_anon_vma(x) do {} while(0)
+#endif
+
static inline void anon_vma_merge(struct vm_area_struct *vma,
struct vm_area_struct *next)
{
diff --git a/mm/ksm.c b/mm/ksm.c
index 7ca0dd7..9f2acc9 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_item *rmap_item,
struct anon_vma *anon_vma)
{
rmap_item->anon_vma = anon_vma;
- atomic_inc(&anon_vma->ksm_refcount);
+ get_anon_vma(anon_vma);
}
-static void drop_anon_vma(struct rmap_item *rmap_item)
+static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
{
struct anon_vma *anon_vma = rmap_item->anon_vma;
- if (atomic_dec_and_lock(&anon_vma->ksm_refcount, &anon_vma->root->lock)) {
- int empty = list_empty(&anon_vma->head);
- anon_vma_unlock(anon_vma);
- if (empty)
- anon_vma_free(anon_vma);
- }
+ drop_anon_vma(anon_vma);
}
/*
@@ -415,7 +410,7 @@ static void break_cow(struct rmap_item *rmap_item)
* It is not an accident that whenever we want to break COW
* to undo, we also need to drop a reference to the anon_vma.
*/
- drop_anon_vma(rmap_item);
+ ksm_drop_anon_vma(rmap_item);
down_read(&mm->mmap_sem);
if (ksm_test_exit(mm))
@@ -470,7 +465,7 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
ksm_pages_sharing--;
else
ksm_pages_shared--;
- drop_anon_vma(rmap_item);
+ ksm_drop_anon_vma(rmap_item);
rmap_item->address &= PAGE_MASK;
cond_resched();
}
@@ -558,7 +553,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
else
ksm_pages_shared--;
- drop_anon_vma(rmap_item);
+ ksm_drop_anon_vma(rmap_item);
rmap_item->address &= PAGE_MASK;
} else if (rmap_item->address & UNSTABLE_FLAG) {
diff --git a/mm/rmap.c b/mm/rmap.c
index f0ba648..d63cd91 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -238,6 +238,12 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
*/
root_avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma);
anon_vma->root = root_avc->anon_vma;
+ /*
+ * With KSM refcounts, an anon_vma can stay around longer than the
+ * process it belongs to. The root anon_vma needs to be pinned,
+ * because that is where the lock lives.
+ */
+ get_anon_vma(anon_vma->root);
/* Mark this anon_vma as the one where our new (COWed) pages go. */
vma->anon_vma = anon_vma;
anon_vma_chain_link(vma, avc, anon_vma);
@@ -267,8 +273,11 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma);
anon_vma_unlock(anon_vma);
- if (empty)
+ if (empty) {
+ /* We no longer need the root anon_vma */
+ drop_anon_vma(anon_vma->root);
anon_vma_free(anon_vma);
+ }
}
void unlink_anon_vmas(struct vm_area_struct *vma)
@@ -1355,6 +1364,40 @@ int try_to_munlock(struct page *page)
return try_to_unmap_file(page, TTU_MUNLOCK);
}
+#ifdef CONFIG_KSM
+/*
+ * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
+ * if necessary. Be careful to do all the tests under the lock. Once
+ * we know we are the last user, nobody else can get a reference and we
+ * can do the freeing without the lock.
+ */
+void drop_anon_vma(struct anon_vma *anon_vma)
+{
+ if (atomic_dec_and_lock(&anon_vma->ksm_refcount, &anon_vma->root->lock)) {
+ struct anon_vma *root = anon_vma->root;
+ int empty list_empty(&anon_vma->head);
+ int last_root_user = 0;
+ int root_empty = 0;
+
+ /*
+ * The refcount on a non-root anon_vma got dropped. Drop
+ * the refcount on the root and check if we need to free it.
+ */
+ if (empty && anon_vma != root) {
+ last_root_user = atomic_dec_and_test(&root->ksm_refcount);
+ root_empty = list_empty(&root->head);
+ }
+ anon_vma_unlock(anon_vma);
+
+ if (empty) {
+ anon_vma_free(anon_vma);
+ if (root_empty && last_root_user)
+ anon_vma_free(root);
+ }
+ }
+}
+#endif
+
#ifdef CONFIG_MIGRATION
/*
* rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-12 17:41 ` Rik van Riel
@ 2010-05-12 21:07 ` Mel Gorman
2010-05-12 21:09 ` Rik van Riel
0 siblings, 1 reply; 30+ messages in thread
From: Mel Gorman @ 2010-05-12 21:07 UTC (permalink / raw)
To: Rik van Riel
Cc: Andrew Morton, Andrea Arcangeli, Minchan Kim, Linux-MM,
KAMEZAWA Hiroyuki, LKML, Linus Torvalds
On Wed, May 12, 2010 at 01:41:11PM -0400, Rik van Riel wrote:
> Subject: extend KSM refcounts to the anon_vma root
>
> KSM reference counts can cause an anon_vma to exist after the processe
> it belongs to have already exited. Because the anon_vma lock now lives
> in the root anon_vma, we need to ensure that the root anon_vma stays
> around until after all the "child" anon_vmas have been freed.
>
> The obvious way to do this is to have a "child" anon_vma take a
> reference to the root in anon_vma_fork. When the anon_vma is freed
> at munmap or process exit, we drop the refcount in anon_vma_unlink
> and possibly free the root anon_vma.
>
> The KSM anon_vma reference count function also needs to be modified
> to deal with the possibility of freeing 2 levels of anon_vma. The
> easiest way to do this is to break out the KSM magic and make it
> generic.
>
> When compiling without CONFIG_KSM, this code is compiled out.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
> ---
> include/linux/rmap.h | 12 ++++++++++++
> mm/ksm.c | 17 ++++++-----------
> mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
> 3 files changed, 62 insertions(+), 12 deletions(-)
>
> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
> index 33ffe14..387d40c 100644
> --- a/include/linux/rmap.h
> +++ b/include/linux/rmap.h
> @@ -126,6 +126,18 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
> void __anon_vma_link(struct vm_area_struct *);
> void anon_vma_free(struct anon_vma *);
>
> +#ifdef CONFIG_KSM
> +static inline void get_anon_vma(struct anon_vma *anon_vma)
> +{
> + atomic_inc(&anon_vma->ksm_refcount);
> +}
> +
> +void drop_anon_vma(struct anon_vma *);
> +#else
> +#define get_anon_vma(x) do {} while(0)
> +#define drop_anon_vma(x) do {} while(0)
> +#endif
> +
> static inline void anon_vma_merge(struct vm_area_struct *vma,
> struct vm_area_struct *next)
> {
> diff --git a/mm/ksm.c b/mm/ksm.c
> index 7ca0dd7..9f2acc9 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_item *rmap_item,
> struct anon_vma *anon_vma)
> {
> rmap_item->anon_vma = anon_vma;
> - atomic_inc(&anon_vma->ksm_refcount);
> + get_anon_vma(anon_vma);
> }
I'm not quite getting this. Here, we get the local anon_vma so we
increment its reference count and later we drop it but without a
refcount taken on the root anon_vma, why is it guaranteed to stay
around?
>
> -static void drop_anon_vma(struct rmap_item *rmap_item)
> +static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
> {
> struct anon_vma *anon_vma = rmap_item->anon_vma;
>
> - if (atomic_dec_and_lock(&anon_vma->ksm_refcount, &anon_vma->root->lock)) {
> - int empty = list_empty(&anon_vma->head);
> - anon_vma_unlock(anon_vma);
> - if (empty)
> - anon_vma_free(anon_vma);
> - }
> + drop_anon_vma(anon_vma);
> }
>
> /*
> @@ -415,7 +410,7 @@ static void break_cow(struct rmap_item *rmap_item)
> * It is not an accident that whenever we want to break COW
> * to undo, we also need to drop a reference to the anon_vma.
> */
> - drop_anon_vma(rmap_item);
> + ksm_drop_anon_vma(rmap_item);
>
> down_read(&mm->mmap_sem);
> if (ksm_test_exit(mm))
> @@ -470,7 +465,7 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node)
> ksm_pages_sharing--;
> else
> ksm_pages_shared--;
> - drop_anon_vma(rmap_item);
> + ksm_drop_anon_vma(rmap_item);
> rmap_item->address &= PAGE_MASK;
> cond_resched();
> }
> @@ -558,7 +553,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
> else
> ksm_pages_shared--;
>
> - drop_anon_vma(rmap_item);
> + ksm_drop_anon_vma(rmap_item);
> rmap_item->address &= PAGE_MASK;
>
> } else if (rmap_item->address & UNSTABLE_FLAG) {
> diff --git a/mm/rmap.c b/mm/rmap.c
> index f0ba648..d63cd91 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -238,6 +238,12 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
> */
> root_avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma);
> anon_vma->root = root_avc->anon_vma;
> + /*
> + * With KSM refcounts, an anon_vma can stay around longer than the
> + * process it belongs to. The root anon_vma needs to be pinned,
> + * because that is where the lock lives.
> + */
> + get_anon_vma(anon_vma->root);
> /* Mark this anon_vma as the one where our new (COWed) pages go. */
> vma->anon_vma = anon_vma;
> anon_vma_chain_link(vma, avc, anon_vma);
> @@ -267,8 +273,11 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
> empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma);
> anon_vma_unlock(anon_vma);
>
> - if (empty)
> + if (empty) {
> + /* We no longer need the root anon_vma */
> + drop_anon_vma(anon_vma->root);
> anon_vma_free(anon_vma);
> + }
> }
>
> void unlink_anon_vmas(struct vm_area_struct *vma)
> @@ -1355,6 +1364,40 @@ int try_to_munlock(struct page *page)
> return try_to_unmap_file(page, TTU_MUNLOCK);
> }
>
> +#ifdef CONFIG_KSM
> +/*
> + * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
> + * if necessary. Be careful to do all the tests under the lock. Once
> + * we know we are the last user, nobody else can get a reference and we
> + * can do the freeing without the lock.
> + */
> +void drop_anon_vma(struct anon_vma *anon_vma)
> +{
> + if (atomic_dec_and_lock(&anon_vma->ksm_refcount, &anon_vma->root->lock)) {
> + struct anon_vma *root = anon_vma->root;
> + int empty list_empty(&anon_vma->head);
> + int last_root_user = 0;
> + int root_empty = 0;
> +
> + /*
> + * The refcount on a non-root anon_vma got dropped. Drop
> + * the refcount on the root and check if we need to free it.
> + */
> + if (empty && anon_vma != root) {
> + last_root_user = atomic_dec_and_test(&root->ksm_refcount);
> + root_empty = list_empty(&root->head);
> + }
> + anon_vma_unlock(anon_vma);
> +
> + if (empty) {
> + anon_vma_free(anon_vma);
> + if (root_empty && last_root_user)
> + anon_vma_free(root);
> + }
> + }
> +}
> +#endif
> +
> #ifdef CONFIG_MIGRATION
> /*
> * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
>
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-12 21:07 ` Mel Gorman
@ 2010-05-12 21:09 ` Rik van Riel
2010-05-13 11:26 ` Mel Gorman
0 siblings, 1 reply; 30+ messages in thread
From: Rik van Riel @ 2010-05-12 21:09 UTC (permalink / raw)
To: Mel Gorman
Cc: Andrew Morton, Andrea Arcangeli, Minchan Kim, Linux-MM,
KAMEZAWA Hiroyuki, LKML, Linus Torvalds
On 05/12/2010 05:07 PM, Mel Gorman wrote:
> On Wed, May 12, 2010 at 01:41:11PM -0400, Rik van Riel wrote:
>> Subject: extend KSM refcounts to the anon_vma root
>>
>> KSM reference counts can cause an anon_vma to exist after the processe
>> it belongs to have already exited. Because the anon_vma lock now lives
>> in the root anon_vma, we need to ensure that the root anon_vma stays
>> around until after all the "child" anon_vmas have been freed.
>>
>> The obvious way to do this is to have a "child" anon_vma take a
>> reference to the root in anon_vma_fork. When the anon_vma is freed
>> at munmap or process exit, we drop the refcount in anon_vma_unlink
>> and possibly free the root anon_vma.
>>
>> The KSM anon_vma reference count function also needs to be modified
>> to deal with the possibility of freeing 2 levels of anon_vma. The
>> easiest way to do this is to break out the KSM magic and make it
>> generic.
>>
>> When compiling without CONFIG_KSM, this code is compiled out.
>>
>> Signed-off-by: Rik van Riel<riel@redhat.com>
>> ---
>> include/linux/rmap.h | 12 ++++++++++++
>> mm/ksm.c | 17 ++++++-----------
>> mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
>> 3 files changed, 62 insertions(+), 12 deletions(-)
>>
>> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
>> index 33ffe14..387d40c 100644
>> --- a/include/linux/rmap.h
>> +++ b/include/linux/rmap.h
>> @@ -126,6 +126,18 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
>> void __anon_vma_link(struct vm_area_struct *);
>> void anon_vma_free(struct anon_vma *);
>>
>> +#ifdef CONFIG_KSM
>> +static inline void get_anon_vma(struct anon_vma *anon_vma)
>> +{
>> + atomic_inc(&anon_vma->ksm_refcount);
>> +}
>> +
>> +void drop_anon_vma(struct anon_vma *);
>> +#else
>> +#define get_anon_vma(x) do {} while(0)
>> +#define drop_anon_vma(x) do {} while(0)
>> +#endif
>> +
>> static inline void anon_vma_merge(struct vm_area_struct *vma,
>> struct vm_area_struct *next)
>> {
>> diff --git a/mm/ksm.c b/mm/ksm.c
>> index 7ca0dd7..9f2acc9 100644
>> --- a/mm/ksm.c
>> +++ b/mm/ksm.c
>> @@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_item *rmap_item,
>> struct anon_vma *anon_vma)
>> {
>> rmap_item->anon_vma = anon_vma;
>> - atomic_inc(&anon_vma->ksm_refcount);
>> + get_anon_vma(anon_vma);
>> }
>
> I'm not quite getting this. Here, we get the local anon_vma so we
> increment its reference count and later we drop it but without a
> refcount taken on the root anon_vma, why is it guaranteed to stay
> around?
Because anon_vma_fork takes a reference count on the root anon_vma,
the VMA we take a refcount on will either have a refcount on the
root, or it is the root.
--
All rights reversed
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-12 21:09 ` Rik van Riel
@ 2010-05-13 11:26 ` Mel Gorman
2010-05-13 13:11 ` Rik van Riel
0 siblings, 1 reply; 30+ messages in thread
From: Mel Gorman @ 2010-05-13 11:26 UTC (permalink / raw)
To: Rik van Riel
Cc: Andrew Morton, Andrea Arcangeli, Minchan Kim, Linux-MM,
KAMEZAWA Hiroyuki, LKML, Linus Torvalds
On Wed, May 12, 2010 at 05:09:18PM -0400, Rik van Riel wrote:
> On 05/12/2010 05:07 PM, Mel Gorman wrote:
>> On Wed, May 12, 2010 at 01:41:11PM -0400, Rik van Riel wrote:
>>> Subject: extend KSM refcounts to the anon_vma root
>>>
>>> KSM reference counts can cause an anon_vma to exist after the processe
>>> it belongs to have already exited. Because the anon_vma lock now lives
>>> in the root anon_vma, we need to ensure that the root anon_vma stays
>>> around until after all the "child" anon_vmas have been freed.
>>>
>>> The obvious way to do this is to have a "child" anon_vma take a
>>> reference to the root in anon_vma_fork. When the anon_vma is freed
>>> at munmap or process exit, we drop the refcount in anon_vma_unlink
>>> and possibly free the root anon_vma.
>>>
>>> The KSM anon_vma reference count function also needs to be modified
>>> to deal with the possibility of freeing 2 levels of anon_vma. The
>>> easiest way to do this is to break out the KSM magic and make it
>>> generic.
>>>
>>> When compiling without CONFIG_KSM, this code is compiled out.
>>>
>>> Signed-off-by: Rik van Riel<riel@redhat.com>
>>> ---
>>> include/linux/rmap.h | 12 ++++++++++++
>>> mm/ksm.c | 17 ++++++-----------
>>> mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
>>> 3 files changed, 62 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
>>> index 33ffe14..387d40c 100644
>>> --- a/include/linux/rmap.h
>>> +++ b/include/linux/rmap.h
>>> @@ -126,6 +126,18 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
>>> void __anon_vma_link(struct vm_area_struct *);
>>> void anon_vma_free(struct anon_vma *);
>>>
>>> +#ifdef CONFIG_KSM
>>> +static inline void get_anon_vma(struct anon_vma *anon_vma)
>>> +{
>>> + atomic_inc(&anon_vma->ksm_refcount);
>>> +}
>>> +
>>> +void drop_anon_vma(struct anon_vma *);
>>> +#else
>>> +#define get_anon_vma(x) do {} while(0)
>>> +#define drop_anon_vma(x) do {} while(0)
>>> +#endif
>>> +
>>> static inline void anon_vma_merge(struct vm_area_struct *vma,
>>> struct vm_area_struct *next)
>>> {
>>> diff --git a/mm/ksm.c b/mm/ksm.c
>>> index 7ca0dd7..9f2acc9 100644
>>> --- a/mm/ksm.c
>>> +++ b/mm/ksm.c
>>> @@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_item *rmap_item,
>>> struct anon_vma *anon_vma)
>>> {
>>> rmap_item->anon_vma = anon_vma;
>>> - atomic_inc(&anon_vma->ksm_refcount);
>>> + get_anon_vma(anon_vma);
>>> }
>>
>> I'm not quite getting this. Here, we get the local anon_vma so we
>> increment its reference count and later we drop it but without a
>> refcount taken on the root anon_vma, why is it guaranteed to stay
>> around?
>
> Because anon_vma_fork takes a reference count on the root anon_vma,
> the VMA we take a refcount on will either have a refcount on the
> root, or it is the root.
>
Sorry, I'm still not getting it. anon_vma_fork keeps the refcount around
during fork but what about during exit? Lets say anon_vma_unlink is called
on the following arrangement;
root_anon_vma->refcounted_anon_vma
We walk the list but the root_anon_vma doesn't have a refcount so it
gets freed. drop_anon_vma gets called on refcounted_anon_vma which does
if (atomic_dec_and_lock(&anon_vma->ksm_refcount, &anon_vma->root->lock))
but the root anon_vma is now gone. Are you depending on the lifecycle of
anon_vma's within KSM for this to work? If so, then the migration-related
fixes in mmotm that take a refcount on anon_vma during migration will also
need to take a refcount on the root.
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-13 11:26 ` Mel Gorman
@ 2010-05-13 13:11 ` Rik van Riel
2010-05-13 13:24 ` Mel Gorman
0 siblings, 1 reply; 30+ messages in thread
From: Rik van Riel @ 2010-05-13 13:11 UTC (permalink / raw)
To: Mel Gorman
Cc: Andrew Morton, Andrea Arcangeli, Minchan Kim, Linux-MM,
KAMEZAWA Hiroyuki, LKML, Linus Torvalds
On 05/13/2010 07:26 AM, Mel Gorman wrote:
> On Wed, May 12, 2010 at 05:09:18PM -0400, Rik van Riel wrote:
>> On 05/12/2010 05:07 PM, Mel Gorman wrote:
>>> On Wed, May 12, 2010 at 01:41:11PM -0400, Rik van Riel wrote:
>>>> Subject: extend KSM refcounts to the anon_vma root
>>>>
>>>> KSM reference counts can cause an anon_vma to exist after the processe
>>>> it belongs to have already exited. Because the anon_vma lock now lives
>>>> in the root anon_vma, we need to ensure that the root anon_vma stays
>>>> around until after all the "child" anon_vmas have been freed.
>>>>
>>>> The obvious way to do this is to have a "child" anon_vma take a
>>>> reference to the root in anon_vma_fork. When the anon_vma is freed
>>>> at munmap or process exit, we drop the refcount in anon_vma_unlink
>>>> and possibly free the root anon_vma.
>>>>
>>>> The KSM anon_vma reference count function also needs to be modified
>>>> to deal with the possibility of freeing 2 levels of anon_vma. The
>>>> easiest way to do this is to break out the KSM magic and make it
>>>> generic.
>>>>
>>>> When compiling without CONFIG_KSM, this code is compiled out.
>>>>
>>>> Signed-off-by: Rik van Riel<riel@redhat.com>
>>>> ---
>>>> include/linux/rmap.h | 12 ++++++++++++
>>>> mm/ksm.c | 17 ++++++-----------
>>>> mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
>>>> 3 files changed, 62 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
>>>> index 33ffe14..387d40c 100644
>>>> --- a/include/linux/rmap.h
>>>> +++ b/include/linux/rmap.h
>>>> @@ -126,6 +126,18 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
>>>> void __anon_vma_link(struct vm_area_struct *);
>>>> void anon_vma_free(struct anon_vma *);
>>>>
>>>> +#ifdef CONFIG_KSM
>>>> +static inline void get_anon_vma(struct anon_vma *anon_vma)
>>>> +{
>>>> + atomic_inc(&anon_vma->ksm_refcount);
>>>> +}
>>>> +
>>>> +void drop_anon_vma(struct anon_vma *);
>>>> +#else
>>>> +#define get_anon_vma(x) do {} while(0)
>>>> +#define drop_anon_vma(x) do {} while(0)
>>>> +#endif
>>>> +
>>>> static inline void anon_vma_merge(struct vm_area_struct *vma,
>>>> struct vm_area_struct *next)
>>>> {
>>>> diff --git a/mm/ksm.c b/mm/ksm.c
>>>> index 7ca0dd7..9f2acc9 100644
>>>> --- a/mm/ksm.c
>>>> +++ b/mm/ksm.c
>>>> @@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_item *rmap_item,
>>>> struct anon_vma *anon_vma)
>>>> {
>>>> rmap_item->anon_vma = anon_vma;
>>>> - atomic_inc(&anon_vma->ksm_refcount);
>>>> + get_anon_vma(anon_vma);
>>>> }
>>>
>>> I'm not quite getting this. Here, we get the local anon_vma so we
>>> increment its reference count and later we drop it but without a
>>> refcount taken on the root anon_vma, why is it guaranteed to stay
>>> around?
>>
>> Because anon_vma_fork takes a reference count on the root anon_vma,
>> the VMA we take a refcount on will either have a refcount on the
>> root, or it is the root.
>>
>
> Sorry, I'm still not getting it. anon_vma_fork keeps the refcount around
> during fork but what about during exit?
It is kept around all the way from fork until exit.
--
All rights reversed
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-13 13:11 ` Rik van Riel
@ 2010-05-13 13:24 ` Mel Gorman
0 siblings, 0 replies; 30+ messages in thread
From: Mel Gorman @ 2010-05-13 13:24 UTC (permalink / raw)
To: Rik van Riel
Cc: Andrew Morton, Andrea Arcangeli, Minchan Kim, Linux-MM,
KAMEZAWA Hiroyuki, LKML, Linus Torvalds
On Thu, May 13, 2010 at 09:11:30AM -0400, Rik van Riel wrote:
> On 05/13/2010 07:26 AM, Mel Gorman wrote:
>> On Wed, May 12, 2010 at 05:09:18PM -0400, Rik van Riel wrote:
>>> On 05/12/2010 05:07 PM, Mel Gorman wrote:
>>>> On Wed, May 12, 2010 at 01:41:11PM -0400, Rik van Riel wrote:
>>>>> Subject: extend KSM refcounts to the anon_vma root
>>>>>
>>>>> KSM reference counts can cause an anon_vma to exist after the processe
>>>>> it belongs to have already exited. Because the anon_vma lock now lives
>>>>> in the root anon_vma, we need to ensure that the root anon_vma stays
>>>>> around until after all the "child" anon_vmas have been freed.
>>>>>
>>>>> The obvious way to do this is to have a "child" anon_vma take a
>>>>> reference to the root in anon_vma_fork. When the anon_vma is freed
>>>>> at munmap or process exit, we drop the refcount in anon_vma_unlink
>>>>> and possibly free the root anon_vma.
>>>>>
>>>>> The KSM anon_vma reference count function also needs to be modified
>>>>> to deal with the possibility of freeing 2 levels of anon_vma. The
>>>>> easiest way to do this is to break out the KSM magic and make it
>>>>> generic.
>>>>>
>>>>> When compiling without CONFIG_KSM, this code is compiled out.
>>>>>
>>>>> Signed-off-by: Rik van Riel<riel@redhat.com>
>>>>> ---
>>>>> include/linux/rmap.h | 12 ++++++++++++
>>>>> mm/ksm.c | 17 ++++++-----------
>>>>> mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
>>>>> 3 files changed, 62 insertions(+), 12 deletions(-)
>>>>>
>>>>> diff --git a/include/linux/rmap.h b/include/linux/rmap.h
>>>>> index 33ffe14..387d40c 100644
>>>>> --- a/include/linux/rmap.h
>>>>> +++ b/include/linux/rmap.h
>>>>> @@ -126,6 +126,18 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
>>>>> void __anon_vma_link(struct vm_area_struct *);
>>>>> void anon_vma_free(struct anon_vma *);
>>>>>
>>>>> +#ifdef CONFIG_KSM
>>>>> +static inline void get_anon_vma(struct anon_vma *anon_vma)
>>>>> +{
>>>>> + atomic_inc(&anon_vma->ksm_refcount);
>>>>> +}
>>>>> +
>>>>> +void drop_anon_vma(struct anon_vma *);
>>>>> +#else
>>>>> +#define get_anon_vma(x) do {} while(0)
>>>>> +#define drop_anon_vma(x) do {} while(0)
>>>>> +#endif
>>>>> +
>>>>> static inline void anon_vma_merge(struct vm_area_struct *vma,
>>>>> struct vm_area_struct *next)
>>>>> {
>>>>> diff --git a/mm/ksm.c b/mm/ksm.c
>>>>> index 7ca0dd7..9f2acc9 100644
>>>>> --- a/mm/ksm.c
>>>>> +++ b/mm/ksm.c
>>>>> @@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_item *rmap_item,
>>>>> struct anon_vma *anon_vma)
>>>>> {
>>>>> rmap_item->anon_vma = anon_vma;
>>>>> - atomic_inc(&anon_vma->ksm_refcount);
>>>>> + get_anon_vma(anon_vma);
>>>>> }
>>>>
>>>> I'm not quite getting this. Here, we get the local anon_vma so we
>>>> increment its reference count and later we drop it but without a
>>>> refcount taken on the root anon_vma, why is it guaranteed to stay
>>>> around?
>>>
>>> Because anon_vma_fork takes a reference count on the root anon_vma,
>>> the VMA we take a refcount on will either have a refcount on the
>>> root, or it is the root.
>>>
>>
>> Sorry, I'm still not getting it. anon_vma_fork keeps the refcount around
>> during fork but what about during exit?
>
> It is kept around all the way from fork until exit.
>
Ok, now I get it. I was thinking in terms of a reference count being for
the duration of a specific operation. In this case, the root anon_vma
has an elevated reference count for the lifetime of the anon_vma forest.
Thanks.
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-26 15:24 ` [PATCH -v2 0/5] always lock the root anon_vma Rik van Riel
@ 2010-05-26 15:27 ` Rik van Riel
0 siblings, 0 replies; 30+ messages in thread
From: Rik van Riel @ 2010-05-26 15:27 UTC (permalink / raw)
To: Andrew Morton
Cc: Mel Gorman, Andrea Arcangeli, Minchan Kim, Linux-MM,
KAMEZAWA Hiroyuki, LKML, Linus Torvalds, Lee Schermerhorn
Subject: extend KSM refcounts to the anon_vma root
KSM reference counts can cause an anon_vma to exist after the processe
it belongs to have already exited. Because the anon_vma lock now lives
in the root anon_vma, we need to ensure that the root anon_vma stays
around until after all the "child" anon_vmas have been freed.
The obvious way to do this is to have a "child" anon_vma take a
reference to the root in anon_vma_fork. When the anon_vma is freed
at munmap or process exit, we drop the refcount in anon_vma_unlink
and possibly free the root anon_vma.
The KSM anon_vma reference count function also needs to be modified
to deal with the possibility of freeing 2 levels of anon_vma. The
easiest way to do this is to break out the KSM magic and make it
generic.
When compiling without CONFIG_KSM, this code is compiled out.
Signed-off-by: Rik van Riel <riel@redhat.com>
---
v2:
- merge with -mm and the compaction code
- improve the anon_vma refcount comment in anon_vma_fork with the
refcount lifetime
include/linux/rmap.h | 15 +++++++++++++++
mm/ksm.c | 17 ++++++-----------
mm/migrate.c | 10 +++-------
mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 68 insertions(+), 19 deletions(-)
Index: linux-2.6.34/include/linux/rmap.h
===================================================================
--- linux-2.6.34.orig/include/linux/rmap.h
+++ linux-2.6.34/include/linux/rmap.h
@@ -81,6 +81,13 @@ static inline int anonvma_external_refco
{
return atomic_read(&anon_vma->external_refcount);
}
+
+static inline void get_anon_vma(struct anon_vma *anon_vma)
+{
+ atomic_inc(&anon_vma->external_refcount);
+}
+
+void drop_anon_vma(struct anon_vma *);
#else
static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
{
@@ -90,6 +97,14 @@ static inline int anonvma_external_refco
{
return 0;
}
+
+static inline void get_anon_vma(struct anon_vma *anon_vma)
+{
+}
+
+static inline void drop_anon_vma(struct anon_vma *anon_vma)
+{
+}
#endif /* CONFIG_KSM */
static inline struct anon_vma *page_anon_vma(struct page *page)
Index: linux-2.6.34/mm/ksm.c
===================================================================
--- linux-2.6.34.orig/mm/ksm.c
+++ linux-2.6.34/mm/ksm.c
@@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_it
struct anon_vma *anon_vma)
{
rmap_item->anon_vma = anon_vma;
- atomic_inc(&anon_vma->external_refcount);
+ get_anon_vma(anon_vma);
}
-static void drop_anon_vma(struct rmap_item *rmap_item)
+static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
{
struct anon_vma *anon_vma = rmap_item->anon_vma;
- if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
- int empty = list_empty(&anon_vma->head);
- anon_vma_unlock(anon_vma);
- if (empty)
- anon_vma_free(anon_vma);
- }
+ drop_anon_vma(anon_vma);
}
/*
@@ -415,7 +410,7 @@ static void break_cow(struct rmap_item *
* It is not an accident that whenever we want to break COW
* to undo, we also need to drop a reference to the anon_vma.
*/
- drop_anon_vma(rmap_item);
+ ksm_drop_anon_vma(rmap_item);
down_read(&mm->mmap_sem);
if (ksm_test_exit(mm))
@@ -470,7 +465,7 @@ static void remove_node_from_stable_tree
ksm_pages_sharing--;
else
ksm_pages_shared--;
- drop_anon_vma(rmap_item);
+ ksm_drop_anon_vma(rmap_item);
rmap_item->address &= PAGE_MASK;
cond_resched();
}
@@ -558,7 +553,7 @@ static void remove_rmap_item_from_tree(s
else
ksm_pages_shared--;
- drop_anon_vma(rmap_item);
+ ksm_drop_anon_vma(rmap_item);
rmap_item->address &= PAGE_MASK;
} else if (rmap_item->address & UNSTABLE_FLAG) {
Index: linux-2.6.34/mm/rmap.c
===================================================================
--- linux-2.6.34.orig/mm/rmap.c
+++ linux-2.6.34/mm/rmap.c
@@ -235,6 +235,12 @@ int anon_vma_fork(struct vm_area_struct
* lock any of the anon_vmas in this anon_vma tree.
*/
anon_vma->root = pvma->anon_vma->root;
+ /*
+ * With KSM refcounts, an anon_vma can stay around longer than the
+ * process it belongs to. The root anon_vma needs to be pinned
+ * until this anon_vma is freed, because the lock lives in the root.
+ */
+ get_anon_vma(anon_vma->root);
/* Mark this anon_vma as the one where our new (COWed) pages go. */
vma->anon_vma = anon_vma;
anon_vma_chain_link(vma, avc, anon_vma);
@@ -264,8 +270,11 @@ static void anon_vma_unlink(struct anon_
empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
anon_vma_unlock(anon_vma);
- if (empty)
+ if (empty) {
+ /* We no longer need the root anon_vma */
+ drop_anon_vma(anon_vma->root);
anon_vma_free(anon_vma);
+ }
}
void unlink_anon_vmas(struct vm_area_struct *vma)
@@ -1389,6 +1398,40 @@ int try_to_munlock(struct page *page)
return try_to_unmap_file(page, TTU_MUNLOCK);
}
+#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
+/*
+ * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
+ * if necessary. Be careful to do all the tests under the lock. Once
+ * we know we are the last user, nobody else can get a reference and we
+ * can do the freeing without the lock.
+ */
+void drop_anon_vma(struct anon_vma *anon_vma)
+{
+ if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
+ struct anon_vma *root = anon_vma->root;
+ int empty = list_empty(&anon_vma->head);
+ int last_root_user = 0;
+ int root_empty = 0;
+
+ /*
+ * The refcount on a non-root anon_vma got dropped. Drop
+ * the refcount on the root and check if we need to free it.
+ */
+ if (empty && anon_vma != root) {
+ last_root_user = atomic_dec_and_test(&root->external_refcount);
+ root_empty = list_empty(&root->head);
+ }
+ anon_vma_unlock(anon_vma);
+
+ if (empty) {
+ anon_vma_free(anon_vma);
+ if (root_empty && last_root_user)
+ anon_vma_free(root);
+ }
+ }
+}
+#endif
+
#ifdef CONFIG_MIGRATION
/*
* rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
Index: linux-2.6.34/mm/migrate.c
===================================================================
--- linux-2.6.34.orig/mm/migrate.c
+++ linux-2.6.34/mm/migrate.c
@@ -639,7 +639,7 @@ static int unmap_and_move(new_page_t get
* exist when the page is remapped later
*/
anon_vma = page_anon_vma(page);
- atomic_inc(&anon_vma->external_refcount);
+ get_anon_vma(anon_vma);
}
}
@@ -682,12 +682,8 @@ skip_unmap:
rcu_unlock:
/* Drop an anon_vma reference if we took one */
- if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
- int empty = list_empty(&anon_vma->head);
- anon_vma_unlock(anon_vma);
- if (empty)
- anon_vma_free(anon_vma);
- }
+ if (anon_vma)
+ drop_anon_vma(anon_vma);
if (rcu_locked)
rcu_read_unlock();
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH -v3 0/5] always lock the root anon_vma
@ 2010-05-26 19:38 Rik van Riel
2010-05-26 19:38 ` [PATCH 1/5] rename anon_vma_lock to vma_lock_anon_vma Rik van Riel
` (4 more replies)
0 siblings, 5 replies; 30+ messages in thread
From: Rik van Riel @ 2010-05-26 19:38 UTC (permalink / raw)
To: akpm
Cc: linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli, Minchan Kim,
KAMEZAWA Hiroyuki, Lee Schermerhorn
Andrew, here are the patches to always lock the root anon_vma,
ported to the latest -mm tree.
These patches implement Linus's idea of always locking the root
anon_vma and contain all the fixes and improvements suggested
by Andrea.
This should fix the last bits of the anon_vma locking.
v3 is identical to v2, except I gathered up the Acked-by:s that
were on list already.
Patches 4 and 5 still need some reviewing and acks...
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 1/5] rename anon_vma_lock to vma_lock_anon_vma
2010-05-26 19:38 [PATCH -v3 0/5] always lock the root anon_vma Rik van Riel
@ 2010-05-26 19:38 ` Rik van Riel
2010-05-26 20:33 ` Larry Woodman
2010-05-27 13:44 ` Minchan Kim
2010-05-26 19:39 ` [PATCH 2/5] change direct call of spin_lock(anon_vma->lock) to inline function Rik van Riel
` (3 subsequent siblings)
4 siblings, 2 replies; 30+ messages in thread
From: Rik van Riel @ 2010-05-26 19:38 UTC (permalink / raw)
To: akpm
Cc: linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli, Minchan Kim,
KAMEZAWA Hiroyuki, Lee Schermerhorn
Subject: rename anon_vma_lock to vma_lock_anon_vma
Rename anon_vma_lock to vma_lock_anon_vma. This matches the
naming style used in page_lock_anon_vma and will come in really
handy further down in this patch series.
Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/rmap.h | 4 ++--
mm/mmap.c | 14 +++++++-------
2 files changed, 9 insertions(+), 9 deletions(-)
Index: linux-2.6.34/include/linux/rmap.h
===================================================================
--- linux-2.6.34.orig/include/linux/rmap.h
+++ linux-2.6.34/include/linux/rmap.h
@@ -99,14 +99,14 @@ static inline struct anon_vma *page_anon
return page_rmapping(page);
}
-static inline void anon_vma_lock(struct vm_area_struct *vma)
+static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma)
spin_lock(&anon_vma->lock);
}
-static inline void anon_vma_unlock(struct vm_area_struct *vma)
+static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma)
Index: linux-2.6.34/mm/mmap.c
===================================================================
--- linux-2.6.34.orig/mm/mmap.c
+++ linux-2.6.34/mm/mmap.c
@@ -452,12 +452,12 @@ static void vma_link(struct mm_struct *m
spin_lock(&mapping->i_mmap_lock);
vma->vm_truncate_count = mapping->truncate_count;
}
- anon_vma_lock(vma);
+ vma_lock_anon_vma(vma);
__vma_link(mm, vma, prev, rb_link, rb_parent);
__vma_link_file(vma);
- anon_vma_unlock(vma);
+ vma_unlock_anon_vma(vma);
if (mapping)
spin_unlock(&mapping->i_mmap_lock);
@@ -1710,7 +1710,7 @@ int expand_upwards(struct vm_area_struct
*/
if (unlikely(anon_vma_prepare(vma)))
return -ENOMEM;
- anon_vma_lock(vma);
+ vma_lock_anon_vma(vma);
/*
* vma->vm_start/vm_end cannot change under us because the caller
@@ -1721,7 +1721,7 @@ int expand_upwards(struct vm_area_struct
if (address < PAGE_ALIGN(address+4))
address = PAGE_ALIGN(address+4);
else {
- anon_vma_unlock(vma);
+ vma_unlock_anon_vma(vma);
return -ENOMEM;
}
error = 0;
@@ -1737,7 +1737,7 @@ int expand_upwards(struct vm_area_struct
if (!error)
vma->vm_end = address;
}
- anon_vma_unlock(vma);
+ vma_unlock_anon_vma(vma);
return error;
}
#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
@@ -1762,7 +1762,7 @@ static int expand_downwards(struct vm_ar
if (error)
return error;
- anon_vma_lock(vma);
+ vma_lock_anon_vma(vma);
/*
* vma->vm_start/vm_end cannot change under us because the caller
@@ -1783,7 +1783,7 @@ static int expand_downwards(struct vm_ar
vma->vm_pgoff -= grow;
}
}
- anon_vma_unlock(vma);
+ vma_unlock_anon_vma(vma);
return error;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 2/5] change direct call of spin_lock(anon_vma->lock) to inline function
2010-05-26 19:38 [PATCH -v3 0/5] always lock the root anon_vma Rik van Riel
2010-05-26 19:38 ` [PATCH 1/5] rename anon_vma_lock to vma_lock_anon_vma Rik van Riel
@ 2010-05-26 19:39 ` Rik van Riel
2010-05-26 20:33 ` Larry Woodman
` (2 more replies)
2010-05-26 19:40 ` [PATCH 3/5] track the root (oldest) anon_vma Rik van Riel
` (2 subsequent siblings)
4 siblings, 3 replies; 30+ messages in thread
From: Rik van Riel @ 2010-05-26 19:39 UTC (permalink / raw)
To: akpm
Cc: linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli, Minchan Kim,
KAMEZAWA Hiroyuki, Lee Schermerhorn
Subject: change direct call of spin_lock(anon_vma->lock) to inline function
Subsitute a direct call of spin_lock(anon_vma->lock) with
an inline function doing exactly the same.
This makes it easier to do the substitution to the root
anon_vma lock in a following patch.
We will deal with the handful of special locks (nested,
dec_and_lock, etc) separately.
Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/rmap.h | 10 ++++++++++
mm/ksm.c | 18 +++++++++---------
mm/migrate.c | 2 +-
mm/mmap.c | 2 +-
mm/rmap.c | 22 +++++++++++-----------
5 files changed, 32 insertions(+), 22 deletions(-)
Index: linux-2.6.34/include/linux/rmap.h
===================================================================
--- linux-2.6.34.orig/include/linux/rmap.h
+++ linux-2.6.34/include/linux/rmap.h
@@ -113,6 +113,16 @@ static inline void vma_unlock_anon_vma(s
spin_unlock(&anon_vma->lock);
}
+static inline void anon_vma_lock(struct anon_vma *anon_vma)
+{
+ spin_lock(&anon_vma->lock);
+}
+
+static inline void anon_vma_unlock(struct anon_vma *anon_vma)
+{
+ spin_unlock(&anon_vma->lock);
+}
+
/*
* anon_vma helper functions.
*/
Index: linux-2.6.34/mm/ksm.c
===================================================================
--- linux-2.6.34.orig/mm/ksm.c
+++ linux-2.6.34/mm/ksm.c
@@ -327,7 +327,7 @@ static void drop_anon_vma(struct rmap_it
if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
int empty = list_empty(&anon_vma->head);
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
if (empty)
anon_vma_free(anon_vma);
}
@@ -1566,7 +1566,7 @@ again:
struct anon_vma_chain *vmac;
struct vm_area_struct *vma;
- spin_lock(&anon_vma->lock);
+ anon_vma_lock(anon_vma);
list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
vma = vmac->vma;
if (rmap_item->address < vma->vm_start ||
@@ -1589,7 +1589,7 @@ again:
if (!search_new_forks || !mapcount)
break;
}
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
if (!mapcount)
goto out;
}
@@ -1619,7 +1619,7 @@ again:
struct anon_vma_chain *vmac;
struct vm_area_struct *vma;
- spin_lock(&anon_vma->lock);
+ anon_vma_lock(anon_vma);
list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
vma = vmac->vma;
if (rmap_item->address < vma->vm_start ||
@@ -1637,11 +1637,11 @@ again:
ret = try_to_unmap_one(page, vma,
rmap_item->address, flags);
if (ret != SWAP_AGAIN || !page_mapped(page)) {
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
goto out;
}
}
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
}
if (!search_new_forks++)
goto again;
@@ -1671,7 +1671,7 @@ again:
struct anon_vma_chain *vmac;
struct vm_area_struct *vma;
- spin_lock(&anon_vma->lock);
+ anon_vma_lock(anon_vma);
list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
vma = vmac->vma;
if (rmap_item->address < vma->vm_start ||
@@ -1688,11 +1688,11 @@ again:
ret = rmap_one(page, vma, rmap_item->address, arg);
if (ret != SWAP_AGAIN) {
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
goto out;
}
}
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
}
if (!search_new_forks++)
goto again;
Index: linux-2.6.34/mm/mmap.c
===================================================================
--- linux-2.6.34.orig/mm/mmap.c
+++ linux-2.6.34/mm/mmap.c
@@ -2589,7 +2589,7 @@ static void vm_unlock_anon_vma(struct an
if (!__test_and_clear_bit(0, (unsigned long *)
&anon_vma->head.next))
BUG();
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
}
}
Index: linux-2.6.34/mm/rmap.c
===================================================================
--- linux-2.6.34.orig/mm/rmap.c
+++ linux-2.6.34/mm/rmap.c
@@ -134,7 +134,7 @@ int anon_vma_prepare(struct vm_area_stru
allocated = anon_vma;
}
- spin_lock(&anon_vma->lock);
+ anon_vma_lock(anon_vma);
/* page_table_lock to protect against threads */
spin_lock(&mm->page_table_lock);
if (likely(!vma->anon_vma)) {
@@ -147,7 +147,7 @@ int anon_vma_prepare(struct vm_area_stru
avc = NULL;
}
spin_unlock(&mm->page_table_lock);
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
if (unlikely(allocated))
anon_vma_free(allocated);
@@ -170,9 +170,9 @@ static void anon_vma_chain_link(struct v
avc->anon_vma = anon_vma;
list_add(&avc->same_vma, &vma->anon_vma_chain);
- spin_lock(&anon_vma->lock);
+ anon_vma_lock(anon_vma);
list_add_tail(&avc->same_anon_vma, &anon_vma->head);
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
}
/*
@@ -246,12 +246,12 @@ static void anon_vma_unlink(struct anon_
if (!anon_vma)
return;
- spin_lock(&anon_vma->lock);
+ anon_vma_lock(anon_vma);
list_del(&anon_vma_chain->same_anon_vma);
/* We must garbage collect the anon_vma if it's empty */
empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
if (empty)
anon_vma_free(anon_vma);
@@ -303,10 +303,10 @@ again:
goto out;
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
- spin_lock(&anon_vma->lock);
+ anon_vma_lock(anon_vma);
if (page_rmapping(page) != anon_vma) {
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
goto again;
}
@@ -318,7 +318,7 @@ out:
void page_unlock_anon_vma(struct anon_vma *anon_vma)
{
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
rcu_read_unlock();
}
@@ -1396,7 +1396,7 @@ static int rmap_walk_anon(struct page *p
anon_vma = page_anon_vma(page);
if (!anon_vma)
return ret;
- spin_lock(&anon_vma->lock);
+ anon_vma_lock(anon_vma);
list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);
@@ -1406,7 +1406,7 @@ static int rmap_walk_anon(struct page *p
if (ret != SWAP_AGAIN)
break;
}
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
return ret;
}
Index: linux-2.6.34/mm/migrate.c
===================================================================
--- linux-2.6.34.orig/mm/migrate.c
+++ linux-2.6.34/mm/migrate.c
@@ -684,7 +684,7 @@ rcu_unlock:
/* Drop an anon_vma reference if we took one */
if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
int empty = list_empty(&anon_vma->head);
- spin_unlock(&anon_vma->lock);
+ anon_vma_unlock(anon_vma);
if (empty)
anon_vma_free(anon_vma);
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 3/5] track the root (oldest) anon_vma
2010-05-26 19:38 [PATCH -v3 0/5] always lock the root anon_vma Rik van Riel
2010-05-26 19:38 ` [PATCH 1/5] rename anon_vma_lock to vma_lock_anon_vma Rik van Riel
2010-05-26 19:39 ` [PATCH 2/5] change direct call of spin_lock(anon_vma->lock) to inline function Rik van Riel
@ 2010-05-26 19:40 ` Rik van Riel
2010-05-26 20:34 ` Larry Woodman
2010-05-27 13:48 ` Minchan Kim
2010-05-26 19:40 ` [PATCH 4/5] always lock " Rik van Riel
2010-05-26 19:41 ` [PATCH 5/5] extend KSM refcounts to the anon_vma root Rik van Riel
4 siblings, 2 replies; 30+ messages in thread
From: Rik van Riel @ 2010-05-26 19:40 UTC (permalink / raw)
To: akpm
Cc: linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli, Minchan Kim,
KAMEZAWA Hiroyuki, Lee Schermerhorn
Subject: track the root (oldest) anon_vma
Track the root (oldest) anon_vma in each anon_vma tree. Because we only
take the lock on the root anon_vma, we cannot use the lock on higher-up
anon_vmas to lock anything. This makes it impossible to do an indirect
lookup of the root anon_vma, since the data structures could go away from
under us.
However, a direct pointer is safe because the root anon_vma is always the
last one that gets freed on munmap or exit, by virtue of the same_vma list
order and unlink_anon_vmas walking the list forward.
Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
include/linux/rmap.h | 1 +
mm/rmap.c | 18 ++++++++++++++++--
2 files changed, 17 insertions(+), 2 deletions(-)
Index: linux-2.6.34/include/linux/rmap.h
===================================================================
--- linux-2.6.34.orig/include/linux/rmap.h
+++ linux-2.6.34/include/linux/rmap.h
@@ -26,6 +26,7 @@
*/
struct anon_vma {
spinlock_t lock; /* Serialize access to vma list */
+ struct anon_vma *root; /* Root of this anon_vma tree */
#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
/*
Index: linux-2.6.34/mm/rmap.c
===================================================================
--- linux-2.6.34.orig/mm/rmap.c
+++ linux-2.6.34/mm/rmap.c
@@ -132,6 +132,11 @@ int anon_vma_prepare(struct vm_area_stru
if (unlikely(!anon_vma))
goto out_enomem_free_avc;
allocated = anon_vma;
+ /*
+ * This VMA had no anon_vma yet. This anon_vma is
+ * the root of any anon_vma tree that might form.
+ */
+ anon_vma->root = anon_vma;
}
anon_vma_lock(anon_vma);
@@ -224,9 +229,15 @@ int anon_vma_fork(struct vm_area_struct
avc = anon_vma_chain_alloc();
if (!avc)
goto out_error_free_anon_vma;
- anon_vma_chain_link(vma, avc, anon_vma);
+
+ /*
+ * The root anon_vm's spinlock is the lock actually used when we
+ * lock any of the anon_vmas in this anon_vma tree.
+ */
+ anon_vma->root = pvma->anon_vma->root;
/* Mark this anon_vma as the one where our new (COWed) pages go. */
vma->anon_vma = anon_vma;
+ anon_vma_chain_link(vma, avc, anon_vma);
return 0;
@@ -261,7 +272,10 @@ void unlink_anon_vmas(struct vm_area_str
{
struct anon_vma_chain *avc, *next;
- /* Unlink each anon_vma chained to the VMA. */
+ /*
+ * Unlink each anon_vma chained to the VMA. This list is ordered
+ * from newest to oldest, ensuring the root anon_vma gets freed last.
+ */
list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
anon_vma_unlink(avc);
list_del(&avc->same_vma);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 4/5] always lock the root (oldest) anon_vma
2010-05-26 19:38 [PATCH -v3 0/5] always lock the root anon_vma Rik van Riel
` (2 preceding siblings ...)
2010-05-26 19:40 ` [PATCH 3/5] track the root (oldest) anon_vma Rik van Riel
@ 2010-05-26 19:40 ` Rik van Riel
2010-05-26 20:36 ` Larry Woodman
` (3 more replies)
2010-05-26 19:41 ` [PATCH 5/5] extend KSM refcounts to the anon_vma root Rik van Riel
4 siblings, 4 replies; 30+ messages in thread
From: Rik van Riel @ 2010-05-26 19:40 UTC (permalink / raw)
To: akpm
Cc: linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli, Minchan Kim,
KAMEZAWA Hiroyuki, Lee Schermerhorn
Subject: always lock the root (oldest) anon_vma
Always (and only) lock the root (oldest) anon_vma whenever we do something in an
anon_vma. The recently introduced anon_vma scalability is due to the rmap code
scanning only the VMAs that need to be scanned. Many common operations still
took the anon_vma lock on the root anon_vma, so always taking that lock is not
expected to introduce any scalability issues.
However, always taking the same lock does mean we only need to take one lock,
which means rmap_walk on pages from any anon_vma in the vma is excluded from
occurring during an munmap, expand_stack or other operation that needs to
exclude rmap_walk and similar functions.
Also add the proper locking to vma_adjust.
Signed-off-by: Rik van Riel <riel@redhat.com>
---
v3:
- fix locking inversion in vma_adjust, spotted by Andrea
- mm_take_all locks needs to use the bitflag in the root anon_vma,
since that is the one that gets locked (Andrea Arcangeli)
v2:
- conditionally take the anon_vma lock in vma_adjust, like introduced
in 252c5f94d944487e9f50ece7942b0fbf659c5c31 (with a proper comment)
include/linux/rmap.h | 8 ++++----
mm/ksm.c | 2 +-
mm/migrate.c | 2 +-
mm/mmap.c | 30 ++++++++++++++++++++++--------
4 files changed, 28 insertions(+), 14 deletions(-)
Index: linux-2.6.34/include/linux/rmap.h
===================================================================
--- linux-2.6.34.orig/include/linux/rmap.h
+++ linux-2.6.34/include/linux/rmap.h
@@ -104,24 +104,24 @@ static inline void vma_lock_anon_vma(str
{
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma)
- spin_lock(&anon_vma->lock);
+ spin_lock(&anon_vma->root->lock);
}
static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
if (anon_vma)
- spin_unlock(&anon_vma->lock);
+ spin_unlock(&anon_vma->root->lock);
}
static inline void anon_vma_lock(struct anon_vma *anon_vma)
{
- spin_lock(&anon_vma->lock);
+ spin_lock(&anon_vma->root->lock);
}
static inline void anon_vma_unlock(struct anon_vma *anon_vma)
{
- spin_unlock(&anon_vma->lock);
+ spin_unlock(&anon_vma->root->lock);
}
/*
Index: linux-2.6.34/mm/ksm.c
===================================================================
--- linux-2.6.34.orig/mm/ksm.c
+++ linux-2.6.34/mm/ksm.c
@@ -325,7 +325,7 @@ static void drop_anon_vma(struct rmap_it
{
struct anon_vma *anon_vma = rmap_item->anon_vma;
- if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
+ if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
int empty = list_empty(&anon_vma->head);
anon_vma_unlock(anon_vma);
if (empty)
Index: linux-2.6.34/mm/mmap.c
===================================================================
--- linux-2.6.34.orig/mm/mmap.c
+++ linux-2.6.34/mm/mmap.c
@@ -506,6 +506,7 @@ int vma_adjust(struct vm_area_struct *vm
struct vm_area_struct *importer = NULL;
struct address_space *mapping = NULL;
struct prio_tree_root *root = NULL;
+ struct anon_vma *anon_vma = NULL;
struct file *file = vma->vm_file;
long adjust_next = 0;
int remove_next = 0;
@@ -578,6 +579,17 @@ again: remove_next = 1 + (end > next->
}
}
+ /*
+ * When changing only vma->vm_end, we don't really need anon_vma
+ * lock. This is a fairly rare case by itself, but the anon_vma
+ * lock may be shared between many sibling processes. Skipping
+ * the lock for brk adjustments makes a difference sometimes.
+ */
+ if (vma->anon_vma && (insert || importer || start != vma->vm_start)) {
+ anon_vma = vma->anon_vma;
+ anon_vma_lock(anon_vma);
+ }
+
if (root) {
flush_dcache_mmap_lock(mapping);
vma_prio_tree_remove(vma, root);
@@ -617,6 +629,8 @@ again: remove_next = 1 + (end > next->
__insert_vm_struct(mm, insert);
}
+ if (anon_vma)
+ anon_vma_unlock(anon_vma);
if (mapping)
spin_unlock(&mapping->i_mmap_lock);
@@ -2466,23 +2480,23 @@ static DEFINE_MUTEX(mm_all_locks_mutex);
static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
{
- if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+ if (!test_bit(0, (unsigned long *) &anon_vma->root->head.next)) {
/*
* The LSB of head.next can't change from under us
* because we hold the mm_all_locks_mutex.
*/
- spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
+ spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
/*
* We can safely modify head.next after taking the
- * anon_vma->lock. If some other vma in this mm shares
+ * anon_vma->root->lock. If some other vma in this mm shares
* the same anon_vma we won't take it again.
*
* No need of atomic instructions here, head.next
* can't change from under us thanks to the
- * anon_vma->lock.
+ * anon_vma->root->lock.
*/
if (__test_and_set_bit(0, (unsigned long *)
- &anon_vma->head.next))
+ &anon_vma->root->head.next))
BUG();
}
}
@@ -2573,7 +2587,7 @@ out_unlock:
static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
{
- if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
+ if (test_bit(0, (unsigned long *) &anon_vma->root->head.next)) {
/*
* The LSB of head.next can't change to 0 from under
* us because we hold the mm_all_locks_mutex.
@@ -2584,10 +2598,10 @@ static void vm_unlock_anon_vma(struct an
*
* No need of atomic instructions here, head.next
* can't change from under us until we release the
- * anon_vma->lock.
+ * anon_vma->root->lock.
*/
if (!__test_and_clear_bit(0, (unsigned long *)
- &anon_vma->head.next))
+ &anon_vma->root->head.next))
BUG();
anon_vma_unlock(anon_vma);
}
Index: linux-2.6.34/mm/migrate.c
===================================================================
--- linux-2.6.34.orig/mm/migrate.c
+++ linux-2.6.34/mm/migrate.c
@@ -682,7 +682,7 @@ skip_unmap:
rcu_unlock:
/* Drop an anon_vma reference if we took one */
- if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
+ if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
int empty = list_empty(&anon_vma->head);
anon_vma_unlock(anon_vma);
if (empty)
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-26 19:38 [PATCH -v3 0/5] always lock the root anon_vma Rik van Riel
` (3 preceding siblings ...)
2010-05-26 19:40 ` [PATCH 4/5] always lock " Rik van Riel
@ 2010-05-26 19:41 ` Rik van Riel
2010-05-26 20:47 ` Larry Woodman
` (3 more replies)
4 siblings, 4 replies; 30+ messages in thread
From: Rik van Riel @ 2010-05-26 19:41 UTC (permalink / raw)
To: akpm
Cc: linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli, Minchan Kim,
KAMEZAWA Hiroyuki, Lee Schermerhorn
Subject: extend KSM refcounts to the anon_vma root
KSM reference counts can cause an anon_vma to exist after the processe
it belongs to have already exited. Because the anon_vma lock now lives
in the root anon_vma, we need to ensure that the root anon_vma stays
around until after all the "child" anon_vmas have been freed.
The obvious way to do this is to have a "child" anon_vma take a
reference to the root in anon_vma_fork. When the anon_vma is freed
at munmap or process exit, we drop the refcount in anon_vma_unlink
and possibly free the root anon_vma.
The KSM anon_vma reference count function also needs to be modified
to deal with the possibility of freeing 2 levels of anon_vma. The
easiest way to do this is to break out the KSM magic and make it
generic.
When compiling without CONFIG_KSM, this code is compiled out.
Signed-off-by: Rik van Riel <riel@redhat.com>
---
v2:
- merge with -mm and the compaction code
- improve the anon_vma refcount comment in anon_vma_fork with the
refcount lifetime
include/linux/rmap.h | 15 +++++++++++++++
mm/ksm.c | 17 ++++++-----------
mm/migrate.c | 10 +++-------
mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 68 insertions(+), 19 deletions(-)
Index: linux-2.6.34/include/linux/rmap.h
===================================================================
--- linux-2.6.34.orig/include/linux/rmap.h
+++ linux-2.6.34/include/linux/rmap.h
@@ -81,6 +81,13 @@ static inline int anonvma_external_refco
{
return atomic_read(&anon_vma->external_refcount);
}
+
+static inline void get_anon_vma(struct anon_vma *anon_vma)
+{
+ atomic_inc(&anon_vma->external_refcount);
+}
+
+void drop_anon_vma(struct anon_vma *);
#else
static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
{
@@ -90,6 +97,14 @@ static inline int anonvma_external_refco
{
return 0;
}
+
+static inline void get_anon_vma(struct anon_vma *anon_vma)
+{
+}
+
+static inline void drop_anon_vma(struct anon_vma *anon_vma)
+{
+}
#endif /* CONFIG_KSM */
static inline struct anon_vma *page_anon_vma(struct page *page)
Index: linux-2.6.34/mm/ksm.c
===================================================================
--- linux-2.6.34.orig/mm/ksm.c
+++ linux-2.6.34/mm/ksm.c
@@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_it
struct anon_vma *anon_vma)
{
rmap_item->anon_vma = anon_vma;
- atomic_inc(&anon_vma->external_refcount);
+ get_anon_vma(anon_vma);
}
-static void drop_anon_vma(struct rmap_item *rmap_item)
+static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
{
struct anon_vma *anon_vma = rmap_item->anon_vma;
- if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
- int empty = list_empty(&anon_vma->head);
- anon_vma_unlock(anon_vma);
- if (empty)
- anon_vma_free(anon_vma);
- }
+ drop_anon_vma(anon_vma);
}
/*
@@ -415,7 +410,7 @@ static void break_cow(struct rmap_item *
* It is not an accident that whenever we want to break COW
* to undo, we also need to drop a reference to the anon_vma.
*/
- drop_anon_vma(rmap_item);
+ ksm_drop_anon_vma(rmap_item);
down_read(&mm->mmap_sem);
if (ksm_test_exit(mm))
@@ -470,7 +465,7 @@ static void remove_node_from_stable_tree
ksm_pages_sharing--;
else
ksm_pages_shared--;
- drop_anon_vma(rmap_item);
+ ksm_drop_anon_vma(rmap_item);
rmap_item->address &= PAGE_MASK;
cond_resched();
}
@@ -558,7 +553,7 @@ static void remove_rmap_item_from_tree(s
else
ksm_pages_shared--;
- drop_anon_vma(rmap_item);
+ ksm_drop_anon_vma(rmap_item);
rmap_item->address &= PAGE_MASK;
} else if (rmap_item->address & UNSTABLE_FLAG) {
Index: linux-2.6.34/mm/rmap.c
===================================================================
--- linux-2.6.34.orig/mm/rmap.c
+++ linux-2.6.34/mm/rmap.c
@@ -235,6 +235,12 @@ int anon_vma_fork(struct vm_area_struct
* lock any of the anon_vmas in this anon_vma tree.
*/
anon_vma->root = pvma->anon_vma->root;
+ /*
+ * With KSM refcounts, an anon_vma can stay around longer than the
+ * process it belongs to. The root anon_vma needs to be pinned
+ * until this anon_vma is freed, because the lock lives in the root.
+ */
+ get_anon_vma(anon_vma->root);
/* Mark this anon_vma as the one where our new (COWed) pages go. */
vma->anon_vma = anon_vma;
anon_vma_chain_link(vma, avc, anon_vma);
@@ -264,8 +270,11 @@ static void anon_vma_unlink(struct anon_
empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
anon_vma_unlock(anon_vma);
- if (empty)
+ if (empty) {
+ /* We no longer need the root anon_vma */
+ drop_anon_vma(anon_vma->root);
anon_vma_free(anon_vma);
+ }
}
void unlink_anon_vmas(struct vm_area_struct *vma)
@@ -1389,6 +1398,40 @@ int try_to_munlock(struct page *page)
return try_to_unmap_file(page, TTU_MUNLOCK);
}
+#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
+/*
+ * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
+ * if necessary. Be careful to do all the tests under the lock. Once
+ * we know we are the last user, nobody else can get a reference and we
+ * can do the freeing without the lock.
+ */
+void drop_anon_vma(struct anon_vma *anon_vma)
+{
+ if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
+ struct anon_vma *root = anon_vma->root;
+ int empty = list_empty(&anon_vma->head);
+ int last_root_user = 0;
+ int root_empty = 0;
+
+ /*
+ * The refcount on a non-root anon_vma got dropped. Drop
+ * the refcount on the root and check if we need to free it.
+ */
+ if (empty && anon_vma != root) {
+ last_root_user = atomic_dec_and_test(&root->external_refcount);
+ root_empty = list_empty(&root->head);
+ }
+ anon_vma_unlock(anon_vma);
+
+ if (empty) {
+ anon_vma_free(anon_vma);
+ if (root_empty && last_root_user)
+ anon_vma_free(root);
+ }
+ }
+}
+#endif
+
#ifdef CONFIG_MIGRATION
/*
* rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
Index: linux-2.6.34/mm/migrate.c
===================================================================
--- linux-2.6.34.orig/mm/migrate.c
+++ linux-2.6.34/mm/migrate.c
@@ -639,7 +639,7 @@ static int unmap_and_move(new_page_t get
* exist when the page is remapped later
*/
anon_vma = page_anon_vma(page);
- atomic_inc(&anon_vma->external_refcount);
+ get_anon_vma(anon_vma);
}
}
@@ -682,12 +682,8 @@ skip_unmap:
rcu_unlock:
/* Drop an anon_vma reference if we took one */
- if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
- int empty = list_empty(&anon_vma->head);
- anon_vma_unlock(anon_vma);
- if (empty)
- anon_vma_free(anon_vma);
- }
+ if (anon_vma)
+ drop_anon_vma(anon_vma);
if (rcu_locked)
rcu_read_unlock();
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 1/5] rename anon_vma_lock to vma_lock_anon_vma
2010-05-26 19:38 ` [PATCH 1/5] rename anon_vma_lock to vma_lock_anon_vma Rik van Riel
@ 2010-05-26 20:33 ` Larry Woodman
2010-05-27 13:44 ` Minchan Kim
1 sibling, 0 replies; 30+ messages in thread
From: Larry Woodman @ 2010-05-26 20:33 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
Minchan Kim, KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, 2010-05-26 at 15:38 -0400, Rik van Riel wrote:
> Subject: rename anon_vma_lock to vma_lock_anon_vma
>
> Rename anon_vma_lock to vma_lock_anon_vma. This matches the
> naming style used in page_lock_anon_vma and will come in really
> handy further down in this patch series.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
> Acked-by: Mel Gorman <mel@csn.ul.ie>
> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested and Acked-by: Larry Woodman <lwoodman@redhat.com>
> ---
> include/linux/rmap.h | 4 ++--
> mm/mmap.c | 14 +++++++-------
> 2 files changed, 9 insertions(+), 9 deletions(-)
>
> Index: linux-2.6.34/include/linux/rmap.h
> ===================================================================
> --- linux-2.6.34.orig/include/linux/rmap.h
> +++ linux-2.6.34/include/linux/rmap.h
> @@ -99,14 +99,14 @@ static inline struct anon_vma *page_anon
> return page_rmapping(page);
> }
>
> -static inline void anon_vma_lock(struct vm_area_struct *vma)
> +static inline void vma_lock_anon_vma(struct vm_area_struct *vma)
> {
> struct anon_vma *anon_vma = vma->anon_vma;
> if (anon_vma)
> spin_lock(&anon_vma->lock);
> }
>
> -static inline void anon_vma_unlock(struct vm_area_struct *vma)
> +static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
> {
> struct anon_vma *anon_vma = vma->anon_vma;
> if (anon_vma)
> Index: linux-2.6.34/mm/mmap.c
> ===================================================================
> --- linux-2.6.34.orig/mm/mmap.c
> +++ linux-2.6.34/mm/mmap.c
> @@ -452,12 +452,12 @@ static void vma_link(struct mm_struct *m
> spin_lock(&mapping->i_mmap_lock);
> vma->vm_truncate_count = mapping->truncate_count;
> }
> - anon_vma_lock(vma);
> + vma_lock_anon_vma(vma);
>
> __vma_link(mm, vma, prev, rb_link, rb_parent);
> __vma_link_file(vma);
>
> - anon_vma_unlock(vma);
> + vma_unlock_anon_vma(vma);
> if (mapping)
> spin_unlock(&mapping->i_mmap_lock);
>
> @@ -1710,7 +1710,7 @@ int expand_upwards(struct vm_area_struct
> */
> if (unlikely(anon_vma_prepare(vma)))
> return -ENOMEM;
> - anon_vma_lock(vma);
> + vma_lock_anon_vma(vma);
>
> /*
> * vma->vm_start/vm_end cannot change under us because the caller
> @@ -1721,7 +1721,7 @@ int expand_upwards(struct vm_area_struct
> if (address < PAGE_ALIGN(address+4))
> address = PAGE_ALIGN(address+4);
> else {
> - anon_vma_unlock(vma);
> + vma_unlock_anon_vma(vma);
> return -ENOMEM;
> }
> error = 0;
> @@ -1737,7 +1737,7 @@ int expand_upwards(struct vm_area_struct
> if (!error)
> vma->vm_end = address;
> }
> - anon_vma_unlock(vma);
> + vma_unlock_anon_vma(vma);
> return error;
> }
> #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
> @@ -1762,7 +1762,7 @@ static int expand_downwards(struct vm_ar
> if (error)
> return error;
>
> - anon_vma_lock(vma);
> + vma_lock_anon_vma(vma);
>
> /*
> * vma->vm_start/vm_end cannot change under us because the caller
> @@ -1783,7 +1783,7 @@ static int expand_downwards(struct vm_ar
> vma->vm_pgoff -= grow;
> }
> }
> - anon_vma_unlock(vma);
> + vma_unlock_anon_vma(vma);
> return error;
> }
>
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 2/5] change direct call of spin_lock(anon_vma->lock) to inline function
2010-05-26 19:39 ` [PATCH 2/5] change direct call of spin_lock(anon_vma->lock) to inline function Rik van Riel
@ 2010-05-26 20:33 ` Larry Woodman
2010-05-27 13:46 ` Minchan Kim
2010-06-01 22:04 ` Andrew Morton
2 siblings, 0 replies; 30+ messages in thread
From: Larry Woodman @ 2010-05-26 20:33 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
Minchan Kim, KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, 2010-05-26 at 15:39 -0400, Rik van Riel wrote:
> Subject: change direct call of spin_lock(anon_vma->lock) to inline function
>
> Subsitute a direct call of spin_lock(anon_vma->lock) with
> an inline function doing exactly the same.
>
> This makes it easier to do the substitution to the root
> anon_vma lock in a following patch.
>
> We will deal with the handful of special locks (nested,
> dec_and_lock, etc) separately.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
> Acked-by: Mel Gorman <mel@csn.ul.ie>
> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested and Acked-by: Larry Woodman <lwoodman@redhat.com>
> ---
> include/linux/rmap.h | 10 ++++++++++
> mm/ksm.c | 18 +++++++++---------
> mm/migrate.c | 2 +-
> mm/mmap.c | 2 +-
> mm/rmap.c | 22 +++++++++++-----------
> 5 files changed, 32 insertions(+), 22 deletions(-)
>
> Index: linux-2.6.34/include/linux/rmap.h
> ===================================================================
> --- linux-2.6.34.orig/include/linux/rmap.h
> +++ linux-2.6.34/include/linux/rmap.h
> @@ -113,6 +113,16 @@ static inline void vma_unlock_anon_vma(s
> spin_unlock(&anon_vma->lock);
> }
>
> +static inline void anon_vma_lock(struct anon_vma *anon_vma)
> +{
> + spin_lock(&anon_vma->lock);
> +}
> +
> +static inline void anon_vma_unlock(struct anon_vma *anon_vma)
> +{
> + spin_unlock(&anon_vma->lock);
> +}
> +
> /*
> * anon_vma helper functions.
> */
> Index: linux-2.6.34/mm/ksm.c
> ===================================================================
> --- linux-2.6.34.orig/mm/ksm.c
> +++ linux-2.6.34/mm/ksm.c
> @@ -327,7 +327,7 @@ static void drop_anon_vma(struct rmap_it
>
> if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
> int empty = list_empty(&anon_vma->head);
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> if (empty)
> anon_vma_free(anon_vma);
> }
> @@ -1566,7 +1566,7 @@ again:
> struct anon_vma_chain *vmac;
> struct vm_area_struct *vma;
>
> - spin_lock(&anon_vma->lock);
> + anon_vma_lock(anon_vma);
> list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
> vma = vmac->vma;
> if (rmap_item->address < vma->vm_start ||
> @@ -1589,7 +1589,7 @@ again:
> if (!search_new_forks || !mapcount)
> break;
> }
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> if (!mapcount)
> goto out;
> }
> @@ -1619,7 +1619,7 @@ again:
> struct anon_vma_chain *vmac;
> struct vm_area_struct *vma;
>
> - spin_lock(&anon_vma->lock);
> + anon_vma_lock(anon_vma);
> list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
> vma = vmac->vma;
> if (rmap_item->address < vma->vm_start ||
> @@ -1637,11 +1637,11 @@ again:
> ret = try_to_unmap_one(page, vma,
> rmap_item->address, flags);
> if (ret != SWAP_AGAIN || !page_mapped(page)) {
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> goto out;
> }
> }
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> }
> if (!search_new_forks++)
> goto again;
> @@ -1671,7 +1671,7 @@ again:
> struct anon_vma_chain *vmac;
> struct vm_area_struct *vma;
>
> - spin_lock(&anon_vma->lock);
> + anon_vma_lock(anon_vma);
> list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
> vma = vmac->vma;
> if (rmap_item->address < vma->vm_start ||
> @@ -1688,11 +1688,11 @@ again:
>
> ret = rmap_one(page, vma, rmap_item->address, arg);
> if (ret != SWAP_AGAIN) {
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> goto out;
> }
> }
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> }
> if (!search_new_forks++)
> goto again;
> Index: linux-2.6.34/mm/mmap.c
> ===================================================================
> --- linux-2.6.34.orig/mm/mmap.c
> +++ linux-2.6.34/mm/mmap.c
> @@ -2589,7 +2589,7 @@ static void vm_unlock_anon_vma(struct an
> if (!__test_and_clear_bit(0, (unsigned long *)
> &anon_vma->head.next))
> BUG();
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> }
> }
>
> Index: linux-2.6.34/mm/rmap.c
> ===================================================================
> --- linux-2.6.34.orig/mm/rmap.c
> +++ linux-2.6.34/mm/rmap.c
> @@ -134,7 +134,7 @@ int anon_vma_prepare(struct vm_area_stru
> allocated = anon_vma;
> }
>
> - spin_lock(&anon_vma->lock);
> + anon_vma_lock(anon_vma);
> /* page_table_lock to protect against threads */
> spin_lock(&mm->page_table_lock);
> if (likely(!vma->anon_vma)) {
> @@ -147,7 +147,7 @@ int anon_vma_prepare(struct vm_area_stru
> avc = NULL;
> }
> spin_unlock(&mm->page_table_lock);
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
>
> if (unlikely(allocated))
> anon_vma_free(allocated);
> @@ -170,9 +170,9 @@ static void anon_vma_chain_link(struct v
> avc->anon_vma = anon_vma;
> list_add(&avc->same_vma, &vma->anon_vma_chain);
>
> - spin_lock(&anon_vma->lock);
> + anon_vma_lock(anon_vma);
> list_add_tail(&avc->same_anon_vma, &anon_vma->head);
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> }
>
> /*
> @@ -246,12 +246,12 @@ static void anon_vma_unlink(struct anon_
> if (!anon_vma)
> return;
>
> - spin_lock(&anon_vma->lock);
> + anon_vma_lock(anon_vma);
> list_del(&anon_vma_chain->same_anon_vma);
>
> /* We must garbage collect the anon_vma if it's empty */
> empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
>
> if (empty)
> anon_vma_free(anon_vma);
> @@ -303,10 +303,10 @@ again:
> goto out;
>
> anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
> - spin_lock(&anon_vma->lock);
> + anon_vma_lock(anon_vma);
>
> if (page_rmapping(page) != anon_vma) {
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> goto again;
> }
>
> @@ -318,7 +318,7 @@ out:
>
> void page_unlock_anon_vma(struct anon_vma *anon_vma)
> {
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> rcu_read_unlock();
> }
>
> @@ -1396,7 +1396,7 @@ static int rmap_walk_anon(struct page *p
> anon_vma = page_anon_vma(page);
> if (!anon_vma)
> return ret;
> - spin_lock(&anon_vma->lock);
> + anon_vma_lock(anon_vma);
> list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
> struct vm_area_struct *vma = avc->vma;
> unsigned long address = vma_address(page, vma);
> @@ -1406,7 +1406,7 @@ static int rmap_walk_anon(struct page *p
> if (ret != SWAP_AGAIN)
> break;
> }
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> return ret;
> }
>
> Index: linux-2.6.34/mm/migrate.c
> ===================================================================
> --- linux-2.6.34.orig/mm/migrate.c
> +++ linux-2.6.34/mm/migrate.c
> @@ -684,7 +684,7 @@ rcu_unlock:
> /* Drop an anon_vma reference if we took one */
> if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
> int empty = list_empty(&anon_vma->head);
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> if (empty)
> anon_vma_free(anon_vma);
> }
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 3/5] track the root (oldest) anon_vma
2010-05-26 19:40 ` [PATCH 3/5] track the root (oldest) anon_vma Rik van Riel
@ 2010-05-26 20:34 ` Larry Woodman
2010-05-27 13:48 ` Minchan Kim
1 sibling, 0 replies; 30+ messages in thread
From: Larry Woodman @ 2010-05-26 20:34 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
Minchan Kim, KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, 2010-05-26 at 15:40 -0400, Rik van Riel wrote:
> Subject: track the root (oldest) anon_vma
>
> Track the root (oldest) anon_vma in each anon_vma tree. Because we only
> take the lock on the root anon_vma, we cannot use the lock on higher-up
> anon_vmas to lock anything. This makes it impossible to do an indirect
> lookup of the root anon_vma, since the data structures could go away from
> under us.
>
> However, a direct pointer is safe because the root anon_vma is always the
> last one that gets freed on munmap or exit, by virtue of the same_vma list
> order and unlink_anon_vmas walking the list forward.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
> Acked-by: Mel Gorman <mel@csn.ul.ie>
> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Tested and Acked-by: Larry Woodman <lwoodman@redhat.com>
> ---
> include/linux/rmap.h | 1 +
> mm/rmap.c | 18 ++++++++++++++++--
> 2 files changed, 17 insertions(+), 2 deletions(-)
>
> Index: linux-2.6.34/include/linux/rmap.h
> ===================================================================
> --- linux-2.6.34.orig/include/linux/rmap.h
> +++ linux-2.6.34/include/linux/rmap.h
> @@ -26,6 +26,7 @@
> */
> struct anon_vma {
> spinlock_t lock; /* Serialize access to vma list */
> + struct anon_vma *root; /* Root of this anon_vma tree */
> #if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
>
> /*
> Index: linux-2.6.34/mm/rmap.c
> ===================================================================
> --- linux-2.6.34.orig/mm/rmap.c
> +++ linux-2.6.34/mm/rmap.c
> @@ -132,6 +132,11 @@ int anon_vma_prepare(struct vm_area_stru
> if (unlikely(!anon_vma))
> goto out_enomem_free_avc;
> allocated = anon_vma;
> + /*
> + * This VMA had no anon_vma yet. This anon_vma is
> + * the root of any anon_vma tree that might form.
> + */
> + anon_vma->root = anon_vma;
> }
>
> anon_vma_lock(anon_vma);
> @@ -224,9 +229,15 @@ int anon_vma_fork(struct vm_area_struct
> avc = anon_vma_chain_alloc();
> if (!avc)
> goto out_error_free_anon_vma;
> - anon_vma_chain_link(vma, avc, anon_vma);
> +
> + /*
> + * The root anon_vm's spinlock is the lock actually used when we
> + * lock any of the anon_vmas in this anon_vma tree.
> + */
> + anon_vma->root = pvma->anon_vma->root;
> /* Mark this anon_vma as the one where our new (COWed) pages go. */
> vma->anon_vma = anon_vma;
> + anon_vma_chain_link(vma, avc, anon_vma);
>
> return 0;
>
> @@ -261,7 +272,10 @@ void unlink_anon_vmas(struct vm_area_str
> {
> struct anon_vma_chain *avc, *next;
>
> - /* Unlink each anon_vma chained to the VMA. */
> + /*
> + * Unlink each anon_vma chained to the VMA. This list is ordered
> + * from newest to oldest, ensuring the root anon_vma gets freed last.
> + */
> list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
> anon_vma_unlink(avc);
> list_del(&avc->same_vma);
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 4/5] always lock the root (oldest) anon_vma
2010-05-26 19:40 ` [PATCH 4/5] always lock " Rik van Riel
@ 2010-05-26 20:36 ` Larry Woodman
2010-05-27 0:57 ` KAMEZAWA Hiroyuki
` (2 subsequent siblings)
3 siblings, 0 replies; 30+ messages in thread
From: Larry Woodman @ 2010-05-26 20:36 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
Minchan Kim, KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, 2010-05-26 at 15:40 -0400, Rik van Riel wrote:
> Subject: always lock the root (oldest) anon_vma
>
> Always (and only) lock the root (oldest) anon_vma whenever we do something in an
> anon_vma. The recently introduced anon_vma scalability is due to the rmap code
> scanning only the VMAs that need to be scanned. Many common operations still
> took the anon_vma lock on the root anon_vma, so always taking that lock is not
> expected to introduce any scalability issues.
>
> However, always taking the same lock does mean we only need to take one lock,
> which means rmap_walk on pages from any anon_vma in the vma is excluded from
> occurring during an munmap, expand_stack or other operation that needs to
> exclude rmap_walk and similar functions.
>
> Also add the proper locking to vma_adjust.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
Tested and Acked-by: Larry Woodman <lwoodman@redhat.com>
> ---
> v3:
> - fix locking inversion in vma_adjust, spotted by Andrea
> - mm_take_all locks needs to use the bitflag in the root anon_vma,
> since that is the one that gets locked (Andrea Arcangeli)
> v2:
> - conditionally take the anon_vma lock in vma_adjust, like introduced
> in 252c5f94d944487e9f50ece7942b0fbf659c5c31 (with a proper comment)
>
> include/linux/rmap.h | 8 ++++----
> mm/ksm.c | 2 +-
> mm/migrate.c | 2 +-
> mm/mmap.c | 30 ++++++++++++++++++++++--------
> 4 files changed, 28 insertions(+), 14 deletions(-)
>
> Index: linux-2.6.34/include/linux/rmap.h
> ===================================================================
> --- linux-2.6.34.orig/include/linux/rmap.h
> +++ linux-2.6.34/include/linux/rmap.h
> @@ -104,24 +104,24 @@ static inline void vma_lock_anon_vma(str
> {
> struct anon_vma *anon_vma = vma->anon_vma;
> if (anon_vma)
> - spin_lock(&anon_vma->lock);
> + spin_lock(&anon_vma->root->lock);
> }
>
> static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
> {
> struct anon_vma *anon_vma = vma->anon_vma;
> if (anon_vma)
> - spin_unlock(&anon_vma->lock);
> + spin_unlock(&anon_vma->root->lock);
> }
>
> static inline void anon_vma_lock(struct anon_vma *anon_vma)
> {
> - spin_lock(&anon_vma->lock);
> + spin_lock(&anon_vma->root->lock);
> }
>
> static inline void anon_vma_unlock(struct anon_vma *anon_vma)
> {
> - spin_unlock(&anon_vma->lock);
> + spin_unlock(&anon_vma->root->lock);
> }
>
> /*
> Index: linux-2.6.34/mm/ksm.c
> ===================================================================
> --- linux-2.6.34.orig/mm/ksm.c
> +++ linux-2.6.34/mm/ksm.c
> @@ -325,7 +325,7 @@ static void drop_anon_vma(struct rmap_it
> {
> struct anon_vma *anon_vma = rmap_item->anon_vma;
>
> - if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
> + if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
> int empty = list_empty(&anon_vma->head);
> anon_vma_unlock(anon_vma);
> if (empty)
> Index: linux-2.6.34/mm/mmap.c
> ===================================================================
> --- linux-2.6.34.orig/mm/mmap.c
> +++ linux-2.6.34/mm/mmap.c
> @@ -506,6 +506,7 @@ int vma_adjust(struct vm_area_struct *vm
> struct vm_area_struct *importer = NULL;
> struct address_space *mapping = NULL;
> struct prio_tree_root *root = NULL;
> + struct anon_vma *anon_vma = NULL;
> struct file *file = vma->vm_file;
> long adjust_next = 0;
> int remove_next = 0;
> @@ -578,6 +579,17 @@ again: remove_next = 1 + (end > next->
> }
> }
>
> + /*
> + * When changing only vma->vm_end, we don't really need anon_vma
> + * lock. This is a fairly rare case by itself, but the anon_vma
> + * lock may be shared between many sibling processes. Skipping
> + * the lock for brk adjustments makes a difference sometimes.
> + */
> + if (vma->anon_vma && (insert || importer || start != vma->vm_start)) {
> + anon_vma = vma->anon_vma;
> + anon_vma_lock(anon_vma);
> + }
> +
> if (root) {
> flush_dcache_mmap_lock(mapping);
> vma_prio_tree_remove(vma, root);
> @@ -617,6 +629,8 @@ again: remove_next = 1 + (end > next->
> __insert_vm_struct(mm, insert);
> }
>
> + if (anon_vma)
> + anon_vma_unlock(anon_vma);
> if (mapping)
> spin_unlock(&mapping->i_mmap_lock);
>
> @@ -2466,23 +2480,23 @@ static DEFINE_MUTEX(mm_all_locks_mutex);
>
> static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
> {
> - if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
> + if (!test_bit(0, (unsigned long *) &anon_vma->root->head.next)) {
> /*
> * The LSB of head.next can't change from under us
> * because we hold the mm_all_locks_mutex.
> */
> - spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
> + spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
> /*
> * We can safely modify head.next after taking the
> - * anon_vma->lock. If some other vma in this mm shares
> + * anon_vma->root->lock. If some other vma in this mm shares
> * the same anon_vma we won't take it again.
> *
> * No need of atomic instructions here, head.next
> * can't change from under us thanks to the
> - * anon_vma->lock.
> + * anon_vma->root->lock.
> */
> if (__test_and_set_bit(0, (unsigned long *)
> - &anon_vma->head.next))
> + &anon_vma->root->head.next))
> BUG();
> }
> }
> @@ -2573,7 +2587,7 @@ out_unlock:
>
> static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
> {
> - if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
> + if (test_bit(0, (unsigned long *) &anon_vma->root->head.next)) {
> /*
> * The LSB of head.next can't change to 0 from under
> * us because we hold the mm_all_locks_mutex.
> @@ -2584,10 +2598,10 @@ static void vm_unlock_anon_vma(struct an
> *
> * No need of atomic instructions here, head.next
> * can't change from under us until we release the
> - * anon_vma->lock.
> + * anon_vma->root->lock.
> */
> if (!__test_and_clear_bit(0, (unsigned long *)
> - &anon_vma->head.next))
> + &anon_vma->root->head.next))
> BUG();
> anon_vma_unlock(anon_vma);
> }
> Index: linux-2.6.34/mm/migrate.c
> ===================================================================
> --- linux-2.6.34.orig/mm/migrate.c
> +++ linux-2.6.34/mm/migrate.c
> @@ -682,7 +682,7 @@ skip_unmap:
> rcu_unlock:
>
> /* Drop an anon_vma reference if we took one */
> - if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
> + if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
> int empty = list_empty(&anon_vma->head);
> anon_vma_unlock(anon_vma);
> if (empty)
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-26 19:41 ` [PATCH 5/5] extend KSM refcounts to the anon_vma root Rik van Riel
@ 2010-05-26 20:47 ` Larry Woodman
2010-05-27 14:02 ` Minchan Kim
` (2 subsequent siblings)
3 siblings, 0 replies; 30+ messages in thread
From: Larry Woodman @ 2010-05-26 20:47 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
Minchan Kim, KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, 2010-05-26 at 15:41 -0400, Rik van Riel wrote:
> Subject: extend KSM refcounts to the anon_vma root
>
> KSM reference counts can cause an anon_vma to exist after the processe
> it belongs to have already exited. Because the anon_vma lock now lives
> in the root anon_vma, we need to ensure that the root anon_vma stays
> around until after all the "child" anon_vmas have been freed.
>
> The obvious way to do this is to have a "child" anon_vma take a
> reference to the root in anon_vma_fork. When the anon_vma is freed
> at munmap or process exit, we drop the refcount in anon_vma_unlink
> and possibly free the root anon_vma.
>
> The KSM anon_vma reference count function also needs to be modified
> to deal with the possibility of freeing 2 levels of anon_vma. The
> easiest way to do this is to break out the KSM magic and make it
> generic.
>
> When compiling without CONFIG_KSM, this code is compiled out.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
Tested and Acked-by: Larry Woodman <lwoodman@redhat.com>
> ---
> v2:
> - merge with -mm and the compaction code
> - improve the anon_vma refcount comment in anon_vma_fork with the
> refcount lifetime
>
> include/linux/rmap.h | 15 +++++++++++++++
> mm/ksm.c | 17 ++++++-----------
> mm/migrate.c | 10 +++-------
> mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
> 4 files changed, 68 insertions(+), 19 deletions(-)
>
> Index: linux-2.6.34/include/linux/rmap.h
> ===================================================================
> --- linux-2.6.34.orig/include/linux/rmap.h
> +++ linux-2.6.34/include/linux/rmap.h
> @@ -81,6 +81,13 @@ static inline int anonvma_external_refco
> {
> return atomic_read(&anon_vma->external_refcount);
> }
> +
> +static inline void get_anon_vma(struct anon_vma *anon_vma)
> +{
> + atomic_inc(&anon_vma->external_refcount);
> +}
> +
> +void drop_anon_vma(struct anon_vma *);
> #else
> static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
> {
> @@ -90,6 +97,14 @@ static inline int anonvma_external_refco
> {
> return 0;
> }
> +
> +static inline void get_anon_vma(struct anon_vma *anon_vma)
> +{
> +}
> +
> +static inline void drop_anon_vma(struct anon_vma *anon_vma)
> +{
> +}
> #endif /* CONFIG_KSM */
>
> static inline struct anon_vma *page_anon_vma(struct page *page)
> Index: linux-2.6.34/mm/ksm.c
> ===================================================================
> --- linux-2.6.34.orig/mm/ksm.c
> +++ linux-2.6.34/mm/ksm.c
> @@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_it
> struct anon_vma *anon_vma)
> {
> rmap_item->anon_vma = anon_vma;
> - atomic_inc(&anon_vma->external_refcount);
> + get_anon_vma(anon_vma);
> }
>
> -static void drop_anon_vma(struct rmap_item *rmap_item)
> +static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
> {
> struct anon_vma *anon_vma = rmap_item->anon_vma;
>
> - if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
> - int empty = list_empty(&anon_vma->head);
> - anon_vma_unlock(anon_vma);
> - if (empty)
> - anon_vma_free(anon_vma);
> - }
> + drop_anon_vma(anon_vma);
> }
>
> /*
> @@ -415,7 +410,7 @@ static void break_cow(struct rmap_item *
> * It is not an accident that whenever we want to break COW
> * to undo, we also need to drop a reference to the anon_vma.
> */
> - drop_anon_vma(rmap_item);
> + ksm_drop_anon_vma(rmap_item);
>
> down_read(&mm->mmap_sem);
> if (ksm_test_exit(mm))
> @@ -470,7 +465,7 @@ static void remove_node_from_stable_tree
> ksm_pages_sharing--;
> else
> ksm_pages_shared--;
> - drop_anon_vma(rmap_item);
> + ksm_drop_anon_vma(rmap_item);
> rmap_item->address &= PAGE_MASK;
> cond_resched();
> }
> @@ -558,7 +553,7 @@ static void remove_rmap_item_from_tree(s
> else
> ksm_pages_shared--;
>
> - drop_anon_vma(rmap_item);
> + ksm_drop_anon_vma(rmap_item);
> rmap_item->address &= PAGE_MASK;
>
> } else if (rmap_item->address & UNSTABLE_FLAG) {
> Index: linux-2.6.34/mm/rmap.c
> ===================================================================
> --- linux-2.6.34.orig/mm/rmap.c
> +++ linux-2.6.34/mm/rmap.c
> @@ -235,6 +235,12 @@ int anon_vma_fork(struct vm_area_struct
> * lock any of the anon_vmas in this anon_vma tree.
> */
> anon_vma->root = pvma->anon_vma->root;
> + /*
> + * With KSM refcounts, an anon_vma can stay around longer than the
> + * process it belongs to. The root anon_vma needs to be pinned
> + * until this anon_vma is freed, because the lock lives in the root.
> + */
> + get_anon_vma(anon_vma->root);
> /* Mark this anon_vma as the one where our new (COWed) pages go. */
> vma->anon_vma = anon_vma;
> anon_vma_chain_link(vma, avc, anon_vma);
> @@ -264,8 +270,11 @@ static void anon_vma_unlink(struct anon_
> empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
> anon_vma_unlock(anon_vma);
>
> - if (empty)
> + if (empty) {
> + /* We no longer need the root anon_vma */
> + drop_anon_vma(anon_vma->root);
> anon_vma_free(anon_vma);
> + }
> }
>
> void unlink_anon_vmas(struct vm_area_struct *vma)
> @@ -1389,6 +1398,40 @@ int try_to_munlock(struct page *page)
> return try_to_unmap_file(page, TTU_MUNLOCK);
> }
>
> +#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
> +/*
> + * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
> + * if necessary. Be careful to do all the tests under the lock. Once
> + * we know we are the last user, nobody else can get a reference and we
> + * can do the freeing without the lock.
> + */
> +void drop_anon_vma(struct anon_vma *anon_vma)
> +{
> + if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
> + struct anon_vma *root = anon_vma->root;
> + int empty = list_empty(&anon_vma->head);
> + int last_root_user = 0;
> + int root_empty = 0;
> +
> + /*
> + * The refcount on a non-root anon_vma got dropped. Drop
> + * the refcount on the root and check if we need to free it.
> + */
> + if (empty && anon_vma != root) {
> + last_root_user = atomic_dec_and_test(&root->external_refcount);
> + root_empty = list_empty(&root->head);
> + }
> + anon_vma_unlock(anon_vma);
> +
> + if (empty) {
> + anon_vma_free(anon_vma);
> + if (root_empty && last_root_user)
> + anon_vma_free(root);
> + }
> + }
> +}
> +#endif
> +
> #ifdef CONFIG_MIGRATION
> /*
> * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
> Index: linux-2.6.34/mm/migrate.c
> ===================================================================
> --- linux-2.6.34.orig/mm/migrate.c
> +++ linux-2.6.34/mm/migrate.c
> @@ -639,7 +639,7 @@ static int unmap_and_move(new_page_t get
> * exist when the page is remapped later
> */
> anon_vma = page_anon_vma(page);
> - atomic_inc(&anon_vma->external_refcount);
> + get_anon_vma(anon_vma);
> }
> }
>
> @@ -682,12 +682,8 @@ skip_unmap:
> rcu_unlock:
>
> /* Drop an anon_vma reference if we took one */
> - if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
> - int empty = list_empty(&anon_vma->head);
> - anon_vma_unlock(anon_vma);
> - if (empty)
> - anon_vma_free(anon_vma);
> - }
> + if (anon_vma)
> + drop_anon_vma(anon_vma);
>
> if (rcu_locked)
> rcu_read_unlock();
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 4/5] always lock the root (oldest) anon_vma
2010-05-26 19:40 ` [PATCH 4/5] always lock " Rik van Riel
2010-05-26 20:36 ` Larry Woodman
@ 2010-05-27 0:57 ` KAMEZAWA Hiroyuki
2010-05-27 13:55 ` Minchan Kim
2010-05-27 17:48 ` Mel Gorman
3 siblings, 0 replies; 30+ messages in thread
From: KAMEZAWA Hiroyuki @ 2010-05-27 0:57 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
Minchan Kim, Lee Schermerhorn
On Wed, 26 May 2010 15:40:44 -0400
Rik van Riel <riel@redhat.com> wrote:
> Subject: always lock the root (oldest) anon_vma
>
> Always (and only) lock the root (oldest) anon_vma whenever we do something in an
> anon_vma. The recently introduced anon_vma scalability is due to the rmap code
> scanning only the VMAs that need to be scanned. Many common operations still
> took the anon_vma lock on the root anon_vma, so always taking that lock is not
> expected to introduce any scalability issues.
>
> However, always taking the same lock does mean we only need to take one lock,
> which means rmap_walk on pages from any anon_vma in the vma is excluded from
> occurring during an munmap, expand_stack or other operation that needs to
> exclude rmap_walk and similar functions.
>
> Also add the proper locking to vma_adjust.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 1/5] rename anon_vma_lock to vma_lock_anon_vma
2010-05-26 19:38 ` [PATCH 1/5] rename anon_vma_lock to vma_lock_anon_vma Rik van Riel
2010-05-26 20:33 ` Larry Woodman
@ 2010-05-27 13:44 ` Minchan Kim
1 sibling, 0 replies; 30+ messages in thread
From: Minchan Kim @ 2010-05-27 13:44 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, May 26, 2010 at 03:38:53PM -0400, Rik van Riel wrote:
> Subject: rename anon_vma_lock to vma_lock_anon_vma
>
> Rename anon_vma_lock to vma_lock_anon_vma. This matches the
> naming style used in page_lock_anon_vma and will come in really
> handy further down in this patch series.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
> Acked-by: Mel Gorman <mel@csn.ul.ie>
> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
--
Kind regards,
Minchan Kim
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 2/5] change direct call of spin_lock(anon_vma->lock) to inline function
2010-05-26 19:39 ` [PATCH 2/5] change direct call of spin_lock(anon_vma->lock) to inline function Rik van Riel
2010-05-26 20:33 ` Larry Woodman
@ 2010-05-27 13:46 ` Minchan Kim
2010-06-01 22:04 ` Andrew Morton
2 siblings, 0 replies; 30+ messages in thread
From: Minchan Kim @ 2010-05-27 13:46 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, May 26, 2010 at 03:39:26PM -0400, Rik van Riel wrote:
> Subject: change direct call of spin_lock(anon_vma->lock) to inline function
>
> Subsitute a direct call of spin_lock(anon_vma->lock) with
> an inline function doing exactly the same.
>
> This makes it easier to do the substitution to the root
> anon_vma lock in a following patch.
>
> We will deal with the handful of special locks (nested,
> dec_and_lock, etc) separately.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
> Acked-by: Mel Gorman <mel@csn.ul.ie>
> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
--
Kind regards,
Minchan Kim
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 3/5] track the root (oldest) anon_vma
2010-05-26 19:40 ` [PATCH 3/5] track the root (oldest) anon_vma Rik van Riel
2010-05-26 20:34 ` Larry Woodman
@ 2010-05-27 13:48 ` Minchan Kim
1 sibling, 0 replies; 30+ messages in thread
From: Minchan Kim @ 2010-05-27 13:48 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, May 26, 2010 at 03:40:10PM -0400, Rik van Riel wrote:
> Subject: track the root (oldest) anon_vma
>
> Track the root (oldest) anon_vma in each anon_vma tree. Because we only
> take the lock on the root anon_vma, we cannot use the lock on higher-up
> anon_vmas to lock anything. This makes it impossible to do an indirect
> lookup of the root anon_vma, since the data structures could go away from
> under us.
>
> However, a direct pointer is safe because the root anon_vma is always the
> last one that gets freed on munmap or exit, by virtue of the same_vma list
> order and unlink_anon_vmas walking the list forward.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
> Acked-by: Mel Gorman <mel@csn.ul.ie>
> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Except below one minor type.
> ---
> include/linux/rmap.h | 1 +
> mm/rmap.c | 18 ++++++++++++++++--
> 2 files changed, 17 insertions(+), 2 deletions(-)
>
> Index: linux-2.6.34/include/linux/rmap.h
> ===================================================================
> --- linux-2.6.34.orig/include/linux/rmap.h
> +++ linux-2.6.34/include/linux/rmap.h
> @@ -26,6 +26,7 @@
> */
> struct anon_vma {
> spinlock_t lock; /* Serialize access to vma list */
> + struct anon_vma *root; /* Root of this anon_vma tree */
> #if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
>
> /*
> Index: linux-2.6.34/mm/rmap.c
> ===================================================================
> --- linux-2.6.34.orig/mm/rmap.c
> +++ linux-2.6.34/mm/rmap.c
> @@ -132,6 +132,11 @@ int anon_vma_prepare(struct vm_area_stru
> if (unlikely(!anon_vma))
> goto out_enomem_free_avc;
> allocated = anon_vma;
> + /*
> + * This VMA had no anon_vma yet. This anon_vma is
> + * the root of any anon_vma tree that might form.
> + */
> + anon_vma->root = anon_vma;
> }
>
> anon_vma_lock(anon_vma);
> @@ -224,9 +229,15 @@ int anon_vma_fork(struct vm_area_struct
> avc = anon_vma_chain_alloc();
> if (!avc)
> goto out_error_free_anon_vma;
> - anon_vma_chain_link(vma, avc, anon_vma);
> +
> + /*
> + * The root anon_vm's spinlock is the lock actually used when we
anon_vma's
--
Kind regards,
Minchan Kim
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 4/5] always lock the root (oldest) anon_vma
2010-05-26 19:40 ` [PATCH 4/5] always lock " Rik van Riel
2010-05-26 20:36 ` Larry Woodman
2010-05-27 0:57 ` KAMEZAWA Hiroyuki
@ 2010-05-27 13:55 ` Minchan Kim
2010-05-27 17:48 ` Mel Gorman
3 siblings, 0 replies; 30+ messages in thread
From: Minchan Kim @ 2010-05-27 13:55 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, May 26, 2010 at 03:40:44PM -0400, Rik van Riel wrote:
> Subject: always lock the root (oldest) anon_vma
>
> Always (and only) lock the root (oldest) anon_vma whenever we do something in an
> anon_vma. The recently introduced anon_vma scalability is due to the rmap code
> scanning only the VMAs that need to be scanned. Many common operations still
> took the anon_vma lock on the root anon_vma, so always taking that lock is not
> expected to introduce any scalability issues.
>
> However, always taking the same lock does mean we only need to take one lock,
> which means rmap_walk on pages from any anon_vma in the vma is excluded from
> occurring during an munmap, expand_stack or other operation that needs to
> exclude rmap_walk and similar functions.
>
> Also add the proper locking to vma_adjust.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Nitpick:
It would be better to modify comment about head of anon_vma in rmap.h, too.
/*
* NOTE: the LSB of the head.next is set by
-> root->hext.next
--
Kind regards,
Minchan Kim
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-26 19:41 ` [PATCH 5/5] extend KSM refcounts to the anon_vma root Rik van Riel
2010-05-26 20:47 ` Larry Woodman
@ 2010-05-27 14:02 ` Minchan Kim
2010-05-27 14:09 ` Rik van Riel
2010-05-27 14:31 ` Minchan Kim
2010-05-27 17:50 ` Mel Gorman
3 siblings, 1 reply; 30+ messages in thread
From: Minchan Kim @ 2010-05-27 14:02 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, May 26, 2010 at 03:41:24PM -0400, Rik van Riel wrote:
> Subject: extend KSM refcounts to the anon_vma root
>
> KSM reference counts can cause an anon_vma to exist after the processe
> it belongs to have already exited. Because the anon_vma lock now lives
> in the root anon_vma, we need to ensure that the root anon_vma stays
> around until after all the "child" anon_vmas have been freed.
>
> The obvious way to do this is to have a "child" anon_vma take a
> reference to the root in anon_vma_fork. When the anon_vma is freed
> at munmap or process exit, we drop the refcount in anon_vma_unlink
> and possibly free the root anon_vma.
>
> The KSM anon_vma reference count function also needs to be modified
> to deal with the possibility of freeing 2 levels of anon_vma. The
> easiest way to do this is to break out the KSM magic and make it
> generic.
>
> When compiling without CONFIG_KSM, this code is compiled out.
Hi, Rik.
Hmm, I can understand this point.
Now, rmap code always depeneds on root anon_vma's lock.
I think it doesn't depends on KSM and MIGRATION.
If we don't use KSM and MIGRATION and it is compiled out,
Can root's anon_vma disappear during rmap walking?
who prevent it?
What am I missing?
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
> ---
> v2:
> - merge with -mm and the compaction code
> - improve the anon_vma refcount comment in anon_vma_fork with the
> refcount lifetime
>
> include/linux/rmap.h | 15 +++++++++++++++
> mm/ksm.c | 17 ++++++-----------
> mm/migrate.c | 10 +++-------
> mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++-
> 4 files changed, 68 insertions(+), 19 deletions(-)
>
> Index: linux-2.6.34/include/linux/rmap.h
> ===================================================================
> --- linux-2.6.34.orig/include/linux/rmap.h
> +++ linux-2.6.34/include/linux/rmap.h
> @@ -81,6 +81,13 @@ static inline int anonvma_external_refco
> {
> return atomic_read(&anon_vma->external_refcount);
> }
> +
> +static inline void get_anon_vma(struct anon_vma *anon_vma)
> +{
> + atomic_inc(&anon_vma->external_refcount);
> +}
> +
> +void drop_anon_vma(struct anon_vma *);
> #else
> static inline void anonvma_external_refcount_init(struct anon_vma *anon_vma)
> {
> @@ -90,6 +97,14 @@ static inline int anonvma_external_refco
> {
> return 0;
> }
> +
> +static inline void get_anon_vma(struct anon_vma *anon_vma)
> +{
> +}
> +
> +static inline void drop_anon_vma(struct anon_vma *anon_vma)
> +{
> +}
> #endif /* CONFIG_KSM */
>
> static inline struct anon_vma *page_anon_vma(struct page *page)
> Index: linux-2.6.34/mm/ksm.c
> ===================================================================
> --- linux-2.6.34.orig/mm/ksm.c
> +++ linux-2.6.34/mm/ksm.c
> @@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_it
> struct anon_vma *anon_vma)
> {
> rmap_item->anon_vma = anon_vma;
> - atomic_inc(&anon_vma->external_refcount);
> + get_anon_vma(anon_vma);
> }
>
> -static void drop_anon_vma(struct rmap_item *rmap_item)
> +static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
> {
> struct anon_vma *anon_vma = rmap_item->anon_vma;
>
> - if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
> - int empty = list_empty(&anon_vma->head);
> - anon_vma_unlock(anon_vma);
> - if (empty)
> - anon_vma_free(anon_vma);
> - }
> + drop_anon_vma(anon_vma);
> }
>
> /*
> @@ -415,7 +410,7 @@ static void break_cow(struct rmap_item *
> * It is not an accident that whenever we want to break COW
> * to undo, we also need to drop a reference to the anon_vma.
> */
> - drop_anon_vma(rmap_item);
> + ksm_drop_anon_vma(rmap_item);
>
> down_read(&mm->mmap_sem);
> if (ksm_test_exit(mm))
> @@ -470,7 +465,7 @@ static void remove_node_from_stable_tree
> ksm_pages_sharing--;
> else
> ksm_pages_shared--;
> - drop_anon_vma(rmap_item);
> + ksm_drop_anon_vma(rmap_item);
> rmap_item->address &= PAGE_MASK;
> cond_resched();
> }
> @@ -558,7 +553,7 @@ static void remove_rmap_item_from_tree(s
> else
> ksm_pages_shared--;
>
> - drop_anon_vma(rmap_item);
> + ksm_drop_anon_vma(rmap_item);
> rmap_item->address &= PAGE_MASK;
>
> } else if (rmap_item->address & UNSTABLE_FLAG) {
> Index: linux-2.6.34/mm/rmap.c
> ===================================================================
> --- linux-2.6.34.orig/mm/rmap.c
> +++ linux-2.6.34/mm/rmap.c
> @@ -235,6 +235,12 @@ int anon_vma_fork(struct vm_area_struct
> * lock any of the anon_vmas in this anon_vma tree.
> */
> anon_vma->root = pvma->anon_vma->root;
> + /*
> + * With KSM refcounts, an anon_vma can stay around longer than the
> + * process it belongs to. The root anon_vma needs to be pinned
> + * until this anon_vma is freed, because the lock lives in the root.
> + */
> + get_anon_vma(anon_vma->root);
> /* Mark this anon_vma as the one where our new (COWed) pages go. */
> vma->anon_vma = anon_vma;
> anon_vma_chain_link(vma, avc, anon_vma);
> @@ -264,8 +270,11 @@ static void anon_vma_unlink(struct anon_
> empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma);
> anon_vma_unlock(anon_vma);
>
> - if (empty)
> + if (empty) {
> + /* We no longer need the root anon_vma */
> + drop_anon_vma(anon_vma->root);
> anon_vma_free(anon_vma);
> + }
> }
>
> void unlink_anon_vmas(struct vm_area_struct *vma)
> @@ -1389,6 +1398,40 @@ int try_to_munlock(struct page *page)
> return try_to_unmap_file(page, TTU_MUNLOCK);
> }
>
> +#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION)
> +/*
> + * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root
> + * if necessary. Be careful to do all the tests under the lock. Once
> + * we know we are the last user, nobody else can get a reference and we
> + * can do the freeing without the lock.
> + */
> +void drop_anon_vma(struct anon_vma *anon_vma)
> +{
> + if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
> + struct anon_vma *root = anon_vma->root;
> + int empty = list_empty(&anon_vma->head);
> + int last_root_user = 0;
> + int root_empty = 0;
> +
> + /*
> + * The refcount on a non-root anon_vma got dropped. Drop
> + * the refcount on the root and check if we need to free it.
> + */
> + if (empty && anon_vma != root) {
> + last_root_user = atomic_dec_and_test(&root->external_refcount);
> + root_empty = list_empty(&root->head);
> + }
> + anon_vma_unlock(anon_vma);
> +
> + if (empty) {
> + anon_vma_free(anon_vma);
> + if (root_empty && last_root_user)
> + anon_vma_free(root);
> + }
> + }
> +}
> +#endif
> +
> #ifdef CONFIG_MIGRATION
> /*
> * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
> Index: linux-2.6.34/mm/migrate.c
> ===================================================================
> --- linux-2.6.34.orig/mm/migrate.c
> +++ linux-2.6.34/mm/migrate.c
> @@ -639,7 +639,7 @@ static int unmap_and_move(new_page_t get
> * exist when the page is remapped later
> */
> anon_vma = page_anon_vma(page);
> - atomic_inc(&anon_vma->external_refcount);
> + get_anon_vma(anon_vma);
> }
> }
>
> @@ -682,12 +682,8 @@ skip_unmap:
> rcu_unlock:
>
> /* Drop an anon_vma reference if we took one */
> - if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
> - int empty = list_empty(&anon_vma->head);
> - anon_vma_unlock(anon_vma);
> - if (empty)
> - anon_vma_free(anon_vma);
> - }
> + if (anon_vma)
> + drop_anon_vma(anon_vma);
>
> if (rcu_locked)
> rcu_read_unlock();
--
Kind regards,
Minchan Kim
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-27 14:02 ` Minchan Kim
@ 2010-05-27 14:09 ` Rik van Riel
0 siblings, 0 replies; 30+ messages in thread
From: Rik van Riel @ 2010-05-27 14:09 UTC (permalink / raw)
To: Minchan Kim
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
KAMEZAWA Hiroyuki, Lee Schermerhorn
On 05/27/2010 10:02 AM, Minchan Kim wrote:
> Hmm, I can understand this point.
> Now, rmap code always depeneds on root anon_vma's lock.
> I think it doesn't depends on KSM and MIGRATION.
>
> If we don't use KSM and MIGRATION and it is compiled out,
> Can root's anon_vma disappear during rmap walking?
> who prevent it?
>
> What am I missing?
unlink_anon_vmas walks the list in the order from newest
to oldest, ie. the root always gets unlinked (and potentially
freed) last.
--
All rights reversed
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-26 19:41 ` [PATCH 5/5] extend KSM refcounts to the anon_vma root Rik van Riel
2010-05-26 20:47 ` Larry Woodman
2010-05-27 14:02 ` Minchan Kim
@ 2010-05-27 14:31 ` Minchan Kim
2010-05-27 17:50 ` Mel Gorman
3 siblings, 0 replies; 30+ messages in thread
From: Minchan Kim @ 2010-05-27 14:31 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli,
KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, May 26, 2010 at 03:41:24PM -0400, Rik van Riel wrote:
> Subject: extend KSM refcounts to the anon_vma root
>
> KSM reference counts can cause an anon_vma to exist after the processe
> it belongs to have already exited. Because the anon_vma lock now lives
> in the root anon_vma, we need to ensure that the root anon_vma stays
> around until after all the "child" anon_vmas have been freed.
>
> The obvious way to do this is to have a "child" anon_vma take a
> reference to the root in anon_vma_fork. When the anon_vma is freed
> at munmap or process exit, we drop the refcount in anon_vma_unlink
> and possibly free the root anon_vma.
>
> The KSM anon_vma reference count function also needs to be modified
> to deal with the possibility of freeing 2 levels of anon_vma. The
> easiest way to do this is to break out the KSM magic and make it
> generic.
>
> When compiling without CONFIG_KSM, this code is compiled out.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
Reviewed-by: Minchan Kim <minchan.kim@gmail.com>
Now I understand this patch.
Thanks, Rik.
--
Kind regards,
Minchan Kim
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 4/5] always lock the root (oldest) anon_vma
2010-05-26 19:40 ` [PATCH 4/5] always lock " Rik van Riel
` (2 preceding siblings ...)
2010-05-27 13:55 ` Minchan Kim
@ 2010-05-27 17:48 ` Mel Gorman
3 siblings, 0 replies; 30+ messages in thread
From: Mel Gorman @ 2010-05-27 17:48 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Andrea Arcangeli, Minchan Kim,
KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, May 26, 2010 at 03:40:44PM -0400, Rik van Riel wrote:
> Subject: always lock the root (oldest) anon_vma
>
> Always (and only) lock the root (oldest) anon_vma whenever we do something in an
> anon_vma. The recently introduced anon_vma scalability is due to the rmap code
> scanning only the VMAs that need to be scanned. Many common operations still
> took the anon_vma lock on the root anon_vma, so always taking that lock is not
> expected to introduce any scalability issues.
>
> However, always taking the same lock does mean we only need to take one lock,
> which means rmap_walk on pages from any anon_vma in the vma is excluded from
> occurring during an munmap, expand_stack or other operation that needs to
> exclude rmap_walk and similar functions.
>
> Also add the proper locking to vma_adjust.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
> ---
> v3:
> - fix locking inversion in vma_adjust, spotted by Andrea
> - mm_take_all locks needs to use the bitflag in the root anon_vma,
> since that is the one that gets locked (Andrea Arcangeli)
> v2:
> - conditionally take the anon_vma lock in vma_adjust, like introduced
> in 252c5f94d944487e9f50ece7942b0fbf659c5c31 (with a proper comment)
>
> include/linux/rmap.h | 8 ++++----
> mm/ksm.c | 2 +-
> mm/migrate.c | 2 +-
> mm/mmap.c | 30 ++++++++++++++++++++++--------
> 4 files changed, 28 insertions(+), 14 deletions(-)
>
> Index: linux-2.6.34/include/linux/rmap.h
> ===================================================================
> --- linux-2.6.34.orig/include/linux/rmap.h
> +++ linux-2.6.34/include/linux/rmap.h
> @@ -104,24 +104,24 @@ static inline void vma_lock_anon_vma(str
> {
> struct anon_vma *anon_vma = vma->anon_vma;
> if (anon_vma)
> - spin_lock(&anon_vma->lock);
> + spin_lock(&anon_vma->root->lock);
> }
>
> static inline void vma_unlock_anon_vma(struct vm_area_struct *vma)
> {
> struct anon_vma *anon_vma = vma->anon_vma;
> if (anon_vma)
> - spin_unlock(&anon_vma->lock);
> + spin_unlock(&anon_vma->root->lock);
> }
>
> static inline void anon_vma_lock(struct anon_vma *anon_vma)
> {
> - spin_lock(&anon_vma->lock);
> + spin_lock(&anon_vma->root->lock);
> }
>
> static inline void anon_vma_unlock(struct anon_vma *anon_vma)
> {
> - spin_unlock(&anon_vma->lock);
> + spin_unlock(&anon_vma->root->lock);
> }
>
> /*
> Index: linux-2.6.34/mm/ksm.c
> ===================================================================
> --- linux-2.6.34.orig/mm/ksm.c
> +++ linux-2.6.34/mm/ksm.c
> @@ -325,7 +325,7 @@ static void drop_anon_vma(struct rmap_it
> {
> struct anon_vma *anon_vma = rmap_item->anon_vma;
>
> - if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
> + if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
> int empty = list_empty(&anon_vma->head);
> anon_vma_unlock(anon_vma);
> if (empty)
> Index: linux-2.6.34/mm/mmap.c
> ===================================================================
> --- linux-2.6.34.orig/mm/mmap.c
> +++ linux-2.6.34/mm/mmap.c
> @@ -506,6 +506,7 @@ int vma_adjust(struct vm_area_struct *vm
> struct vm_area_struct *importer = NULL;
> struct address_space *mapping = NULL;
> struct prio_tree_root *root = NULL;
> + struct anon_vma *anon_vma = NULL;
> struct file *file = vma->vm_file;
> long adjust_next = 0;
> int remove_next = 0;
> @@ -578,6 +579,17 @@ again: remove_next = 1 + (end > next->
> }
> }
>
> + /*
> + * When changing only vma->vm_end, we don't really need anon_vma
> + * lock. This is a fairly rare case by itself, but the anon_vma
> + * lock may be shared between many sibling processes. Skipping
> + * the lock for brk adjustments makes a difference sometimes.
> + */
"sometimes" is a bit. We know when it makes a difference - for
brk-intensive workloads e.g. aim7. It's only worth mentioning because
git blame will no longer point to Lee's analysis in commit
252c5f94d944487e9f50ece7942b0fbf659c5c31.
Whether you update it or not;
Acked-by: Mel Gorman <mel@csn.ul.ie>
> + if (vma->anon_vma && (insert || importer || start != vma->vm_start)) {
> + anon_vma = vma->anon_vma;
> + anon_vma_lock(anon_vma);
> + }
> +
> if (root) {
> flush_dcache_mmap_lock(mapping);
> vma_prio_tree_remove(vma, root);
> @@ -617,6 +629,8 @@ again: remove_next = 1 + (end > next->
> __insert_vm_struct(mm, insert);
> }
>
> + if (anon_vma)
> + anon_vma_unlock(anon_vma);
> if (mapping)
> spin_unlock(&mapping->i_mmap_lock);
>
> @@ -2466,23 +2480,23 @@ static DEFINE_MUTEX(mm_all_locks_mutex);
>
> static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
> {
> - if (!test_bit(0, (unsigned long *) &anon_vma->head.next)) {
> + if (!test_bit(0, (unsigned long *) &anon_vma->root->head.next)) {
> /*
> * The LSB of head.next can't change from under us
> * because we hold the mm_all_locks_mutex.
> */
> - spin_lock_nest_lock(&anon_vma->lock, &mm->mmap_sem);
> + spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
> /*
> * We can safely modify head.next after taking the
> - * anon_vma->lock. If some other vma in this mm shares
> + * anon_vma->root->lock. If some other vma in this mm shares
> * the same anon_vma we won't take it again.
> *
> * No need of atomic instructions here, head.next
> * can't change from under us thanks to the
> - * anon_vma->lock.
> + * anon_vma->root->lock.
> */
> if (__test_and_set_bit(0, (unsigned long *)
> - &anon_vma->head.next))
> + &anon_vma->root->head.next))
> BUG();
> }
> }
> @@ -2573,7 +2587,7 @@ out_unlock:
>
> static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
> {
> - if (test_bit(0, (unsigned long *) &anon_vma->head.next)) {
> + if (test_bit(0, (unsigned long *) &anon_vma->root->head.next)) {
> /*
> * The LSB of head.next can't change to 0 from under
> * us because we hold the mm_all_locks_mutex.
> @@ -2584,10 +2598,10 @@ static void vm_unlock_anon_vma(struct an
> *
> * No need of atomic instructions here, head.next
> * can't change from under us until we release the
> - * anon_vma->lock.
> + * anon_vma->root->lock.
> */
> if (!__test_and_clear_bit(0, (unsigned long *)
> - &anon_vma->head.next))
> + &anon_vma->root->head.next))
> BUG();
> anon_vma_unlock(anon_vma);
> }
> Index: linux-2.6.34/mm/migrate.c
> ===================================================================
> --- linux-2.6.34.orig/mm/migrate.c
> +++ linux-2.6.34/mm/migrate.c
> @@ -682,7 +682,7 @@ skip_unmap:
> rcu_unlock:
>
> /* Drop an anon_vma reference if we took one */
> - if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->lock)) {
> + if (anon_vma && atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) {
> int empty = list_empty(&anon_vma->head);
> anon_vma_unlock(anon_vma);
> if (empty)
>
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 5/5] extend KSM refcounts to the anon_vma root
2010-05-26 19:41 ` [PATCH 5/5] extend KSM refcounts to the anon_vma root Rik van Riel
` (2 preceding siblings ...)
2010-05-27 14:31 ` Minchan Kim
@ 2010-05-27 17:50 ` Mel Gorman
3 siblings, 0 replies; 30+ messages in thread
From: Mel Gorman @ 2010-05-27 17:50 UTC (permalink / raw)
To: Rik van Riel
Cc: akpm, linux-kernel, linux-mm, Andrea Arcangeli, Minchan Kim,
KAMEZAWA Hiroyuki, Lee Schermerhorn
On Wed, May 26, 2010 at 03:41:24PM -0400, Rik van Riel wrote:
> Subject: extend KSM refcounts to the anon_vma root
>
> KSM reference counts can cause an anon_vma to exist after the processe
> it belongs to have already exited. Because the anon_vma lock now lives
> in the root anon_vma, we need to ensure that the root anon_vma stays
> around until after all the "child" anon_vmas have been freed.
>
> The obvious way to do this is to have a "child" anon_vma take a
> reference to the root in anon_vma_fork. When the anon_vma is freed
> at munmap or process exit, we drop the refcount in anon_vma_unlink
> and possibly free the root anon_vma.
>
> The KSM anon_vma reference count function also needs to be modified
> to deal with the possibility of freeing 2 levels of anon_vma. The
> easiest way to do this is to break out the KSM magic and make it
> generic.
>
> When compiling without CONFIG_KSM, this code is compiled out.
>
> Signed-off-by: Rik van Riel <riel@redhat.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 2/5] change direct call of spin_lock(anon_vma->lock) to inline function
2010-05-26 19:39 ` [PATCH 2/5] change direct call of spin_lock(anon_vma->lock) to inline function Rik van Riel
2010-05-26 20:33 ` Larry Woodman
2010-05-27 13:46 ` Minchan Kim
@ 2010-06-01 22:04 ` Andrew Morton
2010-06-02 8:27 ` Peter Zijlstra
2 siblings, 1 reply; 30+ messages in thread
From: Andrew Morton @ 2010-06-01 22:04 UTC (permalink / raw)
To: Rik van Riel
Cc: linux-kernel, linux-mm, Mel Gorman, Andrea Arcangeli, Minchan Kim,
KAMEZAWA Hiroyuki, Lee Schermerhorn, Peter Zijlstra
On Wed, 26 May 2010 15:39:26 -0400
Rik van Riel <riel@redhat.com> wrote:
> @@ -303,10 +303,10 @@ again:
> goto out;
>
> anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
> - spin_lock(&anon_vma->lock);
> + anon_vma_lock(anon_vma);
>
> if (page_rmapping(page) != anon_vma) {
> - spin_unlock(&anon_vma->lock);
> + anon_vma_unlock(anon_vma);
> goto again;
> }
>
This bit is dependent upon Peter's
mm-revalidate-anon_vma-in-page_lock_anon_vma.patch (below). I've been
twiddling thumbs for weeks awaiting the updated version of that patch
(hint).
Do we think that this patch series is needed in 2.6.35? If so, why?
And if so I guess we'll need to route around
mm-revalidate-anon_vma-in-page_lock_anon_vma.patch, or just merge it
as-is.
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
There is nothing preventing the anon_vma from being detached while we are
spinning to acquire the lock. Most (all?) current users end up calling
something like vma_address(page, vma) on it, which has a fairly good
chance of weeding out wonky vmas.
However suppose the anon_vma got freed and re-used while we were waiting
to acquire the lock, and the new anon_vma fits with the page->index
(because that is the only thing vma_address() uses to determine if the
page fits in a particular vma, we could end up traversing faulty anon_vma
chains.
Close this hole for good by re-validating that page->mapping still holds
the very same anon_vma pointer after we acquire the lock, if not be
utterly paranoid and retry the whole operation (which will very likely
bail, because it's unlikely the page got attached to a different anon_vma
in the meantime).
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Hugh Dickins <hugh.dickins@tiscali.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
mm/rmap.c | 7 +++++++
1 file changed, 7 insertions(+)
diff -puN mm/rmap.c~mm-revalidate-anon_vma-in-page_lock_anon_vma mm/rmap.c
--- a/mm/rmap.c~mm-revalidate-anon_vma-in-page_lock_anon_vma
+++ a/mm/rmap.c
@@ -370,6 +370,7 @@ struct anon_vma *page_lock_anon_vma(stru
unsigned long anon_mapping;
rcu_read_lock();
+again:
anon_mapping = (unsigned long) ACCESS_ONCE(page->mapping);
if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)
goto out;
@@ -378,6 +379,12 @@ struct anon_vma *page_lock_anon_vma(stru
anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
spin_lock(&anon_vma->lock);
+
+ if (page_rmapping(page) != anon_vma) {
+ spin_unlock(&anon_vma->lock);
+ goto again;
+ }
+
return anon_vma;
out:
rcu_read_unlock();
_
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH 2/5] change direct call of spin_lock(anon_vma->lock) to inline function
2010-06-01 22:04 ` Andrew Morton
@ 2010-06-02 8:27 ` Peter Zijlstra
0 siblings, 0 replies; 30+ messages in thread
From: Peter Zijlstra @ 2010-06-02 8:27 UTC (permalink / raw)
To: Andrew Morton
Cc: Rik van Riel, linux-kernel, linux-mm, Mel Gorman,
Andrea Arcangeli, Minchan Kim, KAMEZAWA Hiroyuki,
Lee Schermerhorn
On Tue, 2010-06-01 at 15:04 -0700, Andrew Morton wrote:
> On Wed, 26 May 2010 15:39:26 -0400
> Rik van Riel <riel@redhat.com> wrote:
>
> > @@ -303,10 +303,10 @@ again:
> > goto out;
> >
> > anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);
> > - spin_lock(&anon_vma->lock);
> > + anon_vma_lock(anon_vma);
> >
> > if (page_rmapping(page) != anon_vma) {
> > - spin_unlock(&anon_vma->lock);
> > + anon_vma_unlock(anon_vma);
> > goto again;
> > }
> >
>
> This bit is dependent upon Peter's
> mm-revalidate-anon_vma-in-page_lock_anon_vma.patch (below). I've been
> twiddling thumbs for weeks awaiting the updated version of that patch
> (hint).
Yeah, drop it, the updated patch is only a comment trying to explain why
the current code is ok.
> Do we think that this patch series is needed in 2.6.35? If so, why?
> And if so I guess we'll need to route around
> mm-revalidate-anon_vma-in-page_lock_anon_vma.patch, or just merge it
> as-is.
>
I don't actually think that patch of mine is needed, the reject Rik's
patch generates without it is rather trivial to fix up, if you want I
can send you a fixed up version.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 30+ messages in thread
end of thread, other threads:[~2010-06-02 8:27 UTC | newest]
Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-26 19:38 [PATCH -v3 0/5] always lock the root anon_vma Rik van Riel
2010-05-26 19:38 ` [PATCH 1/5] rename anon_vma_lock to vma_lock_anon_vma Rik van Riel
2010-05-26 20:33 ` Larry Woodman
2010-05-27 13:44 ` Minchan Kim
2010-05-26 19:39 ` [PATCH 2/5] change direct call of spin_lock(anon_vma->lock) to inline function Rik van Riel
2010-05-26 20:33 ` Larry Woodman
2010-05-27 13:46 ` Minchan Kim
2010-06-01 22:04 ` Andrew Morton
2010-06-02 8:27 ` Peter Zijlstra
2010-05-26 19:40 ` [PATCH 3/5] track the root (oldest) anon_vma Rik van Riel
2010-05-26 20:34 ` Larry Woodman
2010-05-27 13:48 ` Minchan Kim
2010-05-26 19:40 ` [PATCH 4/5] always lock " Rik van Riel
2010-05-26 20:36 ` Larry Woodman
2010-05-27 0:57 ` KAMEZAWA Hiroyuki
2010-05-27 13:55 ` Minchan Kim
2010-05-27 17:48 ` Mel Gorman
2010-05-26 19:41 ` [PATCH 5/5] extend KSM refcounts to the anon_vma root Rik van Riel
2010-05-26 20:47 ` Larry Woodman
2010-05-27 14:02 ` Minchan Kim
2010-05-27 14:09 ` Rik van Riel
2010-05-27 14:31 ` Minchan Kim
2010-05-27 17:50 ` Mel Gorman
-- strict thread matches above, loose matches on Subject: below --
2010-05-12 17:38 [PATCH 0/5] always lock the root anon_vma Rik van Riel
2010-05-12 17:40 ` [PATCH 4/5] always lock the root (oldest) anon_vma Rik van Riel
2010-05-12 21:02 ` Mel Gorman
2010-05-12 21:08 ` Rik van Riel
2010-05-13 9:54 ` Mel Gorman
2010-05-13 14:33 ` [PATCH -v2 " Rik van Riel
2010-05-13 21:09 ` Andrew Morton
2010-05-26 4:00 ` Rik van Riel
2010-05-26 15:24 ` [PATCH -v2 0/5] always lock the root anon_vma Rik van Riel
2010-05-26 15:27 ` [PATCH 5/5] extend KSM refcounts to the anon_vma root Rik van Riel
2010-05-12 17:41 ` Rik van Riel
2010-05-12 21:07 ` Mel Gorman
2010-05-12 21:09 ` Rik van Riel
2010-05-13 11:26 ` Mel Gorman
2010-05-13 13:11 ` Rik van Riel
2010-05-13 13:24 ` Mel Gorman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).