* [RFC PATCH] mm: thp: make swap configurable
@ 2011-10-10 13:58 Bob Liu
2011-10-10 14:18 ` Andrea Arcangeli
2011-10-12 9:59 ` Johannes Weiner
0 siblings, 2 replies; 6+ messages in thread
From: Bob Liu @ 2011-10-10 13:58 UTC (permalink / raw)
To: aarcange; +Cc: linux-mm, akpm, hannes, riel, Bob Liu
Currently THP do swap by default, user has no control of it.
But some applications are swap sensitive, this patch add a boot param
and sys file to make it configurable.
Signed-off-by: Bob Liu <lliubbo@gmail.com>
---
Documentation/vm/transhuge.txt | 9 +++++++++
include/linux/huge_mm.h | 5 +++++
mm/huge_memory.c | 26 ++++++++++++++++++++++++++
mm/swap_state.c | 10 ++++++----
4 files changed, 46 insertions(+), 4 deletions(-)
diff --git a/Documentation/vm/transhuge.txt b/Documentation/vm/transhuge.txt
index 29bdf62..1c7d8e9 100644
--- a/Documentation/vm/transhuge.txt
+++ b/Documentation/vm/transhuge.txt
@@ -116,6 +116,12 @@ echo always >/sys/kernel/mm/transparent_hugepage/defrag
echo madvise >/sys/kernel/mm/transparent_hugepage/defrag
echo never >/sys/kernel/mm/transparent_hugepage/defrag
+Swap for Transparent Hugepage default is enabled, you can disable it
+by:
+echo 1 > /sys/kernel/mm/transparent_hugepage/disable_swap
+and reenable by:
+echo 0 > /sys/kernel/mm/transparent_hugepage/disable_swap
+
khugepaged will be automatically started when
transparent_hugepage/enabled is set to "always" or "madvise, and it'll
be automatically shutdown if it's set to "never".
@@ -159,6 +165,9 @@ Support by passing the parameter "transparent_hugepage=always" or
"transparent_hugepage=madvise" or "transparent_hugepage=never"
(without "") to the kernel command line.
+You can disable swap for Transparent Hugepage by passing parameter
+"disable_transparent_hugepage_swap".
+
== Need of application restart ==
The transparent_hugepage/enabled values only affect future
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 48c32eb..229ef7b 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -31,6 +31,7 @@ enum transparent_hugepage_flag {
TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
+ TRANSPARENT_HUGEPAGE_SWAP_DISABLE_FLAG,
#ifdef CONFIG_DEBUG_VM
TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG,
#endif
@@ -65,6 +66,9 @@ extern pmd_t *page_check_address_pmd(struct page *page,
(transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG) && \
(__vma)->vm_flags & VM_HUGEPAGE))
+#define transparent_hugepage_swap_disable() \
+ (transparent_hugepage_flags & \
+ (1<<TRANSPARENT_HUGEPAGE_SWAP_DISABLE_FLAG))
#ifdef CONFIG_DEBUG_VM
#define transparent_hugepage_debug_cow() \
(transparent_hugepage_flags & \
@@ -148,6 +152,7 @@ static inline struct page *compound_trans_head(struct page *page)
#define hpage_nr_pages(x) 1
#define transparent_hugepage_enabled(__vma) 0
+#define transparent_hugepage_swap_disable() 0
#define transparent_hugepage_flags 0UL
static inline int split_huge_page(struct page *page)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e2d1587..31aba4b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -293,6 +293,22 @@ static ssize_t defrag_store(struct kobject *kobj,
static struct kobj_attribute defrag_attr =
__ATTR(defrag, 0644, defrag_show, defrag_store);
+static ssize_t disable_swap_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return single_flag_show(kobj, attr, buf,
+ TRANSPARENT_HUGEPAGE_SWAP_DISABLE_FLAG);
+}
+static ssize_t disable_swap_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ return single_flag_store(kobj, attr, buf, count,
+ TRANSPARENT_HUGEPAGE_SWAP_DISABLE_FLAG);
+}
+static struct kobj_attribute swap_attr =
+ __ATTR(disable_swap, 0644, disable_swap_show, disable_swap_store);
+
#ifdef CONFIG_DEBUG_VM
static ssize_t debug_cow_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -314,6 +330,7 @@ static struct kobj_attribute debug_cow_attr =
static struct attribute *hugepage_attr[] = {
&enabled_attr.attr,
&defrag_attr.attr,
+ &swap_attr.attr,
#ifdef CONFIG_DEBUG_VM
&debug_cow_attr.attr,
#endif
@@ -1408,6 +1425,15 @@ out:
return ret;
}
+static __init int disable_transparent_hugepage_swap(char *str)
+{
+ set_bit(TRANSPARENT_HUGEPAGE_SWAP_DISABLE_FLAG, &transparent_hugepage_flags);
+ printk(KERN_INFO "disable swap for transparent hugepage.\n");
+
+ return 0;
+}
+early_param("disable_transparent_hugepage_swap", disable_transparent_hugepage_swap);
+
#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \
VM_HUGETLB|VM_SHARED|VM_MAYSHARE)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 4668046..3dfc4be 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -18,6 +18,7 @@
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/migrate.h>
+#include <linux/mm_inline.h>
#include <linux/page_cgroup.h>
#include <asm/pgtable.h>
@@ -155,10 +156,11 @@ int add_to_swap(struct page *page)
return 0;
if (unlikely(PageTransHuge(page)))
- if (unlikely(split_huge_page(page))) {
- swapcache_free(entry, NULL);
- return 0;
- }
+ if(!transparent_hugepage_swap_disable())
+ if (unlikely(split_huge_page(page))) {
+ swapcache_free(entry, NULL);
+ return 0;
+ }
/*
* Radix-tree node allocations from PF_MEMALLOC contexts could
--
1.5.6.3
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [RFC PATCH] mm: thp: make swap configurable
2011-10-10 13:58 [RFC PATCH] mm: thp: make swap configurable Bob Liu
@ 2011-10-10 14:18 ` Andrea Arcangeli
2011-10-11 9:24 ` Bob Liu
2011-10-12 9:59 ` Johannes Weiner
1 sibling, 1 reply; 6+ messages in thread
From: Andrea Arcangeli @ 2011-10-10 14:18 UTC (permalink / raw)
To: Bob Liu; +Cc: linux-mm, akpm, hannes, riel
Hi Bob,
On Mon, Oct 10, 2011 at 09:58:06PM +0800, Bob Liu wrote:
> Currently THP do swap by default, user has no control of it.
> But some applications are swap sensitive, this patch add a boot param
> and sys file to make it configurable.
Why don't you use mlock or swapoff -a? I doubt we want to handle THP
pages differently from regular pages with regard to swap or anything
else, the value is to behave as close as possible to regular
pages. What you want you can already achieve by other means I think.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC PATCH] mm: thp: make swap configurable
2011-10-10 14:18 ` Andrea Arcangeli
@ 2011-10-11 9:24 ` Bob Liu
2011-10-11 13:40 ` Rik van Riel
2011-10-11 21:01 ` Andrea Arcangeli
0 siblings, 2 replies; 6+ messages in thread
From: Bob Liu @ 2011-10-11 9:24 UTC (permalink / raw)
To: Andrea Arcangeli; +Cc: linux-mm, akpm, hannes, riel
Hi Andrea
On Mon, Oct 10, 2011 at 10:18 PM, Andrea Arcangeli <aarcange@redhat.com> wrote:
> Hi Bob,
>
> On Mon, Oct 10, 2011 at 09:58:06PM +0800, Bob Liu wrote:
>> Currently THP do swap by default, user has no control of it.
>> But some applications are swap sensitive, this patch add a boot param
>> and sys file to make it configurable.
>
> Why don't you use mlock or swapoff -a? I doubt we want to handle THP
> pages differently from regular pages with regard to swap or anything
> else, the value is to behave as close as possible to regular
> pages. What you want you can already achieve by other means I think.
>
Thanks for your reply.
Yes, mlock() can do it but it will require a lot of changes in every
user application.
If some of the applications are hugh and complicated(even not opensource), it's
hard to modify them.
Add this patch can make things simple and thp more flexible.
For using swapoff -a, it will disable swap for 4k normal pages.
A simple use case is like this:
a lot of swap sensitive apps run on a machine, it will use thp so we
need to disable swap.
But this apps are hugh and complicated, it's hard to modify them by mlock().
In addition, there are also some normal and not swap sensitive apps
which don't use thp run on
the same machine, we can still reclaim their memory by swap when lack
of memory.
--
Thanks,
--Bob
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC PATCH] mm: thp: make swap configurable
2011-10-11 9:24 ` Bob Liu
@ 2011-10-11 13:40 ` Rik van Riel
2011-10-11 21:01 ` Andrea Arcangeli
1 sibling, 0 replies; 6+ messages in thread
From: Rik van Riel @ 2011-10-11 13:40 UTC (permalink / raw)
To: Bob Liu; +Cc: Andrea Arcangeli, linux-mm, akpm, hannes
On 10/11/2011 05:24 AM, Bob Liu wrote:
> Yes, mlock() can do it but it will require a lot of changes in every
> user application.
> If some of the applications are hugh and complicated(even not opensource), it's
> hard to modify them.
> Add this patch can make things simple and thp more flexible.
>
> For using swapoff -a, it will disable swap for 4k normal pages.
>
> A simple use case is like this:
> a lot of swap sensitive apps run on a machine, it will use thp so we
> need to disable swap.
> But this apps are hugh and complicated, it's hard to modify them by mlock().
>
> In addition, there are also some normal and not swap sensitive apps
> which don't use thp run on
> the same machine, we can still reclaim their memory by swap when lack
> of memory.
The normal applications could end up getting transparent
huge pages automatically, which would also disable swap
for them.
At that point, you could run out of memory, because you
disabled swap for 2MB pages.
How do you plan to avoid that?
--
All rights reversed
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC PATCH] mm: thp: make swap configurable
2011-10-11 9:24 ` Bob Liu
2011-10-11 13:40 ` Rik van Riel
@ 2011-10-11 21:01 ` Andrea Arcangeli
1 sibling, 0 replies; 6+ messages in thread
From: Andrea Arcangeli @ 2011-10-11 21:01 UTC (permalink / raw)
To: Bob Liu; +Cc: linux-mm, akpm, hannes, riel
Hi Bob,
On Tue, Oct 11, 2011 at 05:24:26PM +0800, Bob Liu wrote:
> Thanks for your reply.
>
> Yes, mlock() can do it but it will require a lot of changes in every
> user application.
> If some of the applications are hugh and complicated(even not opensource), it's
> hard to modify them.
> Add this patch can make things simple and thp more flexible.
>
> For using swapoff -a, it will disable swap for 4k normal pages.
>
> A simple use case is like this:
> a lot of swap sensitive apps run on a machine, it will use thp so we
> need to disable swap.
> But this apps are hugh and complicated, it's hard to modify them by mlock().
>
> In addition, there are also some normal and not swap sensitive apps
> which don't use thp run on
> the same machine, we can still reclaim their memory by swap when lack
> of memory.
I'm not convinced. If you need to disable swap selectively to certain
apps but you can't modify them I'd suggest to add a
mlock-equal-privileged prctl(PR_SWAP_ENABLE/DISABLE) that applies to
all anonymous memory and tmpfs. Probably not to filebacked memory in
case MAP_SHARED is used for all I/O. This seems too limited, it may
happen to work well for a specific application but it's not generic
enough. Another user could have a binary application with a ton of
tmpfs shared memory that he can't modify (MAP_SHARED on /dev/zero for
example) and he wants to mlock it but he can't. Or maybe another user
has an application with <2M anonymous memory scattered in the middle
of MAP_SHARED segments (so that can't be mapped by THP because of
strict hardware limits) and he wants it to remain locked in ram too
and not be swapped out for that specific app. So I prefer a solution
that threats all anonymous memory and tmpfs memory equal (the only two
entities in the kernel that will be paged out to swap). Or at the very
least all anonymous memory equal... so it remains transparent as much
as possible :).
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC PATCH] mm: thp: make swap configurable
2011-10-10 13:58 [RFC PATCH] mm: thp: make swap configurable Bob Liu
2011-10-10 14:18 ` Andrea Arcangeli
@ 2011-10-12 9:59 ` Johannes Weiner
1 sibling, 0 replies; 6+ messages in thread
From: Johannes Weiner @ 2011-10-12 9:59 UTC (permalink / raw)
To: Bob Liu; +Cc: aarcange, linux-mm, akpm, hannes, riel
On Mon, Oct 10, 2011 at 09:58:06PM +0800, Bob Liu wrote:
> Currently THP do swap by default, user has no control of it.
> But some applications are swap sensitive, this patch add a boot param
> and sys file to make it configurable.
What's special about THP compared to regular-sized anon pages?
> @@ -155,10 +156,11 @@ int add_to_swap(struct page *page)
> return 0;
>
> if (unlikely(PageTransHuge(page)))
> - if (unlikely(split_huge_page(page))) {
> - swapcache_free(entry, NULL);
> - return 0;
> - }
> + if(!transparent_hugepage_swap_disable())
> + if (unlikely(split_huge_page(page))) {
> + swapcache_free(entry, NULL);
> + return 0;
> + }
>
> /*
> * Radix-tree node allocations from PF_MEMALLOC contexts could
That will just prevent the splitting and then add the huge page to the
swap cache, for which it is not prepared.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2011-10-12 10:00 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-10-10 13:58 [RFC PATCH] mm: thp: make swap configurable Bob Liu
2011-10-10 14:18 ` Andrea Arcangeli
2011-10-11 9:24 ` Bob Liu
2011-10-11 13:40 ` Rik van Riel
2011-10-11 21:01 ` Andrea Arcangeli
2011-10-12 9:59 ` Johannes Weiner
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).