From: Lin Feng <linfeng@cn.fujitsu.com>
To: akpm@linux-foundation.org, mgorman@suse.de, bcrl@kvack.org,
viro@zeniv.linux.org.uk
Cc: khlebnikov@openvz.org, walken@google.com,
kamezawa.hiroyu@jp.fujitsu.com, minchan@kernel.org,
riel@redhat.com, rientjes@google.com,
isimatu.yasuaki@jp.fujitsu.com, wency@cn.fujitsu.com,
laijs@cn.fujitsu.com, jiang.liu@huawei.com, zab@redhat.com,
jmoyer@redhat.com, linux-mm@kvack.org, linux-aio@kvack.org,
linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
Lin Feng <linfeng@cn.fujitsu.com>
Subject: [PATCH V2 1/2] mm: hotplug: implement non-movable version of get_user_pages() called get_user_pages_non_movable()
Date: Tue, 5 Feb 2013 17:21:52 +0800 [thread overview]
Message-ID: <1360056113-14294-2-git-send-email-linfeng@cn.fujitsu.com> (raw)
In-Reply-To: <1360056113-14294-1-git-send-email-linfeng@cn.fujitsu.com>
get_user_pages() always tries to allocate pages from movable zone, which is not
reliable to memory hotremove framework in some case.
This patch introduces a new library function called get_user_pages_non_movable()
to pin pages only from zone non-movable in memory.
It's a wrapper of get_user_pages() but it makes sure that all pages come from
non-movable zone via additional page migration.
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Zach Brown <zab@redhat.com>
Reviewed-by: Tang Chen <tangchen@cn.fujitsu.com>
Reviewed-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Lin Feng <linfeng@cn.fujitsu.com>
---
include/linux/mm.h | 3 ++
include/linux/mmzone.h | 4 ++
mm/memory.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++
mm/page_isolation.c | 5 +++
4 files changed, 95 insertions(+), 0 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 12f5a09..3ff9eba 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1049,6 +1049,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
struct page **pages, struct vm_area_struct **vmas);
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages);
+int get_user_pages_non_movable(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, int nr_pages, int write, int force,
+ struct page **pages, struct vm_area_struct **vmas);
struct kvec;
int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
struct page **pages);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e25ab6f..1506351 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -841,6 +841,10 @@ static inline int is_normal_idx(enum zone_type idx)
return (idx == ZONE_NORMAL);
}
+static inline int zone_is_movable(struct zone *zone)
+{
+ return zone_idx(zone) == ZONE_MOVABLE;
+}
/**
* is_highmem - helper function to quickly check if a struct zone is a
* highmem zone or not. This is an attempt to keep references
diff --git a/mm/memory.c b/mm/memory.c
index bb1369f..ede53cc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -58,6 +58,8 @@
#include <linux/elf.h>
#include <linux/gfp.h>
#include <linux/migrate.h>
+#include <linux/page-isolation.h>
+#include <linux/mm_inline.h>
#include <linux/string.h>
#include <asm/io.h>
@@ -1995,6 +1997,87 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
}
EXPORT_SYMBOL(get_user_pages);
+#ifdef CONFIG_MEMORY_HOTREMOVE
+/**
+ * It's a wrapper of get_user_pages() but it makes sure that all pages come from
+ * non-movable zone via additional page migration. It's designed for memory
+ * hotremove framework.
+ *
+ * Currently get_user_pages() always tries to allocate pages from movable zone,
+ * in some case users of get_user_pages() is easy to pin user pages for a long
+ * time(for now we found that pages pinned as aio ring pages is such case),
+ * which is fatal for memory hotremove framework.
+ *
+ * This function first calls get_user_pages() to get the candidate pages, and
+ * then check to ensure all pages are from non movable zone. Otherwise migrate
+ * them to non movable zone, then retry. It will at most retry once.
+ */
+int get_user_pages_non_movable(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, int nr_pages, int write, int force,
+ struct page **pages, struct vm_area_struct **vmas)
+{
+ int ret, i, isolate_err, migrate_pre_flag;
+ LIST_HEAD(pagelist);
+
+retry:
+ ret = get_user_pages(tsk, mm, start, nr_pages, write, force, pages,
+ vmas);
+ if (ret <= 0)
+ return ret;
+
+ isolate_err = 0;
+ migrate_pre_flag = 0;
+
+ for (i = 0; i < ret; i++) {
+ if (zone_is_movable(page_zone(pages[i]))) {
+ if (!migrate_pre_flag) {
+ if (migrate_prep())
+ goto release_page;
+ migrate_pre_flag = 1;
+ }
+
+ if (!isolate_lru_page(pages[i])) {
+ inc_zone_page_state(pages[i], NR_ISOLATED_ANON +
+ page_is_file_cache(pages[i]));
+ list_add_tail(&pages[i]->lru, &pagelist);
+ } else {
+ isolate_err = 1;
+ goto release_page;
+ }
+ }
+ }
+
+ /* All pages are non movable, we are done :) */
+ if (i == ret && list_empty(&pagelist))
+ return ret;
+
+release_page:
+ /* Undo the effects of former get_user_pages(), we won't pin anything */
+ release_pages(pages, ret, 1);
+
+ if (migrate_pre_flag && !isolate_err) {
+ ret = migrate_pages(&pagelist, alloc_migrate_target, 1,
+ false, MIGRATE_SYNC, MR_SYSCALL);
+ /* Steal pages from non-movable zone successfully? */
+ if (!ret)
+ goto retry;
+ }
+
+ putback_lru_pages(&pagelist);
+ /* Migration failed, we pin 0 page, tell caller the truth */
+ return 0;
+}
+#else
+inline int get_user_pages_non_movable(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, int nr_pages, int write, int force,
+ struct page **pages, struct vm_area_struct **vmas)
+{
+ return get_user_pages(tsk, mm, start, nr_pages, write, force, pages,
+ vmas);
+}
+#endif
+EXPORT_SYMBOL(get_user_pages_non_movable);
+
/**
* get_dump_page() - pin user page in memory while writing it to core dump
* @addr: user address
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 383bdbb..1b7bd17 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -247,6 +247,9 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn,
return ret ? 0 : -EBUSY;
}
+/**
+ * @private: 0 means page can be alloced from movable zone, otherwise forbidden
+ */
struct page *alloc_migrate_target(struct page *page, unsigned long private,
int **resultp)
{
@@ -254,6 +257,8 @@ struct page *alloc_migrate_target(struct page *page, unsigned long private,
if (PageHighMem(page))
gfp_mask |= __GFP_HIGHMEM;
+ if (unlikely(private != 0))
+ gfp_mask &= ~__GFP_MOVABLE;
return alloc_page(gfp_mask);
}
--
1.7.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2013-02-05 9:21 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-02-05 9:21 [PATCH V2 0/2] mm: hotplug: implement non-movable version of get_user_pages() to kill long-time pin pages Lin Feng
2013-02-05 9:21 ` Lin Feng [this message]
2013-02-05 12:01 ` [PATCH V2 1/2] mm: hotplug: implement non-movable version of get_user_pages() called get_user_pages_non_movable() Mel Gorman
2013-02-06 0:42 ` Minchan Kim
2013-02-06 0:52 ` Benjamin LaHaise
2013-02-06 9:56 ` Mel Gorman
2013-02-08 2:32 ` Minchan Kim
2013-05-13 9:11 ` Tang Chen
2013-05-13 9:19 ` Mel Gorman
2013-05-13 14:37 ` Benjamin LaHaise
2013-05-13 14:54 ` Jeff Moyer
2013-05-13 15:01 ` Benjamin LaHaise
2013-05-14 1:24 ` Tang Chen
2013-05-14 13:58 ` Benjamin LaHaise
2013-05-14 15:16 ` chen tang
2013-05-15 2:09 ` Tang Chen
2013-05-15 7:21 ` Tang Chen
2013-05-14 3:55 ` Tang Chen
2013-05-15 13:24 ` Mel Gorman
2013-05-16 5:54 ` Tang Chen
2013-05-17 0:23 ` [WiP]: aio support for migrating pages (Re: [PATCH V2 1/2] mm: hotplug: implement non-movable version of get_user_pages() called get_user_pages_non_movable()) Benjamin LaHaise
2013-05-17 3:28 ` Tang Chen
2013-05-17 14:37 ` Benjamin LaHaise
2013-05-21 2:07 ` Tang Chen
2013-05-21 2:27 ` Benjamin LaHaise
2013-06-11 9:42 ` Tang Chen
2013-06-11 14:45 ` Benjamin LaHaise
2013-06-28 9:24 ` Gu Zheng
2013-07-01 7:23 ` Gu Zheng
2013-07-02 18:00 ` Benjamin LaHaise
2013-07-03 1:53 ` Gu Zheng
2013-07-04 6:51 ` Gu Zheng
2013-07-04 11:41 ` Benjamin LaHaise
2013-07-05 3:21 ` Gu Zheng
2013-05-17 18:17 ` Zach Brown
2013-05-17 18:30 ` Benjamin LaHaise
2013-02-20 11:37 ` [PATCH V2 1/2] mm: hotplug: implement non-movable version of get_user_pages() called get_user_pages_non_movable() Wanpeng Li
2013-02-20 11:37 ` Wanpeng Li
[not found] ` <20130220113757.GA10124@hacker.(null)>
2013-02-20 12:39 ` Lin Feng
2013-02-05 9:21 ` [PATCH V2 2/2] fs/aio.c: use get_user_pages_non_movable() to pin ring pages when support memory hotremove Lin Feng
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1360056113-14294-2-git-send-email-linfeng@cn.fujitsu.com \
--to=linfeng@cn.fujitsu.com \
--cc=akpm@linux-foundation.org \
--cc=bcrl@kvack.org \
--cc=isimatu.yasuaki@jp.fujitsu.com \
--cc=jiang.liu@huawei.com \
--cc=jmoyer@redhat.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=khlebnikov@openvz.org \
--cc=laijs@cn.fujitsu.com \
--cc=linux-aio@kvack.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@suse.de \
--cc=minchan@kernel.org \
--cc=riel@redhat.com \
--cc=rientjes@google.com \
--cc=viro@zeniv.linux.org.uk \
--cc=walken@google.com \
--cc=wency@cn.fujitsu.com \
--cc=zab@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).