linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Minchan Kim <minchan@kernel.org>
To: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org,
	Michael Kerrisk <mtk.manpages@gmail.com>,
	Arun Sharma <asharma@fb.com>,
	John Stultz <john.stultz@linaro.org>, Mel Gorman <mel@csn.ul.ie>,
	Hugh Dickins <hughd@google.com>,
	Dave Hansen <dave@linux.vnet.ibm.com>,
	Rik van Riel <riel@redhat.com>, Neil Brown <neilb@suse.de>,
	Mike Hommey <mh@glandium.org>, Taras Glek <tglek@mozilla.com>,
	KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	Jason Evans <je@fb.com>,
	sanjay@google.com, Paul Turner <pjt@google.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michel Lespinasse <walken@google.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Minchan Kim <minchan@kernel.org>
Subject: [RFC v7 02/11] add vrange basic data structure and functions
Date: Tue, 12 Mar 2013 16:38:26 +0900	[thread overview]
Message-ID: <1363073915-25000-3-git-send-email-minchan@kernel.org> (raw)
In-Reply-To: <1363073915-25000-1-git-send-email-minchan@kernel.org>

This patch adds vrange data structure(interval tree) and
related functions.

The vrange uses generic interval tree as main data structure
because it handles address range so generic interval tree
fits well for the purpose.

The add_vrange/remove_vrange are core functions for system call
will be introdcued next patch.

1. add_vrange inserts new address range into interval tree.
   If new address range crosses over existing volatile range,
   existing volatile range will be expanded to cover new range.
   Then, if existing volatile range has purged state, new range
   will have a purged state.
   It's not good and we need more fine-grained purged state handling
   in a vrange(TODO)

   If new address range is inside existing range, we ignore it

2. remove_vrange removes address range
   Then, return a purged state of the address ranges.

This patch copied some part from John Stultz's work but different semantic.

Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Minchan Kim <minchan@kernel.org>
---
 include/linux/mm_types.h |   5 ++
 include/linux/vrange.h   |  45 ++++++++++++++
 init/main.c              |   2 +
 kernel/fork.c            |   3 +
 mm/Makefile              |   2 +-
 mm/vrange.c              | 157 +++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 213 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/vrange.h
 create mode 100644 mm/vrange.c

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ace9a5f..080bf74 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -13,6 +13,7 @@
 #include <linux/page-debug-flags.h>
 #include <linux/uprobes.h>
 #include <linux/page-flags-layout.h>
+#include <linux/mutex.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
 
@@ -351,6 +352,10 @@ struct mm_struct {
 						 */
 
 
+#ifdef CONFIG_MMU
+	struct rb_root v_rb;		/* vrange rb tree */
+	struct mutex v_lock;		/* Protect v_rb */
+#endif
 	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
 	unsigned long hiwater_vm;	/* High-water virtual memory usage */
 
diff --git a/include/linux/vrange.h b/include/linux/vrange.h
new file mode 100644
index 0000000..74b5e37
--- /dev/null
+++ b/include/linux/vrange.h
@@ -0,0 +1,45 @@
+#ifndef _LINUX_VRANGE_H
+#define _LINUX_VRANGE_H
+
+#include <linux/mutex.h>
+#include <linux/interval_tree.h>
+#include <linux/mm.h>
+
+struct vrange {
+	struct interval_tree_node node;
+	bool purged;
+};
+
+#define vrange_entry(ptr) \
+	container_of(ptr, struct vrange, node.rb)
+
+#ifdef CONFIG_MMU
+struct mm_struct;
+
+static inline void mm_init_vrange(struct mm_struct *mm)
+{
+	mm->v_rb = RB_ROOT;
+	mutex_init(&mm->v_lock);
+}
+
+static inline void vrange_lock(struct mm_struct *mm)
+{
+	mutex_lock(&mm->v_lock);
+}
+
+static inline void vrange_unlock(struct mm_struct *mm)
+{
+	mutex_unlock(&mm->v_lock);
+}
+
+extern void exit_vrange(struct mm_struct *mm);
+void vrange_init(void);
+
+#else
+
+static inline void vrange_init(void) {};
+static inline void mm_init_vrange(struct mm_struct *mm) {};
+static inline void exit_vrange(struct mm_struct *mm);
+
+#endif
+#endif /* _LINIUX_VRANGE_H */
diff --git a/init/main.c b/init/main.c
index 63534a1..0b9e0b5 100644
--- a/init/main.c
+++ b/init/main.c
@@ -72,6 +72,7 @@
 #include <linux/ptrace.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
+#include <linux/vrange.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -605,6 +606,7 @@ asmlinkage void __init start_kernel(void)
 	calibrate_delay();
 	pidmap_init();
 	anon_vma_init();
+	vrange_init();
 #ifdef CONFIG_X86
 	if (efi_enabled(EFI_RUNTIME_SERVICES))
 		efi_enter_virtual_mode();
diff --git a/kernel/fork.c b/kernel/fork.c
index 8d932b1..e3aa120 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -70,6 +70,7 @@
 #include <linux/khugepaged.h>
 #include <linux/signalfd.h>
 #include <linux/uprobes.h>
+#include <linux/vrange.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -541,6 +542,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
 	spin_lock_init(&mm->page_table_lock);
 	mm->free_area_cache = TASK_UNMAPPED_BASE;
 	mm->cached_hole_size = ~0UL;
+	mm_init_vrange(mm);
 	mm_init_aio(mm);
 	mm_init_owner(mm, p);
 
@@ -612,6 +614,7 @@ void mmput(struct mm_struct *mm)
 
 	if (atomic_dec_and_test(&mm->mm_users)) {
 		uprobe_clear_state(mm);
+		exit_vrange(mm);
 		exit_aio(mm);
 		ksm_exit(mm);
 		khugepaged_exit(mm); /* must run before exit_mmap */
diff --git a/mm/Makefile b/mm/Makefile
index 3a46287..a31235e 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -5,7 +5,7 @@
 mmu-y			:= nommu.o
 mmu-$(CONFIG_MMU)	:= fremap.o highmem.o madvise.o memory.o mincore.o \
 			   mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
-			   vmalloc.o pagewalk.o pgtable-generic.o
+			   vmalloc.o pagewalk.o pgtable-generic.o vrange.o
 
 ifdef CONFIG_CROSS_MEMORY_ATTACH
 mmu-$(CONFIG_MMU)	+= process_vm_access.o
diff --git a/mm/vrange.c b/mm/vrange.c
new file mode 100644
index 0000000..e265c82
--- /dev/null
+++ b/mm/vrange.c
@@ -0,0 +1,157 @@
+/*
+ * mm/vrange.c
+ */
+
+#include <linux/vrange.h>
+#include <linux/slab.h>
+
+static struct kmem_cache *vrange_cachep;
+
+void __init vrange_init(void)
+{
+	vrange_cachep = KMEM_CACHE(vrange, SLAB_PANIC);
+}
+
+static inline void __set_vrange(struct vrange *range,
+		unsigned long start_idx, unsigned long end_idx)
+{
+	range->node.start = start_idx;
+	range->node.last = end_idx;
+}
+
+static void __add_range(struct vrange *range,
+				struct rb_root *root)
+{
+	interval_tree_insert(&range->node, root);
+}
+
+static void __remove_range(struct vrange *range,
+				struct rb_root *root)
+{
+	interval_tree_remove(&range->node, root);
+}
+
+static struct vrange *alloc_vrange(void)
+{
+	return kmem_cache_alloc(vrange_cachep, GFP_KERNEL);
+}
+
+static void free_vrange(struct vrange *range)
+{
+	kmem_cache_free(vrange_cachep, range);
+}
+
+static inline void range_resize(struct rb_root *root,
+		struct vrange *range,
+		unsigned long start, unsigned long end)
+{
+	__remove_range(range, root);
+	__set_vrange(range, start, end);
+	__add_range(range, root);
+}
+
+int add_vrange(struct mm_struct *mm,
+			unsigned long start, unsigned long end)
+{
+	struct rb_root *root;
+	struct vrange *new_range, *range;
+	struct interval_tree_node *node, *next;
+	int purged = 0;
+
+	new_range = alloc_vrange();
+	if (!new_range)
+		return -ENOMEM;
+
+	root = &mm->v_rb;
+	vrange_lock(mm);
+	node = interval_tree_iter_first(root, start, end);
+	while (node) {
+		next = interval_tree_iter_next(node, start, end);
+
+		range = container_of(node, struct vrange, node);
+		if (node->start < start && node->last > end) {
+			free_vrange(new_range);
+			goto out;
+		}
+
+		start = min_t(unsigned long, start, node->start);
+		end = max_t(unsigned long, end, node->last);
+
+		purged |= range->purged;
+		__remove_range(range, root);
+		free_vrange(range);
+
+		node = next;
+	}
+
+	__set_vrange(new_range, start, end);
+	new_range->purged = purged;
+
+	__add_range(new_range, root);
+out:
+	vrange_unlock(mm);
+	return 0;
+}
+
+int remove_vrange(struct mm_struct *mm,
+		unsigned long start, unsigned long end)
+{
+	struct rb_root *root;
+	struct vrange *new_range, *range;
+	struct interval_tree_node *node, *next;
+	int ret	= 0;
+	bool used_new = false;
+
+	new_range = alloc_vrange();
+	if (!new_range)
+		return -ENOMEM;
+
+	root = &mm->v_rb;
+	vrange_lock(mm);
+
+	node = interval_tree_iter_first(root, start, end);
+	while (node) {
+		next = interval_tree_iter_next(node, start, end);
+
+		range = container_of(node, struct vrange, node);
+		ret |= range->purged;
+
+		if (start <= node->start && end >= node->last) {
+			__remove_range(range, root);
+			free_vrange(range);
+		} else if (node->start >= start) {
+			range_resize(root, range, end, node->last);
+		} else if (node->last <= end) {
+			range_resize(root, range, node->start, start);
+		} else {
+			used_new = true;
+			__set_vrange(new_range, end, node->last);
+			new_range->purged = range->purged;
+			range_resize(root, range, node->start, start);
+			__add_range(new_range, root);
+			break;
+		}
+
+		node = next;
+	}
+
+	vrange_unlock(mm);
+	if (!used_new)
+		free_vrange(new_range);
+
+	return ret;
+}
+
+void exit_vrange(struct mm_struct *mm)
+{
+	struct vrange *range;
+	struct rb_node *next;
+
+	next = rb_first(&mm->v_rb);
+	while (next) {
+		range = vrange_entry(next);
+		next = rb_next(next);
+		__remove_range(range, &mm->v_rb);
+		free_vrange(range);
+	}
+}
-- 
1.8.1.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2013-03-12  7:38 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-03-12  7:38 [RFC v7 00/11] Support vrange for anonymous page Minchan Kim
2013-03-12  7:38 ` [RFC v7 01/11] vrange: enable generic interval tree Minchan Kim
2013-03-12  7:38 ` Minchan Kim [this message]
2013-03-12  7:38 ` [RFC v7 03/11] add new system call vrange(2) Minchan Kim
2013-03-12  7:38 ` [RFC v7 04/11] add proc/pid/vrange information Minchan Kim
2013-03-12  7:38 ` [RFC v7 05/11] Add purge operation Minchan Kim
2013-03-12  7:38 ` [RFC v7 06/11] send SIGBUS when user try to access purged page Minchan Kim
2013-03-12  7:38 ` [RFC v7 07/11] keep mm_struct to vrange when system call context Minchan Kim
2013-03-12  7:38 ` [RFC v7 08/11] add LRU handling for victim vrange Minchan Kim
2013-03-12  7:38 ` [RFC v7 09/11] Get rid of depenceny that all pages is from a zone in shrink_page_list Minchan Kim
2013-03-12  7:38 ` [RFC v7 10/11] Purging vrange pages without swap Minchan Kim
2013-03-12  7:38 ` [RFC v7 11/11] add purged page information in vmstat Minchan Kim
2013-03-12 23:16 ` [RFC v7 00/11] Support vrange for anonymous page Paul Turner
2013-03-13  6:44   ` Minchan Kim
2013-03-21  1:29 ` John Stultz
2013-03-22  6:01   ` Minchan Kim
2013-03-22 17:06     ` John Stultz
2013-03-25  8:42       ` Minchan Kim
2013-03-27  0:26         ` John Stultz
2013-03-27  8:03           ` Minchan Kim
2013-03-30  0:05             ` John Stultz
2013-04-01  7:57               ` Minchan Kim
2013-03-25 17:16 ` Bartlomiej Zolnierkiewicz
2013-03-27  7:18   ` Minchan Kim
2013-04-10 20:22 ` KOSAKI Motohiro
2013-04-11  6:55   ` Minchan Kim
2013-04-11  7:20     ` KOSAKI Motohiro
2013-04-11  8:02       ` Minchan Kim
2013-04-11  8:15         ` KOSAKI Motohiro
2013-04-11  8:31           ` Minchan Kim
2013-04-11 15:01             ` KOSAKI Motohiro
2013-04-14  7:42               ` Minchan Kim
2013-04-16  3:33                 ` John Stultz

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1363073915-25000-3-git-send-email-minchan@kernel.org \
    --to=minchan@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=asharma@fb.com \
    --cc=dave@linux.vnet.ibm.com \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=je@fb.com \
    --cc=john.stultz@linaro.org \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mel@csn.ul.ie \
    --cc=mh@glandium.org \
    --cc=mtk.manpages@gmail.com \
    --cc=neilb@suse.de \
    --cc=pjt@google.com \
    --cc=riel@redhat.com \
    --cc=sanjay@google.com \
    --cc=tglek@mozilla.com \
    --cc=walken@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).