From: Izik Eidus <ieidus@redhat.com>
To: hugh@veritas.com
Cc: linux-kernel@vger.kernel.org, aarcange@redhat.com,
akpm@linux-foundation.org, nickpiggin@yahoo.com.au,
chrisw@redhat.com, linux-mm@kvack.org, riel@redhat.com,
Izik Eidus <ieidus@redhat.com>
Subject: [PATCH 1/4] madvice: add MADV_SHAREABLE and MADV_UNSHAREABLE calls.
Date: Thu, 14 May 2009 03:30:45 +0300 [thread overview]
Message-ID: <1242261048-4487-2-git-send-email-ieidus@redhat.com> (raw)
In-Reply-To: <1242261048-4487-1-git-send-email-ieidus@redhat.com>
This patch add MADV_SHAREABLE and MADV_UNSHAREABLE madvise calls,
this calls used to mark vm memory areas with the VM_MERGEABLE flag,
that specific if the memory inside the vma is allowed to be dinamicly shared
with other memorys.
(this is needed for ksm vma scanning support)
Signed-off-by: Izik Eidus <ieidus@redhat.com>
---
include/asm-generic/mman.h | 2 +
include/linux/mm.h | 2 +
include/linux/sched.h | 2 +
mm/madvise.c | 116 +++++++++++++++++++++++++++++++++----------
4 files changed, 95 insertions(+), 27 deletions(-)
diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h
index 5e3dde2..830295d 100644
--- a/include/asm-generic/mman.h
+++ b/include/asm-generic/mman.h
@@ -34,6 +34,8 @@
#define MADV_REMOVE 9 /* remove these pages & resources */
#define MADV_DONTFORK 10 /* don't inherit across fork */
#define MADV_DOFORK 11 /* do inherit across fork */
+#define MADV_SHAREABLE 12 /* can share identical pages */
+#define MADV_UNSHAREABLE 13 /* can not share identical pages */
/* compatibility flags */
#define MAP_FILE 0
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a0ddfb5..61328a4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -106,6 +106,8 @@ extern unsigned int kobjsize(const void *objp);
#define VM_SAO 0x20000000 /* Strong Access Ordering (powerpc) */
#define VM_PFN_AT_MMAP 0x40000000 /* PFNMAP vma that is fully mapped at mmap time */
+#define VM_MERGEABLE 0x80000000 /* Memory may be merged */
+
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4c38bc..7dc786a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -439,6 +439,8 @@ extern int get_dumpable(struct mm_struct *mm);
# define MMF_DUMP_MASK_DEFAULT_ELF 0
#endif
+#define MMF_VM_MERGEABLE 9
+
struct sighand_struct {
atomic_t count;
struct k_sigaction action[_NSIG];
diff --git a/mm/madvise.c b/mm/madvise.c
index b9ce574..bd215ce 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -30,36 +30,12 @@ static int madvise_need_mmap_write(int behavior)
}
}
-/*
- * We can potentially split a vm area into separate
- * areas, each area with its own behavior.
- */
-static long madvise_behavior(struct vm_area_struct * vma,
- struct vm_area_struct **prev,
- unsigned long start, unsigned long end, int behavior)
+static int handle_vmas(struct vm_area_struct *vma, struct vm_area_struct **prev,
+ unsigned long start, unsigned long end, int new_flags)
{
struct mm_struct * mm = vma->vm_mm;
- int error = 0;
pgoff_t pgoff;
- int new_flags = vma->vm_flags;
-
- switch (behavior) {
- case MADV_NORMAL:
- new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
- break;
- case MADV_SEQUENTIAL:
- new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ;
- break;
- case MADV_RANDOM:
- new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ;
- break;
- case MADV_DONTFORK:
- new_flags |= VM_DONTCOPY;
- break;
- case MADV_DOFORK:
- new_flags &= ~VM_DONTCOPY;
- break;
- }
+ int error = 0;
if (new_flags == vma->vm_flags) {
*prev = vma;
@@ -101,6 +77,37 @@ out:
}
/*
+ * We can potentially split a vm area into separate
+ * areas, each area with its own behavior.
+ */
+static long madvise_behavior(struct vm_area_struct * vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end, int behavior)
+{
+ int new_flags = vma->vm_flags;
+
+ switch (behavior) {
+ case MADV_NORMAL:
+ new_flags = new_flags & ~VM_RAND_READ & ~VM_SEQ_READ;
+ break;
+ case MADV_SEQUENTIAL:
+ new_flags = (new_flags & ~VM_RAND_READ) | VM_SEQ_READ;
+ break;
+ case MADV_RANDOM:
+ new_flags = (new_flags & ~VM_SEQ_READ) | VM_RAND_READ;
+ break;
+ case MADV_DONTFORK:
+ new_flags |= VM_DONTCOPY;
+ break;
+ case MADV_DOFORK:
+ new_flags &= ~VM_DONTCOPY;
+ break;
+ }
+
+ return handle_vmas(vma, prev, start, end, new_flags);
+}
+
+/*
* Schedule all required I/O operations. Do not wait for completion.
*/
static long madvise_willneed(struct vm_area_struct * vma,
@@ -208,6 +215,54 @@ static long madvise_remove(struct vm_area_struct *vma,
return error;
}
+/*
+ * Application allows pages to be shared with other pages of identical
+ * content.
+ *
+ */
+static long madvise_shareable(struct vm_area_struct *vma,
+ struct vm_area_struct **prev,
+ unsigned long start, unsigned long end,
+ int behavior)
+{
+ int ret;
+ struct mm_struct *mm;
+
+ switch (behavior) {
+#if defined(CONFIG_KSM) || defined(CONFIG_KSM_MODULE)
+ case MADV_SHAREABLE:
+ ret = handle_vmas(vma, prev, start, end,
+ vma->vm_flags | VM_MERGEABLE);
+
+ if (!ret) {
+ mm = vma->vm_mm;
+ set_bit(MMF_VM_MERGEABLE, &mm->flags);
+ }
+
+ return ret;
+ case MADV_UNSHAREABLE:
+ ret = handle_vmas(vma, prev, start, end,
+ vma->vm_flags & ~VM_MERGEABLE);
+
+ if (!ret) {
+ mm = vma->vm_mm;
+ vma = mm->mmap;
+ while (vma) {
+ if (vma->vm_flags & VM_MERGEABLE)
+ break;
+ vma = vma->vm_next;
+ }
+ if (!vma)
+ clear_bit(MMF_VM_MERGEABLE, &mm->flags);
+ }
+
+ return ret;
+#endif
+ default:
+ return -EINVAL;
+ }
+}
+
static long
madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
unsigned long start, unsigned long end, int behavior)
@@ -238,6 +293,11 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
error = madvise_dontneed(vma, prev, start, end);
break;
+ case MADV_SHAREABLE:
+ case MADV_UNSHAREABLE:
+ error = madvise_shareable(vma, prev, start, end, behavior);
+ break;
+
default:
error = -EINVAL;
break;
@@ -269,6 +329,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
* so the kernel can free resources associated with it.
* MADV_REMOVE - the application wants to free up the given range of
* pages and associated backing store.
+ * MADV_SHAREABLE - the application agrees that pages in the given
+ * range can be shared w/ other pages of identical content.
*
* return values:
* zero - success
--
1.5.6.5
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2009-05-14 0:31 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-05-14 0:30 [PATCH 0/4] RFC - ksm api change into madvise Izik Eidus
2009-05-14 0:30 ` Izik Eidus [this message]
2009-05-14 0:30 ` [PATCH 2/4] mmlist: share mmlist with ksm Izik Eidus
2009-05-14 0:30 ` [PATCH 3/4] ksm: change ksm api to use madvise instead of ioctls Izik Eidus
2009-05-14 0:30 ` [PATCH 4/4] ksm: add support for scanning procsses that were not modifided to use ksm Izik Eidus
2009-06-08 16:18 ` [PATCH 0/4] RFC - ksm api change into madvise Hugh Dickins
2009-06-08 16:35 ` [PATCH mmotm] ksm: stop scan skipping pages Hugh Dickins
2009-06-08 17:42 ` Izik Eidus
2009-06-08 18:01 ` Hugh Dickins
2009-06-08 20:12 ` Izik Eidus
2009-06-08 21:05 ` Hugh Dickins
2009-06-08 17:17 ` [PATCH 0/4] RFC - ksm api change into madvise Izik Eidus
2009-06-08 18:32 ` Hugh Dickins
2009-06-08 20:10 ` Izik Eidus
2009-06-09 4:48 ` Izik Eidus
2009-06-09 17:24 ` Hugh Dickins
2009-06-09 19:27 ` Hugh Dickins
2009-06-10 6:28 ` Izik Eidus
2009-06-11 16:57 ` Hugh Dickins
2009-06-12 21:49 ` Izik Eidus
2009-06-08 22:57 ` Andrea Arcangeli
2009-06-13 15:04 ` Hugh Dickins
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1242261048-4487-2-git-send-email-ieidus@redhat.com \
--to=ieidus@redhat.com \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=chrisw@redhat.com \
--cc=hugh@veritas.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=nickpiggin@yahoo.com.au \
--cc=riel@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).