From: Nathan Lynch <ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
To: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
Subject: [PATCH 5/8] checkpoint/restart of anonymous hugetlb mappings
Date: Tue, 14 Sep 2010 15:02:07 -0500 [thread overview]
Message-ID: <1284494530-25946-6-git-send-email-ntl@pobox.com> (raw)
In-Reply-To: <1284494530-25946-1-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
Support checkpoint and restore of both private and shared
hugepage-backed mappings established via mmap(MAP_HUGETLB). Introduce
APIs for checkpoint and restart of individual huge pages which are to
be used by the sysv SHM_HUGETLB c/r code.
Signed-off-by: Nathan Lynch <ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
---
include/linux/checkpoint.h | 4 +-
include/linux/checkpoint_hdr.h | 16 +++
include/linux/hugetlb.h | 11 ++
mm/checkpoint.c | 13 ++
mm/hugetlb.c | 257 ++++++++++++++++++++++++++++++++++++++++
5 files changed, 300 insertions(+), 1 deletions(-)
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index 4e25042..d9a65a7 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -299,12 +299,14 @@ extern unsigned long generic_vma_restore(struct mm_struct *mm,
extern int private_vma_restore(struct ckpt_ctx *ctx, struct mm_struct *mm,
struct file *file, struct ckpt_hdr_vma *h);
+extern int hugetlb_restore(struct ckpt_ctx *ctx, struct mm_struct *mm,
+ struct ckpt_hdr_vma *hdr);
+
extern int checkpoint_memory_contents(struct ckpt_ctx *ctx,
struct vm_area_struct *vma,
struct inode *inode);
extern int restore_memory_contents(struct ckpt_ctx *ctx, struct inode *inode);
-
#define CKPT_VMA_NOT_SUPPORTED \
(VM_IO | VM_HUGETLB | VM_NONLINEAR | VM_PFNMAP | \
VM_RESERVED | VM_HUGETLB | VM_NONLINEAR | \
diff --git a/include/linux/checkpoint_hdr.h b/include/linux/checkpoint_hdr.h
index f4f9577..bda5d74 100644
--- a/include/linux/checkpoint_hdr.h
+++ b/include/linux/checkpoint_hdr.h
@@ -151,6 +151,8 @@ enum {
#define CKPT_HDR_VMA CKPT_HDR_VMA
CKPT_HDR_PGARR,
#define CKPT_HDR_PGARR CKPT_HDR_PGARR
+ CKPT_HDR_HPAGE,
+#define CKPT_HDR_HPAGE CKPT_HDR_HPAGE
CKPT_HDR_MM_CONTEXT,
#define CKPT_HDR_MM_CONTEXT CKPT_HDR_MM_CONTEXT
@@ -881,6 +883,10 @@ enum vma_type {
#define CKPT_VMA_SHM_IPC CKPT_VMA_SHM_IPC
CKPT_VMA_SHM_IPC_SKIP, /* shared sysvipc (skip contents) */
#define CKPT_VMA_SHM_IPC_SKIP CKPT_VMA_SHM_IPC_SKIP
+ CKPT_VMA_HUGETLB,
+#define CKPT_VMA_HUGETLB CKPT_VMA_HUGETLB
+ CKPT_VMA_HUGETLB_SKIP,
+#define CKPT_VMA_HUGETLB_SKIP CKPT_VMA_HUGETLB_SKIP
CKPT_VMA_MAX,
#define CKPT_VMA_MAX CKPT_VMA_MAX
};
@@ -907,6 +913,16 @@ struct ckpt_hdr_pgarr {
__u64 nr_pages; /* number of pages to saved */
} __attribute__((aligned(8)));
+/* huge page */
+struct ckpt_hdr_hpage {
+ struct ckpt_hdr h;
+ union {
+ __u64 vaddr;
+ __u64 index;
+ };
+ __u16 shift;
+} __attribute__((aligned(8)));
+
/* signals */
struct ckpt_sigset {
__u8 sigset[CKPT_ARCH_NSIG / 8];
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 78b4bc6..3808c04 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -47,6 +47,8 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct vm_area_struct *vma,
int acctflags);
void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
+int hugetlb_restore_page(struct ckpt_ctx *ctx, struct page *page);
+int hugetlb_checkpoint_page(struct ckpt_ctx *ctx, struct page *page);
extern unsigned long hugepages_treat_as_movable;
extern const unsigned long hugetlb_zero, hugetlb_infinity;
@@ -323,6 +325,15 @@ static inline unsigned int pages_per_huge_page(struct hstate *h)
{
return 1;
}
+
+static inline int hugetlb_restore_page(struct ckpt_ctx *ctx, struct page *page)
+{
+ return -ENOSYS;
+}
+static inline int hugetlb_checkpoint_page(struct ckpt_ctx *ctx, struct page *page)
+{
+ return -ENOSYS;
+}
#endif
#endif /* _LINUX_HUGETLB_H */
diff --git a/mm/checkpoint.c b/mm/checkpoint.c
index 70300e8..8d9a168 100644
--- a/mm/checkpoint.c
+++ b/mm/checkpoint.c
@@ -1021,6 +1021,8 @@ static unsigned long calc_map_flags_bits(unsigned long orig_vm_flags)
vm_flags |= MAP_PRIVATE;
if (orig_vm_flags & VM_NORESERVE)
vm_flags |= MAP_NORESERVE;
+ if (orig_vm_flags & VM_HUGETLB)
+ vm_flags |= MAP_HUGETLB;
return vm_flags;
}
@@ -1180,6 +1182,17 @@ static struct restore_vma_ops restore_vma_ops[] = {
.vma_type = CKPT_VMA_SHM_IPC_SKIP,
.restore = ipcshm_restore,
},
+ /* hugeltb */
+ {
+ .vma_name = "HUGETLB",
+ .vma_type = CKPT_VMA_HUGETLB,
+ .restore = hugetlb_restore,
+ },
+ {
+ .vma_name = "HUGETLB (SKIP)",
+ .vma_type = CKPT_VMA_HUGETLB_SKIP,
+ .restore = hugetlb_restore,
+ },
};
/**
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6034dc9..3b5942c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -8,7 +8,10 @@
#include <linux/mm.h>
#include <linux/seq_file.h>
#include <linux/sysctl.h>
+#include <linux/checkpoint.h>
+#include <linux/file.h>
#include <linux/highmem.h>
+#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/nodemask.h>
#include <linux/pagemap.h>
@@ -2057,10 +2060,264 @@ static int hugetlb_vm_op_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return 0;
}
+#define ckpt_debug_hpage_hdr(hdr) \
+ ckpt_debug("vaddr=%#llx shift=%hu\n", (hdr)->vaddr, (hdr)->shift)
+
+static void ckpt_hdr_hpage_init(struct ckpt_hdr_hpage *hdr, unsigned long shift)
+{
+ hdr->h.type = CKPT_HDR_HPAGE;
+ hdr->h.len = sizeof(struct ckpt_hdr_hpage);
+ hdr->shift = shift;
+ hdr->vaddr = 0; /* to be filled in by user */
+}
+
+int hugetlb_checkpoint_page(struct ckpt_ctx *ctx, struct page *head)
+{
+ unsigned int nr_pages;
+ struct page *page;
+ int ret = 0;
+ int i;
+
+ nr_pages = pages_per_huge_page(page_hstate(head));
+ page = head;
+
+ for (i = 0; i < nr_pages; i++) {
+ void *ptr;
+
+ cond_resched();
+
+ ptr = kmap_atomic(page, KM_USER1);
+ copy_page(ctx->scratch_page, ptr);
+ kunmap_atomic(ptr, KM_USER1);
+ ret = ckpt_kwrite(ctx, ctx->scratch_page, PAGE_SIZE);
+ if (ret < 0)
+ break;
+
+ page = mem_map_next(page, head, i + 1);
+ }
+
+ return ret;
+}
+
+#define CKPT_HDR_HPAGE_LAST ~(0UL)
+static bool ckpt_hdr_hpage_last(const struct ckpt_hdr_hpage *hdr)
+{
+ return hdr->vaddr == CKPT_HDR_HPAGE_LAST;
+}
+
+static int hugetlb_dump_contents(struct ckpt_ctx *ctx, struct vm_area_struct *vma)
+{
+ struct ckpt_hdr_hpage hdr;
+ unsigned long pageshift;
+ unsigned long pagesize;
+ unsigned long addr;
+ int ret;
+
+ pageshift = huge_page_shift(hstate_vma(vma));
+ pagesize = vma_kernel_pagesize(vma);
+
+ ckpt_hdr_hpage_init(&hdr, pageshift);
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += pagesize) {
+ struct page *page = NULL;
+
+ down_read(&vma->vm_mm->mmap_sem);
+ ret = __get_user_pages(ctx->tsk, vma->vm_mm,
+ addr, 1, FOLL_DUMP | FOLL_GET,
+ &page, NULL);
+ /* FOLL_DUMP gives -EFAULT for holes */
+ if (ret == -EFAULT)
+ ret = 0;
+ up_read(&vma->vm_mm->mmap_sem);
+
+ if (ret < 0)
+ goto release;
+ if (!page)
+ continue;
+
+ hdr.vaddr = addr;
+
+ ckpt_debug_hpage_hdr(&hdr);
+
+ ret = ckpt_write_obj(ctx, &hdr.h);
+ if (ret < 0)
+ goto release;
+
+ ret = hugetlb_checkpoint_page(ctx, page);
+release:
+ if (page)
+ page_cache_release(page);
+ if (ret < 0)
+ break;
+ }
+
+ if (ret < 0)
+ goto err;
+ hdr.vaddr = CKPT_HDR_HPAGE_LAST;
+ ret = ckpt_write_obj(ctx, &hdr.h);
+err:
+ return ret;
+}
+
+static int hugetlb_vm_op_checkpoint(struct ckpt_ctx *ctx, struct vm_area_struct *vma)
+{
+ enum vma_type vma_type;
+ int ino_objref;
+ int ret, first;
+
+ BUG_ON(!(vma->vm_flags & VM_HUGETLB));
+ BUG_ON(!vma->vm_file);
+
+ ret = ckpt_obj_visit(ctx, vma->vm_file, CKPT_OBJ_FILE);
+ if (ret < 0)
+ return ret;
+
+ ino_objref = ckpt_obj_lookup_add(ctx, vma->vm_file->f_dentry->d_inode,
+ CKPT_OBJ_INODE, &first);
+ if (ino_objref < 0)
+ return ino_objref;
+
+ vma_type = first ? CKPT_VMA_HUGETLB : CKPT_VMA_HUGETLB_SKIP;
+
+ ret = generic_vma_checkpoint(ctx, vma, vma_type, 0, ino_objref);
+ if (ret)
+ return ret;
+
+ if (vma_type == CKPT_VMA_HUGETLB)
+ ret = hugetlb_dump_contents(ctx, vma);
+
+ return ret;
+}
+
+int hugetlb_restore_page(struct ckpt_ctx *ctx, struct page *head)
+{
+ unsigned int nr_pages;
+ struct page *page;
+ int ret = 0;
+ int i;
+
+ nr_pages = pages_per_huge_page(page_hstate(head));
+ page = head;
+
+ for (i = 0; i < nr_pages; i++) {
+ void *ptr;
+
+ ret = ckpt_kread(ctx, ctx->scratch_page, PAGE_SIZE);
+ if (ret < 0)
+ break;
+
+ cond_resched();
+
+ ptr = kmap_atomic(page, KM_USER1);
+ copy_page(ptr, ctx->scratch_page);
+ kunmap_atomic(ptr, KM_USER1);
+
+ page = mem_map_next(page, head, i + 1);
+ }
+
+ return ret;
+}
+
+static int hugetlb_restore_contents(struct ckpt_ctx *ctx)
+{
+ int ret = 0;
+
+ while (1) {
+ struct ckpt_hdr_hpage *hdr;
+ unsigned long addr;
+ struct page *page;
+ bool last;
+
+ hdr = ckpt_read_obj_type(ctx, sizeof(*hdr), CKPT_HDR_HPAGE);
+ if (IS_ERR(hdr)) {
+ ret = PTR_ERR(hdr);
+ break;
+ }
+
+ ckpt_debug_hpage_hdr(hdr);
+ last = ckpt_hdr_hpage_last(hdr);
+ addr = (unsigned long)hdr->vaddr;
+
+ ckpt_hdr_put(ctx, hdr);
+
+ if (last)
+ break;
+
+ down_read(¤t->mm->mmap_sem);
+ ret = get_user_pages(current, current->mm, addr, 1, 1, 1,
+ &page, NULL);
+ up_read(¤t->mm->mmap_sem);
+
+ if (ret < 0)
+ break;
+
+ ret = hugetlb_restore_page(ctx, page);
+
+ page_cache_release(page);
+
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
+}
+
+int hugetlb_restore(struct ckpt_ctx *ctx, struct mm_struct *mm, struct ckpt_hdr_vma *hdr)
+{
+ unsigned long addr;
+ struct file *file;
+ int ret = 0;
+
+ if (!(hdr->vm_flags & (VM_HUGETLB)))
+ return -EINVAL;
+
+ file = ckpt_obj_try_fetch(ctx, hdr->ino_objref, CKPT_OBJ_FILE);
+ if (PTR_ERR(file) == -EINVAL)
+ file = NULL;
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ /* To do: don't assume same default_hstate on source and destinaton */
+ if (!file) {
+ struct user_struct *user = NULL;
+ unsigned long len;
+
+ if (hdr->vma_type != CKPT_VMA_HUGETLB)
+ return -EINVAL;
+
+ /* see sys_mmap_pgoff */
+ len = hdr->vm_end - hdr->vm_start;
+ len = ALIGN(len, huge_page_size(&default_hstate));
+ file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
+ &user, HUGETLB_ANONHUGE_INODE);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+ ret = ckpt_obj_insert(ctx, file, hdr->ino_objref, CKPT_OBJ_FILE);
+ if (ret < 0)
+ goto out;
+ } else {
+ if (hdr->vma_type != CKPT_VMA_HUGETLB_SKIP)
+ return -EINVAL;
+ get_file(file);
+ }
+
+ addr = generic_vma_restore(mm, file, hdr);
+ if (IS_ERR((void *)addr))
+ ret = PTR_ERR((void *)addr);
+ else if (hdr->vma_type == CKPT_VMA_HUGETLB)
+ ret = hugetlb_restore_contents(ctx);
+out:
+ fput(file);
+ return ret;
+}
+
const struct vm_operations_struct hugetlb_vm_ops = {
.fault = hugetlb_vm_op_fault,
.open = hugetlb_vm_op_open,
.close = hugetlb_vm_op_close,
+#ifdef CONFIG_CHECKPOINT
+ .checkpoint = hugetlb_vm_op_checkpoint,
+#endif
};
static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
--
1.7.2.2
next prev parent reply other threads:[~2010-09-14 20:02 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-09-14 20:02 [PATCH 0/8] checkpoint/restart: sysvshm fixes and hugetlb support Nathan Lynch
[not found] ` <1284494530-25946-1-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-14 20:02 ` [PATCH 1/8] sysvshm: check for hugetlb before assuming shmem Nathan Lynch
2010-09-14 20:02 ` [PATCH 2/8] sysvshm: report error on failure to reattach, avoid crash Nathan Lynch
[not found] ` <1284494530-25946-3-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-15 2:55 ` Matt Helsley
[not found] ` <20100915025502.GG8957-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2010-09-15 3:04 ` Matt Helsley
2010-09-17 0:17 ` Oren Laadan
2010-09-14 20:02 ` [PATCH 3/8] checkpoint/sysvshm: release rwsem earlier during restore Nathan Lynch
2010-09-14 20:02 ` [PATCH 4/8] checkpoint/ipc: allow shmat callers to specify ipc namespace Nathan Lynch
2010-09-14 20:02 ` Nathan Lynch [this message]
[not found] ` <1284494530-25946-6-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-17 0:44 ` [PATCH 5/8] checkpoint/restart of anonymous hugetlb mappings Oren Laadan
[not found] ` <4C92BA08.70106-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-09-17 20:23 ` Nathan Lynch
[not found] ` <1284754993.4109.397.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-11-01 17:44 ` Oren Laadan
2010-09-14 20:02 ` [PATCH 6/8] remove VM_HUGETLB and VM_RESERVED from CKPT_VMA_NOT_SUPPORTED Nathan Lynch
[not found] ` <1284494530-25946-7-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-17 3:35 ` Serge E. Hallyn
2010-09-14 20:02 ` [PATCH 7/8] hugetlbfs checkpoint/restart hooks Nathan Lynch
2010-09-14 20:02 ` [PATCH 8/8] checkpoint/restart of SysV SHM_HUGETLB regions Nathan Lynch
[not found] ` <1284494530-25946-9-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-17 0:40 ` Oren Laadan
[not found] ` <4C92B903.20304-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-09-17 19:03 ` Nathan Lynch
2010-09-17 0:37 ` [PATCH 0/8] checkpoint/restart: sysvshm fixes and hugetlb support Oren Laadan
[not found] ` <4C92B831.40400-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-10-06 19:43 ` Nathan Lynch
2010-11-01 17:45 ` Oren Laadan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1284494530-25946-6-git-send-email-ntl@pobox.com \
--to=ntl-e+axbwqsrlaavxtiumwx3w@public.gmane.org \
--cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox