From: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
To: Nathan Lynch <ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
Subject: Re: [PATCH 8/8] checkpoint/restart of SysV SHM_HUGETLB regions
Date: Thu, 16 Sep 2010 20:40:35 -0400 [thread overview]
Message-ID: <4C92B903.20304@cs.columbia.edu> (raw)
In-Reply-To: <1284494530-25946-9-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
On 09/14/2010 04:02 PM, Nathan Lynch wrote:
> Large page-backed shm regions require special handling, especially
> during restart. The association of a large page with a shm region's
> inode can occur only in the context of a process causing a fault with
> the region mapped into its mm. In order to restore that association,
> temporarily shmat-attach the restored SHM_HUGETLB region to the
> restarting process's mm, using the just-restored ipc namespace
> instead of the current one (the nsproxy switch hasn't occured yet).
>
> Since the temporary shmat of the region during restart causes some of
> the shm attributes to be updated, re-restore them from the ipc_shm
> checkpoint header after unmapping.
Would it work to just move the original call to load_ipc_shm_hdr()
further down in restore_ipc_shm(), especially since the mutex is
not needed anymore - that way you don't need to re-restore them ?
I'm not too familiar with HUGETLB code otherwise, so hoping that
others review those parts while I find time to study it ...
Thanks,
Oren.
>
> Signed-off-by: Nathan Lynch <ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
> ---
> ipc/checkpoint_shm.c | 154 ++++++++++++++++++++++++++++++++++++++++++++++----
> 1 files changed, 142 insertions(+), 12 deletions(-)
>
> diff --git a/ipc/checkpoint_shm.c b/ipc/checkpoint_shm.c
> index 69ba35a..7f9d701 100644
> --- a/ipc/checkpoint_shm.c
> +++ b/ipc/checkpoint_shm.c
> @@ -32,6 +32,69 @@
> * ipc checkpoint
> */
>
> +#define CKPT_HDR_HPAGE_LAST ~(0UL)
> +static bool ckpt_hdr_hpage_last(const struct ckpt_hdr_hpage *hdr)
> +{
> + return hdr->index == CKPT_HDR_HPAGE_LAST;
> +}
> +
> +static void ckpt_hdr_hpage_init(struct ckpt_hdr_hpage *hdr, unsigned long shift)
> +{
> + hdr->h.type = CKPT_HDR_HPAGE;
> + hdr->h.len = sizeof(struct ckpt_hdr_hpage);
> + hdr->shift = shift;
> + hdr->index = 0; /* to be filled in by user */
> +}
> +
> +static int shm_hugetlb_checkpoint_contents(struct ckpt_ctx *ctx, struct file *filp)
> +{
> + struct hstate *h = hstate_file(filp);
> + struct address_space *mapping = filp->f_mapping;
> + struct inode *inode = mapping->host;
> + struct ckpt_hdr_hpage hdr;
> + unsigned long end_index;
> + unsigned long index;
> + ssize_t retval = 0;
> + loff_t isize;
> +
> + isize = i_size_read(inode);
> + if (isize == 0)
> + goto out;
> +
> + end_index = (isize - 1) >> huge_page_shift(h);
> +
> + ckpt_hdr_hpage_init(&hdr, huge_page_shift(h));
> +
> + for (index = 0; index < end_index + 1; index++) {
> + struct page *page;
> +
> + page = find_get_page(mapping, index);
> +
> + /* skip holes */
> + if (!page)
> + continue;
> +
> + hdr.index = index;
> +
> + retval = ckpt_write_obj(ctx, &hdr.h);
> + if (retval < 0)
> + goto release;
> +
> + retval = hugetlb_checkpoint_page(ctx, page);
> +release:
> + page_cache_release(page);
> + if (retval < 0)
> + break;
> + }
> +
> + if (retval < 0)
> + goto out;
> + hdr.index = CKPT_HDR_HPAGE_LAST;
> + retval = ckpt_write_obj(ctx, &hdr.h);
> +out:
> + return retval;
> +}
> +
> /* called with the msgids->rw_mutex is read-held */
> static int fill_ipc_shm_hdr(struct ckpt_ctx *ctx,
> struct ckpt_hdr_ipc_shm *h,
> @@ -59,10 +122,8 @@ static int fill_ipc_shm_hdr(struct ckpt_ctx *ctx,
>
> h->flags = 0;
>
> - /* check if shm was setup with SHM_HUGETLB (unsupported yet) */
> if (is_file_hugepages(shp->shm_file)) {
> - pr_warning("c/r: unsupported SHM_HUGETLB\n");
> - ret = -ENOSYS;
> + h->flags |= SHM_HUGETLB;
> } else {
> struct shmem_inode_info *info;
>
> @@ -117,7 +178,10 @@ int checkpoint_ipc_shm(int id, void *p, void *data)
> if (ret < 0)
> goto out;
>
> - ret = checkpoint_memory_contents(ctx, NULL, inode);
> + if (is_file_hugepages(shp->shm_file))
> + ret = shm_hugetlb_checkpoint_contents(ctx, shp->shm_file);
> + else
> + ret = checkpoint_memory_contents(ctx, NULL, inode);
> out:
> ckpt_hdr_put(ctx, h);
> return ret;
> @@ -149,6 +213,75 @@ struct dq_ipcshm_del {
> int id;
> };
>
> +static void __load_ipc_shm_hdr(const struct ckpt_hdr_ipc_shm *h, struct shmid_kernel *shp)
> +{
> + shp->shm_atim = h->shm_atim;
> + shp->shm_dtim = h->shm_dtim;
> + shp->shm_ctim = h->shm_ctim;
> + shp->shm_cprid = h->shm_cprid;
> + shp->shm_lprid = h->shm_lprid;
> +}
> +
> +static int shm_hugetlb_restore_contents(struct ckpt_ctx *ctx, struct ipc_namespace *ipcns, struct shmid_kernel *shp, const struct ckpt_hdr_ipc_shm *hdr)
> +{
> + unsigned long start;
> + int ret;
> +
> + ret = do_shmat_ns_pgoff(ipcns, shp->shm_perm.id, (char __user *)0,
> + 0, &start, 0, 0);
> + if (ret != 0)
> + return ret;
> +
> + ckpt_debug("temporarily using %#lx for huge shm restore\n", start);
> +
> + while (1) {
> + struct ckpt_hdr_hpage *hdr;
> + unsigned long hpagesize;
> + unsigned long index;
> + unsigned long addr;
> + struct page *page;
> + bool last;
> +
> + hdr = ckpt_read_obj_type(ctx, sizeof(*hdr), CKPT_HDR_HPAGE);
> + if (IS_ERR(hdr)) {
> + ret = PTR_ERR(hdr);
> + break;
> + }
> +
> + last = ckpt_hdr_hpage_last(hdr);
> + index = (unsigned long)hdr->index;
> + hpagesize = 1UL << hdr->shift;
> +
> + ckpt_hdr_put(ctx, hdr);
> +
> + if (last)
> + break;
> +
> + addr = start + (hpagesize * index);
> +
> + down_read(¤t->mm->mmap_sem);
> + ret = get_user_pages(current, current->mm, addr, 1, 1, 1,
> + &page, NULL);
> + up_read(¤t->mm->mmap_sem);
> +
> + if (ret < 0)
> + break;
> +
> + ret = hugetlb_restore_page(ctx, page);
> +
> + page_cache_release(page);
> +
> + if (ret < 0)
> + break;
> + }
> +
> + sys_shmdt((void __user *)start);
> +
> + __load_ipc_shm_hdr(hdr, shp);
> +
> + return ret;
> +}
> +
> static int _ipc_shm_delete(struct ipc_namespace *ns, int id)
> {
> mm_segment_t old_fs;
> @@ -190,11 +323,7 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
> if (h->shm_cprid < 0 || h->shm_lprid < 0)
> return -EINVAL;
>
> - shp->shm_atim = h->shm_atim;
> - shp->shm_dtim = h->shm_dtim;
> - shp->shm_ctim = h->shm_ctim;
> - shp->shm_cprid = h->shm_cprid;
> - shp->shm_lprid = h->shm_lprid;
> + __load_ipc_shm_hdr(h, shp);
>
> return 0;
> }
> @@ -224,8 +353,6 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
> ret = -ENOSYS;
> if (h->mlock_uid != (unsigned int) -1) /* FIXME: support SHM_LOCK */
> goto out;
> - if (h->flags & SHM_HUGETLB) /* FIXME: support SHM_HUGETLB */
> - goto out;
>
> shmflag = h->flags | h->perms.mode | IPC_CREAT | IPC_EXCL;
> ckpt_debug("shm: do_shmget size %lld flag %#x id %d\n",
> @@ -294,7 +421,10 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
> ret = ckpt_obj_insert(ctx, file, h->objref, CKPT_OBJ_FILE);
> if (ret < 0)
> goto fput;
> - ret = restore_memory_contents(ctx, file->f_dentry->d_inode);
> + if (is_file_hugepages(file))
> + ret = shm_hugetlb_restore_contents(ctx, ns, shp, h);
> + else
> + ret = restore_memory_contents(ctx, file->f_dentry->d_inode);
> fput:
> fput(file);
>
next prev parent reply other threads:[~2010-09-17 0:40 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-09-14 20:02 [PATCH 0/8] checkpoint/restart: sysvshm fixes and hugetlb support Nathan Lynch
[not found] ` <1284494530-25946-1-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-14 20:02 ` [PATCH 1/8] sysvshm: check for hugetlb before assuming shmem Nathan Lynch
2010-09-14 20:02 ` [PATCH 2/8] sysvshm: report error on failure to reattach, avoid crash Nathan Lynch
[not found] ` <1284494530-25946-3-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-15 2:55 ` Matt Helsley
[not found] ` <20100915025502.GG8957-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2010-09-15 3:04 ` Matt Helsley
2010-09-17 0:17 ` Oren Laadan
2010-09-14 20:02 ` [PATCH 3/8] checkpoint/sysvshm: release rwsem earlier during restore Nathan Lynch
2010-09-14 20:02 ` [PATCH 4/8] checkpoint/ipc: allow shmat callers to specify ipc namespace Nathan Lynch
2010-09-14 20:02 ` [PATCH 5/8] checkpoint/restart of anonymous hugetlb mappings Nathan Lynch
[not found] ` <1284494530-25946-6-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-17 0:44 ` Oren Laadan
[not found] ` <4C92BA08.70106-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-09-17 20:23 ` Nathan Lynch
[not found] ` <1284754993.4109.397.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-11-01 17:44 ` Oren Laadan
2010-09-14 20:02 ` [PATCH 6/8] remove VM_HUGETLB and VM_RESERVED from CKPT_VMA_NOT_SUPPORTED Nathan Lynch
[not found] ` <1284494530-25946-7-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-17 3:35 ` Serge E. Hallyn
2010-09-14 20:02 ` [PATCH 7/8] hugetlbfs checkpoint/restart hooks Nathan Lynch
2010-09-14 20:02 ` [PATCH 8/8] checkpoint/restart of SysV SHM_HUGETLB regions Nathan Lynch
[not found] ` <1284494530-25946-9-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-17 0:40 ` Oren Laadan [this message]
[not found] ` <4C92B903.20304-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-09-17 19:03 ` Nathan Lynch
2010-09-17 0:37 ` [PATCH 0/8] checkpoint/restart: sysvshm fixes and hugetlb support Oren Laadan
[not found] ` <4C92B831.40400-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-10-06 19:43 ` Nathan Lynch
2010-11-01 17:45 ` Oren Laadan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4C92B903.20304@cs.columbia.edu \
--to=orenl-eqauephvms7envbuuze7ea@public.gmane.org \
--cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
--cc=ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox