Linux Container Development
 help / color / mirror / Atom feed
From: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
To: Nathan Lynch <ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
Subject: Re: [PATCH 8/8] checkpoint/restart of SysV SHM_HUGETLB regions
Date: Thu, 16 Sep 2010 20:40:35 -0400	[thread overview]
Message-ID: <4C92B903.20304@cs.columbia.edu> (raw)
In-Reply-To: <1284494530-25946-9-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>



On 09/14/2010 04:02 PM, Nathan Lynch wrote:
> Large page-backed shm regions require special handling, especially
> during restart.  The association of a large page with a shm region's
> inode can occur only in the context of a process causing a fault with
> the region mapped into its mm.  In order to restore that association,
> temporarily shmat-attach the restored SHM_HUGETLB region to the
> restarting process's mm, using the just-restored ipc namespace
> instead of the current one (the nsproxy switch hasn't occured yet).
> 
> Since the temporary shmat of the region during restart causes some of
> the shm attributes to be updated, re-restore them from the ipc_shm
> checkpoint header after unmapping.

Would it work to just move the original call to load_ipc_shm_hdr()
further down in restore_ipc_shm(), especially since the mutex is
not needed anymore - that way you don't need to re-restore them ?

I'm not too familiar with HUGETLB code otherwise, so hoping that
others review those parts while I find time to study it ...

Thanks,

Oren.

> 
> Signed-off-by: Nathan Lynch <ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
> ---
>  ipc/checkpoint_shm.c |  154 ++++++++++++++++++++++++++++++++++++++++++++++----
>  1 files changed, 142 insertions(+), 12 deletions(-)
> 
> diff --git a/ipc/checkpoint_shm.c b/ipc/checkpoint_shm.c
> index 69ba35a..7f9d701 100644
> --- a/ipc/checkpoint_shm.c
> +++ b/ipc/checkpoint_shm.c
> @@ -32,6 +32,69 @@
>   * ipc checkpoint
>   */
>  
> +#define CKPT_HDR_HPAGE_LAST ~(0UL)
> +static bool ckpt_hdr_hpage_last(const struct ckpt_hdr_hpage *hdr)
> +{
> +	return hdr->index == CKPT_HDR_HPAGE_LAST;
> +}
> +
> +static void ckpt_hdr_hpage_init(struct ckpt_hdr_hpage *hdr, unsigned long shift)
> +{
> +	hdr->h.type = CKPT_HDR_HPAGE;
> +	hdr->h.len = sizeof(struct ckpt_hdr_hpage);
> +	hdr->shift = shift;
> +	hdr->index = 0; /* to be filled in by user */
> +}
> +
> +static int shm_hugetlb_checkpoint_contents(struct ckpt_ctx *ctx, struct file *filp)
> +{
> +	struct hstate *h = hstate_file(filp);
> +	struct address_space *mapping = filp->f_mapping;
> +	struct inode *inode = mapping->host;
> +	struct ckpt_hdr_hpage hdr;
> +	unsigned long end_index;
> +	unsigned long index;
> +	ssize_t retval = 0;
> +	loff_t isize;
> +
> +	isize = i_size_read(inode);
> +	if (isize == 0)
> +		goto out;
> +
> +	end_index = (isize - 1) >> huge_page_shift(h);
> +
> +	ckpt_hdr_hpage_init(&hdr, huge_page_shift(h));
> +
> +	for (index = 0; index < end_index + 1; index++) {
> +		struct page *page;
> +
> +		page = find_get_page(mapping, index);
> +
> +		/* skip holes */
> +		if (!page)
> +			continue;
> +
> +		hdr.index = index;
> +
> +		retval = ckpt_write_obj(ctx, &hdr.h);
> +		if (retval < 0)
> +			goto release;
> +
> +		retval = hugetlb_checkpoint_page(ctx, page);
> +release:
> +		page_cache_release(page);
> +		if (retval < 0)
> +			break;
> +	}
> +
> +	if (retval < 0)
> +		goto out;
> +	hdr.index = CKPT_HDR_HPAGE_LAST;
> +	retval = ckpt_write_obj(ctx, &hdr.h);
> +out:
> +	return retval;
> +}
> +
>  /* called with the msgids->rw_mutex is read-held */
>  static int fill_ipc_shm_hdr(struct ckpt_ctx *ctx,
>  			    struct ckpt_hdr_ipc_shm *h,
> @@ -59,10 +122,8 @@ static int fill_ipc_shm_hdr(struct ckpt_ctx *ctx,
>  
>  	h->flags = 0;
>  
> -	/* check if shm was setup with SHM_HUGETLB (unsupported yet) */
>  	if (is_file_hugepages(shp->shm_file)) {
> -		pr_warning("c/r: unsupported SHM_HUGETLB\n");
> -		ret = -ENOSYS;
> +		h->flags |= SHM_HUGETLB;
>  	} else {
>  		struct shmem_inode_info *info;
>  
> @@ -117,7 +178,10 @@ int checkpoint_ipc_shm(int id, void *p, void *data)
>  	if (ret < 0)
>  		goto out;
>  
> -	ret = checkpoint_memory_contents(ctx, NULL, inode);
> +	if (is_file_hugepages(shp->shm_file))
> +		ret = shm_hugetlb_checkpoint_contents(ctx, shp->shm_file);
> +	else
> +		ret = checkpoint_memory_contents(ctx, NULL, inode);
>   out:
>  	ckpt_hdr_put(ctx, h);
>  	return ret;
> @@ -149,6 +213,75 @@ struct dq_ipcshm_del {
>  	int id;
>  };
>  
> +static void __load_ipc_shm_hdr(const struct ckpt_hdr_ipc_shm *h, struct shmid_kernel *shp)
> +{
> +	shp->shm_atim = h->shm_atim;
> +	shp->shm_dtim = h->shm_dtim;
> +	shp->shm_ctim = h->shm_ctim;
> +	shp->shm_cprid = h->shm_cprid;
> +	shp->shm_lprid = h->shm_lprid;
> +}
> +
> +static int shm_hugetlb_restore_contents(struct ckpt_ctx *ctx, struct ipc_namespace *ipcns, struct shmid_kernel *shp, const struct ckpt_hdr_ipc_shm *hdr)
> +{
> +	unsigned long start;
> +	int ret;
> +
> +	ret = do_shmat_ns_pgoff(ipcns, shp->shm_perm.id, (char __user *)0,
> +				0, &start, 0, 0);
> +	if (ret != 0)
> +		return ret;
> +
> +	ckpt_debug("temporarily using %#lx for huge shm restore\n", start);
> +
> +	while (1) {
> +		struct ckpt_hdr_hpage *hdr;
> +		unsigned long hpagesize;
> +		unsigned long index;
> +		unsigned long addr;
> +		struct page *page;
> +		bool last;
> +
> +		hdr = ckpt_read_obj_type(ctx, sizeof(*hdr), CKPT_HDR_HPAGE);
> +		if (IS_ERR(hdr)) {
> +			ret = PTR_ERR(hdr);
> +			break;
> +		}
> +
> +		last = ckpt_hdr_hpage_last(hdr);
> +		index = (unsigned long)hdr->index;
> +		hpagesize = 1UL << hdr->shift;
> +
> +		ckpt_hdr_put(ctx, hdr);
> +
> +		if (last)
> +			break;
> +
> +		addr = start + (hpagesize * index);
> +
> +		down_read(&current->mm->mmap_sem);
> +		ret = get_user_pages(current, current->mm, addr, 1, 1, 1,
> +				     &page, NULL);
> +		up_read(&current->mm->mmap_sem);
> +
> +		if (ret < 0)
> +			break;
> +
> +		ret = hugetlb_restore_page(ctx, page);
> +
> +		page_cache_release(page);
> +
> +		if (ret < 0)
> +			break;
> +	}
> +
> +	sys_shmdt((void __user *)start);
> +
> +	__load_ipc_shm_hdr(hdr, shp);
> +
> +	return ret;
> +}
> +
>  static int _ipc_shm_delete(struct ipc_namespace *ns, int id)
>  {
>  	mm_segment_t old_fs;
> @@ -190,11 +323,7 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
>  	if (h->shm_cprid < 0 || h->shm_lprid < 0)
>  		return -EINVAL;
>  
> -	shp->shm_atim = h->shm_atim;
> -	shp->shm_dtim = h->shm_dtim;
> -	shp->shm_ctim = h->shm_ctim;
> -	shp->shm_cprid = h->shm_cprid;
> -	shp->shm_lprid = h->shm_lprid;
> +	__load_ipc_shm_hdr(h, shp);
>  
>  	return 0;
>  }
> @@ -224,8 +353,6 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
>  	ret = -ENOSYS;
>  	if (h->mlock_uid != (unsigned int) -1)	/* FIXME: support SHM_LOCK */
>  		goto out;
> -	if (h->flags & SHM_HUGETLB)	/* FIXME: support SHM_HUGETLB */
> -		goto out;
>  
>  	shmflag = h->flags | h->perms.mode | IPC_CREAT | IPC_EXCL;
>  	ckpt_debug("shm: do_shmget size %lld flag %#x id %d\n",
> @@ -294,7 +421,10 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
>  	ret = ckpt_obj_insert(ctx, file, h->objref, CKPT_OBJ_FILE);
>  	if (ret < 0)
>  		goto fput;
> -	ret = restore_memory_contents(ctx, file->f_dentry->d_inode);
> +	if (is_file_hugepages(file))
> +		ret = shm_hugetlb_restore_contents(ctx, ns, shp, h);
> +	else
> +		ret = restore_memory_contents(ctx, file->f_dentry->d_inode);
>  fput:
>  	fput(file);
>  

  parent reply	other threads:[~2010-09-17  0:40 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-14 20:02 [PATCH 0/8] checkpoint/restart: sysvshm fixes and hugetlb support Nathan Lynch
     [not found] ` <1284494530-25946-1-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-14 20:02   ` [PATCH 1/8] sysvshm: check for hugetlb before assuming shmem Nathan Lynch
2010-09-14 20:02   ` [PATCH 2/8] sysvshm: report error on failure to reattach, avoid crash Nathan Lynch
     [not found]     ` <1284494530-25946-3-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-15  2:55       ` Matt Helsley
     [not found]         ` <20100915025502.GG8957-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2010-09-15  3:04           ` Matt Helsley
2010-09-17  0:17           ` Oren Laadan
2010-09-14 20:02   ` [PATCH 3/8] checkpoint/sysvshm: release rwsem earlier during restore Nathan Lynch
2010-09-14 20:02   ` [PATCH 4/8] checkpoint/ipc: allow shmat callers to specify ipc namespace Nathan Lynch
2010-09-14 20:02   ` [PATCH 5/8] checkpoint/restart of anonymous hugetlb mappings Nathan Lynch
     [not found]     ` <1284494530-25946-6-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-17  0:44       ` Oren Laadan
     [not found]         ` <4C92BA08.70106-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-09-17 20:23           ` Nathan Lynch
     [not found]             ` <1284754993.4109.397.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-11-01 17:44               ` Oren Laadan
2010-09-14 20:02   ` [PATCH 6/8] remove VM_HUGETLB and VM_RESERVED from CKPT_VMA_NOT_SUPPORTED Nathan Lynch
     [not found]     ` <1284494530-25946-7-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-17  3:35       ` Serge E. Hallyn
2010-09-14 20:02   ` [PATCH 7/8] hugetlbfs checkpoint/restart hooks Nathan Lynch
2010-09-14 20:02   ` [PATCH 8/8] checkpoint/restart of SysV SHM_HUGETLB regions Nathan Lynch
     [not found]     ` <1284494530-25946-9-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-17  0:40       ` Oren Laadan [this message]
     [not found]         ` <4C92B903.20304-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-09-17 19:03           ` Nathan Lynch
2010-09-17  0:37   ` [PATCH 0/8] checkpoint/restart: sysvshm fixes and hugetlb support Oren Laadan
     [not found]     ` <4C92B831.40400-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-10-06 19:43       ` Nathan Lynch
2010-11-01 17:45         ` Oren Laadan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4C92B903.20304@cs.columbia.edu \
    --to=orenl-eqauephvms7envbuuze7ea@public.gmane.org \
    --cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
    --cc=ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox