All of lore.kernel.org
 help / color / mirror / Atom feed
From: Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
To: Nathan Lynch <ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
Cc: containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
Subject: Re: [PATCH 8/8] checkpoint/restart of SysV SHM_HUGETLB regions
Date: Thu, 16 Sep 2010 20:40:35 -0400	[thread overview]
Message-ID: <4C92B903.20304@cs.columbia.edu> (raw)
In-Reply-To: <1284494530-25946-9-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>



On 09/14/2010 04:02 PM, Nathan Lynch wrote:
> Large page-backed shm regions require special handling, especially
> during restart.  The association of a large page with a shm region's
> inode can occur only in the context of a process causing a fault with
> the region mapped into its mm.  In order to restore that association,
> temporarily shmat-attach the restored SHM_HUGETLB region to the
> restarting process's mm, using the just-restored ipc namespace
> instead of the current one (the nsproxy switch hasn't occured yet).
> 
> Since the temporary shmat of the region during restart causes some of
> the shm attributes to be updated, re-restore them from the ipc_shm
> checkpoint header after unmapping.

Would it work to just move the original call to load_ipc_shm_hdr()
further down in restore_ipc_shm(), especially since the mutex is
not needed anymore - that way you don't need to re-restore them ?

I'm not too familiar with HUGETLB code otherwise, so hoping that
others review those parts while I find time to study it ...

Thanks,

Oren.

> 
> Signed-off-by: Nathan Lynch <ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
> ---
>  ipc/checkpoint_shm.c |  154 ++++++++++++++++++++++++++++++++++++++++++++++----
>  1 files changed, 142 insertions(+), 12 deletions(-)
> 
> diff --git a/ipc/checkpoint_shm.c b/ipc/checkpoint_shm.c
> index 69ba35a..7f9d701 100644
> --- a/ipc/checkpoint_shm.c
> +++ b/ipc/checkpoint_shm.c
> @@ -32,6 +32,69 @@
>   * ipc checkpoint
>   */
>  
> +#define CKPT_HDR_HPAGE_LAST ~(0UL)
> +static bool ckpt_hdr_hpage_last(const struct ckpt_hdr_hpage *hdr)
> +{
> +	return hdr->index == CKPT_HDR_HPAGE_LAST;
> +}
> +
> +static void ckpt_hdr_hpage_init(struct ckpt_hdr_hpage *hdr, unsigned long shift)
> +{
> +	hdr->h.type = CKPT_HDR_HPAGE;
> +	hdr->h.len = sizeof(struct ckpt_hdr_hpage);
> +	hdr->shift = shift;
> +	hdr->index = 0; /* to be filled in by user */
> +}
> +
> +static int shm_hugetlb_checkpoint_contents(struct ckpt_ctx *ctx, struct file *filp)
> +{
> +	struct hstate *h = hstate_file(filp);
> +	struct address_space *mapping = filp->f_mapping;
> +	struct inode *inode = mapping->host;
> +	struct ckpt_hdr_hpage hdr;
> +	unsigned long end_index;
> +	unsigned long index;
> +	ssize_t retval = 0;
> +	loff_t isize;
> +
> +	isize = i_size_read(inode);
> +	if (isize == 0)
> +		goto out;
> +
> +	end_index = (isize - 1) >> huge_page_shift(h);
> +
> +	ckpt_hdr_hpage_init(&hdr, huge_page_shift(h));
> +
> +	for (index = 0; index < end_index + 1; index++) {
> +		struct page *page;
> +
> +		page = find_get_page(mapping, index);
> +
> +		/* skip holes */
> +		if (!page)
> +			continue;
> +
> +		hdr.index = index;
> +
> +		retval = ckpt_write_obj(ctx, &hdr.h);
> +		if (retval < 0)
> +			goto release;
> +
> +		retval = hugetlb_checkpoint_page(ctx, page);
> +release:
> +		page_cache_release(page);
> +		if (retval < 0)
> +			break;
> +	}
> +
> +	if (retval < 0)
> +		goto out;
> +	hdr.index = CKPT_HDR_HPAGE_LAST;
> +	retval = ckpt_write_obj(ctx, &hdr.h);
> +out:
> +	return retval;
> +}
> +
>  /* called with the msgids->rw_mutex is read-held */
>  static int fill_ipc_shm_hdr(struct ckpt_ctx *ctx,
>  			    struct ckpt_hdr_ipc_shm *h,
> @@ -59,10 +122,8 @@ static int fill_ipc_shm_hdr(struct ckpt_ctx *ctx,
>  
>  	h->flags = 0;
>  
> -	/* check if shm was setup with SHM_HUGETLB (unsupported yet) */
>  	if (is_file_hugepages(shp->shm_file)) {
> -		pr_warning("c/r: unsupported SHM_HUGETLB\n");
> -		ret = -ENOSYS;
> +		h->flags |= SHM_HUGETLB;
>  	} else {
>  		struct shmem_inode_info *info;
>  
> @@ -117,7 +178,10 @@ int checkpoint_ipc_shm(int id, void *p, void *data)
>  	if (ret < 0)
>  		goto out;
>  
> -	ret = checkpoint_memory_contents(ctx, NULL, inode);
> +	if (is_file_hugepages(shp->shm_file))
> +		ret = shm_hugetlb_checkpoint_contents(ctx, shp->shm_file);
> +	else
> +		ret = checkpoint_memory_contents(ctx, NULL, inode);
>   out:
>  	ckpt_hdr_put(ctx, h);
>  	return ret;
> @@ -149,6 +213,75 @@ struct dq_ipcshm_del {
>  	int id;
>  };
>  
> +static void __load_ipc_shm_hdr(const struct ckpt_hdr_ipc_shm *h, struct shmid_kernel *shp)
> +{
> +	shp->shm_atim = h->shm_atim;
> +	shp->shm_dtim = h->shm_dtim;
> +	shp->shm_ctim = h->shm_ctim;
> +	shp->shm_cprid = h->shm_cprid;
> +	shp->shm_lprid = h->shm_lprid;
> +}
> +
> +static int shm_hugetlb_restore_contents(struct ckpt_ctx *ctx, struct ipc_namespace *ipcns, struct shmid_kernel *shp, const struct ckpt_hdr_ipc_shm *hdr)
> +{
> +	unsigned long start;
> +	int ret;
> +
> +	ret = do_shmat_ns_pgoff(ipcns, shp->shm_perm.id, (char __user *)0,
> +				0, &start, 0, 0);
> +	if (ret != 0)
> +		return ret;
> +
> +	ckpt_debug("temporarily using %#lx for huge shm restore\n", start);
> +
> +	while (1) {
> +		struct ckpt_hdr_hpage *hdr;
> +		unsigned long hpagesize;
> +		unsigned long index;
> +		unsigned long addr;
> +		struct page *page;
> +		bool last;
> +
> +		hdr = ckpt_read_obj_type(ctx, sizeof(*hdr), CKPT_HDR_HPAGE);
> +		if (IS_ERR(hdr)) {
> +			ret = PTR_ERR(hdr);
> +			break;
> +		}
> +
> +		last = ckpt_hdr_hpage_last(hdr);
> +		index = (unsigned long)hdr->index;
> +		hpagesize = 1UL << hdr->shift;
> +
> +		ckpt_hdr_put(ctx, hdr);
> +
> +		if (last)
> +			break;
> +
> +		addr = start + (hpagesize * index);
> +
> +		down_read(&current->mm->mmap_sem);
> +		ret = get_user_pages(current, current->mm, addr, 1, 1, 1,
> +				     &page, NULL);
> +		up_read(&current->mm->mmap_sem);
> +
> +		if (ret < 0)
> +			break;
> +
> +		ret = hugetlb_restore_page(ctx, page);
> +
> +		page_cache_release(page);
> +
> +		if (ret < 0)
> +			break;
> +	}
> +
> +	sys_shmdt((void __user *)start);
> +
> +	__load_ipc_shm_hdr(hdr, shp);
> +
> +	return ret;
> +}
> +
>  static int _ipc_shm_delete(struct ipc_namespace *ns, int id)
>  {
>  	mm_segment_t old_fs;
> @@ -190,11 +323,7 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
>  	if (h->shm_cprid < 0 || h->shm_lprid < 0)
>  		return -EINVAL;
>  
> -	shp->shm_atim = h->shm_atim;
> -	shp->shm_dtim = h->shm_dtim;
> -	shp->shm_ctim = h->shm_ctim;
> -	shp->shm_cprid = h->shm_cprid;
> -	shp->shm_lprid = h->shm_lprid;
> +	__load_ipc_shm_hdr(h, shp);
>  
>  	return 0;
>  }
> @@ -224,8 +353,6 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
>  	ret = -ENOSYS;
>  	if (h->mlock_uid != (unsigned int) -1)	/* FIXME: support SHM_LOCK */
>  		goto out;
> -	if (h->flags & SHM_HUGETLB)	/* FIXME: support SHM_HUGETLB */
> -		goto out;
>  
>  	shmflag = h->flags | h->perms.mode | IPC_CREAT | IPC_EXCL;
>  	ckpt_debug("shm: do_shmget size %lld flag %#x id %d\n",
> @@ -294,7 +421,10 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
>  	ret = ckpt_obj_insert(ctx, file, h->objref, CKPT_OBJ_FILE);
>  	if (ret < 0)
>  		goto fput;
> -	ret = restore_memory_contents(ctx, file->f_dentry->d_inode);
> +	if (is_file_hugepages(file))
> +		ret = shm_hugetlb_restore_contents(ctx, ns, shp, h);
> +	else
> +		ret = restore_memory_contents(ctx, file->f_dentry->d_inode);
>  fput:
>  	fput(file);
>  

  parent reply	other threads:[~2010-09-17  0:40 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-14 20:02 [PATCH 0/8] checkpoint/restart: sysvshm fixes and hugetlb support Nathan Lynch
     [not found] ` <1284494530-25946-1-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-14 20:02   ` [PATCH 1/8] sysvshm: check for hugetlb before assuming shmem Nathan Lynch
2010-09-14 20:02   ` [PATCH 2/8] sysvshm: report error on failure to reattach, avoid crash Nathan Lynch
     [not found]     ` <1284494530-25946-3-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-15  2:55       ` Matt Helsley
     [not found]         ` <20100915025502.GG8957-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2010-09-15  3:04           ` Matt Helsley
2010-09-17  0:17           ` Oren Laadan
2010-09-14 20:02   ` [PATCH 3/8] checkpoint/sysvshm: release rwsem earlier during restore Nathan Lynch
2010-09-14 20:02   ` [PATCH 4/8] checkpoint/ipc: allow shmat callers to specify ipc namespace Nathan Lynch
2010-09-14 20:02   ` [PATCH 5/8] checkpoint/restart of anonymous hugetlb mappings Nathan Lynch
     [not found]     ` <1284494530-25946-6-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-17  0:44       ` Oren Laadan
     [not found]         ` <4C92BA08.70106-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-09-17 20:23           ` Nathan Lynch
     [not found]             ` <1284754993.4109.397.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2010-11-01 17:44               ` Oren Laadan
2010-09-14 20:02   ` [PATCH 6/8] remove VM_HUGETLB and VM_RESERVED from CKPT_VMA_NOT_SUPPORTED Nathan Lynch
     [not found]     ` <1284494530-25946-7-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-17  3:35       ` Serge E. Hallyn
2010-09-14 20:02   ` [PATCH 7/8] hugetlbfs checkpoint/restart hooks Nathan Lynch
2010-09-14 20:02   ` [PATCH 8/8] checkpoint/restart of SysV SHM_HUGETLB regions Nathan Lynch
     [not found]     ` <1284494530-25946-9-git-send-email-ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>
2010-09-17  0:40       ` Oren Laadan [this message]
     [not found]         ` <4C92B903.20304-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-09-17 19:03           ` Nathan Lynch
2010-09-17  0:37   ` [PATCH 0/8] checkpoint/restart: sysvshm fixes and hugetlb support Oren Laadan
     [not found]     ` <4C92B831.40400-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2010-10-06 19:43       ` Nathan Lynch
2010-11-01 17:45         ` Oren Laadan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4C92B903.20304@cs.columbia.edu \
    --to=orenl-eqauephvms7envbuuze7ea@public.gmane.org \
    --cc=containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org \
    --cc=ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.