public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Andrey Mirkin <major@openvz.org>
To: devel@openvz.org, Louis.Rilling@kerlabs.com
Cc: containers@lists.linux-foundation.org, linux-kernel@vger.kernel.org
Subject: Re: [Devel] Re: [PATCH 08/10] Introduce functions to restart a process
Date: Thu, 23 Oct 2008 14:56:26 +0400	[thread overview]
Message-ID: <200810231456.27902.major@openvz.org> (raw)
In-Reply-To: <20081020132536.GS15171@hawkmoon.kerlabs.com>

On Monday 20 October 2008 17:25 Louis Rilling wrote:
> On Sat, Oct 18, 2008 at 03:11:36AM +0400, Andrey Mirkin wrote:
> > Functions to restart process, restore its state, fpu and registers are
> > added.
>
> [...]
>
> > diff --git a/checkpoint/rst_process.c b/checkpoint/rst_process.c
> > new file mode 100644
> > index 0000000..b9f745e
> > --- /dev/null
> > +++ b/checkpoint/rst_process.c
> > @@ -0,0 +1,277 @@
> > +/*
> > + *  Copyright (C) 2008 Parallels, Inc.
> > + *
> > + *  Author: Andrey Mirkin <major@openvz.org>
> > + *
> > + *  This program is free software; you can redistribute it and/or
> > + *  modify it under the terms of the GNU General Public License as
> > + *  published by the Free Software Foundation, version 2 of the
> > + *  License.
> > + *
> > + */
> > +
> > +#include <linux/sched.h>
> > +#include <linux/fs.h>
> > +#include <linux/file.h>
> > +#include <linux/version.h>
> > +#include <linux/module.h>
> > +
> > +#include "checkpoint.h"
> > +#include "cpt_image.h"
> > +
> > +#define HOOK_RESERVE	256
> > +
> > +struct thr_context {
> > +	struct completion complete;
> > +	int error;
> > +	struct cpt_context *ctx;
> > +	struct task_struct *tsk;
> > +};
> > +
> > +int local_kernel_thread(int (*fn)(void *), void * arg, unsigned long
> > flags, pid_t pid) +{
> > +	pid_t ret;
> > +
> > +	if (current->fs == NULL) {
> > +		/* do_fork_pid() hates processes without fs, oopses. */
> > +		eprintk("local_kernel_thread: current->fs==NULL\n");
> > +		return -EINVAL;
> > +	}
> > +	if (!try_module_get(THIS_MODULE))
> > +		return -EBUSY;
> > +	ret = kernel_thread(fn, arg, flags);
> > +	if (ret < 0)
> > +		module_put(THIS_MODULE);
> > +	return ret;
> > +}
> > +
> > +static unsigned int decode_task_flags(unsigned int task_flags)
> > +{
> > +	unsigned int flags = 0;
> > +
> > +	if (task_flags & (1 << CPT_PF_EXITING))
> > +		flags |= PF_EXITING;
> > +	if (task_flags & (1 << CPT_PF_FORKNOEXEC))
> > +		flags |= PF_FORKNOEXEC;
> > +	if (task_flags & (1 << CPT_PF_SUPERPRIV))
> > +		flags |= PF_SUPERPRIV;
> > +	if (task_flags & (1 << CPT_PF_DUMPCORE))
> > +		flags |= PF_DUMPCORE;
> > +	if (task_flags & (1 << CPT_PF_SIGNALED))
> > +		flags |= PF_SIGNALED;
> > +
> > +	return flags;
> > +
> > +}
> > +
> > +int rst_restore_task_struct(struct task_struct *tsk, struct
> > cpt_task_image *ti, +			    struct cpt_context *ctx)
> > +{
> > +	int i;
> > +
> > +	/* Restore only saved flags, comm and tls for now */
> > +	tsk->flags = decode_task_flags(ti->cpt_flags);
> > +	clear_tsk_thread_flag(tsk, TIF_FREEZE);
> > +	memcpy(tsk->comm, ti->cpt_comm, TASK_COMM_LEN);
> > +	for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) {
> > +		tsk->thread.tls_array[i].a = ti->cpt_tls[i] & 0xFFFFFFFF;
> > +		tsk->thread.tls_array[i].b = ti->cpt_tls[i] >> 32;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int rst_restore_fpustate(struct task_struct *tsk, struct
> > cpt_task_image *ti, +				struct cpt_context *ctx)
> > +{
> > +	struct cpt_obj_bits hdr;
> > +	int err;
> > +	char *buf;
> > +
> > +	clear_stopped_child_used_math(tsk);
> > +
> > +	err = rst_get_object(CPT_OBJ_BITS, &hdr, sizeof(hdr), ctx);
> > +	if (err < 0)
> > +		return err;
> > +
> > +	buf = kmalloc(hdr.cpt_size, GFP_KERNEL);
> > +	if (!buf)
> > +		return -ENOMEM;
> > +
> > +	err = ctx->read(buf, hdr.cpt_size, ctx);
> > +	if (err)
> > +		goto out;
> > +
> > +	if (hdr.cpt_content == CPT_CONTENT_X86_FPUSTATE && cpu_has_fxsr) {
> > +		memcpy(&tsk->thread.xstate, buf,
> > +				sizeof(struct i387_fxsave_struct));
> > +		if (ti->cpt_flags & CPT_PF_USED_MATH)
> > +			set_stopped_child_used_math(tsk);
> > +	}
> > +#ifndef CONFIG_X86_64
> > +	else if (hdr.cpt_content == CPT_CONTENT_X86_FPUSTATE_OLD &&
> > +			!cpu_has_fxsr) {
> > +		memcpy(&tsk->thread.xstate, buf,
> > +				sizeof(struct i387_fsave_struct));
> > +		if (ti->cpt_flags & CPT_PF_USED_MATH)
> > +			set_stopped_child_used_math(tsk);
> > +	}
> > +#endif
> > +
> > +out:
> > +	kfree(buf);
> > +	return err;
> > +}
> > +
> > +static u32 decode_segment(u32 segid)
> > +{
> > +	if (segid == CPT_SEG_ZERO)
> > +		return 0;
> > +
> > +	/* TLS descriptors */
> > +	if (segid <= CPT_SEG_TLS3)
> > +		return ((GDT_ENTRY_TLS_MIN + segid - CPT_SEG_TLS1) << 3) + 3;
> > +
> > +	/* LDT descriptor, it is just an index to LDT array */
> > +	if (segid >= CPT_SEG_LDT)
> > +		return ((segid - CPT_SEG_LDT) << 3) | 7;
> > +
> > +	/* Check for one of standard descriptors */
> > +	if (segid == CPT_SEG_USER32_DS)
> > +		return __USER_DS;
> > +	if (segid == CPT_SEG_USER32_CS)
> > +		return __USER_CS;
> > +
> > +	eprintk("Invalid segment reg %d\n", segid);
> > +	return 0;
> > +}
> > +
> > +static int rst_restore_registers(struct task_struct *tsk, struct
> > cpt_context *ctx) +{
> > +	struct cpt_x86_regs ri;
> > +	struct pt_regs *regs = task_pt_regs(tsk);
> > +	extern char i386_ret_from_resume;
> > +	int err;
> > +
> > +	err = rst_get_object(CPT_OBJ_X86_REGS, &ri, sizeof(ri), ctx);
> > +	if (err < 0)
> > +		return err;
> > +
> > +	tsk->thread.sp = (unsigned long) regs;
> > +	tsk->thread.sp0 = (unsigned long) (regs+1);
> > +	tsk->thread.ip = (unsigned long) &i386_ret_from_resume;
> > +
> > +	tsk->thread.gs = decode_segment(ri.cpt_gs);
> > +	tsk->thread.debugreg0 = ri.cpt_debugreg[0];
> > +	tsk->thread.debugreg1 = ri.cpt_debugreg[1];
> > +	tsk->thread.debugreg2 = ri.cpt_debugreg[2];
> > +	tsk->thread.debugreg3 = ri.cpt_debugreg[3];
> > +	tsk->thread.debugreg6 = ri.cpt_debugreg[6];
> > +	tsk->thread.debugreg7 = ri.cpt_debugreg[7];
> > +
> > +	regs->bx = ri.cpt_bx;
> > +	regs->cx = ri.cpt_cx;
> > +	regs->dx = ri.cpt_dx;
> > +	regs->si = ri.cpt_si;
> > +	regs->di = ri.cpt_di;
> > +	regs->bp = ri.cpt_bp;
> > +	regs->ax = ri.cpt_ax;
> > +	regs->orig_ax = ri.cpt_orig_ax;
> > +	regs->ip = ri.cpt_ip;
> > +	regs->flags = ri.cpt_flags;
> > +	regs->sp = ri.cpt_sp;
> > +
> > +	regs->cs = decode_segment(ri.cpt_cs);
> > +	regs->ss = decode_segment(ri.cpt_ss);
> > +	regs->ds = decode_segment(ri.cpt_ds);
> > +	regs->es = decode_segment(ri.cpt_es);
> > +	regs->fs = decode_segment(ri.cpt_fs);
> > +
> > +	tsk->thread.sp -= HOOK_RESERVE;
> > +	memset((void*)tsk->thread.sp, 0, HOOK_RESERVE);
> > +
> > +	return 0;
> > +}
> > +
> > +static int restart_thread(void *arg)
> > +{
> > +	struct thr_context *thr_ctx = arg;
> > +	struct cpt_context *ctx;
> > +	struct cpt_task_image *ti;
> > +	int err;
> > +
> > +	current->state = TASK_UNINTERRUPTIBLE;
> > +
> > +	ctx = thr_ctx->ctx;
> > +	ti = kmalloc(sizeof(*ti), GFP_KERNEL);
> > +	if (!ti)
> > +		return -ENOMEM;
> > +
> > +	err = rst_get_object(CPT_OBJ_TASK, ti, sizeof(*ti), ctx);
> > +	if (!err)
> > +		err = rst_restore_task_struct(current, ti, ctx);
> > +	/* Restore mm here */
> > +	if (!err)
> > +		err = rst_restore_fpustate(current, ti, ctx);
> > +	if (!err)
> > +		err = rst_restore_registers(current, ctx);
> > +
> > +	thr_ctx->error = err;
> > +	complete(&thr_ctx->complete);
> > +
> > +	if (!err && (ti->cpt_state & (EXIT_ZOMBIE|EXIT_DEAD))) {
> > +		do_exit(ti->cpt_exit_code);
> > +	} else {
> > +		__set_current_state(TASK_UNINTERRUPTIBLE);
> > +	}
> > +
> > +	kfree(ti);
> > +	schedule();
> > +
> > +	eprintk("leaked %d/%d %p\n", task_pid_nr(current),
> > task_pid_vnr(current), current->mm); +
> > +	module_put(THIS_MODULE);
>
> I'm sorry, I still do not understand what you are doing with this
> self-module pinning stuff. AFAICS, we should not get here unless there is a
> bug. So the checkpoint module ref count is never decreased, right?
>
> Could you detail what is this self-module pinning for? As I already told
> you, this looks like a bogus solution to avoid unloading the checkpoint
> module during restart.

Actually right now module ref count increase/decrease is not needed.
But in some cases restore work should be done only after unfreezing the 
process. So, in this case we should grab ref count during process creation 
and put it after this special work is done.
I will rework this place and send it in next version to make it more clear how 
it will be used in future.

Andrey

  reply	other threads:[~2008-10-23 10:56 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-10-17 23:11 [PATCH 0/10] OpenVZ kernel based checkpointing/restart (v2) Andrey Mirkin
2008-10-17 23:11 ` [PATCH 01/10] Introduce trivial sys_checkpoint and sys_restore system calls Andrey Mirkin
2008-10-17 23:11   ` [PATCH 02/10] Make checkpoint/restart functionality modular Andrey Mirkin
2008-10-17 23:11     ` [PATCH 03/10] Introduce context structure needed during checkpointing/restart Andrey Mirkin
2008-10-17 23:11       ` [PATCH 04/10] Introduce container dump function Andrey Mirkin
2008-10-17 23:11         ` [PATCH 05/10] Introduce function to dump process Andrey Mirkin
2008-10-17 23:11           ` [PATCH 06/10] Introduce functions to dump mm Andrey Mirkin
2008-10-17 23:11             ` [PATCH 07/10] Introduce function for restarting a container Andrey Mirkin
2008-10-17 23:11               ` [PATCH 08/10] Introduce functions to restart a process Andrey Mirkin
2008-10-17 23:11                 ` [PATCH 09/10] Introduce functions to restore mm Andrey Mirkin
2008-10-17 23:11                   ` [PATCH 10/10] Add support for multiple processes Andrey Mirkin
2008-10-27 15:58                     ` Oren Laadan
2008-10-30  4:55                       ` [Devel] " Andrey Mirkin
2008-10-20  9:23                 ` [PATCH 08/10] Introduce functions to restart a process Cedric Le Goater
2008-10-22  8:49                   ` [Devel] " Andrey Mirkin
2008-10-22  9:25                     ` Louis Rilling
2008-10-22 10:06                       ` Greg Kurz
2008-10-22 10:44                         ` Louis Rilling
2008-10-22 12:44                           ` Greg Kurz
2008-10-22 10:12                       ` Andrey Mirkin
2008-10-22 10:46                         ` Louis Rilling
2008-10-23  8:53                           ` Andrey Mirkin
2008-10-22 15:25                         ` Oren Laadan
2008-10-23  9:00                           ` Andrey Mirkin
2008-10-23 13:57                             ` Dave Hansen
2008-10-24  3:57                               ` Andrey Mirkin
2008-10-25 21:10                                 ` Oren Laadan
2008-10-29 14:52                                   ` Andrey Mirkin
2008-10-30 15:59                                     ` Oren Laadan
2008-10-22 12:47                     ` Cedric Le Goater
2008-10-23  9:54                       ` Andrey Mirkin
2008-10-23 13:49                         ` Dave Hansen
2008-10-24  4:04                           ` Andrey Mirkin
2008-10-20 13:25                 ` Louis Rilling
2008-10-23 10:56                   ` Andrey Mirkin [this message]
2008-10-20 12:25             ` [PATCH 06/10] Introduce functions to dump mm Louis Rilling
2008-10-22  8:58               ` [Devel] " Andrey Mirkin
2008-10-20 17:21             ` Dave Hansen
2008-10-23  8:43               ` [Devel] " Andrey Mirkin
2008-10-23 13:51                 ` Dave Hansen
2008-10-24  4:07                   ` Andrey Mirkin
2008-10-20 11:02           ` [PATCH 05/10] Introduce function to dump process Louis Rilling
2008-10-24  4:15             ` [Devel] " Andrey Mirkin
2008-10-20 17:48           ` Serge E. Hallyn
2008-10-24  4:40             ` [Devel] " Andrey Mirkin
2008-10-20 17:02       ` [PATCH 03/10] Introduce context structure needed during checkpointing/restart Dave Hansen
2008-10-29 15:30         ` [Devel] " Andrey Mirkin
2008-10-20 16:51     ` [PATCH 02/10] Make checkpoint/restart functionality modular Dave Hansen
2008-10-20 16:59     ` Serge E. Hallyn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200810231456.27902.major@openvz.org \
    --to=major@openvz.org \
    --cc=Louis.Rilling@kerlabs.com \
    --cc=containers@lists.linux-foundation.org \
    --cc=devel@openvz.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox