All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC] Send checkpoint and restart debug info to a log file (v2)
@ 2009-10-21 21:05 Serge E. Hallyn
       [not found] ` <20091021210507.GA2098-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Serge E. Hallyn @ 2009-10-21 21:05 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers

Until now, debug data was sent to syslog.  That's not really
right.

This patch sends ckpt_debug output to a logfile provided by
the caller.  If no logfile is provided, then no debug output
is needed.  So the user can pass in -1 for the logfd to say
so.

Note that this does not address the potential (inevitable?)
lockup of writing out a debug msg while checkpointing the
debug file.  In practice so far it seems to work rather well.
(with quite a bit of testing)

This also means that we have to be more careful than we have
been about not writing out sensitive data.

This is pure RFC, not meant to be pretty.

The split into ckpt_debug and ckpt_err (see changelog) suggests
that we should rearrange (and be more consistent about) how and
when we print out debug info.  Left as an exercise for later.

Changelog:
	Oct 21: split ckpt_debug into ckpt_debug and ckpt_err.
		Git rid of the split by memory debug info etc.
		Since userspace actively asks for debug info, I
		also made it not depend on CONFIG_CHECKPOINT_DEBUG.
		We may want to put it back again to limit kernel
		size, but for now it's a distraction, and I'm not
		convinced it makes sense

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 arch/s390/kernel/compat_wrapper.S |    2 +
 arch/x86/mm/checkpoint.c          |   11 ++---
 checkpoint/checkpoint.c           |   22 ++++----
 checkpoint/files.c                |   29 +++++------
 checkpoint/memory.c               |   43 ++++++++-------
 checkpoint/namespace.c            |    3 -
 checkpoint/objhash.c              |   33 +++++++------
 checkpoint/process.c              |   85 +++++++++++++++----------------
 checkpoint/restart.c              |  101 +++++++++++++++++--------------------
 checkpoint/signal.c               |    5 +--
 checkpoint/sys.c                  |   94 ++++++++++++++++++++++------------
 drivers/char/tty_io.c             |   22 ++++----
 include/linux/checkpoint.h        |   85 ++++++++++++++++---------------
 include/linux/checkpoint_types.h  |    5 +-
 include/linux/syscalls.h          |    5 +-
 ipc/checkpoint.c                  |   13 ++---
 ipc/checkpoint_msg.c              |   13 ++---
 ipc/checkpoint_sem.c              |   13 ++---
 ipc/checkpoint_shm.c              |   15 ++---
 kernel/cred.c                     |    2 +-
 lib/Kconfig.debug                 |    4 +-
 mm/filemap.c                      |    2 +-
 mm/shmem.c                        |    2 +-
 net/checkpoint.c                  |   46 ++++++++--------
 net/ipv4/checkpoint.c             |   26 ++++-----
 net/unix/checkpoint.c             |   65 +++++++++++++-----------
 26 files changed, 380 insertions(+), 366 deletions(-)

diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
index e882f99..414226e 100644
--- a/arch/s390/kernel/compat_wrapper.S
+++ b/arch/s390/kernel/compat_wrapper.S
@@ -1846,6 +1846,7 @@ sys_checkpoint_wrapper:
 	lgfr	%r2,%r2			# pid_t
 	lgfr	%r3,%r3			# int
 	llgfr	%r4,%r4			# unsigned long
+	lgfr	%r5,%r5			# int
 	jg	compat_sys_checkpoint
 
 	.globl sys_restore_wrapper
@@ -1853,4 +1854,5 @@ sys_restore_wrapper:
 	lgfr	%r2,%r2			# int
 	lgfr	%r3,%r3			# int
 	llgfr	%r4,%r4			# unsigned long
+	lgfr	%r5,%r5			# int
 	jg	compat_sys_restore
diff --git a/arch/x86/mm/checkpoint.c b/arch/x86/mm/checkpoint.c
index 9dd8e12..533e783 100644
--- a/arch/x86/mm/checkpoint.c
+++ b/arch/x86/mm/checkpoint.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DSYS
-
 #include <asm/desc.h>
 #include <asm/i387.h>
 #include <asm/elf.h>
@@ -292,7 +289,7 @@ int checkpoint_cpu(struct ckpt_ctx *ctx, struct task_struct *t)
 	save_cpu_debug(h, t);
 	save_cpu_fpu(h, t);
 
-	ckpt_debug("math %d debug %d\n", h->used_math, !!h->debugreg7);
+	ckpt_debug(ctx, "math %d debug %d\n", h->used_math, !!h->debugreg7);
 
 	ret = ckpt_write_obj(ctx, &h->h);
 	if (ret < 0)
@@ -341,7 +338,7 @@ int checkpoint_mm_context(struct ckpt_ctx *ctx, struct mm_struct *mm)
 	h->ldt_entry_size = LDT_ENTRY_SIZE;
 	h->nldt = mm->context.size;
 
-	ckpt_debug("nldt %d vdso %#llx\n", h->nldt, h->vdso);
+	ckpt_debug(ctx, "nldt %d vdso %#llx\n", h->nldt, h->vdso);
 
 	ret = ckpt_write_obj(ctx, &h->h);
 	ckpt_hdr_put(ctx, h);
@@ -522,7 +519,7 @@ int restore_cpu(struct ckpt_ctx *ctx)
 	if (IS_ERR(h))
 		return PTR_ERR(h);
 
-	ckpt_debug("math %d debug %d\n", h->used_math, !!h->debugreg7);
+	ckpt_debug(ctx, "math %d debug %d\n", h->used_math, !!h->debugreg7);
 
 	ret = load_cpu_regs(h, t);
 	if (ret < 0)
@@ -572,7 +569,7 @@ int restore_mm_context(struct ckpt_ctx *ctx, struct mm_struct *mm)
 	if (IS_ERR(h))
 		return PTR_ERR(h);
 
-	ckpt_debug("nldt %d vdso %#lx (%p)\n",
+	ckpt_debug(ctx, "nldt %d vdso %#lx (%p)\n",
 		 h->nldt, (unsigned long) h->vdso, mm->context.vdso);
 
 	ret = -EINVAL;
diff --git a/checkpoint/checkpoint.c b/checkpoint/checkpoint.c
index 6eb8f3b..883ccb9 100644
--- a/checkpoint/checkpoint.c
+++ b/checkpoint/checkpoint.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DSYS
-
 #include <linux/version.h>
 #include <linux/sched.h>
 #include <linux/freezer.h>
@@ -38,7 +35,7 @@ static atomic_t ctx_count = ATOMIC_INIT(0);
  */
 int ckpt_write_obj(struct ckpt_ctx *ctx, struct ckpt_hdr *h)
 {
-	_ckpt_debug(CKPT_DRW, "type %d len %d\n", h->type, h->len);
+	ckpt_debug(ctx, "type %d len %d\n", h->type, h->len);
 	return ckpt_kwrite(ctx, h, h->len);
 }
 
@@ -63,7 +60,7 @@ int ckpt_write_obj_type(struct ckpt_ctx *ctx, void *ptr, int len, int type)
 	h->type = type;
 	h->len = len + sizeof(*h);
 
-	_ckpt_debug(CKPT_DRW, "type %d len %d\n", h->type, h->len);
+	ckpt_debug(ctx, "type %d len %d\n", h->type, h->len);
 	ret = ckpt_kwrite(ctx, h, sizeof(*h));
 	if (ret < 0)
 		goto out;
@@ -213,7 +210,7 @@ static void __ckpt_generate_err(struct ckpt_ctx *ctx, char *fmt0,
 	va_end(aq);
 	kfree(format);
 
-	ckpt_debug("c/r: checkpoint error: %s\n", str);
+	ckpt_debug(ctx, "c/r: checkpoint error: %s\n", str);
 }
 
 /**
@@ -392,7 +389,7 @@ static int checkpoint_all_tasks(struct ckpt_ctx *ctx)
 	int n, ret = 0;
 
 	for (n = 0; n < ctx->nr_tasks; n++) {
-		ckpt_debug("dumping task #%d\n", n);
+		ckpt_debug(ctx, "dumping task #%d\n", n);
 		ret = checkpoint_task(ctx, ctx->tasks_arr[n]);
 		if (ret < 0)
 			break;
@@ -407,7 +404,8 @@ static int may_checkpoint_task(struct ckpt_ctx *ctx, struct task_struct *t)
 	struct nsproxy *nsproxy;
 	int ret = 0;
 
-	ckpt_debug("check %d\n", task_pid_nr_ns(t, ctx->root_nsproxy->pid_ns));
+	ckpt_debug(ctx, "check %d\n",
+		   task_pid_nr_ns(t, ctx->root_nsproxy->pid_ns));
 
 	if (t->exit_state == EXIT_DEAD) {
 		__ckpt_write_err(ctx, "TE", "task state EXIT_DEAD\n", -EBUSY);
@@ -499,7 +497,8 @@ static int checkpoint_pids(struct ckpt_ctx *ctx)
 			h[n].vpgid = task_pgrp_nr_ns(task, ns);
 			h[n].vsid = task_session_nr_ns(task, ns);
 			h[n].vppid = task_tgid_nr_ns(task->real_parent, ns);
-			ckpt_debug("task[%d]: vpid %d vtgid %d parent %d\n",
+			ckpt_debug(ctx,
+				   "task[%d]: vpid %d vtgid %d parent %d\n",
 				   pos, h[n].vpid, h[n].vtgid, h[n].vppid);
 			pos++;
 		}
@@ -522,7 +521,7 @@ static int collect_objects(struct ckpt_ctx *ctx)
 	int n, ret = 0;
 
 	for (n = 0; n < ctx->nr_tasks; n++) {
-		ckpt_debug("dumping task #%d\n", n);
+		ckpt_debug(ctx, "dumping task #%d\n", n);
 		ret = ckpt_collect_task(ctx, ctx->tasks_arr[n]);
 		if (ret < 0) {
 			ctx->tsk = ctx->tasks_arr[n];
@@ -579,7 +578,8 @@ static int tree_count_tasks(struct ckpt_ctx *ctx)
 	data.ctx = ctx;
 	data.nr = 0;
 
-	return walk_task_subtree(ctx->root_task, __tree_count_tasks, &data);
+	return walk_task_subtree(ctx, ctx->root_task, __tree_count_tasks,
+				 &data);
 }
 
 /*
diff --git a/checkpoint/files.c b/checkpoint/files.c
index f6de07e..44b8db0 100644
--- a/checkpoint/files.c
+++ b/checkpoint/files.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DFILE
-
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
@@ -159,7 +156,7 @@ int checkpoint_file_common(struct ckpt_ctx *ctx, struct file *file,
 	if (h->f_credref < 0)
 		return h->f_credref;
 
-	ckpt_debug("file %s credref %d", file->f_dentry->d_name.name,
+	ckpt_debug(ctx, "file %s credref %d", file->f_dentry->d_name.name,
 		h->f_credref);
 
 	/* FIX: need also file->f_owner, etc */
@@ -204,12 +201,13 @@ int checkpoint_file(struct ckpt_ctx *ctx, void *ptr)
 	if (!file->f_op || !file->f_op->checkpoint) {
 		ckpt_write_err(ctx, "TEPS", "f_op lacks checkpoint",
 			       -EBADF, file, file->f_op);
-		ckpt_debug("f_op lacks checkpoint handler: %pS\n", file->f_op);
+		ckpt_err(ctx, "f_op lacks checkpoint handler: %pS\n",
+			   file->f_op);
 		return -EBADF;
 	}
 	if (d_unlinked(file->f_dentry)) {
 		ckpt_write_err(ctx, "TEP", "unlinked file", -EBADF, file);
-		ckpt_debug("unlinked files are unsupported\n");
+		ckpt_err(ctx, "unlinked files are unsupported\n");
 		return -EBADF;
 	}
 
@@ -264,7 +262,8 @@ static int checkpoint_file_desc(struct ckpt_ctx *ctx,
 	 * a reference to it, dump its state while at it.
 	 */
 	objref = checkpoint_obj(ctx, file, CKPT_OBJ_FILE);
-	ckpt_debug("fd %d objref %d file %p coe %d)\n", fd, objref, file, coe);
+	ckpt_debug(ctx, "fd %d objref %d file %p coe %d)\n", fd, objref, file,
+		   coe);
 	if (objref < 0) {
 		ret = objref;
 		goto out;
@@ -306,7 +305,7 @@ static int do_checkpoint_file_table(struct ckpt_ctx *ctx,
 	if (ret < 0)
 		goto out;
 
-	ckpt_debug("nfds %d\n", nfds);
+	ckpt_debug(ctx, "nfds %d\n", nfds);
 	for (n = 0; n < nfds; n++) {
 		ret = checkpoint_file_desc(ctx, files, fdtable[n]);
 		if (ret < 0)
@@ -314,7 +313,7 @@ static int do_checkpoint_file_table(struct ckpt_ctx *ctx,
 	}
 
 	ret = deferqueue_run(ctx->files_deferq);
-	ckpt_debug("files_deferq ran %d entries\n", ret);
+	ckpt_debug(ctx, "files_deferq ran %d entries\n", ret);
 	if (ret > 0)
 		ret = 0;
  out:
@@ -452,7 +451,7 @@ struct file *restore_open_fname(struct ckpt_ctx *ctx, int flags)
 	if (IS_ERR(h))
 		return (struct file *) h;
 	fname = (char *) (h + 1);
-	ckpt_debug("fname '%s' flags %#x\n", fname, flags);
+	ckpt_debug(ctx, "fname '%s' flags %#x\n", fname, flags);
 
 	file = filp_open(fname, flags, 0);
 	ckpt_hdr_put(ctx, h);
@@ -623,7 +622,7 @@ static struct file *do_restore_file(struct ckpt_ctx *ctx)
 	h = ckpt_read_buf_type(ctx, PAGE_SIZE, CKPT_HDR_FILE);
 	if (IS_ERR(h))
 		return (struct file *) h;
-	ckpt_debug("flags %#x mode %#x type %d\n",
+	ckpt_debug(ctx, "flags %#x mode %#x type %d\n",
 		 h->f_flags, h->f_mode, h->f_type);
 
 	if (h->f_type >= CKPT_FILE_MAX)
@@ -662,7 +661,7 @@ static int restore_file_desc(struct ckpt_ctx *ctx)
 	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_FILE_DESC);
 	if (IS_ERR(h))
 		return PTR_ERR(h);
-	ckpt_debug("ref %d fd %d c.o.e %d\n",
+	ckpt_debug(ctx, "ref %d fd %d c.o.e %d\n",
 		 h->fd_objref, h->fd_descriptor, h->fd_close_on_exec);
 
 	ret = -EINVAL;
@@ -681,7 +680,7 @@ static int restore_file_desc(struct ckpt_ctx *ctx)
 		goto out;
 	}
 
-	ckpt_debug("newfd got %d wanted %d\n", newfd, h->fd_descriptor);
+	ckpt_debug(ctx, "newfd got %d wanted %d\n", newfd, h->fd_descriptor);
 
 	/* reposition if newfd isn't desired fd */
 	if (newfd != h->fd_descriptor) {
@@ -709,7 +708,7 @@ static struct files_struct *do_restore_file_table(struct ckpt_ctx *ctx)
 	if (IS_ERR(h))
 		return (struct files_struct *) h;
 
-	ckpt_debug("nfds %d\n", h->fdt_nfds);
+	ckpt_debug(ctx, "nfds %d\n", h->fdt_nfds);
 
 	ret = -EMFILE;
 	if (h->fdt_nfds < 0 || h->fdt_nfds > sysctl_nr_open)
@@ -734,7 +733,7 @@ static struct files_struct *do_restore_file_table(struct ckpt_ctx *ctx)
 	}
 
 	ret = deferqueue_run(ctx->files_deferq);
-	ckpt_debug("files_deferq ran %d entries\n", ret);
+	ckpt_debug(ctx, "files_deferq ran %d entries\n", ret);
 	if (ret > 0)
 		ret = 0;
  out:
diff --git a/checkpoint/memory.c b/checkpoint/memory.c
index 0da948f..2e87554 100644
--- a/checkpoint/memory.c
+++ b/checkpoint/memory.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DMEM
-
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -98,7 +95,6 @@ static inline struct ckpt_pgarr *pgarr_from_pool(struct ckpt_ctx *ctx)
 /* release pages referenced by a page-array */
 static void pgarr_release_pages(struct ckpt_pgarr *pgarr)
 {
-	ckpt_debug("total pages %d\n", pgarr->nr_used);
 	/*
 	 * both checkpoint and restart use 'nr_used', however we only
 	 * collect pages during checkpoint; in restart we simply return
@@ -197,8 +193,11 @@ static void pgarr_reset_all(struct ckpt_ctx *ctx)
 {
 	struct ckpt_pgarr *pgarr;
 
-	list_for_each_entry(pgarr, &ctx->pgarr_list, list)
+	list_for_each_entry(pgarr, &ctx->pgarr_list, list) {
+		ckpt_debug(ctx, "call pgarr_release_pages: total pages %d\n",
+			   pgarr->nr_used);
 		pgarr_release_pages(pgarr);
+	}
 	list_splice_init(&ctx->pgarr_list, &ctx->pgarr_pool);
 }
 
@@ -369,8 +368,7 @@ static int vma_fill_pgarr(struct ckpt_ctx *ctx,
 				return PTR_ERR(page);
 
 			if (page) {
-				_ckpt_debug(CKPT_DPAGE,
-					    "got page %#lx\n", addr);
+				ckpt_debug(ctx, "got page %#lx\n", addr);
 				pgarr->pages[pgarr->nr_used] = page;
 				pgarr->vaddrs[pgarr->nr_used] = addr;
 				pgarr->nr_used++;
@@ -508,7 +506,7 @@ int checkpoint_memory_contents(struct ckpt_ctx *ctx,
 		else if (cnt < 0)
 			return cnt;
 
-		ckpt_debug("collected %d pages\n", cnt);
+		ckpt_debug(ctx, "collected %d pages\n", cnt);
 
 		h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_PGARR);
 		if (!h)
@@ -551,7 +549,7 @@ int generic_vma_checkpoint(struct ckpt_ctx *ctx, struct vm_area_struct *vma,
 	struct ckpt_hdr_vma *h;
 	int ret;
 
-	ckpt_debug("vma %#lx-%#lx flags %#lx type %d\n",
+	ckpt_debug(ctx, "vma %#lx-%#lx flags %#lx type %d\n",
 		 vma->vm_start, vma->vm_end, vma->vm_flags, type);
 
 	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_VMA);
@@ -615,7 +613,7 @@ int shmem_vma_checkpoint(struct ckpt_ctx *ctx, struct vm_area_struct *vma,
 	struct file *file = vma->vm_file;
 	int ret;
 
-	ckpt_debug("type %d, ino_ref %d\n", type, ino_objref);
+	ckpt_debug(ctx, "type %d, ino_ref %d\n", type, ino_objref);
 	BUG_ON(!(vma->vm_flags & (VM_SHARED | VM_MAYSHARE)));
 	BUG_ON(!file);
 
@@ -694,7 +692,7 @@ static int do_checkpoint_mm(struct ckpt_ctx *ctx, struct mm_struct *mm)
 
 	/* write the vma's */
 	for (vma = mm->mmap; vma; vma = vma->vm_next) {
-		ckpt_debug("vma %#lx-%#lx flags %#lx\n",
+		ckpt_debug(ctx, "vma %#lx-%#lx flags %#lx\n",
 			 vma->vm_start, vma->vm_end, vma->vm_flags);
 		if (vma->vm_flags & CKPT_VMA_NOT_SUPPORTED) {
 			ckpt_write_err(ctx, "TE", "vma: bad flags (%#lx)\n",
@@ -895,7 +893,8 @@ static int read_pages_contents(struct ckpt_ctx *ctx, struct inode *inode)
 		for (i = 0; i < pgarr->nr_used; i++) {
 			struct page *page;
 
-			_ckpt_debug(CKPT_DPAGE, "got page %#lx\n", vaddrs[i]);
+			ckpt_debug(ctx, "got page %#lx\n",
+				    vaddrs[i]);
 			if (inode)
 				page = bring_shared_page(vaddrs[i], inode);
 			else
@@ -942,7 +941,8 @@ int restore_memory_contents(struct ckpt_ctx *ctx, struct inode *inode)
 		if (IS_ERR(h))
 			break;
 
-		ckpt_debug("total pages %ld\n", (unsigned long) h->nr_pages);
+		ckpt_debug(ctx, "total pages %ld\n",
+			   (unsigned long) h->nr_pages);
 
 		nr_pages = h->nr_pages;
 		ckpt_hdr_put(ctx, h);
@@ -1012,11 +1012,13 @@ static unsigned long calc_map_flags_bits(unsigned long orig_vm_flags)
 
 /**
  * generic_vma_restore - restore a vma
+ * @ctx - checkpoint context
  * @mm - address space
  * @file - file to map (NULL for anonymous)
  * @h - vma header data
  */
-unsigned long generic_vma_restore(struct mm_struct *mm,
+unsigned long generic_vma_restore(struct ckpt_ctx *ctx,
+				  struct mm_struct *mm,
 				  struct file *file,
 				  struct ckpt_hdr_vma *h)
 {
@@ -1038,7 +1040,7 @@ unsigned long generic_vma_restore(struct mm_struct *mm,
 	addr = do_mmap_pgoff(file, vm_start, vm_size,
 			     vm_prot, vm_flags, vm_pgoff);
 	up_write(&mm->mmap_sem);
-	ckpt_debug("size %#lx prot %#lx flag %#lx pgoff %#lx => %#lx\n",
+	ckpt_debug(ctx, "size %#lx prot %#lx flag %#lx pgoff %#lx => %#lx\n",
 		 vm_size, vm_prot, vm_flags, vm_pgoff, addr);
 
 	return addr;
@@ -1059,7 +1061,7 @@ int private_vma_restore(struct ckpt_ctx *ctx, struct mm_struct *mm,
 	if (h->vm_flags & (VM_SHARED | VM_MAYSHARE))
 		return -EINVAL;
 
-	addr = generic_vma_restore(mm, file, h);
+	addr = generic_vma_restore(ctx, mm, file, h);
 	if (IS_ERR((void *) addr))
 		return PTR_ERR((void *) addr);
 
@@ -1175,7 +1177,8 @@ static int restore_vma(struct ckpt_ctx *ctx, struct mm_struct *mm)
 	if (IS_ERR(h))
 		return PTR_ERR(h);
 
-	ckpt_debug("vma %#lx-%#lx flags %#lx type %d vmaref %d inoref %d\n",
+	ckpt_debug(ctx,
+		   "vma %#lx-%#lx flags %#lx type %d vmaref %d inoref %d\n",
 		   (unsigned long) h->vm_start, (unsigned long) h->vm_end,
 		   (unsigned long) h->vm_flags, (int) h->vma_type,
 		   (int) h->vma_objref, (int) h->ino_objref);
@@ -1196,10 +1199,10 @@ static int restore_vma(struct ckpt_ctx *ctx, struct mm_struct *mm)
 	BUG_ON(ops->vma_type != h->vma_type);
 
 	if (ops->restore) {
-		ckpt_debug("vma type %s\n", ops->vma_name);
+		ckpt_debug(ctx, "vma type %s\n", ops->vma_name);
 		ret = ops->restore(ctx, mm, h);
 	} else {
-		ckpt_debug("vma ignored\n");
+		ckpt_debug(ctx, "vma ignored\n");
 		ret = 0;
 	}
  out:
@@ -1237,7 +1240,7 @@ static struct mm_struct *do_restore_mm(struct ckpt_ctx *ctx)
 	if (IS_ERR(h))
 		return (struct mm_struct *) h;
 
-	ckpt_debug("map_count %d\n", h->map_count);
+	ckpt_debug(ctx, "map_count %d\n", h->map_count);
 
 	/* XXX need more sanity checks */
 
diff --git a/checkpoint/namespace.c b/checkpoint/namespace.c
index 89af2c0..5795bb8 100644
--- a/checkpoint/namespace.c
+++ b/checkpoint/namespace.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DSYS
-
 #include <linux/nsproxy.h>
 #include <linux/user_namespace.h>
 
diff --git a/checkpoint/objhash.c b/checkpoint/objhash.c
index 730dd82..89018e4 100644
--- a/checkpoint/objhash.c
+++ b/checkpoint/objhash.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DOBJ
-
 #include <linux/kernel.h>
 #include <linux/hash.h>
 #include <linux/file.h>
@@ -624,7 +621,7 @@ int ckpt_obj_collect(struct ckpt_ctx *ctx, void *ptr, enum obj_type type)
 	obj = obj_lookup_add(ctx, ptr, type, &first);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
-	ckpt_debug("%s objref %d first %d\n",
+	ckpt_debug(ctx, "%s objref %d first %d\n",
 		   obj->ops->obj_name, obj->objref, first);
 	return (first ? obj->objref : 0);
 }
@@ -645,7 +642,8 @@ int ckpt_obj_lookup(struct ckpt_ctx *ctx, void *ptr, enum obj_type type)
 	obj = obj_find_by_ptr(ctx, ptr);
 	BUG_ON(obj && obj->ops->obj_type != type);
 	if (obj)
-		ckpt_debug("%s objref %d\n", obj->ops->obj_name, obj->objref);
+		ckpt_debug(ctx, "%s objref %d\n", obj->ops->obj_name,
+			   obj->objref);
 	return obj ? obj->objref : 0;
 }
 
@@ -682,7 +680,7 @@ int ckpt_obj_lookup_add(struct ckpt_ctx *ctx, void *ptr,
 	obj = obj_lookup_add(ctx, ptr, type, first);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
-	ckpt_debug("%s objref %d first %d\n",
+	ckpt_debug(ctx, "%s objref %d first %d\n",
 		   obj->ops->obj_name, obj->objref, *first);
 
 	if (*first && obj_reverse_leak(ctx, obj))
@@ -780,7 +778,7 @@ int ckpt_obj_visit(struct ckpt_ctx *ctx, void *ptr, enum obj_type type)
 			return -EBUSY;
 		}
 	} else {
-		ckpt_debug("visit %s objref %d\n",
+		ckpt_debug(ctx, "visit %s objref %d\n",
 			   obj->ops->obj_name, obj->objref);
 		obj->flags |= CKPT_OBJ_VISITED;
 	}
@@ -823,6 +821,7 @@ static void ckpt_obj_users_inc(struct ckpt_ctx *ctx, void *ptr, int increment)
 
 /**
  * obj_sock_adjust_users - remove implicit reference on DEAD sockets
+ * @ctx: checkpoint context
  * @obj: CKPT_OBJ_SOCK object to adjust
  *
  * Sockets that have been disconnected from their struct file have
@@ -830,13 +829,14 @@ static void ckpt_obj_users_inc(struct ckpt_ctx *ctx, void *ptr, int increment)
  * assumption of such a reference is therefore incorrect, so we correct
  * it here.
  */
-static inline void obj_sock_adjust_users(struct ckpt_obj *obj)
+static inline void obj_sock_adjust_users(struct ckpt_ctx *ctx,
+					 struct ckpt_obj *obj)
 {
 	struct sock *sk = (struct sock *)obj->ptr;
 
 	if (sock_flag(sk, SOCK_DEAD)) {
 		obj->users--;
-		ckpt_debug("Adjusting SOCK %i count to %i\n",
+		ckpt_debug(ctx, "Adjusting SOCK %i count to %i\n",
 			   obj->objref, obj->users);
 	}
 }
@@ -858,6 +858,8 @@ int ckpt_obj_contained(struct ckpt_ctx *ctx)
 
 	/* account for ctx->file reference (if in the table already) */
 	ckpt_obj_users_inc(ctx, ctx->file, 1);
+	if (ctx->logfile)
+		ckpt_obj_users_inc(ctx, ctx->logfile, 1);
 	/* account for ctx->root_nsproxy reference (if in the table already) */
 	ckpt_obj_users_inc(ctx, ctx->root_nsproxy, 1);
 
@@ -866,10 +868,10 @@ int ckpt_obj_contained(struct ckpt_ctx *ctx)
 			continue;
 
 		if (obj->ops->obj_type == CKPT_OBJ_SOCK)
-			obj_sock_adjust_users(obj);
+			obj_sock_adjust_users(ctx, obj);
 
 		if (obj->ops->ref_users(obj->ptr) != obj->users) {
-			ckpt_debug("usage leak: %s\n", obj->ops->obj_name);
+			ckpt_debug(ctx, "usage leak: %s\n", obj->ops->obj_name);
 			ckpt_write_err(ctx, "OP", "leak: usage (%d != %d (%s)",
 				       obj->objref, obj->ptr,
 				       obj->ops->ref_users(obj->ptr),
@@ -894,7 +896,7 @@ int ckpt_obj_visited(struct ckpt_ctx *ctx)
 
 	hlist_for_each_entry(obj, node, &ctx->obj_hash->list, next) {
 		if (!(obj->flags & CKPT_OBJ_VISITED)) {
-			ckpt_debug("reverse leak: %s (%d)\n",
+			ckpt_debug(ctx, "reverse leak: %s (%d)\n",
 				   obj->ops->obj_name, obj->objref);
 			ckpt_write_err(ctx, "OP", "leak: not visited (%s)",
 				       obj->objref, obj->ptr,
@@ -929,7 +931,8 @@ int restore_obj(struct ckpt_ctx *ctx, struct ckpt_hdr_objref *h)
 	struct ckpt_obj *obj;
 	void *ptr = NULL;
 
-	ckpt_debug("len %d ref %d type %d\n", h->h.len, h->objref, h->objtype);
+	ckpt_debug(ctx, "len %d ref %d type %d\n", h->h.len, h->objref,
+		   h->objtype);
 	if (h->objtype >= CKPT_OBJ_MAX)
 		return -EINVAL;
 	if (h->objref <= 0)
@@ -985,7 +988,7 @@ int ckpt_obj_insert(struct ckpt_ctx *ctx, void *ptr,
 	obj = obj_new(ctx, ptr, objref, type);
 	if (IS_ERR(obj))
 		return PTR_ERR(obj);
-	ckpt_debug("%s objref %d\n", obj->ops->obj_name, objref);
+	ckpt_debug(ctx, "%s objref %d\n", obj->ops->obj_name, objref);
 	return obj->objref;
 }
 
@@ -1007,6 +1010,6 @@ void *ckpt_obj_fetch(struct ckpt_ctx *ctx, int objref, enum obj_type type)
 	obj = obj_find_by_objref(ctx, objref);
 	if (!obj)
 		return ERR_PTR(-EINVAL);
-	ckpt_debug("%s ref %d\n", obj->ops->obj_name, obj->objref);
+	ckpt_debug(ctx, "%s ref %d\n", obj->ops->obj_name, obj->objref);
 	return (obj->ops->obj_type == type ? obj->ptr : ERR_PTR(-ENOMSG));
 }
diff --git a/checkpoint/process.c b/checkpoint/process.c
index 6b2ef4c..8e22fcd 100644
--- a/checkpoint/process.c
+++ b/checkpoint/process.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DSYS
-
 #include <linux/sched.h>
 #include <linux/nsproxy.h>
 #include <linux/posix-timers.h>
@@ -174,7 +171,7 @@ static int checkpoint_task_ns(struct ckpt_ctx *ctx, struct task_struct *t)
 	ns_objref = checkpoint_obj(ctx, nsproxy, CKPT_OBJ_NS);
 	put_nsproxy(nsproxy);
 
-	ckpt_debug("nsproxy: objref %d\n", ns_objref);
+	ckpt_debug(ctx, "nsproxy: objref %d\n", ns_objref);
 	if (ns_objref < 0)
 		return ns_objref;
 
@@ -245,30 +242,30 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
 	 */
 
 	ret = checkpoint_task_creds(ctx, t);
-	ckpt_debug("cred: objref %d\n", ret);
+	ckpt_debug(ctx, "cred: objref %d\n", ret);
 	if (!ret) {
 		ret = checkpoint_task_ns(ctx, t);
-		ckpt_debug("ns: objref %d\n", ret);
+		ckpt_debug(ctx, "ns: objref %d\n", ret);
 	}
 	if (ret < 0)
 		return ret;
 
 	files_objref = checkpoint_obj_file_table(ctx, t);
-	ckpt_debug("files: objref %d\n", files_objref);
+	ckpt_debug(ctx, "files: objref %d\n", files_objref);
 	if (files_objref < 0) {
 		ckpt_write_err(ctx, "TE", "files_struct", files_objref);
 		return files_objref;
 	}
 
 	mm_objref = checkpoint_obj_mm(ctx, t);
-	ckpt_debug("mm: objref %d\n", mm_objref);
+	ckpt_debug(ctx, "mm: objref %d\n", mm_objref);
 	if (mm_objref < 0) {
 		ckpt_write_err(ctx, "TE", "mm_struct", mm_objref);
 		return mm_objref;
 	}
 
 	sighand_objref = checkpoint_obj_sighand(ctx, t);
-	ckpt_debug("sighand: objref %d\n", sighand_objref);
+	ckpt_debug(ctx, "sighand: objref %d\n", sighand_objref);
 	if (sighand_objref < 0) {
 		ckpt_write_err(ctx, "TE", "sighand_struct", sighand_objref);
 		return sighand_objref;
@@ -283,7 +280,7 @@ static int checkpoint_task_objs(struct ckpt_ctx *ctx, struct task_struct *t)
 	 */
 	signal_objref = ckpt_obj_lookup_add(ctx, t->signal,
 					    CKPT_OBJ_SIGNAL, &first);
-	ckpt_debug("signal: objref %d\n", signal_objref);
+	ckpt_debug(ctx, "signal: objref %d\n", signal_objref);
 	if (signal_objref < 0)
 		return signal_objref;
 
@@ -330,7 +327,7 @@ int checkpoint_restart_block(struct ckpt_ctx *ctx, struct task_struct *t)
 	if (fn == do_no_restart_syscall) {
 
 		h->function_type = CKPT_RESTART_BLOCK_NONE;
-		ckpt_debug("restart_block: non\n");
+		ckpt_debug(ctx, "restart_block: non\n");
 
 	} else if (fn == hrtimer_nanosleep_restart) {
 
@@ -338,7 +335,7 @@ int checkpoint_restart_block(struct ckpt_ctx *ctx, struct task_struct *t)
 		h->arg_0 = restart_block->nanosleep.index;
 		h->arg_1 = (unsigned long) restart_block->nanosleep.rmtp;
 		expire = restart_block->nanosleep.expires;
-		ckpt_debug("restart_block: hrtimer expire %lld now %lld\n",
+		ckpt_debug(ctx, "restart_block: hrtimer expire %lld now %lld\n",
 			 expire, base);
 
 	} else if (fn == posix_cpu_nsleep_restart) {
@@ -350,7 +347,7 @@ int checkpoint_restart_block(struct ckpt_ctx *ctx, struct task_struct *t)
 		ts.tv_sec = restart_block->arg2;
 		ts.tv_nsec = restart_block->arg3;
 		expire = timespec_to_ns(&ts);
-		ckpt_debug("restart_block: posix_cpu expire %lld now %lld\n",
+		ckpt_debug(ctx, "restart_block: posix_cpu expire %lld now %lld\n",
 			 expire, base);
 
 #ifdef CONFIG_COMPAT
@@ -361,7 +358,7 @@ int checkpoint_restart_block(struct ckpt_ctx *ctx, struct task_struct *t)
 		h->arg_1 = (unsigned long)restart_block->nanosleep.rmtp;
 		h->arg_2 = (unsigned long)restart_block->nanosleep.compat_rmtp;
 		expire = restart_block->nanosleep.expires;
-		ckpt_debug("restart_block: compat expire %lld now %lld\n",
+		ckpt_debug(ctx, "restart_block: compat expire %lld now %lld\n",
 			 expire, base);
 
 	} else if (fn == compat_clock_nanosleep_restart) {
@@ -371,7 +368,7 @@ int checkpoint_restart_block(struct ckpt_ctx *ctx, struct task_struct *t)
 		h->arg_1 = (unsigned long)restart_block->nanosleep.rmtp;
 		h->arg_2 = (unsigned long)restart_block->nanosleep.compat_rmtp;
 		expire = restart_block->nanosleep.expires;
-		ckpt_debug("restart_block: compat_clock expire %lld now %lld\n",
+		ckpt_debug(ctx, "restart_block: compat_clock expire %lld now %lld\n",
 			 expire, base);
 
 #endif
@@ -383,7 +380,7 @@ int checkpoint_restart_block(struct ckpt_ctx *ctx, struct task_struct *t)
 		h->arg_2 = restart_block->futex.flags;
 		h->arg_3 = restart_block->futex.bitset;
 		expire = restart_block->futex.time;
-		ckpt_debug("restart_block: futex expire %lld now %lld\n",
+		ckpt_debug(ctx, "restart_block: futex expire %lld now %lld\n",
 			 expire, base);
 
 	} else if (fn == do_restart_poll) {
@@ -396,7 +393,7 @@ int checkpoint_restart_block(struct ckpt_ctx *ctx, struct task_struct *t)
 		ts.tv_sec = restart_block->poll.tv_sec;
 		ts.tv_nsec = restart_block->poll.tv_nsec;
 		expire = timespec_to_ns(&ts);
-		ckpt_debug("restart_block: poll expire %lld now %lld\n",
+		ckpt_debug(ctx, "restart_block: poll expire %lld now %lld\n",
 			 expire, base);
 
 	} else {
@@ -408,13 +405,13 @@ int checkpoint_restart_block(struct ckpt_ctx *ctx, struct task_struct *t)
 	/* common to all restart blocks: */
 	h->arg_4 = (base < expire ? expire - base : 0);
 
-	ckpt_debug("restart_block: args %#llx %#llx %#llx %#llx %#llx\n",
+	ckpt_debug(ctx, "restart_block: args %#llx %#llx %#llx %#llx %#llx\n",
 		 h->arg_0, h->arg_1, h->arg_2, h->arg_3, h->arg_4);
 
 	ret = ckpt_write_obj(ctx, &h->h);
 	ckpt_hdr_put(ctx, h);
 
-	ckpt_debug("restart_block ret %d\n", ret);
+	ckpt_debug(ctx, "restart_block ret %d\n", ret);
 	return ret;
 }
 
@@ -426,7 +423,7 @@ int checkpoint_task(struct ckpt_ctx *ctx, struct task_struct *t)
 	ctx->tsk = t;
 
 	ret = checkpoint_task_struct(ctx, t);
-	ckpt_debug("task %d\n", ret);
+	ckpt_debug(ctx, "task %d\n", ret);
 
 	if (ret < 0)
 		goto out;
@@ -436,23 +433,23 @@ int checkpoint_task(struct ckpt_ctx *ctx, struct task_struct *t)
 		return 0;
 
 	ret = checkpoint_task_objs(ctx, t);
-	ckpt_debug("objs %d\n", ret);
+	ckpt_debug(ctx, "objs %d\n", ret);
 	if (ret < 0)
 		goto out;
 	ret = checkpoint_thread(ctx, t);
-	ckpt_debug("thread %d\n", ret);
+	ckpt_debug(ctx, "thread %d\n", ret);
 	if (ret < 0)
 		goto out;
 	ret = checkpoint_restart_block(ctx, t);
-	ckpt_debug("restart-blocks %d\n", ret);
+	ckpt_debug(ctx, "restart-blocks %d\n", ret);
 	if (ret < 0)
 		goto out;
 	ret = checkpoint_cpu(ctx, t);
-	ckpt_debug("cpu %d\n", ret);
+	ckpt_debug(ctx, "cpu %d\n", ret);
 	if (ret < 0)
 		goto out;
 	ret = checkpoint_task_signal(ctx, t);
-	ckpt_debug("task-signal %d\n", ret);
+	ckpt_debug(ctx, "task-signal %d\n", ret);
  out:
 	ctx->tsk = NULL;
 	return ret;
@@ -564,7 +561,7 @@ static int restore_task_ns(struct ckpt_ctx *ctx)
 		switch_task_namespaces(current, nsproxy);
 	}
  out:
-	ckpt_debug("nsproxy: ret %d (%p)\n", ret, task_nsproxy(current));
+	ckpt_debug(ctx, "nsproxy: ret %d (%p)\n", ret, task_nsproxy(current));
 	ckpt_hdr_put(ctx, h);
 	return ret;
 }
@@ -581,14 +578,14 @@ static int restore_task_creds(struct ckpt_ctx *ctx)
 
 	realcred = ckpt_obj_fetch(ctx, h->cred_ref, CKPT_OBJ_CRED);
 	if (IS_ERR(realcred)) {
-		ckpt_debug("Error %ld fetching realcred (ref %d)\n",
+		ckpt_err(ctx, "Error %ld fetching realcred (ref %d)\n",
 			PTR_ERR(realcred), h->cred_ref);
 		ret = PTR_ERR(realcred);
 		goto out;
 	}
 	ecred = ckpt_obj_fetch(ctx, h->ecred_ref, CKPT_OBJ_CRED);
 	if (IS_ERR(ecred)) {
-		ckpt_debug("Error %ld fetching ecred (ref %d)\n",
+		ckpt_err(ctx, "Error %ld fetching ecred (ref %d)\n",
 			PTR_ERR(ecred), h->ecred_ref);
 		ret = PTR_ERR(ecred);
 		goto out;
@@ -597,7 +594,7 @@ static int restore_task_creds(struct ckpt_ctx *ctx)
 	ctx->ecred = ecred;
 
 out:
-	ckpt_debug("Returning %d\n", ret);
+	ckpt_debug(ctx, "Returning %d\n", ret);
 	ckpt_hdr_put(ctx, h);
 	return ret;
 }
@@ -614,38 +611,38 @@ static int restore_task_objs(struct ckpt_ctx *ctx)
 	 */
 	ret = restore_task_creds(ctx);
 	if (ret < 0) {
-		ckpt_debug("restore_task_creds returned %d\n", ret);
+		ckpt_err(ctx, "restore_task_creds returned %d\n", ret);
 		return ret;
 	}
 	ret = restore_task_ns(ctx);
 	if (ret < 0) {
-		ckpt_debug("restore_task_ns returned %d\n", ret);
+		ckpt_err(ctx, "restore_task_ns returned %d\n", ret);
 		return ret;
 	}
 
 	h = ckpt_read_obj_type(ctx, sizeof(*h), CKPT_HDR_TASK_OBJS);
 	if (IS_ERR(h)) {
-		ckpt_debug("Error fetching task obj\n");
+		ckpt_err(ctx, "Error fetching task obj\n");
 		return PTR_ERR(h);
 	}
 
 	ret = restore_obj_file_table(ctx, h->files_objref);
-	ckpt_debug("file_table: ret %d (%p)\n", ret, current->files);
+	ckpt_debug(ctx, "file_table: ret %d (%p)\n", ret, current->files);
 	if (ret < 0)
 		goto out;
 
 	ret = restore_obj_mm(ctx, h->mm_objref);
-	ckpt_debug("mm: ret %d (%p)\n", ret, current->mm);
+	ckpt_debug(ctx, "mm: ret %d (%p)\n", ret, current->mm);
 	if (ret < 0)
 		goto out;
 
 	ret = restore_obj_sighand(ctx, h->sighand_objref);
-	ckpt_debug("sighand: ret %d (%p)\n", ret, current->sighand);
+	ckpt_debug(ctx, "sighand: ret %d (%p)\n", ret, current->sighand);
 	if (ret < 0)
 		goto out;
 
 	ret = restore_obj_signal(ctx, h->signal_objref);
-	ckpt_debug("signal: ret %d (%p)\n", ret, current->signal);
+	ckpt_debug(ctx, "signal: ret %d (%p)\n", ret, current->signal);
  out:
 	ckpt_hdr_put(ctx, h);
 	return ret;
@@ -694,9 +691,9 @@ int restore_restart_block(struct ckpt_ctx *ctx)
 	expire = ktime_to_ns(ctx->ktime_begin) + h->arg_4;
 	restart_block.fn = NULL;
 
-	ckpt_debug("restart_block: expire %lld begin %lld\n",
+	ckpt_debug(ctx, "restart_block: expire %lld begin %lld\n",
 		 expire, ktime_to_ns(ctx->ktime_begin));
-	ckpt_debug("restart_block: args %#llx %#llx %#llx %#llx %#llx\n",
+	ckpt_debug(ctx, "restart_block: args %#llx %#llx %#llx %#llx %#llx\n",
 		 h->arg_0, h->arg_1, h->arg_2, h->arg_3, h->arg_4);
 
 	switch (h->function_type) {
@@ -876,7 +873,7 @@ int restore_task(struct ckpt_ctx *ctx)
 	int ret;
 
 	ret = restore_task_struct(ctx);
-	ckpt_debug("task %d\n", ret);
+	ckpt_debug(ctx, "task %d\n", ret);
 	if (ret < 0)
 		goto out;
 
@@ -888,23 +885,23 @@ int restore_task(struct ckpt_ctx *ctx)
 	if (ret < 0)
 		goto out;
 	ret = restore_task_objs(ctx);
-	ckpt_debug("objs %d\n", ret);
+	ckpt_debug(ctx, "objs %d\n", ret);
 	if (ret < 0)
 		goto out;
 	ret = restore_thread(ctx);
-	ckpt_debug("thread %d\n", ret);
+	ckpt_debug(ctx, "thread %d\n", ret);
 	if (ret < 0)
 		goto out;
 	ret = restore_restart_block(ctx);
-	ckpt_debug("restart-blocks %d\n", ret);
+	ckpt_debug(ctx, "restart-blocks %d\n", ret);
 	if (ret < 0)
 		goto out;
 	ret = restore_cpu(ctx);
-	ckpt_debug("cpu %d\n", ret);
+	ckpt_debug(ctx, "cpu %d\n", ret);
 	if (ret < 0)
 		goto out;
 	ret = restore_creds(ctx);
-	ckpt_debug("creds: ret %d\n", ret);
+	ckpt_debug(ctx, "creds: ret %d\n", ret);
 	if (ret < 0)
 		goto out;
 
diff --git a/checkpoint/restart.c b/checkpoint/restart.c
index 32a9fc5..4664b64 100644
--- a/checkpoint/restart.c
+++ b/checkpoint/restart.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DSYS
-
 #include <linux/version.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
@@ -64,7 +61,7 @@ static int restore_debug_task(struct ckpt_ctx *ctx, int flags)
 
 	s = kmalloc(sizeof(*s), GFP_KERNEL);
 	if (!s) {
-		ckpt_debug("no memory to register ?!\n");
+		ckpt_err(ctx, "no memory to register ?!\n");
 		return -ENOMEM;
 	}
 	s->pid = current->pid;
@@ -137,15 +134,16 @@ void restore_debug_free(struct ckpt_ctx *ctx)
 	 */
 	list_for_each_entry(s, &ctx->task_status, list)
 		count++;
-	ckpt_debug("%d tasks registered, nr_tasks was %d nr_total %d\n",
+	ckpt_debug(ctx, "%d tasks registered, nr_tasks was %d nr_total %d\n",
 		   count, ctx->nr_tasks, atomic_read(&ctx->nr_total));
 
-	ckpt_debug("active pid was %d, ctx->errno %d\n", ctx->active_pid,
+	ckpt_debug(ctx, "active pid was %d, ctx->errno %d\n", ctx->active_pid,
 		   ctx->errno);
-	ckpt_debug("kflags %lu uflags %lu oflags %lu", ctx->kflags,
+	ckpt_debug(ctx, "kflags %lu uflags %lu oflags %lu", ctx->kflags,
 		   ctx->uflags, ctx->oflags);
 	for (i = 0; i < ctx->active_pid; i++)
-		ckpt_debug("task[%d] to run %d\n", i, ctx->pids_arr[i].vpid);
+		ckpt_debug(ctx, "task[%d] to run %d\n", i,
+			   ctx->pids_arr[i].vpid);
 
 	list_for_each_entry_safe(s, p, &ctx->task_status, list) {
 		if (s->flags & RESTART_DBG_COORD)
@@ -170,7 +168,8 @@ void restore_debug_free(struct ckpt_ctx *ctx)
 			state = "Exited";
 		else
 			state = "??????";
-		ckpt_debug("pid %d type %s state %s\n", s->pid, which, state);
+		ckpt_debug(ctx, "pid %d type %s state %s\n", s->pid, which,
+			   state);
 		list_del(&s->list);
 		kfree(s);
 	}
@@ -194,13 +193,13 @@ static int _ckpt_read_err(struct ckpt_ctx *ctx, struct ckpt_hdr *h)
 	len = h->len - sizeof(*h);
 	ptr = kzalloc(len + 1, GFP_KERNEL);
 	if (!ptr) {
-		ckpt_debug("insufficient memory to report image error\n");
+		ckpt_err(ctx, "insufficient memory to report image error\n");
 		return -ENOMEM;
 	}
 
 	ret = ckpt_kread(ctx, ptr, len);
 	if (ret >= 0) {
-		ckpt_debug("%s\n", &ptr[1]);
+		ckpt_err(ctx, "%s\n", &ptr[1]);
 		ret = -EIO;
 	}
 
@@ -224,7 +223,7 @@ static int _ckpt_read_objref(struct ckpt_ctx *ctx, struct ckpt_hdr *hh)
 
 	*h = *hh;	/* yay ! */
 
-	_ckpt_debug(CKPT_DOBJ, "shared len %d type %d\n", h->len, h->type);
+	ckpt_debug(ctx, "shared len %d type %d\n", h->len, h->type);
 	ret = ckpt_kread(ctx, (h + 1), hh->len - sizeof(struct ckpt_hdr));
 	if (ret < 0)
 		goto out;
@@ -255,8 +254,7 @@ static int _ckpt_read_obj(struct ckpt_ctx *ctx, struct ckpt_hdr *h,
 	ret = ckpt_kread(ctx, h, sizeof(*h));
 	if (ret < 0)
 		return ret;
-	_ckpt_debug(CKPT_DRW, "type %d len %d(%d,%d)\n",
-		    h->type, h->len, len, max);
+	ckpt_debug(ctx, "type %d len %d(%d,%d)\n", h->type, h->len, len, max);
 	if (h->len < sizeof(*h))
 		return -EINVAL;
 
@@ -363,8 +361,7 @@ static void *ckpt_read_obj(struct ckpt_ctx *ctx, int len, int max)
 	ret = ckpt_kread(ctx, &hh, sizeof(hh));
 	if (ret < 0)
 		return ERR_PTR(ret);
-	_ckpt_debug(CKPT_DRW, "type %d len %d(%d,%d)\n",
-		    hh.type, hh.len, len, max);
+	ckpt_debug(ctx, "type %d len %d(%d,%d)\n", hh.type, hh.len, len, max);
 	if (hh.len < sizeof(*h))
 		return ERR_PTR(-EINVAL);
 
@@ -706,7 +703,7 @@ static inline void _restore_notify_error(struct ckpt_ctx *ctx, int errno)
 {
 	/* first to fail: notify everyone (racy but harmless) */
 	if (!ckpt_test_ctx_error(ctx)) {
-		ckpt_debug("setting restart error %d\n", errno); \
+		ckpt_err(ctx, "setting restart error %d\n", errno);
 		ckpt_set_ctx_error(ctx, errno);
 		complete(&ctx->complete);
 		wake_up_all(&ctx->waitq);
@@ -720,7 +717,8 @@ static inline void _restore_notify_error(struct ckpt_ctx *ctx, int errno)
 */
 #define restore_notify_error(ctx, errno) \
 do { \
-	ckpt_debug("restart error %d, root pid %d\n", errno, ctx->root_pid); \
+	ckpt_err(ctx, "restart error %d, root pid %d\n", errno, \
+		   ctx->root_pid); \
 	_restore_notify_error(ctx, errno); \
 } while(0)
 
@@ -744,7 +742,8 @@ static int set_task_ctx(struct task_struct *task, struct ckpt_ctx *ctx)
 		task->checkpoint_ctx = ckpt_ctx_get(ctx);
 		ret = 0;
 	} else {
-		ckpt_debug("task %d has checkpoint_ctx\n", task_pid_vnr(task));
+		ckpt_err(ctx, "task %d has checkpoint_ctx\n",
+			   task_pid_vnr(task));
 		ret = 1;
 	}
 	task_unlock(task);
@@ -760,7 +759,7 @@ static void clear_task_ctx(struct task_struct *task)
 	task->checkpoint_ctx = NULL;
 	task_unlock(task);
 
-	ckpt_debug("task %d clear checkpoint_ctx\n", task_pid_vnr(task));
+	ckpt_debug(old, "task %d clear checkpoint_ctx\n", task_pid_vnr(task));
 	ckpt_ctx_put(old);
 }
 
@@ -794,7 +793,7 @@ static int restore_activate_next(struct ckpt_ctx *ctx)
 		rcu_read_unlock();
 
 		if (!task) {
-			ckpt_debug("could not find task %d\n", pid);
+			ckpt_err(ctx, "could not find task %d\n", pid);
 			restore_notify_error(ctx, -ESRCH);
 			return -ESRCH;
 		}
@@ -811,11 +810,11 @@ static int wait_task_active(struct ckpt_ctx *ctx)
 	pid_t pid = task_pid_vnr(current);
 	int ret;
 
-	ckpt_debug("pid %d waiting\n", pid);
+	ckpt_debug(ctx, "pid %d waiting\n", pid);
 	ret = wait_event_interruptible(ctx->waitq,
 				       is_task_active(ctx, pid) ||
 				       ckpt_test_ctx_error(ctx));
-	ckpt_debug("active %d < %d (ret %d, errno %d)\n",
+	ckpt_debug(ctx, "active %d < %d (ret %d, errno %d)\n",
 		   ctx->active_pid, ctx->nr_pids, ret, ctx->errno);
 	if (!ret && ckpt_test_ctx_error(ctx))
 		ret = -EBUSY;
@@ -824,9 +823,9 @@ static int wait_task_active(struct ckpt_ctx *ctx)
 
 static int wait_task_sync(struct ckpt_ctx *ctx)
 {
-	ckpt_debug("pid %d syncing\n", task_pid_vnr(current));
+	ckpt_debug(ctx, "pid %d syncing\n", task_pid_vnr(current));
 	wait_event_interruptible(ctx->waitq, ckpt_test_ctx_complete(ctx));
-	ckpt_debug("task sync done (errno %d)\n", ctx->errno);
+	ckpt_debug(ctx, "task sync done (errno %d)\n", ctx->errno);
 	if (ckpt_test_ctx_error(ctx))
 		return -EBUSY;
 	return 0;
@@ -844,16 +843,12 @@ static struct ckpt_ctx *wait_checkpoint_ctx(void)
 	 * reference to its restart context.
 	 */
 	ret = wait_event_interruptible(waitq, current->checkpoint_ctx);
-	if (ret < 0) {
-		ckpt_debug("wait_checkpoint_ctx: failed (%d)\n", ret);
+	if (ret < 0)
 		return ERR_PTR(ret);
-	}
 
 	ctx = get_task_ctx(current);
-	if (!ctx) {
-		ckpt_debug("wait_checkpoint_ctx: checkpoint_ctx missing\n");
+	if (!ctx)
 		return ERR_PTR(-EAGAIN);
-	}
 
 	return ctx;
 }
@@ -1009,15 +1004,15 @@ static int __prepare_descendants(struct task_struct *task, void *data)
 {
 	struct ckpt_ctx *ctx = (struct ckpt_ctx *) data;
 
-	ckpt_debug("consider task %d\n", task_pid_vnr(task));
+	ckpt_debug(ctx, "consider task %d\n", task_pid_vnr(task));
 
 	if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) {
-		ckpt_debug("stranger task %d\n", task_pid_vnr(task));
+		ckpt_err(ctx, "stranger task %d\n", task_pid_vnr(task));
 		return -EPERM;
 	}
 
 	if (task_ptrace(task) & PT_PTRACED) {
-		ckpt_debug("ptraced task %d\n", task_pid_vnr(task));
+		ckpt_err(ctx, "ptraced task %d\n", task_pid_vnr(task));
 		return -EBUSY;
 	}
 
@@ -1034,7 +1029,7 @@ static int __prepare_descendants(struct task_struct *task, void *data)
 	if (!task->exit_state) {
 		if (set_task_ctx(task, ctx))
 			return -EBUSY;
-		ckpt_debug("prepare task %d\n", task_pid_vnr(task));
+		ckpt_debug(ctx, "prepare task %d\n", task_pid_vnr(task));
 		wake_up_process(task);
 		return 1;
 	}
@@ -1054,8 +1049,8 @@ static int prepare_descendants(struct ckpt_ctx *ctx, struct task_struct *root)
 {
 	int nr_pids;
 
-	nr_pids = walk_task_subtree(root, __prepare_descendants, ctx);
-	ckpt_debug("nr %d/%d\n", ctx->nr_pids, nr_pids);
+	nr_pids = walk_task_subtree(ctx, root, __prepare_descendants, ctx);
+	ckpt_debug(ctx, "nr %d/%d\n", ctx->nr_pids, nr_pids);
 	if (nr_pids < 0)
 		return nr_pids;
 
@@ -1082,7 +1077,7 @@ static int wait_all_tasks_finish(struct ckpt_ctx *ctx)
 		return ret;
 
 	ret = wait_for_completion_interruptible(&ctx->complete);
-	ckpt_debug("final sync kflags %#lx (ret %d)\n", ctx->kflags, ret);
+	ckpt_debug(ctx, "final sync kflags %#lx (ret %d)\n", ctx->kflags, ret);
 	/*
 	 * Usually when restart fails, the restarting task will first
 	 * set @ctx->errno before waking us up. In the rare event that
@@ -1160,7 +1155,7 @@ static int __destroy_descendants(struct task_struct *task, void *data)
 
 static void destroy_descendants(struct ckpt_ctx *ctx)
 {
-	walk_task_subtree(ctx->root_task, __destroy_descendants, ctx);
+	walk_task_subtree(ctx, ctx->root_task, __destroy_descendants, ctx);
 }
 
 static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
@@ -1173,15 +1168,15 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 	restore_debug_running(ctx);
 
 	ret = restore_read_header(ctx);
-	ckpt_debug("restore header: %d\n", ret);
+	ckpt_debug(ctx, "restore header: %d\n", ret);
 	if (ret < 0)
 		return ret;
 	ret = restore_container(ctx);
-	ckpt_debug("restore container: %d\n", ret);
+	ckpt_debug(ctx, "restore container: %d\n", ret);
 	if (ret < 0)
 		return ret;
 	ret = restore_read_tree(ctx);
-	ckpt_debug("restore tree: %d\n", ret);
+	ckpt_debug(ctx, "restore tree: %d\n", ret);
 	if (ret < 0)
 		return ret;
 
@@ -1198,14 +1193,12 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 	 * that ancestor won't proceed deeper to interfere with our
 	 * descendants that are restarting.
 	 */
-	if (set_task_ctx(current, ctx)) {
+	if (set_task_ctx(current, ctx))
 		/*
 		 * We are a bad-behaving descendant: an ancestor must
 		 * have prepare_descendants() us as part of a restart.
 		 */
-		ckpt_debug("coord already has checkpoint_ctx\n");
 		return -EBUSY;
-	}
 
 	/*
 	 * From now on we are committed to the restart. If anything
@@ -1215,39 +1208,39 @@ static int do_restore_coord(struct ckpt_ctx *ctx, pid_t pid)
 
 	if (ctx->uflags & RESTART_TASKSELF) {
 		ret = pre_restore_task();
-		ckpt_debug("pre restore task: %d\n", ret);
+		ckpt_debug(ctx, "pre restore task: %d\n", ret);
 		if (ret < 0)
 			goto out;
 		ret = restore_task(ctx);
-		ckpt_debug("restore task: %d\n", ret);
+		ckpt_debug(ctx, "restore task: %d\n", ret);
 		if (ret < 0)
 			goto out;
 	} else {
 		/* prepare descendants' t->checkpoint_ctx point to coord */
 		ret = prepare_descendants(ctx, ctx->root_task);
-		ckpt_debug("restore prepare: %d\n", ret);
+		ckpt_debug(ctx, "restore prepare: %d\n", ret);
 		if (ret < 0)
 			goto out;
 		/* wait for all other tasks to complete do_restore_task() */
 		ret = wait_all_tasks_finish(ctx);
-		ckpt_debug("restore finish: %d\n", ret);
+		ckpt_debug(ctx, "restore finish: %d\n", ret);
 		if (ret < 0)
 			goto out;
 	}
 
 	ret = deferqueue_run(ctx->deferqueue);  /* run deferred work */
-	ckpt_debug("restore deferqueue: %d\n", ret);
+	ckpt_debug(ctx, "restore deferqueue: %d\n", ret);
 	if (ret < 0)
 		goto out;
 
 	ret = restore_read_tail(ctx);
-	ckpt_debug("restore tail: %d\n", ret);
+	ckpt_debug(ctx, "restore tail: %d\n", ret);
 	if (ret < 0)
 		goto out;
 
 	if (ctx->uflags & RESTART_FROZEN) {
 		ret = cgroup_freezer_make_frozen(ctx->root_task);
-		ckpt_debug("freezing restart tasks ... %d\n", ret);
+		ckpt_debug(ctx, "freezing restart tasks ... %d\n", ret);
 	}
  out:
 	if (ctx->uflags & RESTART_TASKSELF)
@@ -1344,14 +1337,14 @@ long do_restart(struct ckpt_ctx *ctx, pid_t pid, unsigned long flags)
 	if (!ctx || (ctx->uflags & RESTART_TASKSELF)) {
 		if (ret < 0) {
 			/* partial restore is undefined: terminate */
-			ckpt_debug("restart err %ld, exiting\n", ret);
+			ckpt_err(ctx, "restart err %ld, exiting\n", ret);
 			force_sig(SIGKILL, current);
 		} else {
 			ret = restore_retval();
 		}
 	}
 
-	ckpt_debug("sys_restart returns %ld\n", ret);
+	ckpt_debug(ctx, "sys_restart returns %ld\n", ret);
 	return ret;
 }
 
diff --git a/checkpoint/signal.c b/checkpoint/signal.c
index cdfb142..97e878b 100644
--- a/checkpoint/signal.c
+++ b/checkpoint/signal.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DSYS
-
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/errno.h>
@@ -367,7 +364,7 @@ static int checkpoint_signal(struct ckpt_ctx *ctx, struct task_struct *t)
 		 * the signal from this very expiry won't be sent before
 		 * we release t->sighand->siglock).
 		 */
-		ckpt_debug("active ! %lld\n", delta.tv64);
+		ckpt_debug(ctx, "active ! %lld\n", delta.tv64);
 		if (delta.tv64 <= 0)
 			delta.tv64 = NSEC_PER_USEC;
 		h->it_real_value = ktime_to_ns(delta);
diff --git a/checkpoint/sys.c b/checkpoint/sys.c
index 260a1ee..e0d8192 100644
--- a/checkpoint/sys.c
+++ b/checkpoint/sys.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DSYS
-
 #include <linux/sched.h>
 #include <linux/nsproxy.h>
 #include <linux/kernel.h>
@@ -204,6 +201,8 @@ static void ckpt_ctx_free(struct ckpt_ctx *ctx)
 
 	if (ctx->file)
 		fput(ctx->file);
+	if (ctx->logfile)
+		fput(ctx->logfile);
 
 	ckpt_obj_hash_free(ctx);
 	path_put(&ctx->fs_mnt);
@@ -225,7 +224,7 @@ static void ckpt_ctx_free(struct ckpt_ctx *ctx)
 }
 
 static struct ckpt_ctx *ckpt_ctx_alloc(int fd, unsigned long uflags,
-				       unsigned long kflags)
+				       unsigned long kflags, int logfd)
 {
 	struct ckpt_ctx *ctx;
 	int err;
@@ -238,6 +237,9 @@ static struct ckpt_ctx *ckpt_ctx_alloc(int fd, unsigned long uflags,
 	ctx->kflags = kflags;
 	ctx->ktime_begin = ktime_get();
 
+	/* If logfd's a bad fd that's fine, no log output required... */
+	ctx->logfile = fget(logfd);
+
 	atomic_set(&ctx->refcount, 0);
 	INIT_LIST_HEAD(&ctx->pgarr_list);
 	INIT_LIST_HEAD(&ctx->pgarr_pool);
@@ -285,7 +287,7 @@ void ckpt_ctx_put(struct ckpt_ctx *ctx)
 		ckpt_ctx_free(ctx);
 }
 
-int walk_task_subtree(struct task_struct *root,
+int walk_task_subtree(struct ckpt_ctx *ctx, struct task_struct *root,
 		      int (*func)(struct task_struct *, void *),
 		      void *data)
 {
@@ -335,10 +337,59 @@ int walk_task_subtree(struct task_struct *root,
 	}
 	read_unlock(&tasklist_lock);
 
-	ckpt_debug("total %d ret %d\n", total, ret);
+	ckpt_debug(ctx, "total %d ret %d\n", total, ret);
 	return (ret < 0 ? ret : total);
 }
 
+int do_ckpt_debug(struct ckpt_ctx *ctx, char *fmt, int p1, int p2,
+	const char *f, int l, ...)
+{
+	mm_segment_t fs;
+	struct file *file = ctx->logfile;
+	int ret, count;
+	char buf[200], *bufp = buf;
+	va_list args;
+
+	/* write the prefix */
+	count = snprintf(buf, 200, "[%d:%d:%s:%d] ", p1, p2, f, l);
+	if (count > 200) {
+		bufp = kmalloc(count, GFP_KERNEL);
+		if (!bufp)
+			return -ENOMEM; /* ignored */
+		count = snprintf(bufp, count, "[%d:%d:%s:%d] ", p1, p2, f, l);
+	}
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	ret = _ckpt_kwrite(file, bufp, count);
+	set_fs(fs);
+	if (bufp != buf)
+		kfree(bufp);
+	if (ret < 0)
+		return ret;
+	bufp = buf;
+
+	/* now write the actual error msg */
+	va_start(args, l);
+	count = vsnprintf(bufp, 200, fmt, args);
+	va_end(args);
+	if (count > 200) {
+		bufp = kmalloc(count, GFP_KERNEL);
+		if (!bufp)
+			return -ENOMEM; /* ignored */
+		va_start(args, l);
+		count = vsnprintf(bufp, count, fmt, args);
+		va_end(args);
+	}
+
+	fs = get_fs();
+	set_fs(KERNEL_DS);
+	ret = _ckpt_kwrite(file, bufp, count);
+	set_fs(fs);
+
+	if (bufp != buf)
+		kfree(bufp);
+	return ret;
+}
 
 /**
  * sys_checkpoint - checkpoint a container
@@ -349,7 +400,8 @@ int walk_task_subtree(struct task_struct *root,
  * Returns positive identifier on success, 0 when returning from restart
  * or negative value on error
  */
-SYSCALL_DEFINE3(checkpoint, pid_t, pid, int, fd, unsigned long, flags)
+SYSCALL_DEFINE4(checkpoint, pid_t, pid, int, fd, unsigned long, flags,
+		int, logfd)
 {
 	struct ckpt_ctx *ctx;
 	long ret;
@@ -362,7 +414,7 @@ SYSCALL_DEFINE3(checkpoint, pid_t, pid, int, fd, unsigned long, flags)
 
 	if (pid == 0)
 		pid = task_pid_vnr(current);
-	ctx = ckpt_ctx_alloc(fd, flags, CKPT_CTX_CHECKPOINT);
+	ctx = ckpt_ctx_alloc(fd, flags, CKPT_CTX_CHECKPOINT, logfd);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
@@ -384,7 +436,7 @@ SYSCALL_DEFINE3(checkpoint, pid_t, pid, int, fd, unsigned long, flags)
  * Returns negative value on error, or otherwise returns in the realm
  * of the original checkpoint
  */
-SYSCALL_DEFINE3(restart, pid_t, pid, int, fd, unsigned long, flags)
+SYSCALL_DEFINE4(restart, pid_t, pid, int, fd, unsigned long, flags, int, logfd)
 {
 	struct ckpt_ctx *ctx = NULL;
 	long ret;
@@ -397,7 +449,7 @@ SYSCALL_DEFINE3(restart, pid_t, pid, int, fd, unsigned long, flags)
 		return -EPERM;
 
 	if (pid)
-		ctx = ckpt_ctx_alloc(fd, flags, CKPT_CTX_RESTART);
+		ctx = ckpt_ctx_alloc(fd, flags, CKPT_CTX_RESTART, logfd);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
 
@@ -406,25 +458,3 @@ SYSCALL_DEFINE3(restart, pid_t, pid, int, fd, unsigned long, flags)
 	ckpt_ctx_put(ctx);
 	return ret;
 }
-
-
-/* 'ckpt_debug_level' controls the verbosity level of c/r code */
-#ifdef CONFIG_CHECKPOINT_DEBUG
-
-/* FIX: allow to change during runtime */
-unsigned long __read_mostly ckpt_debug_level = CKPT_DDEFAULT;
-
-static __init int ckpt_debug_setup(char *s)
-{
-	long val, ret;
-
-	ret = strict_strtoul(s, 10, &val);
-	if (ret < 0)
-		return ret;
-	ckpt_debug_level = val;
-	return 0;
-}
-
-__setup("ckpt_debug=", ckpt_debug_setup);
-
-#endif /* CONFIG_CHECKPOINT_DEBUG */
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index 1b220c1..2fd2cd4 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -2664,7 +2664,7 @@ static int tty_file_checkpoint(struct ckpt_ctx *ctx, struct file *file)
 	 */
 
 	real_tty = tty_pair_get_tty(tty);
-	ckpt_debug("tty: %p, real_tty: %p\n", tty, real_tty);
+	ckpt_debug(ctx, "tty: %p, real_tty: %p\n", tty, real_tty);
 
 	master_objref = checkpoint_obj(ctx, real_tty->link, CKPT_OBJ_TTY);
 	if (master_objref < 0)
@@ -2672,7 +2672,7 @@ static int tty_file_checkpoint(struct ckpt_ctx *ctx, struct file *file)
 	slave_objref = checkpoint_obj(ctx, real_tty, CKPT_OBJ_TTY);
 	if (slave_objref < 0)
 		return slave_objref;
-	ckpt_debug("master %p %d, slave %p %d\n",
+	ckpt_debug(ctx, "master %p %d, slave %p %d\n",
 		   real_tty->link, master_objref, real_tty, slave_objref);
 
 	h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_FILE);
@@ -2704,7 +2704,7 @@ static int tty_file_collect(struct ckpt_ctx *ctx, struct file *file)
 	if (!tty_can_checkpoint(ctx, tty))
 		return -ENOSYS;
 
-	ckpt_debug("collecting tty: %p\n", tty);
+	ckpt_debug(ctx, "collecting tty: %p\n", tty);
 	ret = ckpt_obj_collect(ctx, tty, CKPT_OBJ_TTY);
 	if (ret < 0)
 		return ret;
@@ -2714,7 +2714,7 @@ static int tty_file_collect(struct ckpt_ctx *ctx, struct file *file)
 			ckpt_write_err(ctx, "TP", "tty: missing link\n", tty);
 			return -EIO;
 		}
-		ckpt_debug("collecting slave tty: %p\n", tty->link);
+		ckpt_debug(ctx, "collecting slave tty: %p\n", tty->link);
 		ret = ckpt_obj_collect(ctx, tty->link, CKPT_OBJ_TTY);
 	}
 
@@ -2781,7 +2781,7 @@ static int checkpoint_tty_ldisc(struct ckpt_ctx *ctx, struct tty_struct *tty)
 	if (ret < 0)
 		return ret;
 
-	ckpt_debug("datalen %d\n", datalen);
+	ckpt_debug(ctx, "datalen %d\n", datalen);
 	if (datalen) {
 		ret = ckpt_write_buffer(ctx, NULL, datalen);
 		if (ret < 0)
@@ -2836,7 +2836,7 @@ static int do_checkpoint_tty(struct ckpt_ctx *ctx, struct tty_struct *tty)
 
 	/* if master, reserve an objref (see do_restore_tty) */
 	h->file_objref = (master ? ckpt_obj_reserve(ctx) : 0);
-	ckpt_debug("link %d file %d\n", h->link_objref, h->file_objref);
+	ckpt_debug(ctx, "link %d file %d\n", h->link_objref, h->file_objref);
 
 	h->index = tty->index;
 	h->ldisc = tty->ldisc->ops->num;
@@ -2889,7 +2889,7 @@ struct file *tty_file_restore(struct ckpt_ctx *ctx, struct ckpt_hdr_file *ptr)
 		return ERR_PTR(-EINVAL);
 
 	tty = ckpt_obj_fetch(ctx, h->tty_objref, CKPT_OBJ_TTY);
-	ckpt_debug("tty %p objref %d\n", tty, h->tty_objref);
+	ckpt_debug(ctx, "tty %p objref %d\n", tty, h->tty_objref);
 
 	/* at this point the tty should have been restore already */
 	if (IS_ERR(tty))
@@ -2910,13 +2910,13 @@ struct file *tty_file_restore(struct ckpt_ctx *ctx, struct ckpt_hdr_file *ptr)
 					typeof(*file), f_u.fu_list);
 		get_file(file);
 		file_list_unlock();
-		ckpt_debug("master file %p\n", file);
+		ckpt_debug(ctx, "master file %p\n", file);
 	} else {
 		sprintf(slavepath, "/dev/pts/%d", tty->index);
 		slavelock = test_bit(TTY_PTY_LOCK, &tty->link->flags);
 		clear_bit(TTY_PTY_LOCK, &tty->link->flags);
 		file = filp_open(slavepath, O_RDWR | O_NOCTTY, 0);
-		ckpt_debug("slave file %p (idnex %d)\n", file, tty->index);
+		ckpt_debug(ctx, "slave file %p (idnex %d)\n", file, tty->index);
 		if (IS_ERR(file))
 			return file;
 		if (slavelock)
@@ -3026,7 +3026,7 @@ static struct tty_struct *do_restore_tty(struct ckpt_ctx *ctx)
 	if (test_bit(TTY_HUPPED, (unsigned long *) &h->flags) && master)
 		goto out;
 
-	ckpt_debug("sanity checks passed, link %d\n", h->link_objref);
+	ckpt_debug(ctx, "sanity checks passed, link %d\n", h->link_objref);
 
 	/*
 	 * If we ever support more than PTYs, this would be tty-type
@@ -3060,7 +3060,7 @@ static struct tty_struct *do_restore_tty(struct ckpt_ctx *ctx)
 		tty = tty->link;
 	}
 
-	ckpt_debug("tty %p (hup %d)\n",
+	ckpt_debug(ctx, "tty %p (hup %d)\n",
 		   tty, test_bit(TTY_HUPPED, (unsigned long *) &h->flags));
 
 	/* we now have the desired tty: restore its state as per @h */
diff --git a/include/linux/checkpoint.h b/include/linux/checkpoint.h
index dfcb59b..d6fdf90 100644
--- a/include/linux/checkpoint.h
+++ b/include/linux/checkpoint.h
@@ -12,13 +12,17 @@
 
 #define CHECKPOINT_VERSION  3
 
+#define CKPT_LOG_ALL		0x8
+
 /* checkpoint user flags */
 #define CHECKPOINT_SUBTREE	0x1
+#define CHECKPOINT_LOG_ALL	CKPT_LOG_ALL
 
 /* restart user flags */
 #define RESTART_TASKSELF	0x1
 #define RESTART_FROZEN		0x2
 #define RESTART_GHOST		0x4
+#define RESTART_LOG_ALL		CKPT_LOG_ALL
 
 #ifdef __KERNEL__
 #ifdef CONFIG_CHECKPOINT
@@ -44,13 +48,15 @@
 #define CKPT_CTX_ERROR		(1 << CKPT_CTX_ERROR_BIT)
 
 /* ckpt_ctx: uflags */
-#define CHECKPOINT_USER_FLAGS		CHECKPOINT_SUBTREE
+#define CHECKPOINT_USER_FLAGS		(CHECKPOINT_SUBTREE | \
+					 CHECKPOINT_LOG_ALL)
 #define RESTART_USER_FLAGS  \
 	(RESTART_TASKSELF | \
 	 RESTART_FROZEN | \
-	 RESTART_GHOST)
+	 RESTART_GHOST | \
+	 RESTART_LOG_ALL)
 
-extern int walk_task_subtree(struct task_struct *task,
+extern int walk_task_subtree(struct ckpt_ctx *ctx, struct task_struct *task,
 			     int (*func)(struct task_struct *, void *),
 			     void *data);
 extern void exit_checkpoint(struct task_struct *tsk);
@@ -253,7 +259,8 @@ extern int ckpt_collect_mm(struct ckpt_ctx *ctx, struct task_struct *t);
 extern int checkpoint_mm(struct ckpt_ctx *ctx, void *ptr);
 extern void *restore_mm(struct ckpt_ctx *ctx);
 
-extern unsigned long generic_vma_restore(struct mm_struct *mm,
+extern unsigned long generic_vma_restore(struct ckpt_ctx *ctx,
+					 struct mm_struct *mm,
 					 struct file *file,
 					 struct ckpt_hdr_vma *h);
 
@@ -321,55 +328,51 @@ static inline int ckpt_validate_errno(int errno)
 			memcpy(LIVE, SAVE, count * sizeof(*SAVE));	\
 	} while (0)
 
-/* debugging flags */
-#define CKPT_DBASE	0x1		/* anything */
-#define CKPT_DSYS	0x2		/* generic (system) */
-#define CKPT_DRW	0x4		/* image read/write */
-#define CKPT_DOBJ	0x8		/* shared objects */
-#define CKPT_DFILE	0x10		/* files and filesystem */
-#define CKPT_DMEM	0x20		/* memory state */
-#define CKPT_DPAGE	0x40		/* memory pages */
-#define CKPT_DIPC	0x80		/* sysvipc */
-
-#define CKPT_DDEFAULT	0xffff		/* default debug level */
-
-#ifndef CKPT_DFLAG
-#define CKPT_DFLAG	0xffff		/* everything */
-#endif
+extern int do_ckpt_debug(struct ckpt_ctx *ctx, char *fmt, int p1, int p2,
+	const char *f, int l, ...);
 
 #ifdef CONFIG_CHECKPOINT_DEBUG
 
-extern void restore_debug_free(struct ckpt_ctx *ctx);
-extern unsigned long ckpt_debug_level;
-
-/* use this to select a specific debug level */
-#define _ckpt_debug(level, fmt, args...)				\
-	do {								\
-		if (ckpt_debug_level & (level))				\
-			printk(KERN_DEBUG "[%d:%d:c/r:%s:%d] " fmt,	\
-				current->pid,				\
-				current->nsproxy ?			\
-				task_pid_vnr(current) : -1,		\
-				__func__, __LINE__, ## args);		\
-	} while (0)
-
 /*
- * CKPT_DBASE is the base flags, doesn't change
- * CKPT_DFLAG is to be redfined in each source file
+ * when CONFIG_CHECKPOINT_DEBUG=y we keep some extra info on
+ * restarted tasks.  This is where we free it.
  */
-
-#define ckpt_debug(fmt, args...)  \
-	_ckpt_debug(CKPT_DBASE | CKPT_DFLAG, fmt, ## args)
+extern void restore_debug_free(struct ckpt_ctx *ctx);
 
 #else
 
 static inline void restore_debug_free(struct ckpt_ctx *ctx) {}
 
-#define _ckpt_debug(level, fmt, args...)	do { } while (0)
-#define ckpt_debug(fmt, args...)		do { } while (0)
-
 #endif /* CONFIG_CHECKPOINT_DEBUG */
 
+/* use this to select a specific debug level */
+/*
+ * XXX I think it's better to just drop the error msgs rather than
+ * default to syslog, but let's see what others think
+ */
+#define ctx_has_logfile(ctx) (ctx && ctx->logfile)
+#define all_log_info(ctx) (ctx->uflags & CKPT_LOG_ALL)
+
+#define ckpt_log(ctx, fmt, args...)				\
+	do {							\
+		do_ckpt_debug(ctx, fmt,	current->pid,		\
+			current->nsproxy ?			\
+			task_pid_vnr(current) : -1,		\
+			__func__, __LINE__, ## args);		\
+	} while (0)
+
+#define ckpt_debug(ctx, fmt, args...)				\
+	do {							\
+		if (ctx_has_logfile(ctx) && all_log_info(ctx))	\
+			ckpt_log(ctx, fmt, ## args);		\
+	} while (0)
+
+#define ckpt_err(ctx, fmt, args...)				\
+	do {							\
+		if (ctx_has_logfile(ctx))			\
+			ckpt_log(ctx, fmt, ## args);		\
+	} while (0)
+
 #endif /* CONFIG_CHECKPOINT */
 #endif /* __KERNEL__ */
 
diff --git a/include/linux/checkpoint_types.h b/include/linux/checkpoint_types.h
index fa57cdc..8182a9c 100644
--- a/include/linux/checkpoint_types.h
+++ b/include/linux/checkpoint_types.h
@@ -47,8 +47,9 @@ struct ckpt_ctx {
 	unsigned long uflags;	/* user flags */
 	unsigned long oflags;	/* restart: uflags from checkpoint */
 
-	struct file *file;	/* input/output file */
-	int total;		/* total read/written */
+	struct file *file;		/* input/output file */
+	struct file *logfile;	/* debug log file */
+	int total;			/* total read/written */
 
 	atomic_t refcount;
 
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 33bce6e..c460d87 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -754,8 +754,9 @@ asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
 asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int,
 			  struct timespec __user *, const sigset_t __user *,
 			  size_t);
-asmlinkage long sys_checkpoint(pid_t pid, int fd, unsigned long flags);
-asmlinkage long sys_restart(pid_t pid, int fd, unsigned long flags);
+asmlinkage long sys_checkpoint(pid_t pid, int fd, unsigned long flags,
+				int logfd);
+asmlinkage long sys_restart(pid_t pid, int fd, unsigned long flags, int logfd);
 
 int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
 
diff --git a/ipc/checkpoint.c b/ipc/checkpoint.c
index 8e6e9ba..65863a8 100644
--- a/ipc/checkpoint.c
+++ b/ipc/checkpoint.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DIPC
-
 #include <linux/ipc.h>
 #include <linux/msg.h>
 #include <linux/sched.h>
@@ -65,7 +62,8 @@ static int checkpoint_ipc_any(struct ckpt_ctx *ctx,
 
 	h->ipc_type = ipc_type;
 	h->ipc_count = ipc_ids->in_use;
-	ckpt_debug("ipc-%s count %d\n", ipc_ind_to_str[ipc_ind], h->ipc_count);
+	ckpt_debug(ctx, "ipc-%s count %d\n", ipc_ind_to_str[ipc_ind],
+		   h->ipc_count);
 
 	ret = ckpt_write_obj(ctx, &h->h);
 	ckpt_hdr_put(ctx, h);
@@ -73,7 +71,7 @@ static int checkpoint_ipc_any(struct ckpt_ctx *ctx,
 		goto out;
 
 	ret = idr_for_each(&ipc_ids->ipcs_idr, func, ctx);
-	ckpt_debug("ipc-%s ret %d\n", ipc_ind_to_str[ipc_ind], ret);
+	ckpt_debug(ctx, "ipc-%s ret %d\n", ipc_ind_to_str[ipc_ind], ret);
  out:
 	up_read(&ipc_ids->rw_mutex);
 	return ret;
@@ -229,7 +227,8 @@ static int restore_ipc_any(struct ckpt_ctx *ctx, struct ipc_namespace *ipc_ns,
 	if (IS_ERR(h))
 		return PTR_ERR(h);
 
-	ckpt_debug("ipc-%s: count %d\n", ipc_ind_to_str[ipc_ind], h->ipc_count);
+	ckpt_debug(ctx, "ipc-%s: count %d\n", ipc_ind_to_str[ipc_ind],
+		   h->ipc_count);
 
 	ret = -EINVAL;
 	if (h->ipc_type != ipc_type)
@@ -242,7 +241,7 @@ static int restore_ipc_any(struct ckpt_ctx *ctx, struct ipc_namespace *ipc_ns,
 			goto out;
 	}
  out:
-	ckpt_debug("ipc-%s: ret %d\n", ipc_ind_to_str[ipc_ind], ret);
+	ckpt_debug(ctx, "ipc-%s: ret %d\n", ipc_ind_to_str[ipc_ind], ret);
 	ckpt_hdr_put(ctx, h);
 	return ret;
 }
diff --git a/ipc/checkpoint_msg.c b/ipc/checkpoint_msg.c
index b933c19..8c2233e 100644
--- a/ipc/checkpoint_msg.c
+++ b/ipc/checkpoint_msg.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DIPC
-
 #include <linux/mm.h>
 #include <linux/msg.h>
 #include <linux/rwsem.h>
@@ -52,7 +49,7 @@ static int fill_ipc_msg_hdr(struct ckpt_ctx *ctx,
 
  unlock:
 	ipc_unlock(&msq->q_perm);
-	ckpt_debug("msg: lspid %d rspid %d qnum %lld qbytes %lld\n",
+	ckpt_debug(ctx, "msg: lspid %d rspid %d qnum %lld qbytes %lld\n",
 		 h->q_lspid, h->q_lrpid, h->q_qnum, h->q_qbytes);
 
 	return ret;
@@ -181,7 +178,7 @@ static int load_ipc_msg_hdr(struct ckpt_ctx *ctx,
 	if (ret < 0)
 		return ret;
 
-	ckpt_debug("msq: lspid %d lrpid %d qnum %lld qbytes %lld\n",
+	ckpt_debug(ctx, "msq: lspid %d lrpid %d qnum %lld qbytes %lld\n",
 		 h->q_lspid, h->q_lrpid, h->q_qnum, h->q_qbytes);
 
 	if (h->q_lspid < 0 || h->q_lrpid < 0)
@@ -323,10 +320,10 @@ int restore_ipc_msg(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 
 	/* restore the message queue */
 	msgflag = h->perms.mode | IPC_CREAT | IPC_EXCL;
-	ckpt_debug("msg: do_msgget key %d flag %#x id %d\n",
+	ckpt_debug(ctx, "msg: do_msgget key %d flag %#x id %d\n",
 		 h->perms.key, msgflag, h->perms.id);
 	ret = do_msgget(ns, h->perms.key, msgflag, h->perms.id);
-	ckpt_debug("msg: do_msgget ret %d\n", ret);
+	ckpt_debug(ctx, "msg: do_msgget ret %d\n", ret);
 	if (ret < 0)
 		goto out;
 
@@ -352,7 +349,7 @@ int restore_ipc_msg(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 	ret = load_ipc_msg_hdr(ctx, h, msq);
 
 	if (ret < 0) {
-		ckpt_debug("msq: need to remove (%d)\n", ret);
+		ckpt_err(ctx, "msq: need to remove (%d)\n", ret);
 		freeque(ns, perms);
 	} else
 		ipc_unlock(perms);
diff --git a/ipc/checkpoint_sem.c b/ipc/checkpoint_sem.c
index 76eb2b9..06f59f6 100644
--- a/ipc/checkpoint_sem.c
+++ b/ipc/checkpoint_sem.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DIPC
-
 #include <linux/mm.h>
 #include <linux/sem.h>
 #include <linux/rwsem.h>
@@ -47,7 +44,7 @@ static int fill_ipc_sem_hdr(struct ckpt_ctx *ctx,
 
  unlock:
 	ipc_unlock(&sem->sem_perm);
-	ckpt_debug("sem: nsems %u\n", h->sem_nsems);
+	ckpt_debug(ctx, "sem: nsems %u\n", h->sem_nsems);
 
 	return ret;
 }
@@ -117,7 +114,7 @@ static int load_ipc_sem_hdr(struct ckpt_ctx *ctx,
 	if (ret < 0)
 		return ret;
 
-	ckpt_debug("sem: nsems %u\n", h->sem_nsems);
+	ckpt_debug(ctx, "sem: nsems %u\n", h->sem_nsems);
 
 	sem->sem_otime = h->sem_otime;
 	sem->sem_ctime = h->sem_ctime;
@@ -191,10 +188,10 @@ int restore_ipc_sem(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 
 	/* restore the message queue now */
 	semflag = h->perms.mode | IPC_CREAT | IPC_EXCL;
-	ckpt_debug("sem: do_semget key %d flag %#x id %d\n",
+	ckpt_debug(ctx, "sem: do_semget key %d flag %#x id %d\n",
 		 h->perms.key, semflag, h->perms.id);
 	ret = do_semget(ns, h->perms.key, h->sem_nsems, semflag, h->perms.id);
-	ckpt_debug("sem: do_semget ret %d\n", ret);
+	ckpt_debug(ctx, "sem: do_semget ret %d\n", ret);
 	if (ret < 0)
 		goto out;
 
@@ -209,7 +206,7 @@ int restore_ipc_sem(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 
 	ret = load_ipc_sem_hdr(ctx, h, sem);
 	if (ret < 0) {
-		ckpt_debug("sem: need to remove (%d)\n", ret);
+		ckpt_err(ctx, "sem: need to remove (%d)\n", ret);
 		freeary(ns, perms);
 	} else
 		ipc_unlock(perms);
diff --git a/ipc/checkpoint_shm.c b/ipc/checkpoint_shm.c
index 826e430..92523f0 100644
--- a/ipc/checkpoint_shm.c
+++ b/ipc/checkpoint_shm.c
@@ -8,9 +8,6 @@
  *  distribution for more details.
  */
 
-/* default debug level for output */
-#define CKPT_DFLAG  CKPT_DIPC
-
 #include <linux/mm.h>
 #include <linux/shm.h>
 #include <linux/shmem_fs.h>
@@ -69,7 +66,7 @@ static int fill_ipc_shm_hdr(struct ckpt_ctx *ctx,
 
  unlock:
 	ipc_unlock(&shp->shm_perm);
-	ckpt_debug("shm: cprid %d lprid %d segsz %lld mlock %d\n",
+	ckpt_debug(ctx, "shm: cprid %d lprid %d segsz %lld mlock %d\n",
 		 h->shm_cprid, h->shm_lprid, h->shm_segsz, h->mlock_uid);
 
 	return ret;
@@ -106,7 +103,7 @@ int checkpoint_ipc_shm(int id, void *p, void *data)
 		goto out;
 
 	h->objref = objref;
-	ckpt_debug("shm: objref %d\n", h->objref);
+	ckpt_debug(ctx, "shm: objref %d\n", h->objref);
 
 	ret = ckpt_write_obj(ctx, &h->h);
 	if (ret < 0)
@@ -169,7 +166,7 @@ static int load_ipc_shm_hdr(struct ckpt_ctx *ctx,
 	if (ret < 0)
 		return ret;
 
-	ckpt_debug("shm: cprid %d lprid %d segsz %lld mlock %d\n",
+	ckpt_debug(ctx, "shm: cprid %d lprid %d segsz %lld mlock %d\n",
 		 h->shm_cprid, h->shm_lprid, h->shm_segsz, h->mlock_uid);
 
 	if (h->shm_cprid < 0 || h->shm_lprid < 0)
@@ -237,10 +234,10 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
 	}
 
 	shmflag = h->flags | h->perms.mode | IPC_CREAT | IPC_EXCL;
-	ckpt_debug("shm: do_shmget size %lld flag %#x id %d\n",
+	ckpt_debug(ctx, "shm: do_shmget size %lld flag %#x id %d\n",
 		 h->shm_segsz, shmflag, h->perms.id);
 	ret = do_shmget(ns, h->perms.key, h->shm_segsz, shmflag, h->perms.id);
-	ckpt_debug("shm: do_shmget ret %d\n", ret);
+	ckpt_debug(ctx, "shm: do_shmget ret %d\n", ret);
 	if (ret < 0)
 		goto out;
 
@@ -266,7 +263,7 @@ int restore_ipc_shm(struct ckpt_ctx *ctx, struct ipc_namespace *ns)
  mutex:
 	fput(file);
 	if (ret < 0) {
-		ckpt_debug("shm: need to remove (%d)\n", ret);
+		ckpt_err(ctx, "shm: need to remove (%d)\n", ret);
 		do_shm_rmid(ns, perms);
 	} else
 		ipc_unlock(perms);
diff --git a/kernel/cred.c b/kernel/cred.c
index 62d28a4..850c9c6 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -724,7 +724,7 @@ static int do_checkpoint_cred(struct ckpt_ctx *ctx, struct cred *cred)
 	if (!h)
 		return -ENOMEM;
 
-	ckpt_debug("cred uid %d fsuid %d gid %d\n", cred->uid, cred->fsuid,
+	ckpt_debug(ctx, "cred uid %d fsuid %d gid %d\n", cred->uid, cred->fsuid,
 			cred->gid);
 	h->uid = cred->uid;
 	h->suid = cred->suid;
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e1ae6e6..73698ee 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1011,9 +1011,7 @@ config CHECKPOINT_DEBUG
 	depends on CHECKPOINT
 	default y
 	help
-	  This options turns on the debugging output of checkpoint/restart.
-	  The level of verbosity is controlled by 'ckpt_debug_level' and can
-	  be set at boot time with "ckpt_debug=" option.
+	  This options turns on extra debugging output of checkpoint/restart.
 
 	  Turning this option off will reduce the size of the c/r code. If
 	  turned on, it is unlikely to incur visible overhead if the debug
diff --git a/mm/filemap.c b/mm/filemap.c
index eb7653d..ff8571b 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1730,7 +1730,7 @@ int filemap_restore(struct ckpt_ctx *ctx,
 		ret = private_vma_restore(ctx, mm, file, h);
 	} else {
 		/* shared mapped file */
-		addr = generic_vma_restore(mm, file, h);
+		addr = generic_vma_restore(ctx, mm, file, h);
 		ret = (IS_ERR((void *) addr) ? PTR_ERR((void *) addr) : 0);
 	}
 	return ret;
diff --git a/mm/shmem.c b/mm/shmem.c
index 2cfff8d..b5954de 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2449,7 +2449,7 @@ int shmem_restore(struct ckpt_ctx *ctx,
 		get_file(file);
 	}
 
-	addr = generic_vma_restore(mm, file, h);
+	addr = generic_vma_restore(ctx, mm, file, h);
 	if (IS_ERR((void *) addr))
 		return PTR_ERR((void *) addr);
 
diff --git a/net/checkpoint.c b/net/checkpoint.c
index 9a72aae..9e969bf 100644
--- a/net/checkpoint.c
+++ b/net/checkpoint.c
@@ -183,12 +183,12 @@ int sock_deferred_write_buffers(void *data)
 	}
 
 	ret = sock_write_buffers(ctx, &dq->sk->sk_receive_queue, dst_objref);
-	ckpt_debug("write recv buffers: %i\n", ret);
+	ckpt_debug(ctx, "write recv buffers: %i\n", ret);
 	if (ret < 0)
 		return ret;
 
 	ret = sock_write_buffers(ctx, &dq->sk->sk_write_queue, dst_objref);
-	ckpt_debug("write send buffers: %i\n", ret);
+	ckpt_debug(ctx, "write send buffers: %i\n", ret);
 
 	return ret;
 }
@@ -270,20 +270,20 @@ static int sock_cptrst_opt(int op, struct socket *sock,
 #define CKPT_COPY_SOPT(op, sk, name, opt) \
 	sock_cptrst_opt(op, sk->sk_socket, name, (char *)opt, sizeof(*opt))
 
-static int sock_cptrst_bufopts(int op, struct sock *sk,
+static int sock_cptrst_bufopts(struct ckpt_ctx *ctx, int op, struct sock *sk,
 			       struct ckpt_hdr_socket *h)
 {
 	if (CKPT_COPY_SOPT(op, sk, SO_RCVBUF, &h->sock.rcvbuf))
 		if ((op == CKPT_RST) &&
 		    CKPT_COPY_SOPT(op, sk, SO_RCVBUFFORCE, &h->sock.rcvbuf)) {
-			ckpt_debug("Failed to set SO_RCVBUF");
+			ckpt_err(ctx, "Failed to set SO_RCVBUF");
 			return -EINVAL;
 		}
 
 	if (CKPT_COPY_SOPT(op, sk, SO_SNDBUF, &h->sock.sndbuf))
 		if ((op == CKPT_RST) &&
 		    CKPT_COPY_SOPT(op, sk, SO_SNDBUFFORCE, &h->sock.sndbuf)) {
-			ckpt_debug("Failed to set SO_SNDBUF");
+			ckpt_err(ctx, "Failed to set SO_SNDBUF");
 			return -EINVAL;
 		}
 
@@ -335,7 +335,8 @@ static int sock_restore_flag(struct socket *sock,
 }
 
 
-static int sock_restore_flags(struct socket *sock, struct ckpt_hdr_socket *h)
+static int sock_restore_flags(struct ckpt_ctx *ctx, struct socket *sock,
+			      struct ckpt_hdr_socket *h)
 {
 	unsigned long sk_flags = h->sock.flags;
 	unsigned long sock_flags = h->socket.flags;
@@ -347,7 +348,7 @@ static int sock_restore_flags(struct socket *sock, struct ckpt_hdr_socket *h)
 		int flag = sk_flag_map[i].flag;
 		ret = sock_restore_flag(sock, &sk_flags, flag, opt);
 		if (ret) {
-			ckpt_debug("Failed to set skopt %i: %i\n", opt, ret);
+			ckpt_err(ctx, "Failed to set skopt %i: %i\n", opt, ret);
 			return ret;
 		}
 	}
@@ -357,7 +358,7 @@ static int sock_restore_flags(struct socket *sock, struct ckpt_hdr_socket *h)
 		int flag = sock_flag_map[i].flag;
 		ret = sock_restore_flag(sock, &sock_flags, flag, opt);
 		if (ret) {
-			ckpt_debug("Failed to set sockopt %i: %i\n", opt, ret);
+			ckpt_err(ctx, "Failed to set sockopt %i: %i\n", opt, ret);
 			return ret;
 		}
 	}
@@ -370,7 +371,7 @@ static int sock_restore_flags(struct socket *sock, struct ckpt_hdr_socket *h)
 	    test_bit(SOCK_TIMESTAMPING_SOFTWARE, &sk_flags) ||
 	    test_bit(SOCK_TIMESTAMPING_RAW_HARDWARE, &sk_flags) ||
 	    test_bit(SOCK_TIMESTAMPING_SYS_HARDWARE, &sk_flags)) {
-		ckpt_debug("SOF_TIMESTAMPING_* flags are not supported\n");
+		ckpt_err(ctx, "SOF_TIMESTAMPING_* flags are not supported\n");
 		return -ENOSYS;
 	}
 
@@ -382,7 +383,7 @@ static int sock_restore_flags(struct socket *sock, struct ckpt_hdr_socket *h)
 	 * our protocol's default set, indicates an error
 	 */
 	if (sk_flags & ~sock->sk->sk_flags) {
-		ckpt_debug("Unhandled sock flags: %lx\n", sk_flags);
+		ckpt_err(ctx, "Unhandled sock flags: %lx\n", sk_flags);
 		return -EINVAL;
 	}
 
@@ -427,36 +428,36 @@ static int sock_cptrst(struct ckpt_ctx *ctx, struct sock *sk,
 	CKPT_COPY(op, h->sock.state, sk->sk_state);
 	CKPT_COPY(op, h->sock.backlog, sk->sk_max_ack_backlog);
 
-	if (sock_cptrst_bufopts(op, sk, h))
+	if (sock_cptrst_bufopts(ctx, op, sk, h))
 		return -EINVAL;
 
 	if (CKPT_COPY_SOPT(op, sk, SO_REUSEADDR, &h->sock_common.reuse)) {
-		ckpt_debug("Failed to set SO_REUSEADDR");
+		ckpt_err(ctx, "Failed to set SO_REUSEADDR");
 		return -EINVAL;
 	}
 
 	if (CKPT_COPY_SOPT(op, sk, SO_PRIORITY, &h->sock.priority)) {
-		ckpt_debug("Failed to set SO_PRIORITY");
+		ckpt_err(ctx, "Failed to set SO_PRIORITY");
 		return -EINVAL;
 	}
 
 	if (CKPT_COPY_SOPT(op, sk, SO_RCVLOWAT, &h->sock.rcvlowat)) {
-		ckpt_debug("Failed to set SO_RCVLOWAT");
+		ckpt_err(ctx, "Failed to set SO_RCVLOWAT");
 		return -EINVAL;
 	}
 
 	if (CKPT_COPY_SOPT(op, sk, SO_LINGER, &h->sock.linger)) {
-		ckpt_debug("Failed to set SO_LINGER");
+		ckpt_err(ctx, "Failed to set SO_LINGER");
 		return -EINVAL;
 	}
 
 	if (sock_copy_timeval(op, sk, SO_SNDTIMEO, &h->sock.sndtimeo)) {
-		ckpt_debug("Failed to set SO_SNDTIMEO");
+		ckpt_err(ctx, "Failed to set SO_SNDTIMEO");
 		return -EINVAL;
 	}
 
 	if (sock_copy_timeval(op, sk, SO_RCVTIMEO, &h->sock.rcvtimeo)) {
-		ckpt_debug("Failed to set SO_RCVTIMEO");
+		ckpt_err(ctx, "Failed to set SO_RCVTIMEO");
 		return -EINVAL;
 	}
 
@@ -469,7 +470,7 @@ static int sock_cptrst(struct ckpt_ctx *ctx, struct sock *sk,
 
 		old_fs = get_fs();
 		set_fs(KERNEL_DS);
-		ret = sock_restore_flags(sk->sk_socket, h);
+		ret = sock_restore_flags(ctx, sk->sk_socket, h);
 		set_fs(old_fs);
 		if (ret)
 			return ret;
@@ -477,16 +478,15 @@ static int sock_cptrst(struct ckpt_ctx *ctx, struct sock *sk,
 
 	if ((h->socket.state == SS_CONNECTED) &&
 	    (h->sock.state != TCP_ESTABLISHED)) {
-		ckpt_debug("socket/sock in inconsistent state: %i/%i",
+		ckpt_err(ctx, "socket/sock in inconsistent state: %i/%i",
 			   h->socket.state, h->sock.state);
 		return -EINVAL;
 	} else if ((h->sock.state < TCP_ESTABLISHED) ||
 		   (h->sock.state >= TCP_MAX_STATES)) {
-		ckpt_debug("sock in invalid state: %i", h->sock.state);
+		ckpt_err(ctx, "sock in invalid state: %i", h->sock.state);
 		return -EINVAL;
 	} else if (h->socket.state > SS_DISCONNECTING) {
-		ckpt_debug("socket in invalid state: %i",
-			   h->socket.state);
+		ckpt_err(ctx, "socket in invalid state: %i", h->socket.state);
 		return -EINVAL;
 	}
 
@@ -692,7 +692,7 @@ struct sock *do_sock_restore(struct ckpt_ctx *ctx)
 		goto err;
 
 	if (!sock->ops->restore) {
-		ckpt_debug("proto_ops lacks checkpoint: %pS\n", sock->ops);
+		ckpt_err(ctx, "proto_ops lacks checkpoint: %pS\n", sock->ops);
 		ret = -EINVAL;
 		goto err;
 	}
diff --git a/net/ipv4/checkpoint.c b/net/ipv4/checkpoint.c
index 9cbbf5e..b0fda64 100644
--- a/net/ipv4/checkpoint.c
+++ b/net/ipv4/checkpoint.c
@@ -71,7 +71,7 @@ static int inet_read_buffer(struct ckpt_ctx *ctx, struct sk_buff_head *queue)
 		ret = len;
 		goto out;
 	} else if (len > SKB_MAX_ALLOC) {
-		ckpt_debug("Socket buffer too big (%i > %lu)",
+		ckpt_err(ctx, "Socket buffer too big (%i > %lu)",
 			   len, SKB_MAX_ALLOC);
 		ret = -ENOSPC;
 		goto out;
@@ -111,12 +111,12 @@ static int inet_read_buffers(struct ckpt_ctx *ctx, struct sk_buff_head *queue)
 
 	for (i = 0; i < h->skb_count; i++) {
 		ret = inet_read_buffer(ctx, queue);
-		ckpt_debug("read inet buffer %i: %i", i, ret);
+		ckpt_debug(ctx, "read inet buffer %i: %i", i, ret);
 		if (ret < 0)
 			goto out;
 
 		if (ret > h->total_bytes) {
-			ckpt_debug("Buffers exceeded claim");
+			ckpt_err(ctx, "Buffers exceeded claim");
 			ret = -EINVAL;
 			goto out;
 		}
@@ -139,12 +139,12 @@ static int inet_deferred_restore_buffers(void *data)
 	int ret;
 
 	ret = inet_read_buffers(ctx, &sk->sk_receive_queue);
-	ckpt_debug("(R) inet_read_buffers: %i\n", ret);
+	ckpt_debug(ctx, "(R) inet_read_buffers: %i\n", ret);
 	if (ret < 0)
 		return ret;
 
 	ret = inet_read_buffers(ctx, &sk->sk_write_queue);
-	ckpt_debug("(W) inet_read_buffers: %i\n", ret);
+	ckpt_debug(ctx, "(W) inet_read_buffers: %i\n", ret);
 
 	return ret;
 }
@@ -162,15 +162,11 @@ static int inet_defer_restore_buffers(struct ckpt_ctx *ctx, struct sock *sk)
 
 static int inet_precheck(struct socket *sock, struct ckpt_hdr_socket_inet *in)
 {
-	if (in->laddr_len > sizeof(struct sockaddr_in)) {
-		ckpt_debug("laddr_len is too big\n");
+	if (in->laddr_len > sizeof(struct sockaddr_in))
 		return -EINVAL;
-	}
 
-	if (in->raddr_len > sizeof(struct sockaddr_in)) {
-		ckpt_debug("raddr_len is too big\n");
+	if (in->raddr_len > sizeof(struct sockaddr_in))
 		return -EINVAL;
-	}
 
 	return 0;
 }
@@ -187,8 +183,10 @@ int inet_restore(struct ckpt_ctx *ctx,
 		return PTR_ERR(in);
 
 	ret = inet_precheck(sock, in);
-	if (ret < 0)
+	if (ret < 0) {
+		ckpt_err(ctx, "laddr_len or raddr_len is too big\n");
 		goto out;
+	}
 
 	/* Listening sockets and those that are closed but have a local
 	 * address need to call bind()
@@ -200,13 +198,13 @@ int inet_restore(struct ckpt_ctx *ctx,
 		ret = sock->ops->bind(sock,
 				      (struct sockaddr *)&in->laddr,
 				      in->laddr_len);
-		ckpt_debug("inet bind: %i\n", ret);
+		ckpt_debug(ctx, "inet bind: %i\n", ret);
 		if (ret < 0)
 			goto out;
 
 		if (h->sock.state == TCP_LISTEN) {
 			ret = sock->ops->listen(sock, h->sock.backlog);
-			ckpt_debug("inet listen: %i\n", ret);
+			ckpt_debug(ctx, "inet listen: %i\n", ret);
 			if (ret < 0)
 				goto out;
 		}
diff --git a/net/unix/checkpoint.c b/net/unix/checkpoint.c
index 8b7cb22..f9e377d 100644
--- a/net/unix/checkpoint.c
+++ b/net/unix/checkpoint.c
@@ -49,13 +49,13 @@ static int unix_deferred_join(void *data)
 
 	src = ckpt_obj_fetch(ctx, dq->src_objref, CKPT_OBJ_SOCK);
 	if (!src) {
-		ckpt_debug("Missing src sock ref %i\n", dq->src_objref);
+		ckpt_err(ctx, "Missing src sock ref %i\n", dq->src_objref);
 		return -EINVAL;
 	}
 
 	dst = ckpt_obj_fetch(ctx, dq->dst_objref, CKPT_OBJ_SOCK);
 	if (!src) {
-		ckpt_debug("Missing dst sock ref %i\n", dq->dst_objref);
+		ckpt_err(ctx, "Missing dst sock ref %i\n", dq->dst_objref);
 		return -EINVAL;
 	}
 
@@ -110,7 +110,7 @@ static int unix_write_cwd(struct ckpt_ctx *ctx,
 
 	fqpath[offset] = '\0';
 
-	ckpt_debug("writing socket directory: %s\n", fqpath);
+	ckpt_debug(ctx, "writing socket directory: %s\n", fqpath);
 	ret = ckpt_write_string(ctx, fqpath, offset + 1);
  out:
 	kfree(buf);
@@ -214,7 +214,7 @@ static int sock_read_buffer_sendmsg(struct ckpt_ctx *ctx,
 		ret = len;
 		goto out;
 	} else if (len > SKB_MAX_ALLOC) {
-		ckpt_debug("Socket buffer too big (%i > %lu)",
+		ckpt_err(ctx, "Socket buffer too big (%i > %lu)",
 			   len, SKB_MAX_ALLOC);
 		ret = -ENOSPC;
 		goto out;
@@ -234,13 +234,13 @@ static int sock_read_buffer_sendmsg(struct ckpt_ctx *ctx,
 		struct sock *pr;
 		pr = ckpt_obj_fetch(ctx, h->pr_objref, CKPT_OBJ_SOCK);
 		if (IS_ERR(pr)) {
-			ckpt_debug("Failed to get our peer: %li\n", PTR_ERR(pr));
+			ckpt_err(ctx, "Failed to get our peer: %li\n", PTR_ERR(pr));
 			ret = PTR_ERR(pr);
 			goto out;
 		}
 		ret = unix_join(sk, pr);
 		if (ret < 0) {
-			ckpt_debug("Failed to join: %i\n", ret);
+			ckpt_err(ctx, "Failed to join: %i\n", ret);
 			goto out;
 		}
 	}
@@ -279,7 +279,7 @@ static int sock_read_buffer_sendmsg(struct ckpt_ctx *ctx,
 		sk->sk_sndbuf = sysctl_wmem_max;
 
 	ret = kernel_sendmsg(sk->sk_socket, &msg, &kvec, 1, len);
-	ckpt_debug("kernel_sendmsg(%i,%i): %i\n", h->sk_objref, len, ret);
+	ckpt_debug(ctx, "kernel_sendmsg(%i,%i): %i\n", h->sk_objref, len, ret);
 	if ((ret > 0) && (ret != len))
 		ret = -ENOMEM;
 
@@ -307,12 +307,12 @@ static int unix_read_buffers(struct ckpt_ctx *ctx,
 
 	for (i = 0; i < h->skb_count; i++) {
 		ret = sock_read_buffer_sendmsg(ctx, addr, addrlen);
-		ckpt_debug("read_buffer_sendmsg(%i): %i\n", i, ret);
+		ckpt_debug(ctx, "read_buffer_sendmsg(%i): %i\n", i, ret);
 		if (ret < 0)
 			goto out;
 
 		if (ret > h->total_bytes) {
-			ckpt_debug("Buffers exceeded claim");
+			ckpt_err(ctx, "Buffers exceeded claim");
 			ret = -EINVAL;
 			goto out;
 		}
@@ -338,7 +338,7 @@ static int unix_deferred_restore_buffers(void *data)
 
 	sk = ckpt_obj_fetch(ctx, dq->sk_objref, CKPT_OBJ_SOCK);
 	if (!sk) {
-		ckpt_debug("Missing sock ref %i\n", dq->sk_objref);
+		ckpt_err(ctx, "Missing sock ref %i\n", dq->sk_objref);
 		return -EINVAL;
 	}
 
@@ -348,12 +348,12 @@ static int unix_deferred_restore_buffers(void *data)
 	}
 
 	ret = unix_read_buffers(ctx, addr, addrlen);
-	ckpt_debug("read recv buffers: %i\n", ret);
+	ckpt_debug(ctx, "read recv buffers: %i\n", ret);
 	if (ret < 0)
 		return ret;
 
 	ret = unix_read_buffers(ctx, addr, addrlen);
-	ckpt_debug("read send buffers: %i\n", ret);
+	ckpt_debug(ctx, "read send buffers: %i\n", ret);
 	if (ret > 0)
 		ret = -EINVAL; /* No send buffers for UNIX sockets */
 
@@ -422,14 +422,14 @@ static int unix_restore_connected(struct ckpt_ctx *ctx,
 		sk->sk_peercred.uid = un->peercred_uid;
 		sk->sk_peercred.gid = un->peercred_gid;
 	} else {
-		ckpt_debug("peercred %i:%i would require setuid",
+		ckpt_err(ctx, "peercred %i:%i would require setuid",
 			   un->peercred_uid, un->peercred_gid);
 		return -EPERM;
 	}
 
 	if (!dead && (un->peer > 0)) {
 		ret = unix_defer_join(ctx, un->this, un->peer);
-		ckpt_debug("unix_defer_join: %i\n", ret);
+		ckpt_debug(ctx, "unix_defer_join: %i\n", ret);
 	}
 
 	if (!dead && !ret)
@@ -438,7 +438,7 @@ static int unix_restore_connected(struct ckpt_ctx *ctx,
 	return ret;
 }
 
-static int unix_unlink(const char *name)
+static int unix_unlink(struct ckpt_ctx *ctx, const char *name)
 {
 	struct path spath;
 	struct path ppath;
@@ -453,13 +453,13 @@ static int unix_unlink(const char *name)
 		goto out_s;
 
 	if (!spath.dentry) {
-		ckpt_debug("No dentry found for %s\n", name);
+		ckpt_err(ctx, "No dentry found for %s\n", name);
 		ret = -ENOENT;
 		goto out_p;
 	}
 
 	if (!ppath.dentry || !ppath.dentry->d_inode) {
-		ckpt_debug("No inode for parent of %s\n", name);
+		ckpt_err(ctx, "No inode for parent of %s\n", name);
 		ret = -ENOENT;
 		goto out_p;
 	}
@@ -476,7 +476,8 @@ static int unix_unlink(const char *name)
 /* Call bind() for socket, optionally changing (temporarily) to @path first
  * if non-NULL
  */
-static int unix_chdir_and_bind(struct socket *sock,
+static int unix_chdir_and_bind(struct ckpt_ctx *ctx,
+			       struct socket *sock,
 			       const char *path,
 			       struct sockaddr *addr,
 			       unsigned long addrlen)
@@ -487,7 +488,7 @@ static int unix_chdir_and_bind(struct socket *sock,
 	int ret;
 
 	if (path) {
-		ckpt_debug("switching to cwd %s for unix bind", path);
+		ckpt_debug(ctx, "switching to cwd %s for unix bind", path);
 
 		ret = kern_path(path, 0, &dir);
 		if (ret)
@@ -504,8 +505,8 @@ static int unix_chdir_and_bind(struct socket *sock,
 		write_unlock(&current->fs->lock);
 	}
 
-	ret = unix_unlink(un->sun_path);
-	ckpt_debug("unlink(%s): %i\n", un->sun_path, ret);
+	ret = unix_unlink(ctx, un->sun_path);
+	ckpt_debug(ctx, "unlink(%s): %i\n", un->sun_path, ret);
 	if ((ret == 0) || (ret == -ENOENT))
 		ret = sock_bind(sock, addr, addrlen);
 
@@ -535,7 +536,8 @@ static int unix_fakebind(struct socket *sock,
 	return 0;
 }
 
-static int unix_restore_bind(struct ckpt_hdr_socket *h,
+static int unix_restore_bind(struct ckpt_ctx *ctx,
+			     struct ckpt_hdr_socket *h,
 			     struct ckpt_hdr_socket_unix *un,
 			     struct socket *sock,
 			     const char *path)
@@ -552,11 +554,13 @@ static int unix_restore_bind(struct ckpt_hdr_socket *h,
 	else if (!(un->flags & CKPT_UNIX_LINKED))
 		return unix_fakebind(sock, &un->laddr, len);
 	else
-		return unix_chdir_and_bind(sock, path, addr, len);
+		return unix_chdir_and_bind(ctx, sock, path, addr, len);
 }
 
 /* Some easy pre-flight checks before we get underway */
-static int unix_precheck(struct socket *sock, struct ckpt_hdr_socket *h)
+static int unix_precheck(struct ckpt_ctx *ctx,
+			 struct socket *sock,
+			 struct ckpt_hdr_socket *h)
 {
 	struct net *net = sock_net(sock->sk);
 	unsigned long sk_flags = h->sock.flags;
@@ -564,7 +568,7 @@ static int unix_precheck(struct socket *sock, struct ckpt_hdr_socket *h)
 	if ((h->socket.state == SS_CONNECTING) ||
 	    (h->socket.state == SS_DISCONNECTING) ||
 	    (h->socket.state == SS_FREE)) {
-		ckpt_debug("AF_UNIX socket can't be SS_(DIS)CONNECTING");
+		ckpt_err(ctx, "AF_UNIX socket can't be SS_(DIS)CONNECTING");
 		return -EINVAL;
 	}
 
@@ -574,13 +578,13 @@ static int unix_precheck(struct socket *sock, struct ckpt_hdr_socket *h)
 	 */
 	if ((h->sock.type == SOCK_DGRAM) &&
 	    (h->sock.backlog > net->unx.sysctl_max_dgram_qlen)) {
-		ckpt_debug("DGRAM backlog of %i exceeds system max of %i\n",
+		ckpt_err(ctx, "DGRAM backlog of %i exceeds system max of %i\n",
 			   h->sock.backlog, net->unx.sysctl_max_dgram_qlen);
 		return -EINVAL;
 	}
 
 	if (test_bit(SOCK_USE_WRITE_QUEUE, &sk_flags)) {
-		ckpt_debug("AF_UNIX socket has SOCK_USE_WRITE_QUEUE set");
+		ckpt_err(ctx, "AF_UNIX socket has SOCK_USE_WRITE_QUEUE set");
 		return -EINVAL;
 	}
 
@@ -595,7 +599,7 @@ int unix_restore(struct ckpt_ctx *ctx, struct socket *sock,
 	int ret = -EINVAL;
 	char *cwd = NULL;
 
-	ret = unix_precheck(sock, h);
+	ret = unix_precheck(ctx, sock, h);
 	if (ret)
 		return ret;
 
@@ -616,7 +620,7 @@ int unix_restore(struct ckpt_ctx *ctx, struct socket *sock,
 
 	if ((h->sock.state != TCP_ESTABLISHED) &&
 	    !UNIX_ADDR_EMPTY(un->laddr_len)) {
-		ret = unix_restore_bind(h, un, sock, cwd);
+		ret = unix_restore_bind(ctx, h, un, sock, cwd);
 		if (ret)
 			goto out;
 	}
@@ -626,7 +630,8 @@ int unix_restore(struct ckpt_ctx *ctx, struct socket *sock,
 	else if (h->sock.state == TCP_LISTEN)
 		ret = sock->ops->listen(sock, h->sock.backlog);
 	else
-		ckpt_debug("unsupported UNIX socket state %i\n", h->sock.state);
+		ckpt_debug(ctx, "unsupported UNIX socket state %i\n",
+			   h->sock.state);
  out:
 	ckpt_hdr_put(ctx, un);
 	kfree(cwd);
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH user-cr] Allow for logfile for kernel debug messages (v2)
       [not found] ` <20091021210507.GA2098-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-10-21 21:05   ` Serge E. Hallyn
       [not found]     ` <20091021210533.GA2165-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-10-21 22:03   ` [PATCH RFC] Send checkpoint and restart debug info to a log file (v2) Oren Laadan
  1 sibling, 1 reply; 18+ messages in thread
From: Serge E. Hallyn @ 2009-10-21 21:05 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers

If unspecified, -1 will be sent for logfd to the kernel, and there
will be no debug log.   If specified, then checkpoint and restart
debug msgs will be sent to the logfile.

Logfile is specified using -l LOGFILE or --logfile=LOGFILE, i.e.

	checkpoint $pid -o ckpt.out -l ckpt.debug
	restart -l rstr.debug -i ckpt.out

Changelog:
	oct 21: take a --logall flag

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 checkpoint.c |   34 ++++++++++++++++++++++++++++++----
 restart.c    |   47 +++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 71 insertions(+), 10 deletions(-)

diff --git a/checkpoint.c b/checkpoint.c
index c116daf..553132c 100644
--- a/checkpoint.c
+++ b/checkpoint.c
@@ -32,6 +32,8 @@ static char usage_str[] =
 "\tOptions:\n"
 "  -h,--help             print this help message\n"
 "  -o,--output=FILE      write data to FILE instead of standard output\n"
+"  -l,--logfile=FILE     write kernel debug data to FILE (default=nowhere)\n"
+"  -a,--logall           have kernel output all debug data, not just errors\n"
 "  -c,--container        require the PID is a container-init\n"
 "  -v,--verbose          verbose output\n"
 "";
@@ -40,11 +42,13 @@ struct args {
 	char *output;
 	int container;
 	int verbose;
+	int logall;
+	char *logfile;
 };
 
-inline static int checkpoint(pid_t pid, int fd, unsigned long flags)
+inline static int checkpoint(pid_t pid, int fd, unsigned long flags, int logfd)
 {
-	return syscall(__NR_checkpoint, pid, fd, flags);
+	return syscall(__NR_checkpoint, pid, fd, flags, logfd);
 }
 
 static void usage(char *str)
@@ -59,10 +63,12 @@ static void parse_args(struct args *args, int argc, char *argv[])
 		{ "help",	no_argument,		NULL, 'h' },
 		{ "output",	required_argument,	NULL, 'o' },
 		{ "container",	no_argument,		NULL, 'c' },
+		{ "logfile",	required_argument,	NULL, 'l' },
+		{ "logall",	required_argument,	NULL, 'a' },
 		{ "verbose",	no_argument,		NULL, 'v' },
 		{ NULL,		0,			NULL, 0 }
 	};
-	static char optc[] = "hvco:";
+	static char optc[] = "ahvco:l:";
 
 	while (1) {
 		int c = getopt_long(argc, argv, optc, opts, NULL);
@@ -73,12 +79,18 @@ static void parse_args(struct args *args, int argc, char *argv[])
 			exit(1);
 		case 'h':
 			usage(usage_str);
+		case 'a':
+			args->logall = 1;
+			break;
 		case 'o':
 			args->output = optarg;
 			break;
 		case 'c':
 			args->container = 1;
 			break;
+		case 'l':
+			args->logfile = optarg;
+			break;
 		case 'v':
 			args->verbose = 1;
 			break;
@@ -94,6 +106,7 @@ int main(int argc, char *argv[])
 	unsigned long flags = 0;
 	pid_t pid;
 	int ret;
+	int logfd;
 
 	memset(&args, 0, sizeof(args));
 	parse_args(&args, argc, argv);
@@ -125,8 +138,21 @@ int main(int argc, char *argv[])
 		if (ret != STDOUT_FILENO)
 			close(ret);
 	}
+	if (args.logfile) {
+		logfd = open(args.logfile, O_RDWR | O_CREAT, 0600);
+		if (logfd < 0) {
+			perror("open logfile");
+			exit(1);
+		}
+	}
+
+	if (args.logall) {
+		if (!args.logfile)
+			printf("Warning: -a specified but no logfile\n");
+		flags |= CHECKPOINT_LOG_ALL;
+	}
 
-	ret = checkpoint(pid, STDOUT_FILENO, flags);
+	ret = checkpoint(pid, STDOUT_FILENO, flags, logfd);
 
 	if (ret < 0) {
 		perror("checkpoint");
diff --git a/restart.c b/restart.c
index fbaab88..6ba6445 100644
--- a/restart.c
+++ b/restart.c
@@ -75,6 +75,8 @@ static char usage_str[] =
 "  -i,--input=FILE      read data from FILE instead of standard input\n"
 "     --inspect          inspect image on-the-fly for error records\n"
 "  -v,--verbose          verbose output\n"
+"  -l,--log=FILE         logfile for kernel debug info\n"
+"  -a,--logall           have kernel write all debug info, not just errors\n"
 "  -d,--debug            debugging output\n"
 "";
 
@@ -194,9 +196,9 @@ static int str2sig(char *str)
 	return -1;
 }
 
-inline static int restart(pid_t pid, int fd, unsigned long flags)
+inline static int restart(pid_t pid, int fd, unsigned long flags, int logfd)
 {
-	return syscall(__NR_restart, pid, fd, flags);
+	return syscall(__NR_restart, pid, fd, flags, logfd);
 }
 
 #define BUFSIZE  (4 * 4096)
@@ -252,6 +254,8 @@ struct ckpt_ctx {
 	int pipe_out;
 	int pids_nr;
 
+	int logfd;
+
 	int pipe_child[2];	/* for children to report status */
 	int pipe_feed[2];	/* for feeder to provide input */
 
@@ -352,6 +356,8 @@ struct args {
 	int copy_status;
 	char *freezer;
 	char *input;
+	char *logfile;
+	int logall;
 };
 
 static void usage(char *str)
@@ -390,10 +396,12 @@ static void parse_args(struct args *args, int argc, char *argv[])
 		{ "no-wait",	no_argument,		NULL, 'W' },
 		{ "freezer",	required_argument,	NULL, 'F' },
 		{ "verbose",	no_argument,		NULL, 'v' },
+		{ "log",	required_argument,	NULL, 'l' },
+		{ "logall",	required_argument,	NULL, 'a' },
 		{ "debug",	no_argument,		NULL, 'd' },
 		{ NULL,		0,			NULL, 0 }
 	};
-	static char optc[] = "hdvpPwWF:r:i:";
+	static char optc[] = "ahdvpPwWF:r:i:l:";
 
 	int sig;
 
@@ -408,6 +416,9 @@ static void parse_args(struct args *args, int argc, char *argv[])
 		switch (c) {
 		case '?':
 			exit(1);
+		case 'a':
+			args->logall = 1;
+			break;
 		case 'h':
 			usage(usage_str);
 		case 'v':
@@ -416,6 +427,9 @@ static void parse_args(struct args *args, int argc, char *argv[])
 		case 5:  /* --inspect */
 			args->inspect = 1;
 			break;
+		case 'l':
+			args->logfile = optarg;
+			break;
 		case 'i':
 			args->input = optarg;
 			break;
@@ -489,6 +503,9 @@ static void parse_args(struct args *args, int argc, char *argv[])
 	}
 #endif
 
+	if (args->logall && !args->logfile)
+		printf("Warning: -a specified but no logfile\n");
+
 	if (args->pidns)
 		args->pids = 1;
 
@@ -666,6 +683,15 @@ int main(int argc, char *argv[])
 			close(ret);
 	}
 
+	ctx.logfd = -1;
+	if (args.logfile) {
+		ctx.logfd = open(args.logfile, O_RDWR | O_CREAT, 0600);
+		if (ctx.logfd < 0) {
+			perror("open log file");
+			exit(1);
+		}
+	}
+
 	/* freezer preparation */
 	if (args.freezer && freezer_prepare(&ctx) < 0)
 		exit(1);
@@ -678,7 +704,10 @@ int main(int argc, char *argv[])
 
 	/* self-restart ends here: */
 	if (args.self) {
-		restart(getpid(), STDIN_FILENO, RESTART_TASKSELF);
+		int flags = RESTART_TASKSELF;
+		if (args.logall)
+			flags |= RESTART_LOG_ALL;
+		restart(getpid(), STDIN_FILENO, flags, ctx.logfd);
 		/* reach here if restart(2) failed ! */
 		perror("restart");
 		exit(1);
@@ -929,7 +958,10 @@ static int ckpt_coordinator(struct ckpt_ctx *ctx)
 	if (ctx->args->freezer)
 		flags |= RESTART_FROZEN;
 
-	ret = restart(root_pid, STDIN_FILENO, flags);
+	if (ctx->args->logall)
+		flags |= RESTART_LOG_ALL;
+
+	ret = restart(root_pid, STDIN_FILENO, flags, ctx->logfd);
 
 	if (ret < 0) {
 		perror("restart failed");
@@ -1588,9 +1620,12 @@ static int ckpt_make_tree(struct ckpt_ctx *ctx, struct task *task)
 	if (task->flags & (TASK_GHOST | TASK_DEAD))
 		flags |= RESTART_GHOST;
 
+	if (ctx->args->logall)
+		flags |= RESTART_LOG_ALL;
+
 	/* on success this doesn't return */
 	ckpt_dbg("about to call sys_restart(), flags %#lx\n", flags);
-	ret = restart(0, STDIN_FILENO, flags);
+	ret = restart(0, STDIN_FILENO, flags, ctx->logfd);
 	if (ret < 0)
 		perror("task restore failed");
 	return ret;
-- 
1.6.1.1

^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH user-cr] Allow for logfile for kernel debug messages (v2)
       [not found]     ` <20091021210533.GA2165-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-10-21 21:46       ` Oren Laadan
       [not found]         ` <4ADF8140.3010102-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Oren Laadan @ 2009-10-21 21:46 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers


Serge E. Hallyn wrote:
> If unspecified, -1 will be sent for logfd to the kernel, and there
> will be no debug log.   If specified, then checkpoint and restart
> debug msgs will be sent to the logfile.

The new interface will cover error reporting, so when run without '-l'
both 'checkpoint' and 'restart' (unless asked to run extra silently)
need to provide a pipe to the syscall, and if an error occurs, display
pull the error reason from the pipe and print it.

> 
> Logfile is specified using -l LOGFILE or --logfile=LOGFILE, i.e.
> 
> 	checkpoint $pid -o ckpt.out -l ckpt.debug
> 	restart -l rstr.debug -i ckpt.out
> 
> Changelog:
> 	oct 21: take a --logall flag
> 
> Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
>  checkpoint.c |   34 ++++++++++++++++++++++++++++++----
>  restart.c    |   47 +++++++++++++++++++++++++++++++++++++++++------
>  2 files changed, 71 insertions(+), 10 deletions(-)
> 
> diff --git a/checkpoint.c b/checkpoint.c
> index c116daf..553132c 100644
> --- a/checkpoint.c
> +++ b/checkpoint.c
> @@ -32,6 +32,8 @@ static char usage_str[] =
>  "\tOptions:\n"
>  "  -h,--help             print this help message\n"
>  "  -o,--output=FILE      write data to FILE instead of standard output\n"
> +"  -l,--logfile=FILE     write kernel debug data to FILE (default=nowhere)\n"

Can the user ask to append the data to a file ?

Also, I'm thinking of adding --output-fd=FD (and then --logfile-fd=FD)
to allow a user to first create suitable fds and then exec the program
(that will allow to easily redirect input/output without adding endless
options).

> +"  -a,--logall           have kernel output all debug data, not just errors\n"

Maybe use '-d'/'-D', or some --debug-log instead ?

>  "  -c,--container        require the PID is a container-init\n"
>  "  -v,--verbose          verbose output\n"
>  "";
> @@ -40,11 +42,13 @@ struct args {
>  	char *output;
>  	int container;
>  	int verbose;
> +	int logall;
> +	char *logfile;
>  };
>  
> -inline static int checkpoint(pid_t pid, int fd, unsigned long flags)
> +inline static int checkpoint(pid_t pid, int fd, unsigned long flags, int logfd)
>  {
> -	return syscall(__NR_checkpoint, pid, fd, flags);
> +	return syscall(__NR_checkpoint, pid, fd, flags, logfd);
>  }
>  
>  static void usage(char *str)
> @@ -59,10 +63,12 @@ static void parse_args(struct args *args, int argc, char *argv[])
>  		{ "help",	no_argument,		NULL, 'h' },
>  		{ "output",	required_argument,	NULL, 'o' },
>  		{ "container",	no_argument,		NULL, 'c' },
> +		{ "logfile",	required_argument,	NULL, 'l' },
> +		{ "logall",	required_argument,	NULL, 'a' },
>  		{ "verbose",	no_argument,		NULL, 'v' },
>  		{ NULL,		0,			NULL, 0 }
>  	};
> -	static char optc[] = "hvco:";
> +	static char optc[] = "ahvco:l:";
>  
>  	while (1) {
>  		int c = getopt_long(argc, argv, optc, opts, NULL);
> @@ -73,12 +79,18 @@ static void parse_args(struct args *args, int argc, char *argv[])
>  			exit(1);
>  		case 'h':
>  			usage(usage_str);
> +		case 'a':
> +			args->logall = 1;
> +			break;
>  		case 'o':
>  			args->output = optarg;
>  			break;
>  		case 'c':
>  			args->container = 1;
>  			break;
> +		case 'l':
> +			args->logfile = optarg;
> +			break;
>  		case 'v':
>  			args->verbose = 1;
>  			break;
> @@ -94,6 +106,7 @@ int main(int argc, char *argv[])
>  	unsigned long flags = 0;
>  	pid_t pid;
>  	int ret;
> +	int logfd;
>  
>  	memset(&args, 0, sizeof(args));
>  	parse_args(&args, argc, argv);
> @@ -125,8 +138,21 @@ int main(int argc, char *argv[])
>  		if (ret != STDOUT_FILENO)
>  			close(ret);
>  	}
> +	if (args.logfile) {
> +		logfd = open(args.logfile, O_RDWR | O_CREAT, 0600);

What if the file exists -- always overwrite ?

> +		if (logfd < 0) {
> +			perror("open logfile");
> +			exit(1);
> +		}
> +	}

[...]

The comments above also apply to the 'restart' part.

Thanks,

Oren.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH user-cr] Allow for logfile for kernel debug messages (v2)
       [not found]         ` <4ADF8140.3010102-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
@ 2009-10-21 22:02           ` Serge E. Hallyn
       [not found]             ` <20091021220245.GA8994-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>
  2009-10-22  5:48           ` Matt Helsley
  1 sibling, 1 reply; 18+ messages in thread
From: Serge E. Hallyn @ 2009-10-21 22:02 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers

Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> 
> Serge E. Hallyn wrote:
> > If unspecified, -1 will be sent for logfd to the kernel, and there
> > will be no debug log.   If specified, then checkpoint and restart
> > debug msgs will be sent to the logfile.
> 
> The new interface will cover error reporting, so when run without '-l'

Not sure what you mean by 'the new interface will cover error reporting'.

> both 'checkpoint' and 'restart' (unless asked to run extra silently)
> need to provide a pipe to the syscall, and if an error occurs, display
> pull the error reason from the pipe and print it.

Sounds like a useful feature, but to be clear note that what is now
ckpt_err() output from the kernel was debug output before, so compared
to without this patchset, the user is not losing any information.

In fact what you're saying sounds like even more justification for
changing some of the

	ret = abc();
	ckpt_debug("...", ret);
	if (!ret)
		ret = cdef();
	ckpt_debug("...", ret);

type of flow into one where ckpt_err() is used and only in the case of
a definite error.  Yes?

-serge

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC] Send checkpoint and restart debug info to a log file (v2)
       [not found] ` <20091021210507.GA2098-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-10-21 21:05   ` [PATCH user-cr] Allow for logfile for kernel debug messages (v2) Serge E. Hallyn
@ 2009-10-21 22:03   ` Oren Laadan
       [not found]     ` <4ADF853F.6080807-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  1 sibling, 1 reply; 18+ messages in thread
From: Oren Laadan @ 2009-10-21 22:03 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers



Serge E. Hallyn wrote:
> Until now, debug data was sent to syslog.  That's not really
> right.
> 
> This patch sends ckpt_debug output to a logfile provided by
> the caller.  If no logfile is provided, then no debug output
> is needed.  So the user can pass in -1 for the logfd to say
> so.

I suggest to keep an option to write to the console/syslog too.
Otherwise we may lose important debug info if a nasty crash
occurs ...

> 
> Note that this does not address the potential (inevitable?)
> lockup of writing out a debug msg while checkpointing the
> debug file.  In practice so far it seems to work rather well.
> (with quite a bit of testing)

I believe the only concern here is that our debug code should
not write any debug info while the respective inode is locked.

> 
> This also means that we have to be more careful than we have
> been about not writing out sensitive data.
> 
> This is pure RFC, not meant to be pretty.
> 
> The split into ckpt_debug and ckpt_err (see changelog) suggests
> that we should rearrange (and be more consistent about) how and
> when we print out debug info.  Left as an exercise for later.

I hate homework...

Besides, I think we need to address it now. This patch is pretty
intrusive and will be painful when I fold to ckpt-v19. I do not
want to go through the process twice.

It's definitely time to come up with guidelines to when/where/how
to add debugging output, and when/where/how to add error output.

In the 'how' section, besides avoiding leaking sensitive data, I'd
like to come up with a canonical format that will be easy to read
and to parse automatically (improve over ckpt_write_err).

Hmm... ckpt_write_err() should change too -- be written to the log
instead.

> 
> Changelog:
> 	Oct 21: split ckpt_debug into ckpt_debug and ckpt_err.
> 		Git rid of the split by memory debug info etc.

The split is useful to control the amount of log.

> 		Since userspace actively asks for debug info, I
> 		also made it not depend on CONFIG_CHECKPOINT_DEBUG.
> 		We may want to put it back again to limit kernel
> 		size, but for now it's a distraction, and I'm not
> 		convinced it makes sense

Then add CONFIG_CHECKPOINT_LOGGING ?
(automatically implied by CONFIG_CHECKPOINT_DEBUG)

> 
> Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> ---
>  arch/s390/kernel/compat_wrapper.S |    2 +
>  arch/x86/mm/checkpoint.c          |   11 ++---
>  checkpoint/checkpoint.c           |   22 ++++----
>  checkpoint/files.c                |   29 +++++------
>  checkpoint/memory.c               |   43 ++++++++-------
>  checkpoint/namespace.c            |    3 -
>  checkpoint/objhash.c              |   33 +++++++------
>  checkpoint/process.c              |   85 +++++++++++++++----------------
>  checkpoint/restart.c              |  101 +++++++++++++++++--------------------
>  checkpoint/signal.c               |    5 +--
>  checkpoint/sys.c                  |   94 ++++++++++++++++++++++------------
>  drivers/char/tty_io.c             |   22 ++++----
>  include/linux/checkpoint.h        |   85 ++++++++++++++++---------------
>  include/linux/checkpoint_types.h  |    5 +-
>  include/linux/syscalls.h          |    5 +-
>  ipc/checkpoint.c                  |   13 ++---
>  ipc/checkpoint_msg.c              |   13 ++---
>  ipc/checkpoint_sem.c              |   13 ++---
>  ipc/checkpoint_shm.c              |   15 ++---
>  kernel/cred.c                     |    2 +-
>  lib/Kconfig.debug                 |    4 +-
>  mm/filemap.c                      |    2 +-
>  mm/shmem.c                        |    2 +-
>  net/checkpoint.c                  |   46 ++++++++--------
>  net/ipv4/checkpoint.c             |   26 ++++-----
>  net/unix/checkpoint.c             |   65 +++++++++++++-----------
>  26 files changed, 380 insertions(+), 366 deletions(-)

Uhhh... when this matures from RFC to patch-for-inclusion, make
sure to split it nicely for me ... please.

Oren.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH user-cr] Allow for logfile for kernel debug messages (v2)
       [not found]             ` <20091021220245.GA8994-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>
@ 2009-10-21 22:18               ` Oren Laadan
       [not found]                 ` <4ADF88BD.20400-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Oren Laadan @ 2009-10-21 22:18 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers



Serge E. Hallyn wrote:
> Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
>> Serge E. Hallyn wrote:
>>> If unspecified, -1 will be sent for logfd to the kernel, and there
>>> will be no debug log.   If specified, then checkpoint and restart
>>> debug msgs will be sent to the logfile.
>> The new interface will cover error reporting, so when run without '-l'
> 
> Not sure what you mean by 'the new interface will cover error reporting'.

I mean that using an logging fd rids the need to write an error record
to the checkpoint image. So I think we may retire ckpt_write_err() in
favor of the new interface.

The downside (if any) is that the error message is cannot be recovered
from the partial image file (so what ?).

And, unlike now, it's a way to report the reason for a restart failure
back to the caller.

I'd like both 'checkpoint' and 'restart' to report the reason for a
failure to the caller (either by default, or with a new switch).

> 
>> both 'checkpoint' and 'restart' (unless asked to run extra silently)
>> need to provide a pipe to the syscall, and if an error occurs, display
>> pull the error reason from the pipe and print it.
> 
> Sounds like a useful feature, but to be clear note that what is now
> ckpt_err() output from the kernel was debug output before, so compared
> to without this patchset, the user is not losing any information.

True for checkpoint. But the motivation for the new interface (besides
debugging), is to have the same error reporting for restart, too.

> 
> In fact what you're saying sounds like even more justification for
> changing some of the
> 
> 	ret = abc();
> 	ckpt_debug("...", ret);
> 	if (!ret)
> 		ret = cdef();
> 	ckpt_debug("...", ret);
> 
> type of flow into one where ckpt_err() is used and only in the case of
> a definite error.  Yes?

Hmm... I'm not sure.

I think (...) I'm suggesting two classes of output:
- ckpt_error() to report an error to the user
- ckpt_debug() to report debugging info for developers

The value of a @ret is interesting for error reporting only when
it is negative; but for debugging it may be interesting even if
an error did not occur.

Also, I expect more places where we want to see @ret for debugging.
So @ret may be negative, but ckpt_error() will be called later (e.g.
by our caller).

Oren.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC] Send checkpoint and restart debug info to a log file (v2)
       [not found]     ` <4ADF853F.6080807-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
@ 2009-10-21 22:49       ` Serge E. Hallyn
       [not found]         ` <20091021224922.GA5827-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Serge E. Hallyn @ 2009-10-21 22:49 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers

Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> 
> 
> Serge E. Hallyn wrote:
> > Until now, debug data was sent to syslog.  That's not really
> > right.
> > 
> > This patch sends ckpt_debug output to a logfile provided by
> > the caller.  If no logfile is provided, then no debug output
> > is needed.  So the user can pass in -1 for the logfd to say
> > so.
> 
> I suggest to keep an option to write to the console/syslog too.
> Otherwise we may lose important debug info if a nasty crash
> occurs ...

Ok, that should be simple enough to tack on once the rest is
settled - just have ckpt_log() sent fmt+##args to both
do_ckpt_user_debug() (to write to user-provided file) and a
ckpt_syslog() which is a noop if !CONFIG_CHECKPOINT_DEBUG.

> > Note that this does not address the potential (inevitable?)
> > lockup of writing out a debug msg while checkpointing the
> > debug file.  In practice so far it seems to work rather well.
> > (with quite a bit of testing)
> 
> I believe the only concern here is that our debug code should
> not write any debug info while the respective inode is locked.

BTW it occurs to me that self-restart with a logfd must be kinda
hosed early on :)

> > This also means that we have to be more careful than we have
> > been about not writing out sensitive data.
> > 
> > This is pure RFC, not meant to be pretty.
> > 
> > The split into ckpt_debug and ckpt_err (see changelog) suggests
> > that we should rearrange (and be more consistent about) how and
> > when we print out debug info.  Left as an exercise for later.
> 
> I hate homework...
> 
> Besides, I think we need to address it now. This patch is pretty
> intrusive and will be painful when I fold to ckpt-v19. I do not
> want to go through the process twice.

Ok, why don't I take my best stab at this in my next patch.

> It's definitely time to come up with guidelines to when/where/how
> to add debugging output, and when/where/how to add error output.
> 
> In the 'how' section, besides avoiding leaking sensitive data, I'd
> like to come up with a canonical format that will be easy to read
> and to parse automatically (improve over ckpt_write_err).
> 
> Hmm... ckpt_write_err() should change too -- be written to the log
> instead.

Let's not be hasty - maybe duplicated, but an error message at the
end of the checkpoint file is still a nice sanity check for userspace
if they happened to checkpoint without a logfile.

> > Changelog:
> > 	Oct 21: split ckpt_debug into ckpt_debug and ckpt_err.
> > 		Git rid of the split by memory debug info etc.
> 
> The split is useful to control the amount of log.

It's a stupid split!  And I've never used it.  Besides, when a log is
for a single c/r, it's really not very big.

More practically, requiring userspace to pass over a flag
consisting of CKPT_DBG_MEM|CKPT_DBG|FILE|CKPT_DBG|TASK, and
handle corresponding usage flags, is not nice.

> > 		Since userspace actively asks for debug info, I
> > 		also made it not depend on CONFIG_CHECKPOINT_DEBUG.
> > 		We may want to put it back again to limit kernel
> > 		size, but for now it's a distraction, and I'm not
> > 		convinced it makes sense
> 
> Then add CONFIG_CHECKPOINT_LOGGING ?
> (automatically implied by CONFIG_CHECKPOINT_DEBUG)

Well...  for some reason I can't decide...  does that make sense?

Remember more config options means more ways for the code to
not compile.  Before adding one, I'd prefer to compile an image
with and without the ckpt_debugs (iow with the #defines under
#ifdef 0) and see how much size it really adds.  And in face right
now CONFIG_CHECKPOINT_DEBUG only directs creation of the extra restart
task debug info, which is only printed out when ctx->uflags & CKPT_DEBUG_ALL,
so that config option could go away entirely.

> > Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> > ---
> >  arch/s390/kernel/compat_wrapper.S |    2 +
> >  arch/x86/mm/checkpoint.c          |   11 ++---
> >  checkpoint/checkpoint.c           |   22 ++++----
> >  checkpoint/files.c                |   29 +++++------
> >  checkpoint/memory.c               |   43 ++++++++-------
> >  checkpoint/namespace.c            |    3 -
> >  checkpoint/objhash.c              |   33 +++++++------
> >  checkpoint/process.c              |   85 +++++++++++++++----------------
> >  checkpoint/restart.c              |  101 +++++++++++++++++--------------------
> >  checkpoint/signal.c               |    5 +--
> >  checkpoint/sys.c                  |   94 ++++++++++++++++++++++------------
> >  drivers/char/tty_io.c             |   22 ++++----
> >  include/linux/checkpoint.h        |   85 ++++++++++++++++---------------
> >  include/linux/checkpoint_types.h  |    5 +-
> >  include/linux/syscalls.h          |    5 +-
> >  ipc/checkpoint.c                  |   13 ++---
> >  ipc/checkpoint_msg.c              |   13 ++---
> >  ipc/checkpoint_sem.c              |   13 ++---
> >  ipc/checkpoint_shm.c              |   15 ++---
> >  kernel/cred.c                     |    2 +-
> >  lib/Kconfig.debug                 |    4 +-
> >  mm/filemap.c                      |    2 +-
> >  mm/shmem.c                        |    2 +-
> >  net/checkpoint.c                  |   46 ++++++++--------
> >  net/ipv4/checkpoint.c             |   26 ++++-----
> >  net/unix/checkpoint.c             |   65 +++++++++++++-----------
> >  26 files changed, 380 insertions(+), 366 deletions(-)
> 
> Uhhh... when this matures from RFC to patch-for-inclusion, make
> sure to split it nicely for me ... please.

I'm not sure how split-able it is, but i'll do my best.

-serge

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC] Send checkpoint and restart debug info to a log file (v2)
       [not found]         ` <20091021224922.GA5827-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-10-21 23:14           ` Oren Laadan
       [not found]             ` <4ADF95D0.8060806-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Oren Laadan @ 2009-10-21 23:14 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers



Serge E. Hallyn wrote:
> Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
>>
>> Serge E. Hallyn wrote:
>>> Until now, debug data was sent to syslog.  That's not really
>>> right.
>>>
>>> This patch sends ckpt_debug output to a logfile provided by
>>> the caller.  If no logfile is provided, then no debug output
>>> is needed.  So the user can pass in -1 for the logfd to say
>>> so.
>> I suggest to keep an option to write to the console/syslog too.
>> Otherwise we may lose important debug info if a nasty crash
>> occurs ...
> 
> Ok, that should be simple enough to tack on once the rest is
> settled - just have ckpt_log() sent fmt+##args to both
> do_ckpt_user_debug() (to write to user-provided file) and a
> ckpt_syslog() which is a noop if !CONFIG_CHECKPOINT_DEBUG.
> 
>>> Note that this does not address the potential (inevitable?)
>>> lockup of writing out a debug msg while checkpointing the
>>> debug file.  In practice so far it seems to work rather well.
>>> (with quite a bit of testing)
>> I believe the only concern here is that our debug code should
>> not write any debug info while the respective inode is locked.
> 
> BTW it occurs to me that self-restart with a logfd must be kinda
> hosed early on :)

Why would that be a problem ?  I actually think it's useful for
those doing self-restart:  you open a file, the kernel takes a
reference to it, then sys_restart() will eventually close that
file descriptor - but kernel still keeps a reference - so debug
data keeps flowing. When restart completes -- data is gone; If
restart fails - user will have information in that file.

> 
>>> This also means that we have to be more careful than we have
>>> been about not writing out sensitive data.
>>>
>>> This is pure RFC, not meant to be pretty.
>>>
>>> The split into ckpt_debug and ckpt_err (see changelog) suggests
>>> that we should rearrange (and be more consistent about) how and
>>> when we print out debug info.  Left as an exercise for later.
>> I hate homework...
>>
>> Besides, I think we need to address it now. This patch is pretty
>> intrusive and will be painful when I fold to ckpt-v19. I do not
>> want to go through the process twice.
> 
> Ok, why don't I take my best stab at this in my next patch.
> 
>> It's definitely time to come up with guidelines to when/where/how
>> to add debugging output, and when/where/how to add error output.
>>
>> In the 'how' section, besides avoiding leaking sensitive data, I'd
>> like to come up with a canonical format that will be easy to read
>> and to parse automatically (improve over ckpt_write_err).
>>
>> Hmm... ckpt_write_err() should change too -- be written to the log
>> instead.
> 
> Let's not be hasty - maybe duplicated, but an error message at the
> end of the checkpoint file is still a nice sanity check for userspace
> if they happened to checkpoint without a logfile.

Ok.

> 
>>> Changelog:
>>> 	Oct 21: split ckpt_debug into ckpt_debug and ckpt_err.
>>> 		Git rid of the split by memory debug info etc.
>> The split is useful to control the amount of log.
> 
> It's a stupid split!  And I've never used it.  Besides, when a log is
> for a single c/r, it's really not very big.

It may be stupid split!, yet it did prove very useful to me.
Maybe it's because you never debugged the memory checkpoint
page by page.

A typical scenario: you hit a bug -> you enable debugging ->
the bug disappears -> you disable debugging -> you hit the bug ...

IOW, debugging output in big doses affects the execution in a
way that makes heisen-bugs hide. Control over verbosity means you
get better chances at reproducing the behavior and still have
enough meaningful data.

> 
> More practically, requiring userspace to pass over a flag
> consisting of CKPT_DBG_MEM|CKPT_DBG|FILE|CKPT_DBG|TASK, and
> handle corresponding usage flags, is not nice.

I agree with you on about this. Maybe we want a better
interface ?

Which brings me to this random thought: maybe we want to
make the fourth argument of sys_{checkpoint,restart} a
structure, to make it easier to extend it in the future
without having to go throw a clone3-like hell...

Specifically, this structure could now be:

struct ckpt_args {
	int version;
	int logfd;
	int logmask;
};

(or use union checkpoint {} and union restart {} to tell
between checkpoint- and restart-related args.

> 
>>> 		Since userspace actively asks for debug info, I
>>> 		also made it not depend on CONFIG_CHECKPOINT_DEBUG.
>>> 		We may want to put it back again to limit kernel
>>> 		size, but for now it's a distraction, and I'm not
>>> 		convinced it makes sense
>> Then add CONFIG_CHECKPOINT_LOGGING ?
>> (automatically implied by CONFIG_CHECKPOINT_DEBUG)
> 
> Well...  for some reason I can't decide...  does that make sense?
> 
> Remember more config options means more ways for the code to
> not compile.  Before adding one, I'd prefer to compile an image
> with and without the ckpt_debugs (iow with the #defines under
> #ifdef 0) and see how much size it really adds.  And in face right
> now CONFIG_CHECKPOINT_DEBUG only directs creation of the extra restart
> task debug info, which is only printed out when ctx->uflags & CKPT_DEBUG_ALL,
> so that config option could go away entirely.
> 
>>> Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
>>> ---
>>>  arch/s390/kernel/compat_wrapper.S |    2 +
>>>  arch/x86/mm/checkpoint.c          |   11 ++---
>>>  checkpoint/checkpoint.c           |   22 ++++----
>>>  checkpoint/files.c                |   29 +++++------
>>>  checkpoint/memory.c               |   43 ++++++++-------
>>>  checkpoint/namespace.c            |    3 -
>>>  checkpoint/objhash.c              |   33 +++++++------
>>>  checkpoint/process.c              |   85 +++++++++++++++----------------
>>>  checkpoint/restart.c              |  101 +++++++++++++++++--------------------
>>>  checkpoint/signal.c               |    5 +--
>>>  checkpoint/sys.c                  |   94 ++++++++++++++++++++++------------
>>>  drivers/char/tty_io.c             |   22 ++++----
>>>  include/linux/checkpoint.h        |   85 ++++++++++++++++---------------
>>>  include/linux/checkpoint_types.h  |    5 +-
>>>  include/linux/syscalls.h          |    5 +-
>>>  ipc/checkpoint.c                  |   13 ++---
>>>  ipc/checkpoint_msg.c              |   13 ++---
>>>  ipc/checkpoint_sem.c              |   13 ++---
>>>  ipc/checkpoint_shm.c              |   15 ++---
>>>  kernel/cred.c                     |    2 +-
>>>  lib/Kconfig.debug                 |    4 +-
>>>  mm/filemap.c                      |    2 +-
>>>  mm/shmem.c                        |    2 +-
>>>  net/checkpoint.c                  |   46 ++++++++--------
>>>  net/ipv4/checkpoint.c             |   26 ++++-----
>>>  net/unix/checkpoint.c             |   65 +++++++++++++-----------
>>>  26 files changed, 380 insertions(+), 366 deletions(-)
>> Uhhh... when this matures from RFC to patch-for-inclusion, make
>> sure to split it nicely for me ... please.
> 
> I'm not sure how split-able it is, but i'll do my best.

One way it to decide that we change the name ckpt_debug() to,
say, ckpt_log_debug(), then you introduce the new function in
on patch, and in following patches convert file by file.

Or, you just split it to first change ckpt_debug(), and then one
patch per file (or per subsystem). It's not bisect-able, but
that's ok because it's not going as is to the kernel - I'll
fold it anyway into ckpt-v19 clean patchset.

Oren.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC] Send checkpoint and restart debug info to a log file (v2)
       [not found]             ` <4ADF95D0.8060806-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
@ 2009-10-22  0:51               ` Serge E. Hallyn
       [not found]                 ` <20091022005157.GA11608-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  2009-10-22 18:25               ` Serge E. Hallyn
  1 sibling, 1 reply; 18+ messages in thread
From: Serge E. Hallyn @ 2009-10-22  0:51 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers

Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> > BTW it occurs to me that self-restart with a logfd must be kinda
> > hosed early on :)
> 
> Why would that be a problem ?  I actually think it's useful for
> those doing self-restart:  you open a file, the kernel takes a
> reference to it, then sys_restart() will eventually close that
> file descriptor - but kernel still keeps a reference - so debug
> data keeps flowing. When restart completes -- data is gone; If
> restart fails - user will have information in that file.

Hmm, good point.  No problem then.

...

> >>> Changelog:
> >>> 	Oct 21: split ckpt_debug into ckpt_debug and ckpt_err.
> >>> 		Git rid of the split by memory debug info etc.
> >> The split is useful to control the amount of log.
> > 
> > It's a stupid split!  And I've never used it.  Besides, when a log is
> > for a single c/r, it's really not very big.
> 
> It may be stupid split!, yet it did prove very useful to me.

Sorry, stupid isn't right.  Clearly it made sense.

> Maybe it's because you never debugged the memory checkpoint
> page by page.
> 
> A typical scenario: you hit a bug -> you enable debugging ->
> the bug disappears -> you disable debugging -> you hit the bug ...
> 
> IOW, debugging output in big doses affects the execution in a
> way that makes heisen-bugs hide. Control over verbosity means you
> get better chances at reproducing the behavior and still have
> enough meaningful data.

So I guess it should stay in there at least for syslog output.
Then you could debug by not passing an fd for the logfile to
sys_restart, and tweaking the syslog output.

> > More practically, requiring userspace to pass over a flag
> > consisting of CKPT_DBG_MEM|CKPT_DBG|FILE|CKPT_DBG|TASK, and
> > handle corresponding usage flags, is not nice.
> 
> I agree with you on about this. Maybe we want a better
> interface ?
> 
> Which brings me to this random thought: maybe we want to
> make the fourth argument of sys_{checkpoint,restart} a
> structure, to make it easier to extend it in the future
> without having to go throw a clone3-like hell...
> 
> Specifically, this structure could now be:
> 
> struct ckpt_args {
> 	int version;
> 	int logfd;
> 	int logmask;
> };
> 
> (or use union checkpoint {} and union restart {} to tell
> between checkpoint- and restart-related args.

Well I don't like passing structs to the kernel actually (and
don't like that in the clone3 patchset :), but can't think of
anything better offhand.  I'll think about it a bit more, but
maybe this'll be the way to go - long as a very simple program
can pass NULL to mean no debug.

-serge

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH user-cr] Allow for logfile for kernel debug messages (v2)
       [not found]         ` <4ADF8140.3010102-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  2009-10-21 22:02           ` Serge E. Hallyn
@ 2009-10-22  5:48           ` Matt Helsley
       [not found]             ` <20091022054853.GC7757-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
  1 sibling, 1 reply; 18+ messages in thread
From: Matt Helsley @ 2009-10-22  5:48 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers

On Wed, Oct 21, 2009 at 05:46:40PM -0400, Oren Laadan wrote:
> 
> Serge E. Hallyn wrote:
> > If unspecified, -1 will be sent for logfd to the kernel, and there
> > will be no debug log.   If specified, then checkpoint and restart
> > debug msgs will be sent to the logfile.
> 
> The new interface will cover error reporting, so when run without '-l'
> both 'checkpoint' and 'restart' (unless asked to run extra silently)
> need to provide a pipe to the syscall, and if an error occurs, display
> pull the error reason from the pipe and print it.

Of course if more than a pipe buf's worth of error data needs to be 
transferred then that scheme won't work very cleanly --
checkpoint/restart would always be a multi-task operation. (I just ran
into this issue with self-checkpoint.)

> 
> > 
> > Logfile is specified using -l LOGFILE or --logfile=LOGFILE, i.e.
> > 
> > 	checkpoint $pid -o ckpt.out -l ckpt.debug
> > 	restart -l rstr.debug -i ckpt.out
> > 
> > Changelog:
> > 	oct 21: take a --logall flag
> > 
> > Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
> > ---
> >  checkpoint.c |   34 ++++++++++++++++++++++++++++++----
> >  restart.c    |   47 +++++++++++++++++++++++++++++++++++++++++------
> >  2 files changed, 71 insertions(+), 10 deletions(-)
> > 
> > diff --git a/checkpoint.c b/checkpoint.c
> > index c116daf..553132c 100644
> > --- a/checkpoint.c
> > +++ b/checkpoint.c
> > @@ -32,6 +32,8 @@ static char usage_str[] =
> >  "\tOptions:\n"
> >  "  -h,--help             print this help message\n"
> >  "  -o,--output=FILE      write data to FILE instead of standard output\n"
> > +"  -l,--logfile=FILE     write kernel debug data to FILE (default=nowhere)\n"
> 
> Can the user ask to append the data to a file ?
> 
> Also, I'm thinking of adding --output-fd=FD (and then --logfile-fd=FD)
> to allow a user to first create suitable fds and then exec the program
> (that will allow to easily redirect input/output without adding endless
> options).

I like the output-fd idea. It means you don't need to be able to ask to
append -- it's implicit in the file position of the passed file
descriptor.

Cheers,
	-Matt Helsley

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH user-cr] Allow for logfile for kernel debug messages (v2)
       [not found]                 ` <4ADF88BD.20400-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
@ 2009-10-22  5:56                   ` Matt Helsley
  0 siblings, 0 replies; 18+ messages in thread
From: Matt Helsley @ 2009-10-22  5:56 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers

On Wed, Oct 21, 2009 at 06:18:37PM -0400, Oren Laadan wrote:
> 
> 
> Serge E. Hallyn wrote:
> > Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> >> Serge E. Hallyn wrote:
> >>> If unspecified, -1 will be sent for logfd to the kernel, and there
> >>> will be no debug log.   If specified, then checkpoint and restart
> >>> debug msgs will be sent to the logfile.
> >> The new interface will cover error reporting, so when run without '-l'
> > 
> > Not sure what you mean by 'the new interface will cover error reporting'.
> 
> I mean that using an logging fd rids the need to write an error record
> to the checkpoint image. So I think we may retire ckpt_write_err() in
> favor of the new interface.

Having the error in the image is incredibly useful. Not only does it
tell you an error message but it establishes plenty of useful context --
the preceding portions of the checkpoint image. Removing it seems like it
would be a big mistake.

> The downside (if any) is that the error message is cannot be recovered
> from the partial image file (so what ?).

Because it's easy. The easier we make the interface to use the better.
Otherwise we'll get people trying to piece together what happened from
two separate streams -- the checkpoint image and the log -- and doing it
poorly. Better to keep them intertwined in the case of checkpoint.

Cheers,
	-Matt Helsley

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC] Send checkpoint and restart debug info to a log file (v2)
       [not found]                 ` <20091022005157.GA11608-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-10-22  6:04                   ` Matt Helsley
       [not found]                     ` <20091022060400.GE7757-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Matt Helsley @ 2009-10-22  6:04 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers

On Wed, Oct 21, 2009 at 07:51:57PM -0500, Serge E. Hallyn wrote:
> Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):

<snip>

> > > More practically, requiring userspace to pass over a flag
> > > consisting of CKPT_DBG_MEM|CKPT_DBG|FILE|CKPT_DBG|TASK, and
> > > handle corresponding usage flags, is not nice.
> > 
> > I agree with you on about this. Maybe we want a better
> > interface ?
> > 
> > Which brings me to this random thought: maybe we want to
> > make the fourth argument of sys_{checkpoint,restart} a
> > structure, to make it easier to extend it in the future
> > without having to go throw a clone3-like hell...

Adding new kernel interfaces is supposed to be somewhat hellish.

> > 
> > Specifically, this structure could now be:
> > 
> > struct ckpt_args {
> > 	int version;
> > 	int logfd;
> > 	int logmask;
> > };
> > 
> > (or use union checkpoint {} and union restart {} to tell
> > between checkpoint- and restart-related args.
> 
> Well I don't like passing structs to the kernel actually (and

Let's not do this. I agree that passing structs, when unnecessary,
is gross. Especially if it gets used to extend the arguments
passed via the syscall interface (new flag values I don't mind).

Cheers,
	-Matt Helsley

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC] Send checkpoint and restart debug info to a log file (v2)
       [not found]             ` <4ADF95D0.8060806-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  2009-10-22  0:51               ` Serge E. Hallyn
@ 2009-10-22 18:25               ` Serge E. Hallyn
  1 sibling, 0 replies; 18+ messages in thread
From: Serge E. Hallyn @ 2009-10-22 18:25 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers

Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
...
> > More practically, requiring userspace to pass over a flag
> > consisting of CKPT_DBG_MEM|CKPT_DBG|FILE|CKPT_DBG|TASK, and
> > handle corresponding usage flags, is not nice.
> 
> I agree with you on about this. Maybe we want a better
> interface ?

I'm looking at tracehooks right now.  So the way it would work is:

1. Error messages using ckpt_error() are always compiled in, are
sent to ctx->logfile if that exists, and are sent to syslog if
CONFIG_CHECKPOINT_DEBUG (or some sysctl is set).

2. Debug messages could be tracehooks which can be enabled per
subsystem as above - for mem, file, task, etc - using code we add
under linux-cr/samples/.  It would basically do:

	  ret = register_trace_ckpt_mem(ckpt_trace_mem, ckpt_logprobe);
	  ret = register_trace_ckpt_task(ckpt_trace_mem, ckpt_logprobe);

where ckpt_logprobe does the printing of info to ctx->logfile and
syslog.

Or,

at first of course we can just let CONFIG_CHECKPOINT_DEBUG determine
the printing of ckpt_debugs() as we do now.

I'm going to start slowly, first defining the ckpt_error() helpers
and introducing/converting some callers.

-serge

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC] Send checkpoint and restart debug info to a log file (v2)
       [not found]                     ` <20091022060400.GE7757-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
@ 2009-10-23 18:48                       ` Oren Laadan
       [not found]                         ` <4AE1FA7A.5030702-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  0 siblings, 1 reply; 18+ messages in thread
From: Oren Laadan @ 2009-10-23 18:48 UTC (permalink / raw)
  To: Matt Helsley; +Cc: Linux Containers



Matt Helsley wrote:
> On Wed, Oct 21, 2009 at 07:51:57PM -0500, Serge E. Hallyn wrote:
>> Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> 
> <snip>
> 
>>>> More practically, requiring userspace to pass over a flag
>>>> consisting of CKPT_DBG_MEM|CKPT_DBG|FILE|CKPT_DBG|TASK, and
>>>> handle corresponding usage flags, is not nice.
>>> I agree with you on about this. Maybe we want a better
>>> interface ?
>>>
>>> Which brings me to this random thought: maybe we want to
>>> make the fourth argument of sys_{checkpoint,restart} a
>>> structure, to make it easier to extend it in the future
>>> without having to go throw a clone3-like hell...
> 
> Adding new kernel interfaces is supposed to be somewhat hellish.
> 
>>> Specifically, this structure could now be:
>>>
>>> struct ckpt_args {
>>> 	int version;
>>> 	int logfd;
>>> 	int logmask;
>>> };
>>>
>>> (or use union checkpoint {} and union restart {} to tell
>>> between checkpoint- and restart-related args.
>> Well I don't like passing structs to the kernel actually (and
> 
> Let's not do this. I agree that passing structs, when unnecessary,
> is gross. Especially if it gets used to extend the arguments
> passed via the syscall interface (new flag values I don't mind).

Ok, we already allow future extension by being strict about
which flags are taken or not.

Then what do we do with logmask ?   I prefer it to be a per-syscall
value as opposed to a system-wise setting.

Oren.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH user-cr] Allow for logfile for kernel debug messages (v2)
       [not found]             ` <20091022054853.GC7757-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
@ 2009-10-23 18:59               ` Oren Laadan
  0 siblings, 0 replies; 18+ messages in thread
From: Oren Laadan @ 2009-10-23 18:59 UTC (permalink / raw)
  To: Matt Helsley; +Cc: Linux Containers



Matt Helsley wrote:
> On Wed, Oct 21, 2009 at 05:46:40PM -0400, Oren Laadan wrote:
>> Serge E. Hallyn wrote:
>>> If unspecified, -1 will be sent for logfd to the kernel, and there
>>> will be no debug log.   If specified, then checkpoint and restart
>>> debug msgs will be sent to the logfile.
>> The new interface will cover error reporting, so when run without '-l'
>> both 'checkpoint' and 'restart' (unless asked to run extra silently)
>> need to provide a pipe to the syscall, and if an error occurs, display
>> pull the error reason from the pipe and print it.
> 
> Of course if more than a pipe buf's worth of error data needs to be 
> transferred then that scheme won't work very cleanly --
> checkpoint/restart would always be a multi-task operation. (I just ran
> into this issue with self-checkpoint.)

Of course. IIRC I commented at linux plumbers that error messages
should (as in must) fit there, as opposed to log/debug messages.

Oren.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC] Send checkpoint and restart debug info to a log file (v2)
       [not found]                         ` <4AE1FA7A.5030702-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
@ 2009-10-23 20:06                           ` Serge E. Hallyn
  2009-10-26 21:52                           ` Serge E. Hallyn
  1 sibling, 0 replies; 18+ messages in thread
From: Serge E. Hallyn @ 2009-10-23 20:06 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers

Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> 
> 
> Matt Helsley wrote:
> > On Wed, Oct 21, 2009 at 07:51:57PM -0500, Serge E. Hallyn wrote:
> >> Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> > 
> > <snip>
> > 
> >>>> More practically, requiring userspace to pass over a flag
> >>>> consisting of CKPT_DBG_MEM|CKPT_DBG|FILE|CKPT_DBG|TASK, and
> >>>> handle corresponding usage flags, is not nice.
> >>> I agree with you on about this. Maybe we want a better
> >>> interface ?
> >>>
> >>> Which brings me to this random thought: maybe we want to
> >>> make the fourth argument of sys_{checkpoint,restart} a
> >>> structure, to make it easier to extend it in the future
> >>> without having to go throw a clone3-like hell...
> > 
> > Adding new kernel interfaces is supposed to be somewhat hellish.
> > 
> >>> Specifically, this structure could now be:
> >>>
> >>> struct ckpt_args {
> >>> 	int version;
> >>> 	int logfd;
> >>> 	int logmask;
> >>> };
> >>>
> >>> (or use union checkpoint {} and union restart {} to tell
> >>> between checkpoint- and restart-related args.
> >> Well I don't like passing structs to the kernel actually (and
> > 
> > Let's not do this. I agree that passing structs, when unnecessary,
> > is gross. Especially if it gets used to extend the arguments
> > passed via the syscall interface (new flag values I don't mind).
> 
> Ok, we already allow future extension by being strict about
> which flags are taken or not.
> 
> Then what do we do with logmask ?   I prefer it to be a per-syscall
> value as opposed to a system-wise setting.

Getting ugly but...

if we were to use tracepoints, then we could (or the admin could) have the
function which is loaded determine based on a task's cgroup what to
print out.

I did say it was ugly...

-serge

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC] Send checkpoint and restart debug info to a log file (v2)
       [not found]                         ` <4AE1FA7A.5030702-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
  2009-10-23 20:06                           ` Serge E. Hallyn
@ 2009-10-26 21:52                           ` Serge E. Hallyn
       [not found]                             ` <20091026215238.GA10900-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  1 sibling, 1 reply; 18+ messages in thread
From: Serge E. Hallyn @ 2009-10-26 21:52 UTC (permalink / raw)
  To: Oren Laadan; +Cc: Linux Containers

Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> 
> 
> Matt Helsley wrote:
> > On Wed, Oct 21, 2009 at 07:51:57PM -0500, Serge E. Hallyn wrote:
> >> Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
> > 
> > <snip>
> > 
> >>>> More practically, requiring userspace to pass over a flag
> >>>> consisting of CKPT_DBG_MEM|CKPT_DBG|FILE|CKPT_DBG|TASK, and
> >>>> handle corresponding usage flags, is not nice.
> >>> I agree with you on about this. Maybe we want a better
> >>> interface ?
> >>>
> >>> Which brings me to this random thought: maybe we want to
> >>> make the fourth argument of sys_{checkpoint,restart} a
> >>> structure, to make it easier to extend it in the future
> >>> without having to go throw a clone3-like hell...
> > 
> > Adding new kernel interfaces is supposed to be somewhat hellish.
> > 
> >>> Specifically, this structure could now be:
> >>>
> >>> struct ckpt_args {
> >>> 	int version;
> >>> 	int logfd;
> >>> 	int logmask;
> >>> };
> >>>
> >>> (or use union checkpoint {} and union restart {} to tell
> >>> between checkpoint- and restart-related args.
> >> Well I don't like passing structs to the kernel actually (and
> > 
> > Let's not do this. I agree that passing structs, when unnecessary,
> > is gross. Especially if it gets used to extend the arguments
> > passed via the syscall interface (new flag values I don't mind).
> 
> Ok, we already allow future extension by being strict about
> which flags are taken or not.
> 
> Then what do we do with logmask ?   I prefer it to be a per-syscall
> value as opposed to a system-wise setting.

Ok well let's be honest with ourselves - we'd like to give end-users
reasonable debug info to figure out common problems.  But we will
always have cases where we have to hand-instrument the kernel for
our own advanced debugging.  Cases where you must print out only a
specific info to catch a race condition probably will fit into
such a case.  So I'd prefer to aim for giving the end-user just one
or a few options - nothing, errors only, or all debugging info that
we care to insert in everyday kernels (which hopefully isn't too
much, but let's a user figure out approximately where the problem
happened, i.e. recreating a socket, opening a file, etc).  Otherwise
too much debugging will (a) deter from the readability of the code
(b) bulk up the kernel and (c) make the api cumberson (too many
flags to checkpoint/restart userspace programs, and passing structs
to syscall).

In fact I'm wondering whether for the shipped c/r kernel we want
to have only ckpt_error()s and make sure we record the location in
the restart file where we errored out.  The ckpt_write_err() info
is already very helpful in tracking down checkpoint errors.  Of course
this becomes a pain whenever we introduce a subte bug and have to
hand-instrument everything.  And then I have to curse all the printks
I have to add.  But it's not reasonabe to have every other line of
c/r code be a debug statement either.

-serge

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH RFC] Send checkpoint and restart debug info to a log file (v2)
       [not found]                             ` <20091026215238.GA10900-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
@ 2009-10-26 23:39                               ` Oren Laadan
  0 siblings, 0 replies; 18+ messages in thread
From: Oren Laadan @ 2009-10-26 23:39 UTC (permalink / raw)
  To: Serge E. Hallyn; +Cc: Linux Containers



Serge E. Hallyn wrote:
> Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
>>
>> Matt Helsley wrote:
>>> On Wed, Oct 21, 2009 at 07:51:57PM -0500, Serge E. Hallyn wrote:
>>>> Quoting Oren Laadan (orenl-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org):
>>> <snip>
>>>
>>>>>> More practically, requiring userspace to pass over a flag
>>>>>> consisting of CKPT_DBG_MEM|CKPT_DBG|FILE|CKPT_DBG|TASK, and
>>>>>> handle corresponding usage flags, is not nice.
>>>>> I agree with you on about this. Maybe we want a better
>>>>> interface ?
>>>>>
>>>>> Which brings me to this random thought: maybe we want to
>>>>> make the fourth argument of sys_{checkpoint,restart} a
>>>>> structure, to make it easier to extend it in the future
>>>>> without having to go throw a clone3-like hell...
>>> Adding new kernel interfaces is supposed to be somewhat hellish.
>>>
>>>>> Specifically, this structure could now be:
>>>>>
>>>>> struct ckpt_args {
>>>>> 	int version;
>>>>> 	int logfd;
>>>>> 	int logmask;
>>>>> };
>>>>>
>>>>> (or use union checkpoint {} and union restart {} to tell
>>>>> between checkpoint- and restart-related args.
>>>> Well I don't like passing structs to the kernel actually (and
>>> Let's not do this. I agree that passing structs, when unnecessary,
>>> is gross. Especially if it gets used to extend the arguments
>>> passed via the syscall interface (new flag values I don't mind).
>> Ok, we already allow future extension by being strict about
>> which flags are taken or not.
>>
>> Then what do we do with logmask ?   I prefer it to be a per-syscall
>> value as opposed to a system-wise setting.
> 
> Ok well let's be honest with ourselves - we'd like to give end-users
> reasonable debug info to figure out common problems.  But we will
> always have cases where we have to hand-instrument the kernel for
> our own advanced debugging.  Cases where you must print out only a
> specific info to catch a race condition probably will fit into
> such a case.  So I'd prefer to aim for giving the end-user just one
> or a few options - nothing, errors only, or all debugging info that
> we care to insert in everyday kernels (which hopefully isn't too
> much, but let's a user figure out approximately where the problem
> happened, i.e. recreating a socket, opening a file, etc).  Otherwise

I'm ok with this nothing-error-all scheme.

Two questions are:

What's "reasonable debug info" ?

How does it differ than looking at the image (checkpoint/restart) at
the specific file position and error message ?

> too much debugging will (a) deter from the readability of the code
> (b) bulk up the kernel and (c) make the api cumberson (too many
> flags to checkpoint/restart userspace programs, and passing structs
> to syscall).
> 
> In fact I'm wondering whether for the shipped c/r kernel we want
> to have only ckpt_error()s and make sure we record the location in
> the restart file where we errored out.  The ckpt_write_err() info
> is already very helpful in tracking down checkpoint errors.  Of course
> this becomes a pain whenever we introduce a subte bug and have to
> hand-instrument everything.  And then I have to curse all the printks
> I have to add.  But it's not reasonabe to have every other line of
> c/r code be a debug statement either.

So you suggest ckpt_error() and ckpt_debug(). Fair enough.

Yet I'm not willing to give up my existing (too noisy for the user)
debug statements for a chance to curse at their removal.

So I suggest also _ckpt_debug(), only with CONFIG_CHECKPOINT_DEBUG.
They are there for developers, they can be used by a user who is
having an issue and works with a developer, and they prevent Serge
from cursing unnecessarily :p

(or can change the naming:  ckpt_error, ckpt_log, ckpt_debug, etc).

Oren.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2009-10-26 23:39 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-10-21 21:05 [PATCH RFC] Send checkpoint and restart debug info to a log file (v2) Serge E. Hallyn
     [not found] ` <20091021210507.GA2098-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-21 21:05   ` [PATCH user-cr] Allow for logfile for kernel debug messages (v2) Serge E. Hallyn
     [not found]     ` <20091021210533.GA2165-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-21 21:46       ` Oren Laadan
     [not found]         ` <4ADF8140.3010102-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-21 22:02           ` Serge E. Hallyn
     [not found]             ` <20091021220245.GA8994-A9i7LUbDfNHQT0dZR+AlfA@public.gmane.org>
2009-10-21 22:18               ` Oren Laadan
     [not found]                 ` <4ADF88BD.20400-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-22  5:56                   ` Matt Helsley
2009-10-22  5:48           ` Matt Helsley
     [not found]             ` <20091022054853.GC7757-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2009-10-23 18:59               ` Oren Laadan
2009-10-21 22:03   ` [PATCH RFC] Send checkpoint and restart debug info to a log file (v2) Oren Laadan
     [not found]     ` <4ADF853F.6080807-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-21 22:49       ` Serge E. Hallyn
     [not found]         ` <20091021224922.GA5827-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-21 23:14           ` Oren Laadan
     [not found]             ` <4ADF95D0.8060806-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-22  0:51               ` Serge E. Hallyn
     [not found]                 ` <20091022005157.GA11608-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-22  6:04                   ` Matt Helsley
     [not found]                     ` <20091022060400.GE7757-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2009-10-23 18:48                       ` Oren Laadan
     [not found]                         ` <4AE1FA7A.5030702-RdfvBDnrOixBDgjK7y7TUQ@public.gmane.org>
2009-10-23 20:06                           ` Serge E. Hallyn
2009-10-26 21:52                           ` Serge E. Hallyn
     [not found]                             ` <20091026215238.GA10900-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-10-26 23:39                               ` Oren Laadan
2009-10-22 18:25               ` Serge E. Hallyn

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.