Linux Container Development
 help / color / mirror / Atom feed
From: Pavel Emelyanov <xemul-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
To: Nathan Lynch <ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org>,
	Oren Laadan <orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>,
	Daniel Lezcano <dlezcano-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org>,
	Serge Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>,
	Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: Cyrill Gorcunov
	<gorcunov-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>,
	Linux Containers
	<containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org>,
	Glauber Costa <glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
Subject: [TOOLS] To make use of the patches
Date: Fri, 15 Jul 2011 17:49:08 +0400	[thread overview]
Message-ID: <4E204554.6040901@parallels.com> (raw)
In-Reply-To: <4E204466.8010204-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 70 bytes --]

Additionally the binfmt_img.h from kernel is required for cr-restore.

[-- Attachment #2: cr-dump.c --]
[-- Type: text/plain, Size: 14228 bytes --]

#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <dirent.h>
#include <string.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <errno.h>
#include <linux/kdev_t.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/vfs.h>

#include <linux/types.h>
#include "img_structs.h"

static int fdinfo_img;
static int pages_img;
static int core_img;
static int shmem_img;
static int pipes_img;

#define PIPEFS_MAGIC 0x50495045

static int prep_img_files(int pid)
{
	__u32 type;
	char name[64];

	sprintf(name, "fdinfo-%d.img", pid);
	fdinfo_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
	if (fdinfo_img < 0) {
		perror("Can't open fdinfo");
		return 1;
	}

	type = FDINFO_MAGIC;
	write(fdinfo_img, &type, 4);

	sprintf(name, "pages-%d.img", pid);
	pages_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
	if (pages_img < 0) {
		perror("Can't open shmem");
		return 1;
	}

	type = PAGES_MAGIC;
	write(pages_img, &type, 4);

	sprintf(name, "core-%d.img", pid);
	core_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
	if (core_img < 0) {
		perror("Can't open core");
		return 1;
	}

	sprintf(name, "shmem-%d.img", pid);
	shmem_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
	if (shmem_img < 0) {
		perror("Can't open shmem");
		return 1;
	}

	type = SHMEM_MAGIC;
	write(shmem_img, &type, 4);

	sprintf(name, "pipes-%d.img", pid);
	pipes_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
	if (pipes_img < 0) {
		perror("Can't open pipes");
		return 1;
	}

	type = PIPES_MAGIC;
	write(pipes_img, &type, 4);

	return 0;
}

static void kill_imgfiles(int pid)
{
	/* FIXME */
}

static int stop_task(int pid)
{
	return kill(pid, SIGSTOP);
}

static void continue_task(int pid)
{
	if (kill(pid, SIGCONT))
		perror("Can't cont task");
}

static char big_tmp_str[PATH_MAX];

static int read_fd_params(int pid, char *fd, unsigned long *pos, unsigned int *flags)
{
	char fd_str[128];
	int ifd;

	sprintf(fd_str, "/proc/%d/fdinfo/%s", pid, fd);

	printf("\tGetting fdinfo for fd %s\n", fd);
	ifd = open(fd_str, O_RDONLY);
	if (ifd < 0) {
		perror("Can't open fdinfo");
		return 1;
	}

	read(ifd, big_tmp_str, sizeof(big_tmp_str));
	close(ifd);

	sscanf(big_tmp_str, "pos:\t%lli\nflags:\t%o\n", pos, flags);
	return 0;
}

static int dump_one_reg_file(int type, unsigned long fd_name, int lfd,
		int lclose, unsigned long pos, unsigned int flags)
{
	char fd_str[128];
	int len;
	struct fdinfo_entry e;

	sprintf(fd_str, "/proc/self/fd/%d", lfd);
	len = readlink(fd_str, big_tmp_str, sizeof(big_tmp_str) - 1);
	if (len < 0) {
		perror("Can't readlink fd");
		return 1;
	}

	big_tmp_str[len] = '\0';
	printf("\tDumping path for %x fd via self %d [%s]\n", fd_name, lfd, big_tmp_str);

	if (lclose)
		close(lfd);

	e.type = type;
	e.addr = fd_name;
	e.len = len;
	e.pos = pos;
	e.flags = flags;

	write(fdinfo_img, &e, sizeof(e));
	write(fdinfo_img, big_tmp_str, len);

	return 0;
}

#define MAX_PIPE_BUF_SIZE	1024 /* FIXME - this is not so */
#define SPLICE_F_NONBLOCK	0x2

static int dump_pipe_and_data(int lfd, struct pipes_entry *e)
{
	int steal_pipe[2];
	int ret;

	printf("\tDumping data from pipe %x\n", e->pipeid);
	if (pipe(steal_pipe) < 0) {
		perror("Can't create pipe for stealing data");
		return 1;
	}

	ret = tee(lfd, steal_pipe[1], MAX_PIPE_BUF_SIZE, SPLICE_F_NONBLOCK);
	if (ret < 0) {
		if (errno != EAGAIN) {
			perror("Can't pick pipe data");
			return 1;
		}

		ret = 0;
	}

	e->bytes = ret;
	write(pipes_img, e, sizeof(*e));

	if (ret) {
		ret = splice(steal_pipe[0], NULL, pipes_img, NULL, ret, 0);
		if (ret < 0) {
			perror("Can't push pipe data");
			return 1;
		}
	}

	close(steal_pipe[0]);
	close(steal_pipe[1]);
	return 0;
}

static int dump_one_pipe(int fd, int lfd, unsigned int id, unsigned int flags)
{
	struct pipes_entry e;

	printf("\tDumping pipe %d/%x flags %x\n", fd, id, flags);

	e.fd = fd;
	e.pipeid = id;
	e.flags = flags;

	if (flags & O_WRONLY) {
		e.bytes = 0;
		write(pipes_img, &e, sizeof(e));
		return 0;
	}

	return dump_pipe_and_data(lfd, &e);
}

static int dump_one_fd(int dir, char *fd_name, unsigned long pos, unsigned int flags)
{
	int fd;
	struct stat st_buf;
	struct statfs stfs_buf;

	printf("\tDumping fd %s\n", fd_name);
	fd = openat(dir, fd_name, O_RDONLY);
	if (fd == -1) {
		printf("Tried to openat %d/%d %s\n", getpid(), dir, fd_name);
		perror("Can't open fd");
		return 1;
	}

	if (fstat(fd, &st_buf) < 0) {
		perror("Can't stat one");
		return 1;
	}

	if (S_ISREG(st_buf.st_mode))
		return dump_one_reg_file(FDINFO_FD, atoi(fd_name), fd, 1, pos, flags);

	if (S_ISFIFO(st_buf.st_mode)) {
		if (fstatfs(fd, &stfs_buf) < 0) {
			perror("Can't statfs one");
			return 1;
		}

		if (stfs_buf.f_type == PIPEFS_MAGIC)
			return dump_one_pipe(atoi(fd_name), fd, st_buf.st_ino, flags);
	}

	if (!strcmp(fd_name, "0")) {
		printf("\tSkipping stdin\n");
		return 0;
	}

	if (!strcmp(fd_name, "1")) {
		printf("\tSkipping stdout\n");
		return 0;
	}

	if (!strcmp(fd_name, "2")) {
		printf("\tSkipping stderr\n");
		return 0;
	}

	fprintf(stderr, "Can't dump file %s of that type [%x]\n", fd_name, st_buf.st_mode);
	return 1;

}

static int dump_task_files(int pid)
{
	char pid_fd_dir[64];
	DIR *fd_dir;
	struct dirent *de;
	unsigned long pos;
	unsigned int flags;

	printf("Dumping open files for %d\n", pid);

	sprintf(pid_fd_dir, "/proc/%d/fd", pid);
	fd_dir = opendir(pid_fd_dir);
	if (fd_dir == NULL) {
		perror("Can't open fd dir");
		return -1;
	}

	while ((de = readdir(fd_dir)) != NULL) {
		if (de->d_name[0] == '.')
			continue;

		if (read_fd_params(pid, de->d_name, &pos, &flags))
			return 1;

		if (dump_one_fd(dirfd(fd_dir), de->d_name, pos, flags))
			return 1;
	}

	closedir(fd_dir);
	return 0;
}

#define PAGE_SIZE	4096
#define PAGE_RSS	0x1

static unsigned long rawhex(char *str, char **end)
{
	unsigned long ret = 0;

	while (1) {
		if (str[0] >= '0' && str[0] <= '9') {
			ret <<= 4;
			ret += str[0] - '0';
		} else if (str[0] >= 'a' && str[0] <= 'f') {
			ret <<= 4;
			ret += str[0] - 'a' + 0xA;
		} else if (str[0] >= 'A' && str[0] <= 'F') {
			ret <<= 4;
			ret += str[0] - 'A' + 0xA;
		} else {
			if (end)
				*end = str;
			return ret;
		}

		str++;
	}
}

static void map_desc_parm(char *desc, unsigned long *pgoff, unsigned long *len)
{
	char *s;
	unsigned long start, end;

	start = rawhex(desc, &s);
	if (*s != '-') {
		goto bug;
	}

	end = rawhex(s + 1, &s);
	if (*s != ' ') {
		goto bug;
	}

	s = strchr(s + 1, ' ');
	*pgoff = rawhex(s + 1, &s);
	if (*s != ' ') {
		goto bug;
	}

	if (start > end)
		goto bug;

	*len = end - start;

	if (*len % PAGE_SIZE) {
		goto bug;
	}
	if (*pgoff % PAGE_SIZE) {
		goto bug;
	}

	return;
bug:
	fprintf(stderr, "BUG\n");
	exit(1);
}

static int dump_map_pages(int lfd, unsigned long start, unsigned long pgoff, unsigned long len)
{
	unsigned int nrpages, pfn;
	void *mem;
	unsigned char *mc;

	printf("\t\tDumping pages start %x len %x off %x\n", start, len, pgoff);
	mem = mmap(NULL, len, PROT_READ, MAP_FILE | MAP_PRIVATE, lfd, pgoff);
	if (mem == MAP_FAILED) {
		perror("Can't map");
		return 1;
	}

	nrpages = len / PAGE_SIZE;
	mc = malloc(nrpages);
	if (mincore(mem, len, mc)) {
		perror("Can't mincore mapping");
		return 1;
	}

	for (pfn = 0; pfn < nrpages; pfn++)
		if (mc[pfn] & PAGE_RSS) {
			__u64 vaddr;

			vaddr = start + pfn * PAGE_SIZE;
			write(pages_img, &vaddr, 8);
			write(pages_img, mem + pfn * PAGE_SIZE, PAGE_SIZE);
		}

	munmap(mem, len);

	return 0;
}

static int dump_anon_private_map(char *start)
{
	printf("\tSkipping anon private mapping at %s\n", start);
	return 0;
}

static int dump_anon_shared_map(char *_start, char *mdesc, int lfd, struct stat *st)
{
	unsigned long pgoff, len;
	struct shmem_entry e;
	unsigned long start;
	struct stat buf;

	map_desc_parm(mdesc, &pgoff, &len);

	start = rawhex(_start, NULL);
	e.start = start;
	e.end = start + len;
	e.shmid = st->st_ino;

	write(shmem_img, &e, sizeof(e));

	if (dump_map_pages(lfd, start, pgoff, len))
		return 1;

	close(lfd);
	return 0;
}

static int dump_file_shared_map(char *start, char *mdesc, int lfd)
{
	printf("\tSkipping file shared mapping at %s\n", start);
	close(lfd);
	return 0;
}

static int dump_file_private_map(char *_start, char *mdesc, int lfd)
{
	unsigned long pgoff, len;
	unsigned long start;

	map_desc_parm(mdesc, &pgoff, &len);

	start = rawhex(_start, NULL);
	if (dump_one_reg_file(FDINFO_MAP, start, lfd, 0, 0, O_RDONLY))
		return 1;

	close(lfd);
	return 0;
}

static int dump_one_mapping(char *mdesc, DIR *mfd_dir)
{
	char *flags, *tmp;
	char map_start[32];
	int lfd;
	struct stat st_buf;

	tmp = strchr(mdesc, '-');
	memset(map_start, 0, sizeof(map_start));
	strncpy(map_start, mdesc, tmp - mdesc);
	flags = strchr(mdesc, ' ');
	flags++;

	printf("\tDumping %s\n", map_start);
	lfd = openat(dirfd(mfd_dir), map_start, O_RDONLY);
	if (lfd == -1) {
		if (errno != ENOENT) {
			perror("Can't open mapping");
			return 1;
		}

		if (flags[3] != 'p') {
			fprintf(stderr, "Bogus mapping [%s]\n", mdesc);
			return 1;
		}

		return dump_anon_private_map(map_start);
	}

	if (fstat(lfd, &st_buf) < 0) {
		perror("Can't stat mapping!");
		return 1;
	}

	if (!S_ISREG(st_buf.st_mode)) {
		perror("Can't handle non-regular mapping");
		return 1;
	}

	if (MAJOR(st_buf.st_dev) == 0) {
		if (flags[3] != 's') {
			fprintf(stderr, "Bogus mapping [%s]\n", mdesc);
			return 1;
		}

		/* FIXME - this can be tmpfs visible file mapping */
		return dump_anon_shared_map(map_start, mdesc, lfd, &st_buf);
	}

	if (flags[3] == 'p')
		return dump_file_private_map(map_start, mdesc, lfd);
	else
		return dump_file_shared_map(map_start, mdesc, lfd);
}

static int dump_task_ext_mm(int pid)
{
	char path[64];
	DIR *mfd_dir;
	FILE *maps;

	printf("Dumping mappings for %d\n", pid);

	sprintf(path, "/proc/%d/mfd", pid);
	mfd_dir = opendir(path);
	if (mfd_dir == NULL) {
		perror("Can't open mfd dir");
		return -1;
	}

	sprintf(path, "/proc/%d/maps", pid);
	maps = fopen(path, "r");
	if (maps == NULL) {
		perror("Can't open maps file");
		return 1;
	}

	while (fgets(big_tmp_str, sizeof(big_tmp_str), maps) != NULL)
		if (dump_one_mapping(big_tmp_str, mfd_dir))
			return 1;

	fclose(maps);
	closedir(mfd_dir);
	return 0;
}

static int dump_task_state(int pid)
{
	char path[64];
	int dump_fd;
	void *mem;

	printf("Dumping task image for %d\n", pid);
	sprintf(path, "/proc/%d/dump", pid);
	dump_fd = open(path, O_RDONLY);
	if (dump_fd < 0) {
		perror("Can't open dump file");
		return 1;
	}

	mem = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
	if (mem == MAP_FAILED) {
		perror("Can't get mem");
		return 1;
	}

	while (1) {
		int r, w;

		r = read(dump_fd, mem, 4096);
		if (r == 0)
			break;
		if (r < 0) {
			perror("Can't read dump file");
			return 1;
		}

		w = 0;
		while (w < r) {
			int ret;

			ret = write(core_img, mem + w, r - w);
			if (ret <= 0) {
				perror("Can't write core");
				return 1;
			}

			w += ret;
		}
	}

	munmap(mem, 4096);
	close(dump_fd);

	return 0;
}

static int dump_one_task(int pid, int stop)
{
	printf("Dumping task %d\n", pid);

	if (prep_img_files(pid))
		return 1;

	if (stop && stop_task(pid))
		goto err_task;

	if (dump_task_files(pid))
		goto err;

	if (dump_task_ext_mm(pid))
		goto err;

	if (dump_task_state(pid))
		goto err;

	if (stop)
		continue_task(pid);

	printf("Dump is complete\n");
	return 0;

err:
	if (stop)
		continue_task(pid);
err_task:
	kill_imgfiles(pid);
	return 1;
}

static int pstree_fd;
static char big_tmp_str[4096];
static int *pids, nr_pids;

static char *get_children_pids(int pid)
{
	FILE *f;
	int len;
	char *ret, *tmp;

	sprintf(big_tmp_str, "/proc/%d/status", pid);
	f = fopen(big_tmp_str, "r");
	if (f == NULL)
		return NULL;

	while ((fgets(big_tmp_str, sizeof(big_tmp_str), f)) != NULL) {
		if (strncmp(big_tmp_str, "Children:", 9))
			continue;

		tmp = big_tmp_str + 10;
		len = strlen(tmp);
		ret = malloc(len + 1);
		strcpy(ret, tmp);
		if (len)
			ret[len - 1] = ' ';

		fclose(f);
		return ret;
	}

	fclose(f);
	return NULL;
}

static int dump_pid_and_children(int pid)
{
	struct pstree_entry e;
	char *chlist, *tmp, *tmp2;

	printf("\tReading %d children list\n", pid);
	chlist = get_children_pids(pid);
	if (chlist == NULL)
		return 1;

	printf("\t%d has children %s\n", pid, chlist);

	e.pid = pid;
	e.nr_children = 0;

	pids = realloc(pids, (nr_pids + 1) * sizeof(int));
	pids[nr_pids++] = e.pid;

	tmp = chlist;
	while ((tmp = strchr(tmp, ' ')) != NULL) {
		tmp++;
		e.nr_children++;
	}

	write(pstree_fd, &e, sizeof(e));
	tmp = chlist;
	while (1) {
		__u32 cpid;

		cpid = strtol(tmp, &tmp, 10);
		if (cpid == 0)
			break;
		if (*tmp != ' ') {
			fprintf(stderr, "Error in string with children!\n");
			return 1;
		}

		write(pstree_fd, &cpid, sizeof(cpid));
		tmp++;
	}

	tmp = chlist;
	while ((tmp2 = strchr(tmp, ' ')) != NULL) {
		*tmp2 = '\0';
		if (dump_pid_and_children(atoi(tmp)))
			return 1;
		tmp = tmp2 + 1;
	}

	free(chlist);
	return 0;
}

static int __dump_all_tasks(void)
{
	int i, pid;

	printf("Dumping tasks' images for");
	for (i = 0; i < nr_pids; i++)
		printf(" %d", pids[i]);
	printf("\n");

	printf("Stopping tasks\n");
	for (i = 0; i < nr_pids; i++)
		if (stop_task(pids[i]))
			goto err;

	for (i = 0; i < nr_pids; i++) {
		if (dump_one_task(pids[i], 0))
			goto err;
	}

	printf("Resuming tasks\n");
	for (i = 0; i < nr_pids; i++)
		continue_task(pids[i]);

	return 0;

err:
	for (i = 0; i < nr_pids; i++)
		continue_task(pids[i]);
	return 1;

}

static int dump_all_tasks(int pid)
{
	char *chlist;
	__u32 type;

	pids = NULL;
	nr_pids = 0;

	printf("Dumping process tree, start from %d\n", pid);

	sprintf(big_tmp_str, "pstree-%d.img", pid);
	pstree_fd = open(big_tmp_str, O_WRONLY | O_CREAT | O_EXCL, 0600);
	if (pstree_fd < 0) {
		perror("Can't create pstree");
		return 1;
	}

	type = PSTREE_MAGIC;
	write(pstree_fd, &type, sizeof(type));

	if (dump_pid_and_children(pid))
		return 1;

	close(pstree_fd);

	return __dump_all_tasks();
}

int main(int argc, char **argv)
{
	if (argc != 3)
		goto usage;
	if (argv[1][0] != '-')
		goto usage;
	if (argv[1][1] == 'p')
		return dump_one_task(atoi(argv[2]), 1);
	if (argv[1][1] == 't')
		return dump_all_tasks(atoi(argv[2]));

usage:
	printf("Usage: %s (-p|-t) <pid>\n", argv[0]);
	return 1;
}

[-- Attachment #3: cr-restore.c --]
[-- Type: text/plain, Size: 19947 bytes --]

#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <dirent.h>
#include <string.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <errno.h>
#include <linux/kdev_t.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/sendfile.h>

#define PAGE_SIZE	4096

#include <linux/types.h>
#include "img_structs.h"
#include "binfmt_img.h"

struct fmap_fd {
	unsigned long start;
	int fd;
	struct fmap_fd *next;
};

static struct fmap_fd *fmap_fds;

struct shmem_info {
	unsigned long start;
	unsigned long end;
	unsigned long id;
	int pid;
	int real_pid;
};

static struct shmem_info *shmems;
static int nr_shmems;

struct pipes_info {
	unsigned int id;
	int pid;
	int real_pid;
	int read_fd;
	int write_fd;
	int users;
};

static struct pipes_info *pipes;
static int nr_pipes;

static void show_saved_shmems(void)
{
	int i;

	printf("\tSaved shmems:\n");
	for (i = 0; i < nr_shmems; i++)
		printf("\t\t%016lx %lx %d\n", shmems[i].start, shmems[i].id, shmems[i].pid);
}

static void show_saved_pipes(void)
{
	int i;

	printf("\tSaved pipes:\n");
	for (i = 0; i < nr_pipes; i++)
		printf("\t\t%x -> %d\n", pipes[i].id, pipes[i].pid);
}

static struct shmem_info *search_shmem(unsigned long addr, unsigned long id)
{
	int i;

	for (i = 0; i < nr_shmems; i++) {
		struct shmem_info *si;

		si = shmems + i;
		if (si->start <= addr && si->end >= addr && si->id == id)
			return si;
	}

	return NULL;
}

static struct pipes_info *search_pipes(unsigned int pipeid)
{
	int i;

	for (i = 0; i < nr_pipes; i++) {
		struct pipes_info *pi;

		pi = pipes + i;
		if (pi->id == pipeid)
			return pi;
	}

	return NULL;
}

static void shmem_update_real_pid(int vpid, int rpid)
{
	int i;

	for (i = 0; i < nr_shmems; i++)
		if (shmems[i].pid == vpid)
			shmems[i].real_pid = rpid;
}

static int shmem_wait_and_open(struct shmem_info *si)
{
	/* FIXME - not good */
	char path[128];
	unsigned long time = 1000;

	sleep(1);

	while (si->real_pid == 0)
		usleep(time);

	sprintf(path, "/proc/%d/mfd/0x%lx", si->real_pid, si->start);
	while (1) {
		int ret;

		ret = open(path, O_RDWR);
		if (ret > 0)
			return ret;

		if (ret < 0 && errno != ENOENT) {
			perror("     Can't stat shmem");
			return -1;
		}

		printf("Waiting for [%s] to appear\n", path);
		if (time < 20000000)
			time <<= 1;
		usleep(time);
	}
}

static int try_to_add_shmem(int pid, struct shmem_entry *e)
{
	int i;

	for (i = 0; i < nr_shmems; i++) {
		if (shmems[i].start != e->start || shmems[i].id != e->shmid)
			continue;

		if (shmems[i].end != e->end) {
			printf("Bogus shmem\n");
			return 1;
		}

		if (shmems[i].pid > pid)
			shmems[i].pid = pid;

		return 0;
	}

	if ((nr_shmems + 1) * sizeof(struct shmem_info) >= 4096) {
		printf("OOM storing shmems\n");
		return 1;
	}

	shmems[nr_shmems].start = e->start;
	shmems[nr_shmems].end = e->end;
	shmems[nr_shmems].id = e->shmid;
	shmems[nr_shmems].pid = pid;
	shmems[nr_shmems].real_pid = 0;
	nr_shmems++;

	return 0;
}

static int try_to_add_pipe(int pid, struct pipes_entry *e, int p_fd)
{
	int i;

	for (i = 0; i < nr_pipes; i++) {
		if (pipes[i].id != e->pipeid)
			continue;

		if (pipes[i].pid > pid)
			pipes[i].pid = pid;
		pipes[i].users++;

		return 0;
	}

	if ((nr_pipes + 1) * sizeof(struct pipes_info) >= 4096) {
		printf("OOM storing pipes\n");
		return 1;
	}

	pipes[nr_pipes].id = e->pipeid;
	pipes[nr_pipes].pid = pid;
	pipes[nr_pipes].real_pid = 0;
	pipes[nr_pipes].read_fd = 0;
	pipes[nr_pipes].write_fd = 0;
	pipes[nr_pipes].users = 1;
	nr_pipes++;

	return 0;
}

static int prepare_shmem_pid(int pid)
{
	char path[64];
	int sh_fd;
	__u32 type = 0;

	sprintf(path, "shmem-%d.img", pid);
	sh_fd = open(path, O_RDONLY);
	if (sh_fd < 0) {
		perror("Can't open shmem info");
		return 1;
	}

	read(sh_fd, &type, sizeof(type));
	if (type != SHMEM_MAGIC) {
		perror("Bad shmem magic");
		return 1;
	}

	while (1) {
		struct shmem_entry e;
		int ret;

		ret = read(sh_fd, &e, sizeof(e));
		if (ret == 0)
			break;
		if (ret != sizeof(e)) {
			perror("Can't read shmem entry");
			return 1;
		}

		if (try_to_add_shmem(pid, &e))
			return 1;
	}

	close(sh_fd);
	return 0;
}

static int prepare_pipes_pid(int pid)
{
	char path[64];
	int p_fd;
	__u32 type = 0;

	sprintf(path, "pipes-%d.img", pid);
	p_fd = open(path, O_RDONLY);
	if (p_fd < 0) {
		perror("Can't open pipes image");
		return 1;
	}

	read(p_fd, &type, sizeof(type));
	if (type != PIPES_MAGIC) {
		perror("Bad pipes magin");
		return 1;
	}

	while (1) {
		struct pipes_entry e;
		int ret;

		ret = read(p_fd, &e, sizeof(e));
		if (ret == 0)
			break;
		if (ret != sizeof(e)) {
			fprintf(stderr, "Read pipes for %s failed %d of %d read\n",
					path, ret, sizeof(e));
			perror("Can't read pipes entry");
			return 1;
		}

		if (try_to_add_pipe(pid, &e, p_fd))
			return 1;

		lseek(p_fd, e.bytes, SEEK_CUR);
	}

	close(p_fd);
	return 0;
}

static int prepare_shared(int ps_fd)
{
	printf("Preparing info about shared resources\n");

	nr_shmems = 0;
	shmems = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
	if (shmems == MAP_FAILED) {
		perror("Can't map shmems");
		return 1;
	}

	pipes = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
	if (pipes == MAP_FAILED) {
		perror("Can't map pipes");
		return 1;
	}

	while (1) {
		struct pstree_entry e;
		int ret;

		ret = read(ps_fd, &e, sizeof(e));
		if (ret == 0)
			break;

		if (ret != sizeof(e)) {
			perror("Can't read ps");
			return 1;
		}

		if (prepare_shmem_pid(e.pid))
			return 1;

		if (prepare_pipes_pid(e.pid))
			return 1;

		lseek(ps_fd, e.nr_children * sizeof(__u32), SEEK_CUR);
	}

	lseek(ps_fd, sizeof(__u32), SEEK_SET);

	show_saved_shmems();
	show_saved_pipes();

	return 0;
}

static struct fmap_fd *pop_fmap_fd(unsigned long start)
{
	struct fmap_fd **p, *r;

	for (p = &fmap_fds; *p != NULL; p = &(*p)->next) {
		if ((*p)->start != start)
			continue;

		r = *p;
		*p = r->next;
		return r;
	}

	return NULL;
}

static int open_fe_fd(struct fdinfo_entry *fe, int fd)
{
	char path[PATH_MAX];
	int tmp;

	if (read(fd, path, fe->len) != fe->len) {
		fprintf(stderr, "Error reading path");
		return -1;
	}

	path[fe->len] = '\0';

	tmp = open(path, fe->flags);
	if (tmp < 0) {
		perror("Can't open file");
		return -1;
	}

	lseek(tmp, fe->pos, SEEK_SET);

	return tmp;
}

static int reopen_fd(int old_fd, int new_fd)
{
	int tmp;

	if (old_fd != new_fd) {
		tmp = dup2(old_fd, new_fd);
		if (tmp < 0)
			return tmp;

		close(old_fd);
	}

	return new_fd;
}

static int open_fd(int pid, struct fdinfo_entry *fe, int *cfd)
{
	int fd, tmp;

	if (*cfd == (int)fe->addr) {
		tmp = dup(*cfd);
		if (tmp < 0) {
			perror("Can't dup file");
			return 1;
		}

		*cfd = tmp;
	}

	tmp = open_fe_fd(fe, *cfd);
	if (tmp < 0)
		return 1;

	fd = reopen_fd(tmp, (int)fe->addr);
	if (fd < 0) {
		perror("Can't dup");
		return 1;
	}

	return 0;
}

static int open_fmap(int pid, struct fdinfo_entry *fe, int fd)
{
	int tmp;
	struct fmap_fd *new;

	tmp = open_fe_fd(fe, fd);
	if (tmp < 0)
		return 1;

	printf("%d:\t\tWill map %x to %d\n", pid, fe->addr, tmp);
	new = malloc(sizeof(*new));
	new->start = fe->addr;
	new->fd = tmp;
	new->next = fmap_fds;
	fmap_fds = new;

	return 0;
}

static int prepare_fds(int pid)
{
	__u32 mag;
	char path[64];
	int fdinfo_fd;

	printf("%d: Opening files\n", pid);

	sprintf(path, "fdinfo-%d.img", pid);
	fdinfo_fd = open(path, O_RDONLY);
	if (fdinfo_fd < 0) {
		perror("Can't open fdinfo");
		return 1;
	}

	read(fdinfo_fd, &mag, 4);
	if (mag != FDINFO_MAGIC) {
		fprintf(stderr, "Bad file\n");
		return 1;
	}

	while (1) {
		int ret;
		struct fdinfo_entry fe;

		ret = read(fdinfo_fd, &fe, sizeof(fe));
		if (ret == 0) {
			close(fdinfo_fd);
			return 0;
		}

		if (ret < 0) {
			perror("Can't read file");
			return 1;
		}
		if (ret != sizeof(fe)) {
			fprintf(stderr, "Error reading\n");
			return 1;
		}

		printf("\t%d: Got fd for %lx type %d namelen %d\n", pid,
				(unsigned long)fe.addr, fe.type, fe.len);
		switch (fe.type) {
		case FDINFO_FD:
			if (open_fd(pid, &fe, &fdinfo_fd))
				return 1;

			break;
		case FDINFO_MAP:
			if (open_fmap(pid, &fe, fdinfo_fd))
				return 1;

			break;
		default:
			fprintf(stderr, "Some bullshit in a file\n");
			return 1;
		}
	}
}

struct shmem_to_id {
	unsigned long addr;
	unsigned long end;
	unsigned long id;
	struct shmem_to_id *next;
};

static struct shmem_to_id *my_shmem_ids;

static unsigned long find_shmem_id(unsigned long addr)
{
	struct shmem_to_id *si;

	for (si = my_shmem_ids; si != NULL; si = si->next)
		if (si->addr <= addr && si->end >= addr)
			return si->id;

	return 0;
}

static void save_shmem_id(struct shmem_entry *e)
{
	struct shmem_to_id *si;

	si = malloc(sizeof(*si));
	si->addr = e->start;
	si->end = e->end;
	si->id = e->shmid;
	si->next = my_shmem_ids;
	my_shmem_ids = si;
}

static int prepare_shmem(int pid)
{
	char path[64];
	int sh_fd;
	__u32 type = 0;

	sprintf(path, "shmem-%d.img", pid);
	sh_fd = open(path, O_RDONLY);
	if (sh_fd < 0) {
		perror("Can't open shmem info");
		return 1;
	}

	read(sh_fd, &type, sizeof(type));
	if (type != SHMEM_MAGIC) {
		perror("Bad shmem magic");
		return 1;
	}

	while (1) {
		struct shmem_entry e;
		int ret;

		ret = read(sh_fd, &e, sizeof(e));
		if (ret == 0)
			break;
		if (ret != sizeof(e)) {
			perror("Can't read shmem entry");
			return 1;
		}

		save_shmem_id(&e);
	}

	close(sh_fd);
	return 0;
}

static int try_fixup_file_map(int pid, struct binfmt_vma_image *vi, int fd)
{
	struct fmap_fd *fmfd;

	fmfd = pop_fmap_fd(vi->start);
	if (fmfd != NULL) {
		printf("%d: Fixing %lx vma to %d fd\n", pid, vi->start, fmfd->fd);
		lseek(fd, -sizeof(*vi), SEEK_CUR);
		vi->fd = fmfd->fd;
		if (write(fd, vi, sizeof(*vi)) != sizeof(*vi)) {
			perror("Can't write img");
			return 1;
		}

		free(fmfd);
	}

	return 0;
}

static int try_fixup_shared_map(int pid, struct binfmt_vma_image *vi, int fd)
{
	struct shmem_info *si;
	unsigned long id;

	id = find_shmem_id(vi->start);
	if (id == 0)
		return 0;

	si = search_shmem(vi->start, id);
	printf("%d: Search for %016lx shmem %p/%d\n", pid, vi->start, si, si ? si->pid : -1);

	if (si == NULL) {
		fprintf(stderr, "Can't find my shmem %016lx\n", vi->start);
		return 1;
	}

	if (si->pid != pid) {
		int sh_fd;

		sh_fd = shmem_wait_and_open(si);
		printf("%d: Fixing %lx vma to %x/%d shmem -> %d\n", pid, vi->start, si->id, si->pid, sh_fd);
		if (fd < 0) {
			perror("Can't open shmem");
			return 1;
		}

		lseek(fd, -sizeof(*vi), SEEK_CUR);
		vi->fd = sh_fd;
		if (write(fd, vi, sizeof(*vi)) != sizeof(*vi)) {
			perror("Can't write img");
			return 1;
		}
	}

	return 0;
}

static int fixup_vma_fds(int pid, int fd)
{
	lseek(fd, sizeof(struct binfmt_img_header) +
			sizeof(struct binfmt_regs_image) +
			sizeof(struct binfmt_mm_image), SEEK_SET);

	while (1) {
		struct binfmt_vma_image vi;

		if (read(fd, &vi, sizeof(vi)) != sizeof(vi)) {
			perror("Can't read");
			return 1;
		}

		if (vi.start == 0 && vi.end == 0)
			return 0;

		printf("%d: Fixing %016lx-%016lx %016lx vma\n", pid, vi.start, vi.end, vi.pgoff);
		if (try_fixup_file_map(pid, &vi, fd))
			return 1;

		if (try_fixup_shared_map(pid, &vi, fd))
			return 1;
	}
}

static inline int should_restore_page(int pid, unsigned long vaddr)
{
	struct shmem_info *si;
	unsigned long id;

	id = find_shmem_id(vaddr);
	if (id == 0)
		return 1;

	si = search_shmem(vaddr, id);
	return si->pid == pid;
}

static int fixup_pages_data(int pid, int fd)
{
	char path[128];
	int shfd;
	__u32 mag;
	__u64 vaddr;

	sprintf(path, "pages-%d.img", pid);
	shfd = open(path, O_RDONLY);
	if (shfd < 0) {
		perror("Can't open shmem image");
		return 1;
	}

	read(shfd, &mag, sizeof(mag));
	if (mag != PAGES_MAGIC) {
		fprintf(stderr, "Bad shmem image\n");
		return 1;
	}

	lseek(fd, -sizeof(struct binfmt_page_image), SEEK_END);
	read(fd, &vaddr, sizeof(vaddr));
	if (vaddr != 0) {
		printf("SHIT %lx\n", (unsigned long)vaddr);
		return 1;
	}
	lseek(fd, -sizeof(struct binfmt_page_image), SEEK_END);

	while (1) {
		int ret;

		ret = read(shfd, &vaddr, sizeof(vaddr));
		if (ret == 0)
			break;

		if (ret < 0 || ret != sizeof(vaddr)) {
			perror("Can't read vaddr");
			return 1;
		}

		if (vaddr == 0)
			break;

		if (!should_restore_page(pid, vaddr)) {
			lseek(shfd, PAGE_SIZE, SEEK_CUR);
			continue;
		}

//		printf("Copy page %lx to image\n", (unsigned long)vaddr);
		write(fd, &vaddr, sizeof(vaddr));
		sendfile(fd, shfd, NULL, PAGE_SIZE);
	}

	close(shfd);
	vaddr = 0;
	write(fd, &vaddr, sizeof(vaddr));
	return 0;
}

static int prepare_image_maps(int fd, int pid)
{
	printf("%d: Fixing maps before executing image\n", pid);

	if (fixup_vma_fds(pid, fd))
		return 1;

	if (fixup_pages_data(pid, fd))
		return 1;

	close(fd);
	return 0;
}

static int execute_image(int pid)
{
	char path[128];
	int fd, fd_new;
	struct stat buf;

	sprintf(path, "core-%d.img", pid);
	fd = open(path, O_RDONLY);
	if (fd < 0) {
		perror("Can't open exec image");
		return 1;
	}

	if (fstat(fd, &buf)) {
		perror("Can't stat");
		return 1;
	}

	sprintf(path, "core-%d.img.out", pid);
	fd_new = open(path, O_RDWR | O_CREAT | O_EXCL, 0700);
	if (fd_new < 0) {
		perror("Can't open new image");
		return 1;
	}

	printf("%d: Preparing execution image\n", pid);
	sendfile(fd_new, fd, NULL, buf.st_size);
	close(fd);

	if (fchmod(fd_new, 0700)) {
		perror("Can't prepare exec image");
		return 1;
	}

	if (prepare_image_maps(fd_new, pid))
		return 1;

	printf("%d/%d EXEC IMAGE\n", pid, getpid());
	return execl(path, path, NULL);
}

static int create_pipe(int pid, struct pipes_entry *e, struct pipes_info *pi, int pipes_fd)
{
	int pfd[2], tmp;
	unsigned long time = 1000;

	printf("\t%d: Creating pipe %x\n", pid, e->pipeid);

	if (pipe(pfd) < 0) {
		perror("Can't create pipe");
		return 1;
	}

	if (e->bytes) {
		printf("\t%d: Splicing data to %d\n", pid, pfd[1]);

		tmp = splice(pipes_fd, NULL, pfd[1], NULL, e->bytes, 0);
		if (tmp != e->bytes) {
			fprintf(stderr, "Wanted to restore %ld bytes, but got %ld\n",
					e->bytes, tmp);
			if (tmp < 0)
				perror("Error splicing data");
			return 1;
		}
	}

	pi->read_fd = pfd[0];
	pi->write_fd = pfd[1];
	pi->real_pid = getpid();

	printf("\t%d: Done, waiting for others on %d pid with r:%d w:%d\n",
			pid, pi->real_pid, pfd[0], pfd[1]);

	while (1) {
		if (pi->users == 1) /* only I left */
			break;

		printf("\t%d: Waiting for %x pipe to attach (%d users left)\n",
				pid, e->pipeid, pi->users - 1);
		if (time < 20000000)
			time <<= 1;
		usleep(time);
	}

	printf("\t%d: All is ok - reopening pipe for %d\n", pid, e->fd);
	if (e->flags & O_WRONLY) {
		close(pfd[0]);
		tmp = reopen_fd(pfd[1], e->fd);
	} else {
		close(pfd[1]);
		tmp = reopen_fd(pfd[0], e->fd);
	}

	if (tmp < 0) {
		perror("Can't dup pipe fd");
		return 1;
	}

	return 0;
}

static int attach_pipe(int pid, struct pipes_entry *e, struct pipes_info *pi)
{
	char path[128];
	int tmp, fd;

	printf("\t%d: Wating for pipe %x to appear\n", pid, e->pipeid);

	while (pi->real_pid == 0)
		usleep(1000);

	if (e->flags & O_WRONLY)
		tmp = pi->write_fd;
	else
		tmp = pi->read_fd;

	sprintf(path, "/proc/%d/fd/%d", pi->real_pid, tmp);
	printf("\t%d: Attaching pipe %s\n", pid, path);

	fd = open(path, e->flags);
	if (fd < 0) {
		perror("Can't attach pipe");
		return 1;
	}

	printf("\t%d: Done, reopening for %d\n", pid, e->fd);
	pi->users--;
	tmp = reopen_fd(fd, e->fd);
	if (tmp < 0) {
		perror("Can't dup to attach pipe");
		return 1;
	}

	return 0;

}

static int open_pipe(int pid, struct pipes_entry *e, int *pipes_fd)
{
	struct pipes_info *pi;

	printf("\t%d: Opening pipe %x on fd %d\n", pid, e->pipeid, e->fd);
	if (e->fd == *pipes_fd) {
		int tmp;

		tmp = dup(*pipes_fd);
		if (tmp < 0) {
			perror("Can't dup file");
			return 1;
		}

		*pipes_fd = tmp;
	}

	pi = search_pipes(e->pipeid);
	if (pi == NULL) {
		fprintf(stderr, "BUG: can't find my pipe %x\n", e->pipeid);
		return 1;
	}

	if (pi->pid == pid)
		return create_pipe(pid, e, pi, *pipes_fd);
	else
		return attach_pipe(pid, e, pi);
}

static int prepare_pipes(int pid)
{
	char path[64];
	int pipes_fd;
	__u32 type = 0;

	printf("%d: Opening pipes\n", pid);

	sprintf(path, "pipes-%d.img", pid);
	pipes_fd = open(path, O_RDONLY);
	if (pipes_fd < 0) {
		perror("Can't open pipes img");
		return 1;
	}

	read(pipes_fd, &type, sizeof(type));
	if (type != PIPES_MAGIC) {
		perror("Bad pipes file");
		return 1;
	}

	while (1) {
		struct pipes_entry e;
		int ret;

		ret = read(pipes_fd, &e, sizeof(e));
		if (ret == 0) {
			close(pipes_fd);
			return 0;
		}
		if (ret != sizeof(e)) {
			perror("Bad pipes entry");
			return 1;
		}

		if (open_pipe(pid, &e, &pipes_fd))
			return 1;
	}
}

static int restore_one_task(int pid)
{
	printf("%d: Restoring resources\n", pid);

	if (prepare_pipes(pid))
		return 1;

	if (prepare_fds(pid))
		return 1;

	if (prepare_shmem(pid))
		return 1;

	return execute_image(pid);
}

static int restore_task_with_children(int my_pid, char *pstree_path);

#if 0
static inline int fork_with_pid(int pid, char *pstree_path)
{
	/* FIXME - no such ability now */
	int ret;

	ret = fork();
	if (ret == 0) {
		ret = restore_task_with_children(pid, pstree_path);
		exit(ret);
	}

	return ret;
}
#else
#define CLONE_CHILD_USEPID      0x02000000

static int do_child(void *arg)
{
	return restore_task_with_children(getpid(), arg);
}

static inline int fork_with_pid(int pid, char *pstree_path)
{
	void *stack;

	stack = mmap(0, 4 * 4096, PROT_READ | PROT_WRITE,
			MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 0, 0);
	if (stack == MAP_FAILED)
		return -1;

	stack += 4 * 4096;
	return clone(do_child, stack, SIGCHLD | CLONE_CHILD_USEPID, pstree_path, NULL, NULL, &pid);

}
#endif

static int restore_task_with_children(int my_pid, char *pstree_path)
{
	int *pids;
	int fd, ret, i;
	struct pstree_entry e;

	printf("%d: Starting restore\n", my_pid);

	fd = open(pstree_path, O_RDONLY);
	if (fd < 0) {
		perror("Can't reopen pstree image");
		exit(1);
	}

	lseek(fd, sizeof(__u32), SEEK_SET);
	while (1) {
		ret = read(fd, &e, sizeof(e));
		if (ret != sizeof(e)) {
			fprintf(stderr, "%d: Read returned %d\n", my_pid, ret);
			if (ret < 0)
				perror("Can't read pstree");
			exit(1);
		}

		if (e.pid != my_pid) {
			lseek(fd, e.nr_children * sizeof(__u32), SEEK_CUR);
			continue;
		}
		
		break;
	}

	if (e.nr_children > 0) {
		i = e.nr_children * sizeof(int);
		pids = malloc(i);
		ret = read(fd, pids, i);
		if (ret != i) {
			perror("Can't read children pids");
			exit(1);
		}

		close(fd);

		printf("%d: Restoring %d children:\n", my_pid, e.nr_children);
		for (i = 0; i < e.nr_children; i++) {
			printf("\tFork %d from %d\n", pids[i], my_pid);
			ret = fork_with_pid(pids[i], pstree_path);
			if (ret < 0) {
				perror("Can't fork kid");
				exit(1);
			}
		}
	} else
		close(fd);

	shmem_update_real_pid(my_pid, getpid());

	return restore_one_task(my_pid);
}

static int restore_root_task(char *pstree_path, int fd)
{
	struct pstree_entry e;
	int ret;

	ret = read(fd, &e, sizeof(e));
	if (ret != sizeof(e)) {
		perror("Can't read root pstree entry");
		return 1;
	}

	close(fd);

	printf("Forking root with %d pid\n", e.pid);
	ret = fork_with_pid(e.pid, pstree_path);
	if (ret < 0) {
		perror("Can't fork root");
		return 1;
	}

	wait(NULL);
	return 0;
}

static int restore_all_tasks(char *pid)
{
	char path[128];
	int pstree_fd;
	__u32 type = 0;

	sprintf(path, "pstree-%s.img", pid);
	pstree_fd = open(path, O_RDONLY);
	if (pstree_fd < 0) {
		perror("Can't open pstree image");
		return 1;
	}

	read(pstree_fd, &type, sizeof(type));
	if (type != PSTREE_MAGIC) {
		perror("Bad pstree magic");
		return 1;
	}

	if (prepare_shared(pstree_fd))
		return 1;

	return restore_root_task(path, pstree_fd);
}

int main(int argc, char **argv)
{
	if (argc != 3)
		goto usage;
	if (argv[1][0] != '-')
		goto usage;
	if (argv[1][1] == 'p')
		return restore_one_task(atoi(argv[2]));
	if (argv[1][1] == 't')
		return restore_all_tasks(argv[2]);

usage:
	printf("Usage: %s (-t|-p) <pid>\n", argv[0]);
	return 1;
}

[-- Attachment #4: img-show.c --]
[-- Type: text/plain, Size: 7004 bytes --]

#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <linux/types.h>
#include <string.h>
#include "img_structs.h"
#include "binfmt_img.h"

static int show_fdinfo(int fd)
{
	char data[1024];
	struct fdinfo_entry e;

	while (1) {
		int ret;

		ret = read(fd, &e, sizeof(e));
		if (ret == 0)
			break;
		if (ret != sizeof(e)) {
			perror("Can't read");
			return 1;
		}

		ret = read(fd, data, e.len);
		if (ret != e.len) {
			perror("Can't read");
			return 1;
		}

		data[e.len] = '\0';
		switch (e.type) {
		case FDINFO_FD:
			printf("fd %d [%s] pos %lx flags %o\n", (int)e.addr, data, e.pos, e.flags);
			break;
		case FDINFO_MAP:
			printf("map %lx [%s] flags %o\n", e.addr, data, e.flags);
			break;
		default:
			fprintf(stderr, "Unknown fdinfo entry type %d\n", e.type);
			return 1;
		}
	}

	return 0;
}

#define PAGE_SIZE	4096

static int show_mem(int fd)
{
	__u64 vaddr;
	unsigned int data[2];

	while (1) {
		if (read(fd, &vaddr, 8) == 0)
			break;
		if (vaddr == 0)
			break;

		read(fd, &data[0], sizeof(unsigned int));
		lseek(fd, PAGE_SIZE - 2 * sizeof(unsigned int), SEEK_CUR);
		read(fd, &data[1], sizeof(unsigned int));

		printf("\tpage 0x%lx [%x...%x]\n", (unsigned long)vaddr, data[0], data[1]);
	}

	return 0;
}

static int show_pages(int fd)
{
	return show_mem(fd);
}

static int show_shmem(int fd)
{
	int r;
	struct shmem_entry e;

	while (1) {
		r = read(fd, &e, sizeof(e));
		if (r == 0)
			return 0;
		if (r != sizeof(e)) {
			perror("Can't read shmem entry");
			return 1;
		}

		printf("%016lx-%016lx %016x\n", e.start, e.end, e.shmid);
	}
}

static char *segval(__u16 seg)
{
	switch (seg) {
		case CKPT_X86_SEG_NULL:		return "nul";
		case CKPT_X86_SEG_USER32_CS:	return "cs32";
		case CKPT_X86_SEG_USER32_DS:	return "ds32";
		case CKPT_X86_SEG_USER64_CS:	return "cs64";
		case CKPT_X86_SEG_USER64_DS:	return "ds64";
	}

	if (seg & CKPT_X86_SEG_TLS)
		return "tls";
	if (seg & CKPT_X86_SEG_LDT)
		return "ldt";

	return "[unknown]";
}

static int show_regs(int fd)
{
	struct binfmt_regs_image ri;

	if (read(fd, &ri, sizeof(ri)) != sizeof(ri)) {
		perror("Can't read registers from image");
		return 1;
	}

	printf("Registers:\n");

	printf("\tr15:     %016lx\n", ri.r15);
	printf("\tr14:     %016lx\n", ri.r14);
	printf("\tr13:     %016lx\n", ri.r13);
	printf("\tr12:     %016lx\n", ri.r12);
	printf("\tr11:     %016lx\n", ri.r11);
	printf("\tr10:     %016lx\n", ri.r10);
	printf("\tr9:      %016lx\n", ri.r9);
	printf("\tr8:      %016lx\n", ri.r8);
	printf("\tax:      %016lx\n", ri.ax);
	printf("\torig_ax: %016lx\n", ri.orig_ax);
	printf("\tbx:      %016lx\n", ri.bx);
	printf("\tcx:      %016lx\n", ri.cx);
	printf("\tdx:      %016lx\n", ri.dx);
	printf("\tsi:      %016lx\n", ri.si);
	printf("\tdi:      %016lx\n", ri.di);
	printf("\tip:      %016lx\n", ri.ip);
	printf("\tflags:   %016lx\n", ri.flags);
	printf("\tbp:      %016lx\n", ri.bp);
	printf("\tsp:      %016lx\n", ri.sp);
	printf("\tgs:      %016lx\n", ri.gs);
	printf("\tfs:      %016lx\n", ri.fs);
	printf("\tgsindex: %s\n", segval(ri.gsindex));
	printf("\tfsindex: %s\n", segval(ri.fsindex));
	printf("\tcs:      %s\n", segval(ri.cs));
	printf("\tss:      %s\n", segval(ri.ss));
	printf("\tds:      %s\n", segval(ri.ds));
	printf("\tes:      %s\n", segval(ri.es));

	printf("\ttls0     %016lx\n", ri.tls[0]);
	printf("\ttls1     %016lx\n", ri.tls[1]);
	printf("\ttls2     %016lx\n", ri.tls[2]);

	return 0;
}

static int show_mm(int fd, unsigned long *stack)
{
	struct binfmt_mm_image mi;

	if (read(fd, &mi, sizeof(mi)) != sizeof(mi)) {
		perror("Can't read mm from image");
		return 1;
	}

	printf("MM:\n");
	printf("\tflags:       %016lx\n", mi.flags);
	printf("\tdef_flags:   %016lx\n", mi.def_flags);
	printf("\tstart_code:  %016lx\n", mi.start_code);
	printf("\tend_code:    %016lx\n", mi.end_code);
	printf("\tstart_data:  %016lx\n", mi.start_data);
	printf("\tend_data:    %016lx\n", mi.end_data);
	printf("\tstart_brk:   %016lx\n", mi.start_brk);
	printf("\tbrk:         %016lx\n", mi.brk);
	printf("\tstart_stack: %016lx\n", mi.start_stack);
	printf("\targ_start:   %016lx\n", mi.arg_start);
	printf("\targ_end:     %016lx\n", mi.arg_end);
	printf("\tenv_start:   %016lx\n", mi.env_start);
	printf("\tenv_end:     %016lx\n", mi.env_end);

	*stack = mi.start_stack;

	return 0;
}

static int show_vmas(int fd, unsigned long stack)
{
	struct binfmt_vma_image vi;

	printf("VMAs:\n");
	while (1) {
		char *note = "";

		if (read(fd, &vi, sizeof(vi)) != sizeof(vi)) {
			perror("Can't read vma from image");
			return 1;
		}

		if (vi.start == 0 && vi.end == 0)
			return 0;

		if (vi.start <= stack && vi.end >= stack)
			note = "[stack]";

		printf("\t%016lx-%016lx file %d %016lx prot %x flags %x %s\n",
				vi.start, vi.end, vi.fd, vi.pgoff,
				vi.prot, vi.flags, note);
	}
}

static int show_privmem(int fd)
{
	printf("Pages:\n");
	return show_mem(fd);
}

static int show_core(int fd)
{
	__u32 version = 0;
	unsigned long stack;

	read(fd, &version, 4);
	if (version != BINFMT_IMG_VERS_0) {
		printf("Unsupported version %d\n", version);
		return 1;
	}

	printf("Showing version 0\n");

	if (show_regs(fd))
		return 1;

	if (show_mm(fd, &stack))
		return 1;

	if (show_vmas(fd, stack))
		return 1;

	if (show_privmem(fd))
		return 1;

	return 0;
}

static int show_pstree(int fd)
{
	int ret;
	struct pstree_entry e;

	while (1) {
		int i;
		__u32 *ch;

		ret = read(fd, &e, sizeof(e));
		if (ret == 0)
			return 0;
		if (ret != sizeof(e)) {
			perror("Can't read processes entry");
			return 1;
		}

		printf("%d:", e.pid);
		i = e.nr_children * sizeof(__u32);
		ch = malloc(i);
		ret = read(fd, ch, i);
		if (ret != i) {
			perror("Can't read children list");
			return 1;
		}

		for (i = 0; i < e.nr_children; i++)
			printf(" %d", ch[i]);
		printf("\n");
	}
}

static int show_pipes(int fd)
{
	struct pipes_entry e;
	int ret;
	char buf[17];

	while (1) {
		ret = read(fd, &e, sizeof(e));
		if (ret == 0)
			break;
		if (ret != sizeof(e)) {
			perror("Can't read pipe entry");
			return 1;
		}

		printf("%d: %lx %o %d ", e.fd, e.pipeid, e.flags, e.bytes);
		if (e.flags & O_WRONLY) {
			printf("\n");

			if (e.bytes) {
				printf("Bogus pipe\n");
				return 1;
			}

			continue;
		}

		memset(buf, 0, sizeof(buf));
		ret = e.bytes;
		if (ret > 16)
			ret = 16;

		read(fd, buf, ret);
		printf("\t[%s", buf);
		if (ret < e.bytes)
			printf("...");
		printf("]\n");
		lseek(fd, e.bytes - ret, SEEK_CUR);
	}

	return 0;

}

int main(int argc, char **argv)
{
	__u32 type;
	int fd;

	fd = open(argv[1], O_RDONLY);
	if (fd < 0) {
		perror("Can't open");
		return 1;
	}

	read(fd, &type, 4);

	if (type == FDINFO_MAGIC)
		return show_fdinfo(fd);
	if (type == PAGES_MAGIC)
		return show_pages(fd);
	if (type == SHMEM_MAGIC)
		return show_shmem(fd);
	if (type == PSTREE_MAGIC)
		return show_pstree(fd);
	if (type == PIPES_MAGIC)
		return show_pipes(fd);
	if (type == BINFMT_IMG_MAGIC)
		return show_core(fd);

	printf("Unknown file type 0x%x\n", type);
	return 1;
}

[-- Attachment #5: img_structs.h --]
[-- Type: text/plain, Size: 494 bytes --]


#define FDINFO_MAGIC	0x01010101

struct fdinfo_entry {
	__u8	type;
	__u8	len;
	__u16	flags;
	__u32	pos;
	__u64	addr;
};

#define FDINFO_FD	1
#define FDINFO_MAP	2

#define PAGES_MAGIC	0x20202020

#define SHMEM_MAGIC	0x03300330

struct shmem_entry {
	__u64	start;
	__u64	end;
	__u64	shmid;
};

#define PSTREE_MAGIC	0x40044004

struct pstree_entry {
	__u32	pid;
	__u32	nr_children;
};

#define PIPES_MAGIC	0x05055050

struct pipes_entry {
	__u32	fd;
	__u32	pipeid;
	__u32	flags;
	__u32	bytes;
};

[-- Attachment #6: Makefile --]
[-- Type: text/plain, Size: 186 bytes --]

all: cr-dump img-show cr-restore

img-show: img-show.c
	gcc -o $@ $<

cr-dump: cr-dump.c
	gcc -o $@ $<

cr-restore: cr-restore.c
	gcc -o $@ $<

clean:
	rm -f cr-dump img-show cr-restore

[-- Attachment #7: Type: text/plain, Size: 206 bytes --]

_______________________________________________
Containers mailing list
Containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA@public.gmane.org
https://lists.linux-foundation.org/mailman/listinfo/containers

  parent reply	other threads:[~2011-07-15 13:49 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-15 13:45 [RFC][PATCH 0/7 + tools] Checkpoint/restore mostly in the userspace Pavel Emelyanov
     [not found] ` <4E204466.8010204-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-15 13:45   ` [PATCH 0/1] proc: Introduce the /proc/<pid>/mfd/ directory Pavel Emelyanov
     [not found]     ` <4E20448A.5010207-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-21  7:21       ` Tejun Heo
2011-07-15 13:46   ` [PATCH 2/7] vfs: Introduce the fd closing helper Pavel Emelyanov
     [not found]     ` <4E2044A7.4030103-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-21 15:47       ` Serge E. Hallyn
2011-07-15 13:46   ` [PATCH 3/7] proc: Introduce the Children: line in /proc/<pid>/status Pavel Emelyanov
     [not found]     ` <4E2044C3.7050506-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-21  6:54       ` Tejun Heo
     [not found]         ` <20110721065436.GT3455-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
2011-07-23  8:06           ` Pavel Emelyanov
     [not found]             ` <4E2A8116.1040309-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-23  8:41               ` Tejun Heo
     [not found]                 ` <20110723084110.GG21089-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2011-07-23  8:45                   ` Pavel Emelyanov
     [not found]                     ` <4E2A8A0E.5030208-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-23  8:50                       ` Tejun Heo
     [not found]                         ` <20110723085014.GI21089-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2011-07-23  8:51                           ` Pavel Emelyanov
2011-07-21 15:54       ` Serge E. Hallyn
2011-07-15 13:47   ` [PATCH 4/7] vfs: Add ->statfs callback for pipefs Pavel Emelyanov
     [not found]     ` <4E2044D6.3060205-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-21  6:59       ` Tejun Heo
2011-07-21 15:59       ` Serge E. Hallyn
2011-07-15 13:47   ` [PATCH 5/7] clone: Introduce the CLONE_CHILD_USEPID functionality Pavel Emelyanov
     [not found]     ` <4E2044EB.20001-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-21 16:04       ` Serge E. Hallyn
     [not found]         ` <20110721160459.GD19012-7LNsyQBKDXoIagZqoN9o3w@public.gmane.org>
2011-07-22 23:08           ` Matt Helsley
     [not found]             ` <20110722230848.GB16940-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-23  8:09               ` Pavel Emelyanov
2011-07-15 13:47   ` [PATCH 6/7] proc: Introduce the /proc/<pid>/dump file Pavel Emelyanov
     [not found]     ` <4E204500.6040800-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-16 22:57       ` Kirill A. Shutemov
     [not found]         ` <20110716225709.GA25606-oKw7cIdHH8eLwutG50LtGA@public.gmane.org>
2011-07-17  8:06           ` Cyrill Gorcunov
2011-07-21  6:44       ` Tejun Heo
     [not found]         ` <20110721064408.GR3455-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
2011-07-23  8:11           ` Pavel Emelyanov
     [not found]             ` <4E2A8239.5060908-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-23  8:37               ` Tejun Heo
     [not found]                 ` <20110723083711.GF21089-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2011-07-23  8:49                   ` Pavel Emelyanov
     [not found]                     ` <4E2A8B12.4010709-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-23  8:58                       ` Tejun Heo
2011-07-15 13:48   ` [PATCH 7/7] binfmt: Introduce the binfmt_img exec handler Pavel Emelyanov
     [not found]     ` <4E204519.3040804-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-21  6:51       ` Tejun Heo
     [not found]         ` <20110721065127.GS3455-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
2011-07-22 22:46           ` Matt Helsley
     [not found]             ` <20110722224617.GA16940-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-23  8:17               ` Pavel Emelyanov
     [not found]                 ` <4E2A83AC.6090504-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-23  8:45                   ` Tejun Heo
     [not found]                     ` <20110723084529.GH21089-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2011-07-23  8:51                       ` Pavel Emelyanov
     [not found]                         ` <4E2A8B7D.8010807-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-23  9:04                           ` Tejun Heo
2011-07-15 13:49   ` Pavel Emelyanov [this message]
     [not found]     ` <4E204554.6040901-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-22 23:45       ` [TOOLS] To make use of the patches Matt Helsley
     [not found]         ` <20110722234558.GD16940-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-23  8:32           ` Pavel Emelyanov
     [not found]             ` <4E2A8704.3030306-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org>
2011-07-27 23:00               ` Matt Helsley
     [not found]                 ` <20110727230003.GE15501-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-28  8:23                   ` James Bottomley
2011-07-23  0:40       ` Reply #2: " Matt Helsley
     [not found]         ` <20110723004045.GC21563-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-23  8:33           ` Pavel Emelyanov
2011-07-15 15:01   ` [RFC][PATCH 0/7 + tools] Checkpoint/restore mostly in the userspace Tejun Heo
2011-07-18 13:27   ` Serge E. Hallyn
     [not found]     ` <20110718132759.GB8127-7LNsyQBKDXoIagZqoN9o3w@public.gmane.org>
2011-07-23  8:43       ` Pavel Emelyanov
2011-07-23  0:25   ` Matt Helsley
     [not found]     ` <20110723002558.GE16940-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-23  3:29       ` Matt Helsley
     [not found]         ` <20110723032945.GD21563-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-23  4:58           ` Tejun Heo
     [not found]             ` <20110723045842.GD21089-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2011-07-26 18:11               ` Matt Helsley
     [not found]                 ` <20110726181128.GD14808-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-26 22:45                   ` Tejun Heo
     [not found]                     ` <20110726224525.GC28497-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2011-07-26 23:07                       ` Matt Helsley
2011-07-23  3:53       ` Tejun Heo
     [not found]         ` <CAOS58YPqLSYi2xECUk4O5GG3s6aokT=VykmkL6UnAOzyHXNAgQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2011-07-26 22:59           ` Matt Helsley
     [not found]             ` <20110726225911.GF14808-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-26 23:46               ` Tejun Heo
     [not found]                 ` <20110726234657.GD28497-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2011-07-27  0:53                   ` Matt Helsley
     [not found]                     ` <20110727005341.GB15501-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-27 10:12                       ` Tejun Heo
     [not found]                         ` <20110727101228.GY2622-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
2011-07-27 22:26                           ` Matt Helsley
2011-07-23  5:10       ` Tejun Heo
     [not found]         ` <20110723051005.GE21089-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2011-07-26 22:02           ` Matt Helsley
     [not found]             ` <20110726220215.GE14808-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-26 22:21               ` Tejun Heo
     [not found]                 ` <20110726222109.GB28497-9pTldWuhBndy/B6EtB590w@public.gmane.org>
2011-07-27  0:06                   ` Matt Helsley
     [not found]                     ` <20110727000651.GA15501-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-27 12:01                       ` Tejun Heo
     [not found]                         ` <20110727120114.GZ2622-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
2011-07-27 21:35                           ` Matt Helsley
     [not found]                             ` <20110727213510.GC15501-52DBMbEzqgQ/wnmkkaCWp/UQ3DHhIser@public.gmane.org>
2011-07-28  7:21                               ` Tejun Heo
     [not found]                                 ` <20110728072141.GB2622-Gd/HAXX7CRxy/B6EtB590w@public.gmane.org>
2011-07-28  7:23                                   ` Tejun Heo
2011-07-28  8:37                                   ` James Bottomley
2011-07-28  9:10                                     ` Tejun Heo
2011-07-23  8:39       ` Pavel Emelyanov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4E204554.6040901@parallels.com \
    --to=xemul-bzqdu9zft3wakbo8gow8eq@public.gmane.org \
    --cc=containers-qjLDD68F18O7TbgM5vRIOg@public.gmane.org \
    --cc=dlezcano-NmTC/0ZBporQT0dZR+AlfA@public.gmane.org \
    --cc=glommer-bzQdu9zFT3WakBO8gow8eQ@public.gmane.org \
    --cc=gorcunov-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    --cc=ntl-e+AXbWqSrlAAvxtiuMwx3w@public.gmane.org \
    --cc=orenl-eQaUEPhvms7ENvBUuze7eA@public.gmane.org \
    --cc=serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org \
    --cc=tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox