All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH user-cr 1/2] use Suka's v11 api
@ 2009-11-10 16:58 Serge E. Hallyn
       [not found] ` <20091110165839.GA19222-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: Serge E. Hallyn @ 2009-11-10 16:58 UTC (permalink / raw)
  To: Linux Containers; +Cc: Nathan T Lynch

This patch:
	1. changes restart to pass the right values to
		clone-with-pids.
	2. updates the clone_s390x.c to work with the
		new kernel.

All tests under cr_tests/ pass with this patch.

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 clone_s390x.c |   92 +++++++++++++++++++++++++++++++++++++--------------------
 restart.c     |   14 +++++----
 2 files changed, 68 insertions(+), 38 deletions(-)

diff --git a/clone_s390x.c b/clone_s390x.c
index dada822..71cf52f 100644
--- a/clone_s390x.c
+++ b/clone_s390x.c
@@ -14,6 +14,7 @@
 
 #include <unistd.h>
 #include <errno.h>
+#include <string.h>
 #include <sys/types.h>
 #include <sys/syscall.h>
 #include <asm/unistd.h>
@@ -25,48 +26,75 @@
 #include <linux/checkpoint.h>
 #if defined(__NR_clone_with_pids)
 
-/* this really belongs to some kernel header ! */
 struct pid_set {
 	int num_pids;
 	pid_t *pids;
 };
 
-/* (see: http://lkml.indiana.edu/hypermail/linux/kernel/9604.3/0204.html) */
+typedef unsigned long long u64;
+typedef unsigned int u32;
+typedef int pid_t;
+struct clone_args {
+	u64 clone_flags_high;
 
-#define do_clone_with_pids(stack, flags, ptid, ctid, setp) ({ \
-	register unsigned long int __r2 asm ("2") = (unsigned long int)(stack);\
-	register unsigned long int __r3 asm ("3") = (unsigned long int)(flags);\
-	register unsigned long int __r4 asm ("4") = (unsigned long int)(ptid); \
-	register unsigned long int __r5 asm ("5") = (unsigned long int)(ctid); \
-	register unsigned long int __r6 asm ("6") = (unsigned long int)(NULL); \
-	register unsigned long int __r7 asm ("7") = (unsigned long int)(setp); \
-	register unsigned long int __result asm ("2"); \
-	__asm__ __volatile__( \
-		" lghi %%r1,%7\n" \
-		" svc 0\n" \
-		: "=d" (__result) \
-		: "0" (__r2), "d" (__r3), \
-		  "d" (__r4), "d" (__r5), "d" (__r6), "d" (__r7), \
-		  "i" (__NR_clone_with_pids) \
-		: "1", "cc", "memory" \
-	); \
-		__result; \
-	})
+	u64 child_stack_base;
+	u64 child_stack_size;
 
-int clone_with_pids(int (*fn)(void *), void *child_stack, int flags,
+	u64 parent_tid_ptr;
+	u64 child_tid_ptr;
+
+	u32 nr_pids;
+
+	u32 reserved0;
+	u64 reserved1;
+};
+
+#define do_cwp(flags, pids, args, sz) \
+( { \
+  register unsigned long int __r1 asm ("1") = (unsigned long int)(__NR_clone_with_pids); \
+  register unsigned long int __r2 asm ("2") = (unsigned long int)(flags); \
+  register unsigned long int __r3 asm ("3") = (unsigned long int)(args); \
+  register unsigned long int __r4 asm ("4") = (unsigned long int)(sz); \
+  register unsigned long int __r5 asm ("5") = (unsigned long int)(pids); \
+  register long int __result asm ("2"); \
+  __asm__ __volatile__( \
+	  " svc 0\n" /* do __NR_cwp syscall */ \
+	  " ltgr %%r2,%%r2\n" /* returned 0? */ \
+	  " jnz 1f\n" /* if not goto label 1 */ \
+	  " lg %%r3,0(%%r15)\n"   /* get fnarg off stack into arg 1 */ \
+	  " lg %%r2,8(%%r15)\n"   /* get fn off stack int r3 basr*/ \
+	  " lgr %%r1,%%r15\n" /* tmp store old stack pointer */ \
+	  " aghi %%r15,-160\n" /* move the stack */ \
+	  " stg %%r1,0(%%r15)\n" /* and save old stack pointer */ \
+	  " basr %%r14,%%r3\n" /* call fn(arg) */ \
+	  " svc 1\n"  /* call exit */ \
+	  " 1:\n" \
+	  : "=d" (__result) \
+	  : "d" (__r1), "0" (__r2), "d" (__r3), "d" (__r4), "d" (__r5) \
+	  : "memory"); \
+	__result; \
+} )
+
+int clone_with_pids(int (*fn)(void *), void *child_stack,
+			unsigned long stack_size, unsigned long flags,
 			struct pid_set *target_pids, void *arg)
 {
-	long retval;
-	retval = do_clone_with_pids(child_stack, flags, NULL, NULL,
-				    target_pids);
+	struct clone_args clone_args, *ca = &clone_args;
+	u64 *s;
+
+	memset(ca, 0, sizeof(struct clone_args));
+	ca->nr_pids = target_pids->num_pids;
+	ca->child_stack_size = stack_size - 16;
+	ca->child_stack_base = (u64) child_stack;
+	if (child_stack) {
+		s = (u64 *) (ca->child_stack_base + ca->child_stack_size);
+		*--s = (u64) arg;
+		*--s = (u64) fn;
+		ca->child_stack_size -= 16;
+	}
 
-	if (retval < 0) {
-		errno = -retval;
-		return -1;
-	} else if (retval == 0) {
-		return fn(arg);
-	} else
-		return retval;
+	return do_cwp(flags, target_pids->pids, ca,
+				    sizeof(struct clone_args));
 }
 
 #endif  /* !defined(__NR_clone_with_pids) */
diff --git a/restart.c b/restart.c
index 35c54ea..ebc7bf8 100644
--- a/restart.c
+++ b/restart.c
@@ -43,10 +43,12 @@ struct pid_set {
 
 /* (until it's supported by libc) from clone_ARCH.c */
 #if defined(__NR_clone_with_pids) && defined(ARCH_HAS_CLONE_WITH_PID)
-extern int clone_with_pids(int (*fn)(void *), void *child_stack, int flags,
+extern int clone_with_pids(int (*fn)(void *), void *child_stack,
+			   unsigned long stack_size, int flags,
 			   struct pid_set *target_pids, void *arg);
 #else
-static int clone_with_pids(int (*fn)(void *), void *child_stack, int flags,
+static int clone_with_pids(int (*fn)(void *), void *child_stack,
+			    unsigned long stack_size, int flags,
 			   struct pid_set *target_pids, void *arg)
 {
 	return clone(fn, child_stack, flags, arg);
@@ -1749,18 +1751,17 @@ static pid_t ckpt_fork_child(struct ckpt_ctx *ctx, struct task *child)
 {
 	struct pid_set pid_set;
 	char *stack_region;
-	char *stack_start;
 	unsigned long flags = SIGCHLD;
+	unsigned long stack_size = PTHREAD_STACK_MIN;
 	pid_t pid = 0;
 
 	ckpt_dbg("forking child vpid %d flags %#x\n", child->pid, child->flags);
 
-	stack_region = malloc(PTHREAD_STACK_MIN);
+	stack_region = malloc(stack_size);
 	if (!stack_region) {
 		perror("stack malloc");
 		return -1;
 	}
-	stack_start = stack_region + PTHREAD_STACK_MIN - 1;
 
 	pid_set.pids = &pid;
 	pid_set.num_pids = 1;
@@ -1788,7 +1789,8 @@ static pid_t ckpt_fork_child(struct ckpt_ctx *ctx, struct task *child)
 	else
 		child->real_parent = _getpid();
 
-	pid = clone_with_pids(ckpt_fork_stub, stack_start, flags, &pid_set, child);
+	pid = clone_with_pids(ckpt_fork_stub, stack_region, stack_size - 16,
+				flags, &pid_set, child);
 	if (pid < 0) {
 		perror("clone");
 		free(stack_region);
-- 
1.6.1.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread
* [PATCH linux-cr] implement s390 eclone syscall
@ 2009-11-13  5:24 serue-r/Jw6+rmf7HQT0dZR+AlfA
       [not found] ` <1258089886-10034-1-git-send-email-serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
  0 siblings, 1 reply; 13+ messages in thread
From: serue-r/Jw6+rmf7HQT0dZR+AlfA @ 2009-11-13  5:24 UTC (permalink / raw)
  To: containers-qjLDD68F18O7TbgM5vRIOg

From: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>

This patch implements the s390 hook for sys_eclone.

The user-space clone-with-pids glue for s390 (clone_s390x.c
from the user-cr package) is now:

struct pid_set {
	int num_pids;
	pid_t *pids;
};

 #define do_eclone(flags, pids, args, sz) \
( { \
  register unsigned long int __r1 asm ("1") = (unsigned long int)(__NR_eclone); \
  register unsigned long int __r2 asm ("2") = (unsigned long int)(flags); \
  register unsigned long int __r3 asm ("3") = (unsigned long int)(args); \
  register unsigned long int __r4 asm ("4") = (unsigned long int)(sz); \
  register unsigned long int __r5 asm ("5") = (unsigned long int)(pids); \
  register long int __result asm ("2"); \
  __asm__ __volatile__( \
	  " svc 0\n" /* do __NR_eclone syscall */ \
	  " ltgr %%r2,%%r2\n" /* returned 0? */ \
	  " jnz 1f\n" /* if not goto label 1 */ \
	  " lg %%r3,0(%%r15)\n"   /* get fnarg off stack into arg 1 */ \
	  " lg %%r2,8(%%r15)\n"   /* get fn off stack int r3 basr*/ \
	  " lgr %%r1,%%r15\n" /* tmp store old stack pointer */ \
	  " aghi %%r15,-160\n" /* move the stack */ \
	  " stg %%r1,0(%%r15)\n" /* and save old stack pointer */ \
	  " basr %%r14,%%r3\n" /* call fn(arg) */ \
	  " svc 1\n"  /* call exit */ \
	  " 1:\n" \
	  : "=d" (__result) \
	  : "d" (__r1), "0" (__r2), "d" (__r3), "d" (__r4), "d" (__r5) \
	  : "memory"); \
	__result; \
} )
int clone_with_pids(int (*fn)(void *), void *child_stack, int flags,
			struct pid_set *target_pids, void *arg)
{
	struct clone_args clone_args, *ca = &clone_args;
	u64 *s;

	memset(ca, 0, sizeof(struct clone_args));
	ca->nr_pids = target_pids->num_pids;
	if (!child_stack) {
		/* we could pass in null and then in eclone not
		 * call exit if child_stack was null, but we'll
		 * just malloc here */
		int sz = 4*getpagesize();
		child_stack = malloc(sz);
		if (!child_stack)
			return -ENOMEM;
		child_stack += sz; /* we'll decrement before assigning */
	}
	ca->child_stack = (u64) child_stack;
	s = (u64 *) ca->child_stack;
	*--s = (u64) arg;
	*--s = (u64) fn;
	ca->child_stack -= 16;
	return do_eclone(flags, target_pids->pids, ca,
				    sizeof(struct clone_args));
}

Changelog:
	Nov 12: switch to latest (Nov 12) eclone format
	Nov 10: use orig_gpr2, not gprs[2] for input arg 1

Signed-off-by: Serge E. Hallyn <serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
---
 arch/s390/include/asm/unistd.h  |    3 +-
 arch/s390/kernel/compat_linux.c |   47 ++++++++++++++++++++++++++++++++++++++
 arch/s390/kernel/process.c      |   48 +++++++++++++++++++++++++++++++++++++++
 arch/s390/kernel/syscalls.S     |    1 +
 4 files changed, 98 insertions(+), 1 deletions(-)

diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h
index cb5232d..cbf6c7c 100644
--- a/arch/s390/include/asm/unistd.h
+++ b/arch/s390/include/asm/unistd.h
@@ -269,7 +269,8 @@
 #define	__NR_pwritev		329
 #define __NR_rt_tgsigqueueinfo	330
 #define __NR_perf_event_open	331
-#define NR_syscalls 332
+#define __NR_eclone		332
+#define NR_syscalls 333
 
 /* 
  * There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index 0debcec..c3dc6bd 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -762,6 +762,53 @@ asmlinkage long sys32_write(unsigned int fd, char __user * buf, size_t count)
 	return sys_write(fd, buf, count);
 }
 
+asmlinkage long sys32_eclone(void)
+{
+	int rc;
+	struct pt_regs *regs = task_pt_regs(current);
+	int args_size;
+	struct clone_args kca;
+	unsigned long flags;
+	int __user *parent_tid_ptr;
+	int __user *child_tid_ptr;
+	unsigned long __user child_stack;
+	unsigned long stack_size;
+	unsigned int flags_low;
+	struct clone_args __user *uca;
+	pid_t __user *pids;
+
+	flags_low = regs->orig_gpr2 & 0xffffffffUL;
+	uca = compat_ptr(regs->gprs[3]);
+	args_size = regs->gprs[4] & 0xffffffffUL;
+	pids = compat_ptr(regs->gprs[5]);
+
+	rc = fetch_clone_args_from_user(uca, args_size, &kca);
+	if (rc)
+		return rc;
+
+	if (kca.clone_flags_high)
+		return -EINVAL;
+	flags = flags_low;
+	parent_tid_ptr = (int __user *) kca.parent_tid_ptr;
+	child_tid_ptr =  (int __user *) kca.child_tid_ptr;
+
+	stack_size = (unsigned long) kca.child_stack_size;
+	if (stack_size)
+		return -EINVAL;
+
+	child_stack = (unsigned long) kca.child_stack;
+	if (!child_stack)
+		child_stack = regs->gprs[15];
+
+	/*
+	 * TODO: On 32-bit systems, clone_flags is passed in as 32-bit value
+	 * 	 to several functions. Need to convert clone_flags to 64-bit.
+	 */
+	return do_fork_with_pids(flags, child_stack, regs, stack_size,
+				parent_tid_ptr, child_tid_ptr, kca.nr_pids,
+				pids);
+}
+
 /*
  * 31 bit emulation wrapper functions for sys_fadvise64/fadvise64_64.
  * These need to rewrite the advise values for POSIX_FADV_{DONTNEED,NOREUSE}
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 5417eb5..51f11a1 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -241,6 +241,54 @@ SYSCALL_DEFINE4(clone, unsigned long, newsp, unsigned long, clone_flags,
 		       parent_tidptr, child_tidptr);
 }
 
+SYSCALL_DEFINE0(eclone)
+{
+	int rc;
+	struct pt_regs *regs = task_pt_regs(current);
+	int args_size;
+	struct clone_args kca;
+	unsigned long flags;
+	int __user *parent_tid_ptr;
+	int __user *child_tid_ptr;
+	unsigned long __user child_stack;
+	unsigned long stack_size;
+	unsigned int flags_low;
+	struct clone_args __user *uca;
+	pid_t __user *pids;
+
+	flags_low = regs->orig_gpr2;
+	uca = (struct clone_args __user *) regs->gprs[3];
+	args_size = regs->gprs[4];
+	pids = (pid_t __user *) regs->gprs[5];
+
+	rc = fetch_clone_args_from_user(uca, args_size, &kca);
+	if (rc)
+		return rc;
+
+	if (kca.clone_flags_high)
+		return -EINVAL;
+
+	flags = flags_low;
+	parent_tid_ptr = (int __user *) kca.parent_tid_ptr;
+	child_tid_ptr =  (int __user *) kca.child_tid_ptr;
+
+	stack_size = (unsigned long) kca.child_stack_size;
+	if (stack_size)
+		return -EINVAL;
+
+	child_stack = (unsigned long) kca.child_stack;
+	if (!child_stack)
+		child_stack = regs->gprs[15];
+
+	/*
+	 * TODO: On 32-bit systems, clone_flags is passed in as 32-bit value
+	 * 	 to several functions. Need to convert clone_flags to 64-bit.
+	 */
+	return do_fork_with_pids(flags, child_stack, regs, stack_size,
+				parent_tid_ptr, child_tid_ptr, kca.nr_pids,
+				pids);
+}
+
 /*
  * This is trivial, and on the face of it looks like it
  * could equally well be done in user mode.
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 30eca07..fb8708d 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -340,3 +340,4 @@ SYSCALL(sys_preadv,sys_preadv,compat_sys_preadv_wrapper)
 SYSCALL(sys_pwritev,sys_pwritev,compat_sys_pwritev_wrapper)
 SYSCALL(sys_rt_tgsigqueueinfo,sys_rt_tgsigqueueinfo,compat_sys_rt_tgsigqueueinfo_wrapper) /* 330 */
 SYSCALL(sys_perf_event_open,sys_perf_event_open,sys_perf_event_open_wrapper)
+SYSCALL(sys_eclone,sys_eclone,sys_eclone_wrapper)
-- 
1.6.1

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2009-11-25 19:24 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-11-10 16:58 [PATCH user-cr 1/2] use Suka's v11 api Serge E. Hallyn
     [not found] ` <20091110165839.GA19222-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-11-10 16:59   ` [PATCH user-cr 2/2] add nsexeccwp to test clone-with-pids Serge E. Hallyn
     [not found]     ` <20091110165922.GA19263-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-11-25 18:46       ` Oren Laadan
     [not found]         ` <4B0D7B87.5020504-eQaUEPhvms7ENvBUuze7eA@public.gmane.org>
2009-11-25 19:24           ` Serge E. Hallyn
  -- strict thread matches above, loose matches on Subject: below --
2009-11-13  5:24 [PATCH linux-cr] implement s390 eclone syscall serue-r/Jw6+rmf7HQT0dZR+AlfA
     [not found] ` <1258089886-10034-1-git-send-email-serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-11-13  5:24   ` [PATCH user-cr 2/2] add nsexeccwp to test clone-with-pids serue-r/Jw6+rmf7HQT0dZR+AlfA
     [not found]     ` <1258089886-10034-3-git-send-email-serue-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-11-13 21:08       ` Serge E. Hallyn
2009-11-15 22:45       ` Nathan Lynch
     [not found]         ` <1258325156.4031.3.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2009-11-16 11:12           ` Serge E. Hallyn
     [not found]             ` <20091116111249.GA32340-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-11-15 23:49               ` Nathan Lynch
     [not found]                 ` <1258328984.4031.21.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2009-11-16 18:26                   ` Serge E. Hallyn
     [not found]                     ` <20091116182655.GA3777-r/Jw6+rmf7HQT0dZR+AlfA@public.gmane.org>
2009-11-16 23:18                       ` Nathan Lynch
     [not found]                         ` <1258413522.4031.1036.camel-bi+AKbBUZKY6gyzm1THtWbp2dZbC/Bob@public.gmane.org>
2009-11-17  4:05                           ` Serge E. Hallyn
2009-11-16 14:45       ` Serge E. Hallyn

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.