Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v12 10/11] seccomp: allow mode setting across threads
From: Kees Cook @ 2014-07-17 18:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1405620518-18495-1-git-send-email-keescook@chromium.org>

This changes the mode setting helper to allow threads to change the
seccomp mode from another thread. We must maintain barriers to keep
TIF_SECCOMP synchronized with the rest of the seccomp state.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/seccomp.c |   36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d5543e787e4e..9065d2c79c56 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -173,21 +173,24 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  */
 static u32 seccomp_run_filters(int syscall)
 {
-	struct seccomp_filter *f;
+	struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
 	struct seccomp_data sd;
 	u32 ret = SECCOMP_RET_ALLOW;
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
-	if (WARN_ON(current->seccomp.filter == NULL))
+	if (unlikely(WARN_ON(f == NULL)))
 		return SECCOMP_RET_KILL;
 
+	/* Make sure cross-thread synced filter points somewhere sane. */
+	smp_read_barrier_depends();
+
 	populate_seccomp_data(&sd);
 
 	/*
 	 * All filters in the list are evaluated and the lowest BPF return
 	 * value always takes priority (ignoring the DATA).
 	 */
-	for (f = current->seccomp.filter; f; f = f->prev) {
+	for (; f; f = f->prev) {
 		u32 cur_ret = SK_RUN_FILTER(f->prog, (void *)&sd);
 
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
@@ -207,12 +210,18 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 	return true;
 }
 
-static inline void seccomp_assign_mode(unsigned long seccomp_mode)
+static inline void seccomp_assign_mode(struct task_struct *task,
+				       unsigned long seccomp_mode)
 {
-	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+	BUG_ON(!spin_is_locked(&task->sighand->siglock));
 
-	current->seccomp.mode = seccomp_mode;
-	set_tsk_thread_flag(current, TIF_SECCOMP);
+	task->seccomp.mode = seccomp_mode;
+	/*
+	 * Make sure TIF_SECCOMP cannot be set before the mode (and
+	 * filter) is set.
+	 */
+	smp_mb__before_atomic();
+	set_tsk_thread_flag(task, TIF_SECCOMP);
 }
 
 #ifdef CONFIG_SECCOMP_FILTER
@@ -435,12 +444,17 @@ static int mode1_syscalls_32[] = {
 
 int __secure_computing(int this_syscall)
 {
-	int mode = current->seccomp.mode;
 	int exit_sig = 0;
 	int *syscall;
 	u32 ret;
 
-	switch (mode) {
+	/*
+	 * Make sure that any changes to mode from another thread have
+	 * been seen after TIF_SECCOMP was seen.
+	 */
+	rmb();
+
+	switch (current->seccomp.mode) {
 	case SECCOMP_MODE_STRICT:
 		syscall = mode1_syscalls;
 #ifdef CONFIG_COMPAT
@@ -545,7 +559,7 @@ static long seccomp_set_mode_strict(void)
 #ifdef TIF_NOTSC
 	disable_TSC();
 #endif
-	seccomp_assign_mode(seccomp_mode);
+	seccomp_assign_mode(current, seccomp_mode);
 	ret = 0;
 
 out:
@@ -595,7 +609,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 	/* Do not free the successfully attached filter. */
 	prepared = NULL;
 
-	seccomp_assign_mode(seccomp_mode);
+	seccomp_assign_mode(current, seccomp_mode);
 out:
 	spin_unlock_irq(&current->sighand->siglock);
 	seccomp_filter_free(prepared);
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v12 09/11] seccomp: introduce writer locking
From: Kees Cook @ 2014-07-17 18:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1405620518-18495-1-git-send-email-keescook@chromium.org>

Normally, task_struct.seccomp.filter is only ever read or modified by
the task that owns it (current). This property aids in fast access
during system call filtering as read access is lockless.

Updating the pointer from another task, however, opens up race
conditions. To allow cross-thread filter pointer updates, writes to the
seccomp fields are now protected by the sighand spinlock (which is shared
by all threads in the thread group). Read access remains lockless because
pointer updates themselves are atomic.  However, writes (or cloning)
often entail additional checking (like maximum instruction counts)
which require locking to perform safely.

In the case of cloning threads, the child is invisible to the system
until it enters the task list. To make sure a child can't be cloned from
a thread and left in a prior state, seccomp duplication is additionally
moved under the sighand lock. Then parent and child are certain have
the same seccomp state when they exit the lock.

Based on patches by Will Drewry and David Drysdale.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 include/linux/seccomp.h |    6 +++---
 kernel/fork.c           |   49 ++++++++++++++++++++++++++++++++++++++++++++++-
 kernel/seccomp.c        |   16 +++++++++++++++-
 3 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 4054b0994071..9ff98b4bfe2e 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -14,11 +14,11 @@ struct seccomp_filter;
  *
  * @mode:  indicates one of the valid values above for controlled
  *         system calls available to a process.
- * @filter: The metadata and ruleset for determining what system calls
- *          are allowed for a task.
+ * @filter: must always point to a valid seccomp-filter or NULL as it is
+ *          accessed without locking during system call entry.
  *
  *          @filter must only be accessed from the context of current as there
- *          is no locking.
+ *          is no read locking.
  */
 struct seccomp {
 	int mode;
diff --git a/kernel/fork.c b/kernel/fork.c
index 6a13c46cd87d..ed4bc339c9dc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -315,6 +315,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 		goto free_ti;
 
 	tsk->stack = ti;
+#ifdef CONFIG_SECCOMP
+	/*
+	 * We must handle setting up seccomp filters once we're under
+	 * the sighand lock in case orig has changed between now and
+	 * then. Until then, filter must be NULL to avoid messing up
+	 * the usage counts on the error path calling free_task.
+	 */
+	tsk->seccomp.filter = NULL;
+#endif
 
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
@@ -1081,6 +1090,39 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	return 0;
 }
 
+static void copy_seccomp(struct task_struct *p)
+{
+#ifdef CONFIG_SECCOMP
+	/*
+	 * Must be called with sighand->lock held, which is common to
+	 * all threads in the group. Holding cred_guard_mutex is not
+	 * needed because this new task is not yet running and cannot
+	 * be racing exec.
+	 */
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
+	/* Ref-count the new filter user, and assign it. */
+	get_seccomp_filter(current);
+	p->seccomp = current->seccomp;
+
+	/*
+	 * Explicitly enable no_new_privs here in case it got set
+	 * between the task_struct being duplicated and holding the
+	 * sighand lock. The seccomp state and nnp must be in sync.
+	 */
+	if (task_no_new_privs(current))
+		task_set_no_new_privs(p);
+
+	/*
+	 * If the parent gained a seccomp mode after copying thread
+	 * flags and between before we held the sighand lock, we have
+	 * to manually enable the seccomp thread flag here.
+	 */
+	if (p->seccomp.mode != SECCOMP_MODE_DISABLED)
+		set_tsk_thread_flag(p, TIF_SECCOMP);
+#endif
+}
+
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 {
 	current->clear_child_tid = tidptr;
@@ -1196,7 +1238,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 		goto fork_out;
 
 	ftrace_graph_init_task(p);
-	get_seccomp_filter(p);
 
 	rt_mutex_init_task(p);
 
@@ -1437,6 +1478,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	spin_lock(&current->sighand->siglock);
 
 	/*
+	 * Copy seccomp details explicitly here, in case they were changed
+	 * before holding sighand lock.
+	 */
+	copy_seccomp(p);
+
+	/*
 	 * Process group and session signals need to be delivered to just the
 	 * parent before the fork or both the parent and the child after the
 	 * fork. Restart if a signal comes in before we add the new process to
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 58125160417c..d5543e787e4e 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -199,6 +199,8 @@ static u32 seccomp_run_filters(int syscall)
 
 static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 {
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
 	if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
 		return false;
 
@@ -207,6 +209,8 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
 
 static inline void seccomp_assign_mode(unsigned long seccomp_mode)
 {
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
 	current->seccomp.mode = seccomp_mode;
 	set_tsk_thread_flag(current, TIF_SECCOMP);
 }
@@ -332,6 +336,8 @@ out:
  * @flags:  flags to change filter behavior
  * @filter: seccomp filter to add to the current process
  *
+ * Caller must be holding current->sighand->siglock lock.
+ *
  * Returns 0 on success, -ve on error.
  */
 static long seccomp_attach_filter(unsigned int flags,
@@ -340,6 +346,8 @@ static long seccomp_attach_filter(unsigned int flags,
 	unsigned long total_insns;
 	struct seccomp_filter *walker;
 
+	BUG_ON(!spin_is_locked(&current->sighand->siglock));
+
 	/* Validate resulting filter length. */
 	total_insns = filter->prog->len;
 	for (walker = current->seccomp.filter; walker; walker = walker->prev)
@@ -529,6 +537,8 @@ static long seccomp_set_mode_strict(void)
 	const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
 	long ret = -EINVAL;
 
+	spin_lock_irq(&current->sighand->siglock);
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -539,6 +549,7 @@ static long seccomp_set_mode_strict(void)
 	ret = 0;
 
 out:
+	spin_unlock_irq(&current->sighand->siglock);
 
 	return ret;
 }
@@ -566,13 +577,15 @@ static long seccomp_set_mode_filter(unsigned int flags,
 
 	/* Validate flags. */
 	if (flags != 0)
-		goto out;
+		return -EINVAL;
 
 	/* Prepare the new filter before holding any locks. */
 	prepared = seccomp_prepare_user_filter(filter);
 	if (IS_ERR(prepared))
 		return PTR_ERR(prepared);
 
+	spin_lock_irq(&current->sighand->siglock);
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -584,6 +597,7 @@ static long seccomp_set_mode_filter(unsigned int flags,
 
 	seccomp_assign_mode(seccomp_mode);
 out:
+	spin_unlock_irq(&current->sighand->siglock);
 	seccomp_filter_free(prepared);
 	return ret;
 }
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v12 08/11] seccomp: split filter prep from check and apply
From: Kees Cook @ 2014-07-17 18:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1405620518-18495-1-git-send-email-keescook@chromium.org>

In preparation for adding seccomp locking, move filter creation away
from where it is checked and applied. This will allow for locking where
no memory allocation is happening. The validation, filter attachment,
and seccomp mode setting can all happen under the future locks.

For extreme defensiveness, I've added a BUG_ON check for the calculated
size of the buffer allocation in case BPF_MAXINSN ever changes, which
shouldn't ever happen. The compiler should actually optimize out this
check since the test above it makes it impossible.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/seccomp.c |   97 +++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 67 insertions(+), 30 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d2596136b0d1..58125160417c 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
+#include <linux/slab.h>
 #include <linux/syscalls.h>
 
 /* #define SECCOMP_DEBUG 1 */
@@ -27,7 +28,6 @@
 #include <linux/filter.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
-#include <linux/slab.h>
 #include <linux/tracehook.h>
 #include <linux/uaccess.h>
 
@@ -213,27 +213,23 @@ static inline void seccomp_assign_mode(unsigned long seccomp_mode)
 
 #ifdef CONFIG_SECCOMP_FILTER
 /**
- * seccomp_attach_filter: Attaches a seccomp filter to current.
+ * seccomp_prepare_filter: Prepares a seccomp filter for use.
  * @fprog: BPF program to install
  *
- * Returns 0 on success or an errno on failure.
+ * Returns filter on success or an ERR_PTR on failure.
  */
-static long seccomp_attach_filter(struct sock_fprog *fprog)
+static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 {
 	struct seccomp_filter *filter;
-	unsigned long fp_size = fprog->len * sizeof(struct sock_filter);
-	unsigned long total_insns = fprog->len;
+	unsigned long fp_size;
 	struct sock_filter *fp;
 	int new_len;
 	long ret;
 
 	if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
-		return -EINVAL;
-
-	for (filter = current->seccomp.filter; filter; filter = filter->prev)
-		total_insns += filter->prog->len + 4;  /* include a 4 instr penalty */
-	if (total_insns > MAX_INSNS_PER_PATH)
-		return -ENOMEM;
+		return ERR_PTR(-EINVAL);
+	BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
+	fp_size = fprog->len * sizeof(struct sock_filter);
 
 	/*
 	 * Installing a seccomp filter requires that the task has
@@ -244,11 +240,11 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	if (!task_no_new_privs(current) &&
 	    security_capable_noaudit(current_cred(), current_user_ns(),
 				     CAP_SYS_ADMIN) != 0)
-		return -EACCES;
+		return ERR_PTR(-EACCES);
 
 	fp = kzalloc(fp_size, GFP_KERNEL|__GFP_NOWARN);
 	if (!fp)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	/* Copy the instructions from fprog. */
 	ret = -EFAULT;
@@ -292,13 +288,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 
 	sk_filter_select_runtime(filter->prog);
 
-	/*
-	 * If there is an existing filter, make it the prev and don't drop its
-	 * task reference.
-	 */
-	filter->prev = current->seccomp.filter;
-	current->seccomp.filter = filter;
-	return 0;
+	return filter;
 
 free_filter_prog:
 	kfree(filter->prog);
@@ -306,19 +296,20 @@ free_filter:
 	kfree(filter);
 free_prog:
 	kfree(fp);
-	return ret;
+	return ERR_PTR(ret);
 }
 
 /**
- * seccomp_attach_user_filter - attaches a user-supplied sock_fprog
+ * seccomp_prepare_user_filter - prepares a user-supplied sock_fprog
  * @user_filter: pointer to the user data containing a sock_fprog.
  *
  * Returns 0 on success and non-zero otherwise.
  */
-static long seccomp_attach_user_filter(const char __user *user_filter)
+static struct seccomp_filter *
+seccomp_prepare_user_filter(const char __user *user_filter)
 {
 	struct sock_fprog fprog;
-	long ret = -EFAULT;
+	struct seccomp_filter *filter = ERR_PTR(-EFAULT);
 
 #ifdef CONFIG_COMPAT
 	if (is_compat_task()) {
@@ -331,9 +322,39 @@ static long seccomp_attach_user_filter(const char __user *user_filter)
 #endif
 	if (copy_from_user(&fprog, user_filter, sizeof(fprog)))
 		goto out;
-	ret = seccomp_attach_filter(&fprog);
+	filter = seccomp_prepare_filter(&fprog);
 out:
-	return ret;
+	return filter;
+}
+
+/**
+ * seccomp_attach_filter: validate and attach filter
+ * @flags:  flags to change filter behavior
+ * @filter: seccomp filter to add to the current process
+ *
+ * Returns 0 on success, -ve on error.
+ */
+static long seccomp_attach_filter(unsigned int flags,
+				  struct seccomp_filter *filter)
+{
+	unsigned long total_insns;
+	struct seccomp_filter *walker;
+
+	/* Validate resulting filter length. */
+	total_insns = filter->prog->len;
+	for (walker = current->seccomp.filter; walker; walker = walker->prev)
+		total_insns += walker->prog->len + 4;  /* 4 instr penalty */
+	if (total_insns > MAX_INSNS_PER_PATH)
+		return -ENOMEM;
+
+	/*
+	 * If there is an existing filter, make it the prev and don't drop its
+	 * task reference.
+	 */
+	filter->prev = current->seccomp.filter;
+	current->seccomp.filter = filter;
+
+	return 0;
 }
 
 /* get_seccomp_filter - increments the reference count of the filter on @tsk */
@@ -346,6 +367,14 @@ void get_seccomp_filter(struct task_struct *tsk)
 	atomic_inc(&orig->usage);
 }
 
+static inline void seccomp_filter_free(struct seccomp_filter *filter)
+{
+	if (filter) {
+		sk_filter_free(filter->prog);
+		kfree(filter);
+	}
+}
+
 /* put_seccomp_filter - decrements the ref count of tsk->seccomp.filter */
 void put_seccomp_filter(struct task_struct *tsk)
 {
@@ -354,8 +383,7 @@ void put_seccomp_filter(struct task_struct *tsk)
 	while (orig && atomic_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
 		orig = orig->prev;
-		sk_filter_free(freeme->prog);
-		kfree(freeme);
+		seccomp_filter_free(freeme);
 	}
 }
 
@@ -533,21 +561,30 @@ static long seccomp_set_mode_filter(unsigned int flags,
 				    const char __user *filter)
 {
 	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
+	struct seccomp_filter *prepared = NULL;
 	long ret = -EINVAL;
 
 	/* Validate flags. */
 	if (flags != 0)
 		goto out;
 
+	/* Prepare the new filter before holding any locks. */
+	prepared = seccomp_prepare_user_filter(filter);
+	if (IS_ERR(prepared))
+		return PTR_ERR(prepared);
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
-	ret = seccomp_attach_user_filter(filter);
+	ret = seccomp_attach_filter(flags, prepared);
 	if (ret)
 		goto out;
+	/* Do not free the successfully attached filter. */
+	prepared = NULL;
 
 	seccomp_assign_mode(seccomp_mode);
 out:
+	seccomp_filter_free(prepared);
 	return ret;
 }
 #else
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v12 07/11] sched: move no_new_privs into new atomic flags
From: Kees Cook @ 2014-07-17 18:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1405620518-18495-1-git-send-email-keescook@chromium.org>

Since seccomp transitions between threads requires updates to the
no_new_privs flag to be atomic, the flag must be part of an atomic flag
set. This moves the nnp flag into a separate task field, and introduces
accessors.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 fs/exec.c                  |    4 ++--
 include/linux/sched.h      |   18 +++++++++++++++---
 kernel/seccomp.c           |    2 +-
 kernel/sys.c               |    4 ++--
 security/apparmor/domain.c |    4 ++--
 5 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index a3d33fe592d6..0f5c272410f6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1234,7 +1234,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
 	 * This isn't strictly necessary, but it makes it harder for LSMs to
 	 * mess up.
 	 */
-	if (current->no_new_privs)
+	if (task_no_new_privs(current))
 		bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
 
 	t = p;
@@ -1272,7 +1272,7 @@ int prepare_binprm(struct linux_binprm *bprm)
 	bprm->cred->egid = current_egid();
 
 	if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
-	    !current->no_new_privs &&
+	    !task_no_new_privs(current) &&
 	    kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
 	    kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
 		/* Set-uid? */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0376b054a0d0..45cec6b70eaf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1307,13 +1307,12 @@ struct task_struct {
 				 * execve */
 	unsigned in_iowait:1;
 
-	/* task may not gain privileges */
-	unsigned no_new_privs:1;
-
 	/* Revert to default priority/policy when forking */
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_contributes_to_load:1;
 
+	unsigned long atomic_flags; /* Flags needing atomic access. */
+
 	pid_t pid;
 	pid_t tgid;
 
@@ -1967,6 +1966,19 @@ static inline void memalloc_noio_restore(unsigned int flags)
 	current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags;
 }
 
+/* Per-process atomic flags. */
+#define PFA_NO_NEW_PRIVS 0x00000001	/* May not gain new privileges. */
+
+static inline bool task_no_new_privs(struct task_struct *p)
+{
+	return test_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
+}
+
+static inline void task_set_no_new_privs(struct task_struct *p)
+{
+	set_bit(PFA_NO_NEW_PRIVS, &p->atomic_flags);
+}
+
 /*
  * task->jobctl flags
  */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index f0652578af75..d2596136b0d1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -241,7 +241,7 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	 * This avoids scenarios where unprivileged tasks can affect the
 	 * behavior of privileged children.
 	 */
-	if (!current->no_new_privs &&
+	if (!task_no_new_privs(current) &&
 	    security_capable_noaudit(current_cred(), current_user_ns(),
 				     CAP_SYS_ADMIN) != 0)
 		return -EACCES;
diff --git a/kernel/sys.c b/kernel/sys.c
index 66a751ebf9d9..ce8129192a26 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1990,12 +1990,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		if (arg2 != 1 || arg3 || arg4 || arg5)
 			return -EINVAL;
 
-		current->no_new_privs = 1;
+		task_set_no_new_privs(current);
 		break;
 	case PR_GET_NO_NEW_PRIVS:
 		if (arg2 || arg3 || arg4 || arg5)
 			return -EINVAL;
-		return current->no_new_privs ? 1 : 0;
+		return task_no_new_privs(current) ? 1 : 0;
 	case PR_GET_THP_DISABLE:
 		if (arg2 || arg3 || arg4 || arg5)
 			return -EINVAL;
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 452567d3a08e..d97cba3e3849 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -621,7 +621,7 @@ int aa_change_hat(const char *hats[], int count, u64 token, bool permtest)
 	 * There is no exception for unconfined as change_hat is not
 	 * available.
 	 */
-	if (current->no_new_privs)
+	if (task_no_new_privs(current))
 		return -EPERM;
 
 	/* released below */
@@ -776,7 +776,7 @@ int aa_change_profile(const char *ns_name, const char *hname, bool onexec,
 	 * no_new_privs is set because this aways results in a reduction
 	 * of permissions.
 	 */
-	if (current->no_new_privs && !unconfined(profile)) {
+	if (task_no_new_privs(current) && !unconfined(profile)) {
 		put_cred(cred);
 		return -EPERM;
 	}
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v12 06/11] MIPS: add seccomp syscall
From: Kees Cook @ 2014-07-17 18:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1405620518-18495-1-git-send-email-keescook@chromium.org>

Wires up the new seccomp syscall.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/mips/include/uapi/asm/unistd.h |   15 +++++++++------
 arch/mips/kernel/scall32-o32.S      |    1 +
 arch/mips/kernel/scall64-64.S       |    1 +
 arch/mips/kernel/scall64-n32.S      |    1 +
 arch/mips/kernel/scall64-o32.S      |    1 +
 5 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/mips/include/uapi/asm/unistd.h b/arch/mips/include/uapi/asm/unistd.h
index 5805414777e0..9bc13eaf9d67 100644
--- a/arch/mips/include/uapi/asm/unistd.h
+++ b/arch/mips/include/uapi/asm/unistd.h
@@ -372,16 +372,17 @@
 #define __NR_sched_setattr		(__NR_Linux + 349)
 #define __NR_sched_getattr		(__NR_Linux + 350)
 #define __NR_renameat2			(__NR_Linux + 351)
+#define __NR_seccomp			(__NR_Linux + 352)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		351
+#define __NR_Linux_syscalls		352
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		351
+#define __NR_O32_Linux_syscalls		352
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -701,16 +702,17 @@
 #define __NR_sched_setattr		(__NR_Linux + 309)
 #define __NR_sched_getattr		(__NR_Linux + 310)
 #define __NR_renameat2			(__NR_Linux + 311)
+#define __NR_seccomp			(__NR_Linux + 312)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		311
+#define __NR_Linux_syscalls		312
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		311
+#define __NR_64_Linux_syscalls		312
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -1034,15 +1036,16 @@
 #define __NR_sched_setattr		(__NR_Linux + 313)
 #define __NR_sched_getattr		(__NR_Linux + 314)
 #define __NR_renameat2			(__NR_Linux + 315)
+#define __NR_seccomp			(__NR_Linux + 316)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		315
+#define __NR_Linux_syscalls		316
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		315
+#define __NR_N32_Linux_syscalls		316
 
 #endif /* _UAPI_ASM_UNISTD_H */
diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 3245474f19d5..ab02d14f1b5c 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -578,3 +578,4 @@ EXPORT(sys_call_table)
 	PTR	sys_sched_setattr
 	PTR	sys_sched_getattr		/* 4350 */
 	PTR	sys_renameat2
+	PTR	sys_seccomp
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index be2fedd4ae33..010dccf128ec 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -431,4 +431,5 @@ EXPORT(sys_call_table)
 	PTR	sys_sched_setattr
 	PTR	sys_sched_getattr		/* 5310 */
 	PTR	sys_renameat2
+	PTR	sys_seccomp
 	.size	sys_call_table,.-sys_call_table
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index c1dbcda4b816..c3b3b6525df5 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -424,4 +424,5 @@ EXPORT(sysn32_call_table)
 	PTR	sys_sched_setattr
 	PTR	sys_sched_getattr
 	PTR	sys_renameat2			/* 6315 */
+	PTR	sys_seccomp
 	.size	sysn32_call_table,.-sysn32_call_table
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index f1343ccd7ed7..bb1550b1f501 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -557,4 +557,5 @@ EXPORT(sys32_call_table)
 	PTR	sys_sched_setattr
 	PTR	sys_sched_getattr		/* 4350 */
 	PTR	sys_renameat2
+	PTR	sys_seccomp
 	.size	sys32_call_table,.-sys32_call_table
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v12 05/11] ARM: add seccomp syscall
From: Kees Cook @ 2014-07-17 18:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1405620518-18495-1-git-send-email-keescook@chromium.org>

Wires up the new seccomp syscall.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/arm/include/uapi/asm/unistd.h |    1 +
 arch/arm/kernel/calls.S            |    1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h
index ba94446c72d9..e21b4a069701 100644
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -409,6 +409,7 @@
 #define __NR_sched_setattr		(__NR_SYSCALL_BASE+380)
 #define __NR_sched_getattr		(__NR_SYSCALL_BASE+381)
 #define __NR_renameat2			(__NR_SYSCALL_BASE+382)
+#define __NR_seccomp			(__NR_SYSCALL_BASE+383)
 
 /*
  * This may need to be greater than __NR_last_syscall+1 in order to
diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S
index 8f51bdcdacbb..bea85f97f363 100644
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -392,6 +392,7 @@
 /* 380 */	CALL(sys_sched_setattr)
 		CALL(sys_sched_getattr)
 		CALL(sys_renameat2)
+		CALL(sys_seccomp)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
 #define syscalls_counted
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v12 04/11] seccomp: add "seccomp" syscall
From: Kees Cook @ 2014-07-17 18:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1405620518-18495-1-git-send-email-keescook@chromium.org>

This adds the new "seccomp" syscall with both an "operation" and "flags"
parameter for future expansion. The third argument is a pointer value,
used with the SECCOMP_SET_MODE_FILTER operation. Currently, flags must
be 0. This is functionally equivalent to prctl(PR_SET_SECCOMP, ...).

In addition to the TSYNC flag later in this patch series, there is a
non-zero chance that this syscall could be used for configuring a fixed
argument area for seccomp-tracer-aware processes to pass syscall arguments
in the future. Hence, the use of "seccomp" not simply "seccomp_add_filter"
for this syscall. Additionally, this syscall uses operation, flags,
and user pointer for arguments because strictly passing arguments via
a user pointer would mean seccomp itself would be unable to trivially
filter the seccomp syscall itself.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 arch/Kconfig                      |    1 +
 arch/x86/syscalls/syscall_32.tbl  |    1 +
 arch/x86/syscalls/syscall_64.tbl  |    1 +
 include/linux/syscalls.h          |    2 ++
 include/uapi/asm-generic/unistd.h |    4 ++-
 include/uapi/linux/seccomp.h      |    4 +++
 kernel/seccomp.c                  |   55 +++++++++++++++++++++++++++++++++----
 kernel/sys_ni.c                   |    3 ++
 8 files changed, 65 insertions(+), 6 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 97ff872c7acc..0eae9df35b88 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -321,6 +321,7 @@ config HAVE_ARCH_SECCOMP_FILTER
 	  - secure_computing is called from a ptrace_event()-safe context
 	  - secure_computing return value is checked and a return value of -1
 	    results in the system call being skipped immediately.
+	  - seccomp syscall wired up
 
 config SECCOMP_FILTER
 	def_bool y
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d6b867921612..7527eac24122 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -360,3 +360,4 @@
 351	i386	sched_setattr		sys_sched_setattr
 352	i386	sched_getattr		sys_sched_getattr
 353	i386	renameat2		sys_renameat2
+354	i386	seccomp			sys_seccomp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index ec255a1646d2..16272a6c12b7 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -323,6 +323,7 @@
 314	common	sched_setattr		sys_sched_setattr
 315	common	sched_getattr		sys_sched_getattr
 316	common	renameat2		sys_renameat2
+317	common	seccomp			sys_seccomp
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b0881a0ed322..1713977ee26f 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -866,4 +866,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
 			 unsigned long idx1, unsigned long idx2);
 asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags);
+asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
+			    const char __user *uargs);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 333640608087..65acbf0e2867 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -699,9 +699,11 @@ __SYSCALL(__NR_sched_setattr, sys_sched_setattr)
 __SYSCALL(__NR_sched_getattr, sys_sched_getattr)
 #define __NR_renameat2 276
 __SYSCALL(__NR_renameat2, sys_renameat2)
+#define __NR_seccomp 277
+__SYSCALL(__NR_seccomp, sys_seccomp)
 
 #undef __NR_syscalls
-#define __NR_syscalls 277
+#define __NR_syscalls 278
 
 /*
  * All syscalls below here should go away really,
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index ac2dc9f72973..b258878ba754 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -10,6 +10,10 @@
 #define SECCOMP_MODE_STRICT	1 /* uses hard-coded filter. */
 #define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
 
+/* Valid operations for seccomp syscall. */
+#define SECCOMP_SET_MODE_STRICT	0
+#define SECCOMP_SET_MODE_FILTER	1
+
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 05cac2c2eca1..f0652578af75 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -18,6 +18,7 @@
 #include <linux/compat.h>
 #include <linux/sched.h>
 #include <linux/seccomp.h>
+#include <linux/syscalls.h>
 
 /* #define SECCOMP_DEBUG 1 */
 
@@ -314,7 +315,7 @@ free_prog:
  *
  * Returns 0 on success and non-zero otherwise.
  */
-static long seccomp_attach_user_filter(char __user *user_filter)
+static long seccomp_attach_user_filter(const char __user *user_filter)
 {
 	struct sock_fprog fprog;
 	long ret = -EFAULT;
@@ -517,6 +518,7 @@ out:
 #ifdef CONFIG_SECCOMP_FILTER
 /**
  * seccomp_set_mode_filter: internal function for setting seccomp filter
+ * @flags:  flags to change filter behavior
  * @filter: struct sock_fprog containing filter
  *
  * This function may be called repeatedly to install additional filters.
@@ -527,11 +529,16 @@ out:
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-static long seccomp_set_mode_filter(char __user *filter)
+static long seccomp_set_mode_filter(unsigned int flags,
+				    const char __user *filter)
 {
 	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
 	long ret = -EINVAL;
 
+	/* Validate flags. */
+	if (flags != 0)
+		goto out;
+
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
@@ -544,12 +551,35 @@ out:
 	return ret;
 }
 #else
-static inline long seccomp_set_mode_filter(char __user *filter)
+static inline long seccomp_set_mode_filter(unsigned int flags,
+					   const char __user *filter)
 {
 	return -EINVAL;
 }
 #endif
 
+/* Common entry point for both prctl and syscall. */
+static long do_seccomp(unsigned int op, unsigned int flags,
+		       const char __user *uargs)
+{
+	switch (op) {
+	case SECCOMP_SET_MODE_STRICT:
+		if (flags != 0 || uargs != NULL)
+			return -EINVAL;
+		return seccomp_set_mode_strict();
+	case SECCOMP_SET_MODE_FILTER:
+		return seccomp_set_mode_filter(flags, uargs);
+	default:
+		return -EINVAL;
+	}
+}
+
+SYSCALL_DEFINE3(seccomp, unsigned int, op, unsigned int, flags,
+			 const char __user *, uargs)
+{
+	return do_seccomp(op, flags, uargs);
+}
+
 /**
  * prctl_set_seccomp: configures current->seccomp.mode
  * @seccomp_mode: requested mode to use
@@ -559,12 +589,27 @@ static inline long seccomp_set_mode_filter(char __user *filter)
  */
 long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 {
+	unsigned int op;
+	char __user *uargs;
+
 	switch (seccomp_mode) {
 	case SECCOMP_MODE_STRICT:
-		return seccomp_set_mode_strict();
+		op = SECCOMP_SET_MODE_STRICT;
+		/*
+		 * Setting strict mode through prctl always ignored filter,
+		 * so make sure it is always NULL here to pass the internal
+		 * check in do_seccomp().
+		 */
+		uargs = NULL;
+		break;
 	case SECCOMP_MODE_FILTER:
-		return seccomp_set_mode_filter(filter);
+		op = SECCOMP_SET_MODE_FILTER;
+		uargs = filter;
+		break;
 	default:
 		return -EINVAL;
 	}
+
+	/* prctl interface doesn't have flags, so they are always zero. */
+	return do_seccomp(op, 0, uargs);
 }
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 36441b51b5df..2904a2105914 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -213,3 +213,6 @@ cond_syscall(compat_sys_open_by_handle_at);
 
 /* compare kernel pointers */
 cond_syscall(sys_kcmp);
+
+/* operate on Secure Computing state */
+cond_syscall(sys_seccomp);
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v12 03/11] seccomp: split mode setting routines
From: Kees Cook @ 2014-07-17 18:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1405620518-18495-1-git-send-email-keescook@chromium.org>

Separates the two mode setting paths to make things more readable with
fewer #ifdefs within function bodies.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/seccomp.c |   71 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 48 insertions(+), 23 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 9df7def86c3b..05cac2c2eca1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -489,48 +489,66 @@ long prctl_get_seccomp(void)
 }
 
 /**
- * seccomp_set_mode: internal function for setting seccomp mode
- * @seccomp_mode: requested mode to use
- * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
- *
- * This function may be called repeatedly with a @seccomp_mode of
- * SECCOMP_MODE_FILTER to install additional filters.  Every filter
- * successfully installed will be evaluated (in reverse order) for each system
- * call the task makes.
+ * seccomp_set_mode_strict: internal function for setting strict seccomp
  *
  * Once current->seccomp.mode is non-zero, it may not be changed.
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
+static long seccomp_set_mode_strict(void)
 {
+	const unsigned long seccomp_mode = SECCOMP_MODE_STRICT;
 	long ret = -EINVAL;
 
 	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
-	switch (seccomp_mode) {
-	case SECCOMP_MODE_STRICT:
-		ret = 0;
 #ifdef TIF_NOTSC
-		disable_TSC();
+	disable_TSC();
 #endif
-		break;
+	seccomp_assign_mode(seccomp_mode);
+	ret = 0;
+
+out:
+
+	return ret;
+}
+
 #ifdef CONFIG_SECCOMP_FILTER
-	case SECCOMP_MODE_FILTER:
-		ret = seccomp_attach_user_filter(filter);
-		if (ret)
-			goto out;
-		break;
-#endif
-	default:
+/**
+ * seccomp_set_mode_filter: internal function for setting seccomp filter
+ * @filter: struct sock_fprog containing filter
+ *
+ * This function may be called repeatedly to install additional filters.
+ * Every filter successfully installed will be evaluated (in reverse order)
+ * for each system call the task makes.
+ *
+ * Once current->seccomp.mode is non-zero, it may not be changed.
+ *
+ * Returns 0 on success or -EINVAL on failure.
+ */
+static long seccomp_set_mode_filter(char __user *filter)
+{
+	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
+	long ret = -EINVAL;
+
+	if (!seccomp_may_assign_mode(seccomp_mode))
+		goto out;
+
+	ret = seccomp_attach_user_filter(filter);
+	if (ret)
 		goto out;
-	}
 
 	seccomp_assign_mode(seccomp_mode);
 out:
 	return ret;
 }
+#else
+static inline long seccomp_set_mode_filter(char __user *filter)
+{
+	return -EINVAL;
+}
+#endif
 
 /**
  * prctl_set_seccomp: configures current->seccomp.mode
@@ -541,5 +559,12 @@ out:
  */
 long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 {
-	return seccomp_set_mode(seccomp_mode, filter);
+	switch (seccomp_mode) {
+	case SECCOMP_MODE_STRICT:
+		return seccomp_set_mode_strict();
+	case SECCOMP_MODE_FILTER:
+		return seccomp_set_mode_filter(filter);
+	default:
+		return -EINVAL;
+	}
 }
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v12 02/11] seccomp: extract check/assign mode helpers
From: Kees Cook @ 2014-07-17 18:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1405620518-18495-1-git-send-email-keescook@chromium.org>

To support splitting mode 1 from mode 2, extract the mode checking and
assignment logic into common functions.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/seccomp.c |   22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index afb916c7e890..9df7def86c3b 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -194,7 +194,23 @@ static u32 seccomp_run_filters(int syscall)
 	}
 	return ret;
 }
+#endif /* CONFIG_SECCOMP_FILTER */
 
+static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
+{
+	if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
+		return false;
+
+	return true;
+}
+
+static inline void seccomp_assign_mode(unsigned long seccomp_mode)
+{
+	current->seccomp.mode = seccomp_mode;
+	set_tsk_thread_flag(current, TIF_SECCOMP);
+}
+
+#ifdef CONFIG_SECCOMP_FILTER
 /**
  * seccomp_attach_filter: Attaches a seccomp filter to current.
  * @fprog: BPF program to install
@@ -490,8 +506,7 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
 {
 	long ret = -EINVAL;
 
-	if (current->seccomp.mode &&
-	    current->seccomp.mode != seccomp_mode)
+	if (!seccomp_may_assign_mode(seccomp_mode))
 		goto out;
 
 	switch (seccomp_mode) {
@@ -512,8 +527,7 @@ static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
 		goto out;
 	}
 
-	current->seccomp.mode = seccomp_mode;
-	set_thread_flag(TIF_SECCOMP);
+	seccomp_assign_mode(seccomp_mode);
 out:
 	return ret;
 }
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v12 01/11] seccomp: create internal mode-setting function
From: Kees Cook @ 2014-07-17 18:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1405620518-18495-1-git-send-email-keescook@chromium.org>

In preparation for having other callers of the seccomp mode setting
logic, split the prctl entry point away from the core logic that performs
seccomp mode setting.

Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
---
 kernel/seccomp.c |   16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 301bbc24739c..afb916c7e890 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -473,7 +473,7 @@ long prctl_get_seccomp(void)
 }
 
 /**
- * prctl_set_seccomp: configures current->seccomp.mode
+ * seccomp_set_mode: internal function for setting seccomp mode
  * @seccomp_mode: requested mode to use
  * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
  *
@@ -486,7 +486,7 @@ long prctl_get_seccomp(void)
  *
  * Returns 0 on success or -EINVAL on failure.
  */
-long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
+static long seccomp_set_mode(unsigned long seccomp_mode, char __user *filter)
 {
 	long ret = -EINVAL;
 
@@ -517,3 +517,15 @@ long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
 out:
 	return ret;
 }
+
+/**
+ * prctl_set_seccomp: configures current->seccomp.mode
+ * @seccomp_mode: requested mode to use
+ * @filter: optional struct sock_fprog for use with SECCOMP_MODE_FILTER
+ *
+ * Returns 0 on success or -EINVAL on failure.
+ */
+long prctl_set_seccomp(unsigned long seccomp_mode, char __user *filter)
+{
+	return seccomp_set_mode(seccomp_mode, filter);
+}
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH v12 11/11] seccomp: add thread sync ability
From: Kees Cook @ 2014-07-17 18:08 UTC (permalink / raw)
  To: linux-arm-kernel

Twelfth time's the charm! :)

This adds the ability for threads to request seccomp filter
synchronization across their thread group (at filter attach time).
For example, for Chrome to make sure graphic driver threads are fully
confined after seccomp filters have been attached.

To support this, locking on seccomp changes via thread-group-shared
sighand lock is introduced, along with refactoring of no_new_privs. Races
with thread creation are handled via delayed duplication of the seccomp
task struct field and cred_guard_mutex.

This includes a new syscall (instead of adding a new prctl option),
as suggested by Andy Lutomirski and Michael Kerrisk.

Thanks!

-Kees

v12:
 - fixed bug where initial filter wouldn't allow TSYNC flag (drysdale)
 - optimized thread loops (drysdale)
v11:
 - updated writer locking commit log for clarity (luto)
 - clarified writer lock thread flag setting comment (luto)
 - inverted SECCOMP_FILTER_FLAG_MASK (luto)
 - renamed is_acestor parameter (luto)
 - added BUG_ON to catch currently impossible integer overflow (luto)
v10:
 - dropped pending-kill checks (oleg)
 - tweaked memory barriers (oleg)
v9:
 - rearranged/split patches to make things more reviewable
 - added use of cred_guard_mutex to solve exec race (oleg, luto)
 - added barriers for TIF_SECCOMP vs seccomp.mode race (oleg, luto)
 - fixed missed copying of nnp state after v8 refactor (oleg)
v8:
 - drop use of tasklist_lock, appears redundant against sighand (oleg)
 - reduced use of smp_load_acquire to logical minimum (oleg)
 - change nnp to a task struct held atomic flags field (oleg, luto)
 - drop needless irqflags changes in fork.c for holding sighand lock (oleg)
 - cleaned up use of thread for-each loop (oleg)
 - rearranged patch order to keep syscall changes adjacent
 - added example code to manpage (mtk)
v7:
 - rebase on Linus's tree (merged with network bpf changes)
 - wrote manpage text documenting API (follows this series)
v6:
 - switch from seccomp-specific lock to thread-group lock to gain atomicity
 - implement seccomp syscall across all architectures with seccomp filter
 - clean up sparse warnings around locking
v5:
 - move includes around (drysdale)
 - drop set_nnp return value (luto)
 - use smp_load_acquire/store_release (luto)
 - merge nnp changes to seccomp always, fewer ifdef (luto)
v4:
 - cleaned up locking further, as noticed by David Drysdale
v3:
 - added SECCOMP_EXT_ACT_FILTER for new filter install options
v2:
 - reworked to avoid clone races

^ permalink raw reply

* [PATCH v11 11/11] seccomp: implement SECCOMP_FILTER_FLAG_TSYNC
From: Kees Cook @ 2014-07-17 17:52 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <CAGXu5j+dFZdnnK8f-HRrUs2vLeyhWyHh_AY-OynDcp-Ye+dy7Q@mail.gmail.com>

On Thu, Jul 17, 2014 at 8:45 AM, Kees Cook <keescook@chromium.org> wrote:
> On Thu, Jul 17, 2014 at 8:04 AM, David Drysdale <drysdale@google.com> wrote:
>> On Wed, Jul 16, 2014 at 10:50 PM, Kees Cook <keescook@chromium.org> wrote:
>>> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
>>> index 9065d2c79c56..2125b83ccfd4 100644
>>> +/**
>>> + * seccomp_can_sync_threads: checks if all threads can be synchronized
>>> + *
>>> + * Expects sighand and cred_guard_mutex locks to be held.
>>> + *
>>> + * Returns 0 on success, -ve on error, or the pid of a thread which was
>>> + * either not in the correct seccomp mode or it did not have an ancestral
>>> + * seccomp filter.
>>> + */
>>> +static inline pid_t seccomp_can_sync_threads(void)
>>> +{
>>> +       struct task_struct *thread, *caller;
>>> +
>>> +       BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
>>> +       BUG_ON(!spin_is_locked(&current->sighand->siglock));
>>> +
>>> +       if (current->seccomp.mode != SECCOMP_MODE_FILTER)
>>> +               return -EACCES;
>>
>> Quick question -- is it possible to apply the first filter and also synchronize
>> it across threads in the same operation?  If so, does this arm also need to
>> cope with seccomp.mode being SECCOMP_MODE_DISABLED?
>>
>> [seccomp_set_mode_filter() looks to call this via seccomp_attach_filter()
>> before it does seccomp_assign_mode()]
>
> I don't entirely understand what you're asking. The threads gain the
> filter and the mode before the current thread may gain the mode (if
> it's the first time this has been called). Due to all the locks,
> though, this isn't a problem. Is there a situation you see where there
> might be a problem?

Just to follow up for posterity on lkml: the problem was that mode was
being set in "current" _after_ sync, so the mode check in can_sync
would fail if "current" was not yet in filter mode. (i.e. the first
attached filter could not have the TSYNC flag.) This check was
redundant with the attach_filter entry point checks, and protected
nothing, so it has been removed and a new test added to the seccomp
regression test suite. :)

I sent it as a new patch on top of v11, instead of respinning
everything as v12. If that's not preferred, I can send v12 with this
fix incorporated.

Thanks!

-Kees

-- 
Kees Cook
Chrome OS Security

^ permalink raw reply

* [GIT PULL 1/3] ARM: tegra: rework PCIe regulators
From: Olof Johansson @ 2014-07-17 17:52 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140717142037.GA17494@ulmo>

On Thu, Jul 17, 2014 at 7:20 AM, Thierry Reding
<thierry.reding@gmail.com> wrote:
> On Thu, Jul 10, 2014 at 12:15:28PM +0200, Thierry Reding wrote:
>> On Mon, Jul 07, 2014 at 09:45:46PM -0700, Olof Johansson wrote:
> [...]
>> > If you have to stay compatible, then I suggest you try to fill in
>> > local driver variables with derivatives of the old properties (and
>> > directly from the newer properties where you can). I haven't looked at
>> > the specifics here so I don't know how hard it might be.
>> >
>> > If you are 100% sure that you don't have to stay compatible, then you
>> > can remove the code handling the old bindings. Still, even then I am a
>> > little worried about dependencies (and more importantly conflicts)
>> > between these dtsi changes and others done by tegra platform code for
>> > this release. I suppose that can be resolved by having this as a base
>> > of any DT changes for tegra if needed.
>>
>> To be honest, I'm very much tempted to just drop this series. Even if
>> that means keeping a totally broken DT binding. But frankly I don't have
>> any energy left to debate DT stability.
>
> So this kept bugging me and I couldn't leave it alone after all. How
> about if I squash in the attached patch. I've verified that that keeps
> compatibility with old device trees on TrimSlice and Beaver. I think the
> remainder of the series could still remain as-is (the top few commits
> that you said shouldn't be there) if I squash this into
>
>         PCI: tegra: Implement accurate power supply scheme
>
> That way the binding will be the new one so that people don't get any
> wrong ideas about taking shortcuts while still preserving compatibility
> with existing DTBs.

Taking a quick look at the patch, it looks sane to me and pretty much
exactly what I would expect the code to look like w.r.t. handling old
bindings.

> Interestingly, despite my initial disgust for having to keep around old
> code (it's in fact new code in this case) for compatibility reasons, it
> ended up making the code look more mature.

I'm glad it went that way. It shouldn't be _too_ bad to keep the
compatibility with old bindings in the code like this in most cases,
it's mostly a matter of filling in the newer structures like you did
in this patch.


-Olof

^ permalink raw reply

* [GIT PULL] Keystone DTS update for 3.17
From: Santosh Shilimkar @ 2014-07-17 17:41 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Arm-soc folks,

Please pull below keystone DTS updates for 3.17.

The following changes since commit 7171511eaec5bf23fb06078f59784a3a0626b38f:

  Linux 3.16-rc1 (2014-06-15 17:45:28 -1000)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git tags/keystone-dts

for you to fetch changes up to 6592f671a4e6ee6308ddd67c2459dd71836770df:

  ARM: dts: keystone-evm: add 1g ethernet phys nodes (2014-07-17 13:29:06 -0400)

----------------------------------------------------------------
Keystone DTS update for 3.17
 - Ethernet clock tree fix.
 - MDIO device tree node. Respective driver update is already queued.
 - Ethernet phy node. Respective driver update is already queued.

----------------------------------------------------------------
Grygorii Strashko (3):
      ARM: dts: keystone: fix netcp's clocks definitions
      ARM: dts: keystone: add mdio devices entries
      ARM: dts: keystone-evm: add 1g ethernet phys nodes

 arch/arm/boot/dts/k2e-clocks.dtsi      |    2 +-
 arch/arm/boot/dts/k2hk-clocks.dtsi     |    2 +-
 arch/arm/boot/dts/k2hk-evm.dts         |   12 ++++++++++++
 arch/arm/boot/dts/k2l-clocks.dtsi      |    2 +-
 arch/arm/boot/dts/keystone-clocks.dtsi |    2 +-
 arch/arm/boot/dts/keystone.dtsi        |   11 +++++++++++
 6 files changed, 27 insertions(+), 4 deletions(-)

^ permalink raw reply

* [PATCH v4 2/3] soc: qcom-rpm: Driver for the Qualcomm RPM
From: pramod gurav @ 2014-07-17 17:33 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1405465241-28684-3-git-send-email-bjorn.andersson@sonymobile.com>

Hi Bjorn,

On Wed, Jul 16, 2014 at 4:30 AM, Bjorn Andersson
<bjorn.andersson@sonymobile.com> wrote:
> Driver for the Resource Power Manager (RPM) found in Qualcomm 8660, 8960
> and 8064 based devices. The driver exposes resources that child drivers
> can operate on; to implementing regulator, clock and bus frequency
> drivers.
>

[snip]

> +       }
> +
> +       ret = irq_set_irq_wake(irq_ack, 1);

This calls fails and throws error on my ifc6410 with 3.16-rc5.
Does this driver depend on pincntrl. Looks like the DT support for
pincntrl driver is missing in apq8064 dts in mainline.
Is that right?

> +       if (ret)
> +               dev_warn(&pdev->dev, "failed to mark ack irq as wakeup\n");
> +

[snip]

> +               return ret;
> +       }
> +
> +       ret = irq_set_irq_wake(irq_wakeup, 1);

This fails as well.

> +       if (ret)
> +               dev_warn(&pdev->dev, "failed to mark wakeup irq as wakeup\n");
> +
> +       return of_platform_populate(pdev->dev.of_node, NULL, NULL, &pdev->dev);

[snip]


-- 
Thanks and Regards
Pramod

^ permalink raw reply

* HYP panic with 3.16-rc5, arm64 + 64k pages
From: Joel Schopp @ 2014-07-17 17:31 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140717152808.GO21153@arm.com>


On 07/17/2014 10:28 AM, Will Deacon wrote:
> Hi all,
>
> If I try to spawn a kvm guest with kvmtool (not tried qemu) with a vanilla
> 3.16-rc5 kernel (same for host and guest) using 64k pages, I'm greeted by:
>
>   Kernel panic - not syncing: HYP panic:
>   PS:800002c5 PC:fffffe000042bd10 ESR:00000000bf000000
>   FAR:          (null) HPFAR:          (null) PAR:          (null)
>   VCPU:0000020979e20000
>
>   CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.16.0-rc5 #4
>   Call trace:
>   [<fffffe000009642c>] dump_backtrace+0x0/0x130
>
> on the host. The problem happens every time on Juno and the fastmodel.
>
> Any ideas? Is anybody else seeing this problem? The limited guest output
> is below.

I'm running a small patch series (nothing that should affect what you
are seeing) on top of 3.16-rc5 (both host and guest) with 64K pages on
an arm64 SOC.  I'm not seeing any problems.
>
> Will
>
> --->8
>
> Initializing cgroup subsys cpu
> Linux version 3.16.0-rc5 (will at edgewater-inn) (gcc version 4.9.0 20140214 (experimental) (aarch64-trunk.530) ) #1 SMP PREEMPT Thu Jul 17 16:19:36 BST 2014
> CPU: AArch64 Processor [410fd070] revision 0
> Early serial console at I/O port 0x0 (options '')
> bootconsole [uart0] enabled
> efi: Getting parameters from FDT:
> efi: Can't find System Table in device tree!
> cma: CMA: failed to reserve 512 MiB
> psci: probing for conduit method from DT.
> psci: Using PSCI v0.1 Function IDs from DT
> PERCPU: Embedded 1 pages/cpu @fffffe0023e80000 s13120 r8192 d44224 u65536
> Built 1 zonelists in Zone order, mobility grouping off.  Total pages: 9208
> Kernel command line: console=hvc0,38400 earlycon=uart8250,0x3f8 root=/dev/root rw rootflags=rw,trans=virtio,version=9p2000.L rootfstype=9p init=/virt/init  ip=dhcp
> PID hash table entries: 4096 (order: -1, 32768 bytes)
> Dentry cache hash table entries: 131072 (order: 4, 1048576 bytes)
> Inode-cache hash table entries: 65536 (order: 3, 524288 bytes)
> Memory: 579264K/589824K available (4359K kernel code, 455K rwdata, 1536K rodata, 332K init, 280K bss, 10560K reserved)
> Virtual kernel memory layout:
>     vmalloc : 0xfffffc0000000000 - 0xfffffdfbffff0000   (2080767 MB)
>     vmemmap : 0xfffffdfc001c0000 - 0xfffffdfc0023e000   (     0 MB)
>     modules : 0xfffffdfffc000000 - 0xfffffe0000000000   (    64 MB)
>     memory  : 0xfffffe0000000000 - 0xfffffe0024000000   (   576 MB)
>       .init : 0xfffffe0000660000 - 0xfffffe00006b3340   (   333 kB)
>       .text : 0xfffffe0000080000 - 0xfffffe0000651f94   (  5960 kB)
>       .data : 0xfffffe00006c0000 - 0xfffffe0000731ca8   (   456 kB)
> SLUB: HWalign=64, Order=0-3, MinObjects=0, CPUs=4, Nodes=1
> Preemptible hierarchical RCU implementation.
> 	RCU restricting CPUs from NR_CPUS=8 to nr_cpu_ids=4.
> RCU: Adjusting geometry for rcu_fanout_leaf=16, nr_cpu_ids=4
> NR_IRQS:64 nr_irqs:64 0

Not a lot to go on there.  If you get desperate you can configure FTRACE
and pass "ftrace=function ftrace_dump_on_oops" as additional additional
kernel command line arguments.  If you get really desperate you can do
the same on the host.  Just beware the output is really long.

 

^ permalink raw reply

* [PATCHv4 5/5] arm64: cpuinfo: print info for all CPUs
From: Will Deacon @ 2014-07-17 17:28 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140717171058.GM18203@arm.com>

On Thu, Jul 17, 2014 at 06:10:58PM +0100, Catalin Marinas wrote:
> On Thu, Jul 17, 2014 at 02:55:37PM +0100, Peter Maydell wrote:
> > On 17 July 2014 13:35, Will Deacon <will.deacon@arm.com> wrote:
> > > We're not denying the possibility of heterogeneity, we're trying to expose a
> > > consistent view of the system to userspace. Differences between cores should
> > > be dealt with by the kernel (e.g. IKS, HMP scheduling), not blindly
> > > passed off to userspace.
> > 
> > On that basis, why report anything at all about invididual cores?
> > Just have /proc/cpuinfo report "number of processors: 4" and
> > no per-CPU information at all...
> 
> We lost a lot of time on this already (given the internal threads). So
> my proposal is to go ahead with Mark's patch with per-CPU features. They
> currently just include the same elf_hwcap multiple times. If we ever
> need to present different features, the conditions would be:
> 
> 1. Never report more than elf_hwcap
> 2. elf_hwcap can only include non-symmetric features *if* Linux gets a
>    way to transparently handle migration or emulation

... making the point of a per-cpu field entirely pointless ;)

Will

^ permalink raw reply

* [PATCH RFCv3 01/14] arm64: introduce aarch64_insn_gen_comp_branch_imm()
From: Will Deacon @ 2014-07-17 17:25 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <CAMEtUux1NRqz-Bqi_H8kJe8x9u+1Q12Sg4a-92cE6OsePEBLxg@mail.gmail.com>

On Thu, Jul 17, 2014 at 04:59:10PM +0100, Alexei Starovoitov wrote:
> On Thu, Jul 17, 2014 at 2:19 AM, Will Deacon <will.deacon@arm.com> wrote:
> > On Wed, Jul 16, 2014 at 10:19:31PM +0100, Zi Shen Lim wrote:
> >> >
> >> > Is a BUG_ON justifiable here? Is there not a nicer way to fail?
> >>
> >> In general, it'd be nice if we returned something like -EINVAL and
> >> have all callers handle failures. Today all code gen functions return
> >> the u32 instruction and there's no error handling by callers.
> >> I think following the precedence (aarch64_insn_gen_branch_imm())
> >> of failing with BUG_ON is a reasonable tradeoff.
> >
> > Well, I don't necessarily agree with that BUG_ON, either :)
> > I take it eBPF doesn't have a `trap' instruction or similar? Otherwise, we
> > could generate that and avoid having to propagate errors directly to the
> > caller.
> >
> >> In this case here, when we hit the default (failure) case, that means
> >> there's a serious error of attempting to use an unsupported
> >> variant. I think we're better off failing hard here than trying to
> >> arbitrarily "fallback" on a default choice.
> >
> > It might be a serious error for BPF, but a BUG_ON brings down the entire
> > machine, which I think is unfortunate.
> 
> There is some misunderstanding here. Here BUG_ON will trigger
> only on actual bug in JIT implementation, it cannot be triggered by user.
> eBPF program is verified before it reaches JIT, so all instructions are
> valid and input to JIT is proper. Two instruction are not yet
> implemented in this JIT and they trigger pr_.._once().
> So I don't see any issue with this usage of BUG_ON
> imo living with silent bugs in JIT is more dangerous.
> 
> For the same reason there is no 'trap' instruction in eBPF.
> Static verifier checks that program is valid. If there was a 'trap'
> insn the program would be rejected. Like programs with
> 'div by zero' are rejected. There is normal 'bpf_exit' insn to
> return from the program.

Ok, so assuming that BPF doesn't have any issues, I take your point.
However, we could very easily re-use these functions for things like SMP
alternatives and kprobes, where simply failing the instruction generation
might be acceptable.

It just feels like a bit hammer to me, when the machine is probably happily
scheduling user tasks, responding to interrupts, writing data to disk etc.

Will

^ permalink raw reply

* [RFC PATCH] usb: dwc3: core: allow vendor drivers to check probe status
From: Felipe Balbi @ 2014-07-17 17:20 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1405617213-27360-1-git-send-email-lee.jones@linaro.org>

Hi,

On Thu, Jul 17, 2014 at 06:13:33PM +0100, Lee Jones wrote:
> This patch provides mechanism for subordinate devices to check
> whether the DWC3 core probed successfully or otherwise.  Useful
> if PHYs are required to configure controllers, but aren't yet
> available.  The DWC3 core driver will defer probe if PHYs are
> unavailable, however subordinate DWC3 drivers currently do not
> have any visibility or means to check status - until now.

what's a subordinate DWC3 driver ?

> Another way to do this would be to *_phy_get*(), but if every
> driver did this it would create a high level of code
> duplication.
> 
> Signed-off-by: Lee Jones <lee.jones@linaro.org>
> ---
>  drivers/usb/dwc3/core.c | 12 ++++++++++++
>  drivers/usb/dwc3/core.h |  1 +
>  2 files changed, 13 insertions(+)
> 
> diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
> index eb69eb9..171ca52 100644
> --- a/drivers/usb/dwc3/core.c
> +++ b/drivers/usb/dwc3/core.c
> @@ -47,6 +47,14 @@
>  
>  /* -------------------------------------------------------------------------- */
>  
> +static bool is_enabled = false;
> +
> +int dwc3_is_enabled(void)
> +{
> +	return is_enabled;
> +}
> +EXPORT_SYMBOL(dwc3_is_enabled);

no, no, no, no. Let me try that again, hello no! You _do_ realise there
are systems with more than one dwc3 instance, right ? And this is the
most fragile possible way of doing this.

You never explained what's a dwc3 subordinate driver, you don't show any
example of how this would be used and why/where does the PHY need to
poke into DWC3. Why isn't probe defer enough for you ? Which platform
are you working on ? what is the problem that you're trying to solve ?

>From this patch, all I can is NAK this patch with no mercy, sorry.

-- 
balbi
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: Digital signature
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20140717/7fe3e7f3/attachment.sig>

^ permalink raw reply

* ARM: mvebu: ARCH_HAS_CPUFREQ
From: mat @ 2014-07-17 17:19 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20140717114100.GE13108@titan.lakedaemon.net>

Will be following up.
cpufreq, even with AMD CPUs has been altered. bapm in enabled by default 
now.

:-) Trolling

^ permalink raw reply

* [PATCH] ARM: DMA: ensure that old section mappings are flushed from the TLB
From: Will Deacon @ 2014-07-17 17:15 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <E1X7nlB-0000EN-7e@rmk-PC.arm.linux.org.uk>

On Thu, Jul 17, 2014 at 04:38:17PM +0100, Russell King wrote:
> When setting up the CMA region, we must ensure that the old section
> mappings are flushed from the TLB before replacing them with page
> tables, otherwise we can suffer from mismatched aliases if the CPU
> speculatively prefetches from these mappings at an inopportune time.
> 
> A mismatched alias can occur when the TLB contains a section mapping,
> but a subsequent prefetch causes it to load a page table mapping,
> resulting in the possibility of the TLB containing two matching
> mappings for the same virtual address region.
> 
> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

Acked-by: Will Deacon <will.deacon@arm.com>

Will

> ---
>  arch/arm/mm/dma-mapping.c | 11 ++++++++++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
> index 4c88935654ca..1f88db06b133 100644
> --- a/arch/arm/mm/dma-mapping.c
> +++ b/arch/arm/mm/dma-mapping.c
> @@ -461,12 +461,21 @@ void __init dma_contiguous_remap(void)
>  		map.type = MT_MEMORY_DMA_READY;
>  
>  		/*
> -		 * Clear previous low-memory mapping
> +		 * Clear previous low-memory mapping to ensure that the
> +		 * TLB does not see any conflicting entries, then flush
> +		 * the TLB of the old entries before creating new mappings.
> +		 *
> +		 * This ensures that any speculatively loaded TLB entries
> +		 * (even though they may be rare) can not cause any problems,
> +		 * and ensures that this code is architecturally compliant.
>  		 */
>  		for (addr = __phys_to_virt(start); addr < __phys_to_virt(end);
>  		     addr += PMD_SIZE)
>  			pmd_clear(pmd_off_k(addr));
>  
> +		flush_tlb_kernel_range(__phys_to_virt(start),
> +				       __phys_to_virt(end));
> +
>  		iotable_init(&map, 1);
>  	}
>  }
> -- 
> 1.8.3.1
> 
> 

^ permalink raw reply

* [RFC PATCH] usb: dwc3: core: allow vendor drivers to check probe status
From: Lee Jones @ 2014-07-17 17:13 UTC (permalink / raw)
  To: linux-arm-kernel

This patch provides mechanism for subordinate devices to check
whether the DWC3 core probed successfully or otherwise.  Useful
if PHYs are required to configure controllers, but aren't yet
available.  The DWC3 core driver will defer probe if PHYs are
unavailable, however subordinate DWC3 drivers currently do not
have any visibility or means to check status - until now.

Another way to do this would be to *_phy_get*(), but if every
driver did this it would create a high level of code
duplication.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/usb/dwc3/core.c | 12 ++++++++++++
 drivers/usb/dwc3/core.h |  1 +
 2 files changed, 13 insertions(+)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index eb69eb9..171ca52 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -47,6 +47,14 @@
 
 /* -------------------------------------------------------------------------- */
 
+static bool is_enabled = false;
+
+int dwc3_is_enabled(void)
+{
+	return is_enabled;
+}
+EXPORT_SYMBOL(dwc3_is_enabled);
+
 void dwc3_set_mode(struct dwc3 *dwc, u32 mode)
 {
 	u32 reg;
@@ -757,6 +765,8 @@ static int dwc3_probe(struct platform_device *pdev)
 
 	pm_runtime_allow(dev);
 
+	is_enabled = true;
+
 	return 0;
 
 err3:
@@ -786,6 +796,8 @@ static int dwc3_remove(struct platform_device *pdev)
 {
 	struct dwc3	*dwc = platform_get_drvdata(pdev);
 
+	is_enabled = false;
+
 	usb_phy_set_suspend(dwc->usb2_phy, 1);
 	usb_phy_set_suspend(dwc->usb3_phy, 1);
 	phy_power_off(dwc->usb2_generic_phy);
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 57332e3..94dee86 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -906,6 +906,7 @@ struct dwc3_gadget_ep_cmd_params {
 /* prototypes */
 void dwc3_set_mode(struct dwc3 *dwc, u32 mode);
 int dwc3_gadget_resize_tx_fifos(struct dwc3 *dwc);
+int dwc3_is_enabled(void);
 
 #if IS_ENABLED(CONFIG_USB_DWC3_HOST) || IS_ENABLED(CONFIG_USB_DWC3_DUAL_ROLE)
 int dwc3_host_init(struct dwc3 *dwc);
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH 1/8] ARM: Add platform support for Fujitsu MB86S7X SoCs
From: Arnd Bergmann @ 2014-07-17 17:12 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <CAJe_ZhdM88x883DqxerW-JWpVeiFR9yA7s_=gpPZ7w1qPDJR=A@mail.gmail.com>

On Thursday 17 July 2014 22:24:43 Jassi Brar wrote:
> On 17 July 2014 19:18, Arnd Bergmann <arnd@arndb.de> wrote:
> > On Thursday 17 July 2014 19:02:53 Jassi Brar wrote:

> >> From a few hundred micro-sec for CPU reset, to potentially tens of
> >> milli-sec for some I2C transaction ... yes we do have for I2C over
> >> mailbox! ;)
> >>
> >> Probably bailing out of the loop and returning -ETIMEOUT to the
> >> caller, before a WARN(), is the simplest way to die.
> >
> > If you can have multiple miliseconds here, I think the code should
> > be changed to run in non-atomic context and use msleep(1) or
> > usleep_range() as a back-off. Is that possible?
> >
> I don't think we could sleep there but that should be ok because the
> code is used only when we don't have mailbox framework ready i.e, in
> very early boot before timers are ready and also as late as during
> reboot/poweroff. Also I realize long delays like those for I2C would
> never use this code - they would always have mailbox usable during
> their lifetime.

Ok, just add a comment then, and a warning if things take too long.

> >> >> >> +struct mb86s7x_peri_clk {
> >> >> >> +     u32 payload_size;
> >> >> >> +     u32 cntrlr;
> >> >> >> +     u32 domain;
> >> >> >> +     u32 port;
> >> >> >> +     u32 en;
> >> >> >> +     u64 freqency;
> >> >> >> +} __packed;
> >> >> >
> >> >> > Just mark the last member by itself __packed. I assume you didn't
> >> >> > actually mean to change the alignment of the data structure to one
> >> >> > byte, but just want to say that the last one is misaligned.
> >> >> >
> >> >> This and others, are data packets that are passed between local and
> >> >> remote via SharedMemory. __packed is only meant to specify that these
> >> >> data structures have no holes in them.
> >> >
> >> > That would be '__packed __attribute__((aligned(4)))'. A struct of 'u32'
> >> > already has no padding on any architecture that is supported by Linux.
> >> > The only reason you need the packing here is because the u64 member is
> >> > unaligned. Note that marking the entire structure as packed means that
> >> > accesses are no longer atomic because the compiler may prefer to do them
> >> > one byte at a time, which can break the protocol on the shared memory
> >> > area.
> >> >
> >> We are not worried about the atomic access because the side sending
> >> the data doesn't touch it until the other side indicates it has
> >> consumed it.
> >
> > It's still wrong though ;-)
> >
> The remote f/w expects data to be contiguous and we can't assume how
> it reads the packet. So our firstmost priority is to have no holes in
> the region... like, say, USB descriptors.

The structures being contiguous is guaranteed by the ELF ABI, unless
you have unaligned members.

You can be explicit about it, but then you should also provide a
new minimum alignment (e.g. 4 bytes) for the structure to avoid
having the compiler turn everything into bytewise accesses.

	Arnd

^ permalink raw reply

* [PATCH] ARM: rockchip: Add cpu hotplug support for RK3XXX SoCs
From: Romain Perier @ 2014-07-17 17:11 UTC (permalink / raw)
  To: linux-arm-kernel

Adds ability to shutdown all CPUs except the first one
(since it might be special for a lot of platforms).
It is now possible to use kexec which requires such a feature.

Signed-off-by: Romain Perier <romain.perier@gmail.com>
---
 arch/arm/mach-rockchip/platsmp.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/arch/arm/mach-rockchip/platsmp.c b/arch/arm/mach-rockchip/platsmp.c
index 910835d..7c1299a 100644
--- a/arch/arm/mach-rockchip/platsmp.c
+++ b/arch/arm/mach-rockchip/platsmp.c
@@ -21,6 +21,7 @@
 #include <linux/of_address.h>
 
 #include <asm/cacheflush.h>
+#include <asm/cp15.h>
 #include <asm/smp_scu.h>
 #include <asm/smp_plat.h>
 #include <asm/mach/map.h>
@@ -178,8 +179,38 @@ static void __init rockchip_smp_prepare_cpus(unsigned int max_cpus)
 		pmu_set_power_domain(0 + i, false);
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static DECLARE_COMPLETION(cpu_died);
+
+static int rockchip_cpu_kill(unsigned int cpu)
+{
+	if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(1000))) {
+		pr_err("CPU%d: didn't die correctly\n", cpu);
+		return 0;
+	}
+	pmu_set_power_domain(0 + cpu, false);
+	return 1;
+}
+
+
+static void rockchip_cpu_die(unsigned int cpu)
+{
+	complete(&cpu_died);
+	flush_cache_louis();
+
+	v7_exit_coherency_flush(louis);
+
+	while(1)
+		cpu_do_idle();
+}
+#endif
+
 static struct smp_operations rockchip_smp_ops __initdata = {
 	.smp_prepare_cpus	= rockchip_smp_prepare_cpus,
 	.smp_boot_secondary	= rockchip_boot_secondary,
+#ifdef CONFIG_HOTPLUG_CPU
+	.cpu_kill		= rockchip_cpu_kill,
+	.cpu_die		= rockchip_cpu_die,
+#endif
 };
 CPU_METHOD_OF_DECLARE(rk3066_smp, "rockchip,rk3066-smp", &rockchip_smp_ops);
-- 
1.9.1

^ permalink raw reply related

* [PATCHv4 5/5] arm64: cpuinfo: print info for all CPUs
From: Catalin Marinas @ 2014-07-17 17:10 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <CAFEAcA_ZXDDRycNVXmw1FSgRFiPgtB_2qqe-ioj91jMeBJR7PQ@mail.gmail.com>

On Thu, Jul 17, 2014 at 02:55:37PM +0100, Peter Maydell wrote:
> On 17 July 2014 13:35, Will Deacon <will.deacon@arm.com> wrote:
> > We're not denying the possibility of heterogeneity, we're trying to expose a
> > consistent view of the system to userspace. Differences between cores should
> > be dealt with by the kernel (e.g. IKS, HMP scheduling), not blindly
> > passed off to userspace.
> 
> On that basis, why report anything at all about invididual cores?
> Just have /proc/cpuinfo report "number of processors: 4" and
> no per-CPU information at all...

We lost a lot of time on this already (given the internal threads). So
my proposal is to go ahead with Mark's patch with per-CPU features. They
currently just include the same elf_hwcap multiple times. If we ever
need to present different features, the conditions would be:

1. Never report more than elf_hwcap
2. elf_hwcap can only include non-symmetric features *if* Linux gets a
   way to transparently handle migration or emulation

It basically means that Linux would not rely on the user space to make
informed decisions on where to run a thread and avoid SIGILL.

-- 
Catalin

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox