public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] seccomp: protect seccomp.filter pointer (w) with the task_lock
@ 2014-01-13 20:30 Will Drewry
  2014-01-13 20:30 ` [PATCH 2/2] sys, seccomp: add PR_SECCOMP_EXT and SECCOMP_EXT_ACT_TSYNC Will Drewry
  0 siblings, 1 reply; 18+ messages in thread
From: Will Drewry @ 2014-01-13 20:30 UTC (permalink / raw)
  To: linux-kernel; +Cc: Will Drewry, nschichan, keescook, james.l.morris

Normally, task_struck.seccomp.filter is only ever read or modified by
the task that owns it (current).  This property aids in fast access
during system call filtering as read access is lockless.

Updating the pointer from another task, however, opens up race
conditions.  To allow cross-task filter pointer updates, writes to
the pointer are now protected by the task_lock.  Read access remains
lockless because pointer updates themselves are atomic.  However,
writes often entail additional checking (like maximum instruction
counts) which require locking to perform safely.

Signed-off-by: Will Drewry <wad@chromium.org>
---
 include/linux/seccomp.h |    4 ++--
 kernel/seccomp.c        |   22 +++++++++++++---------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 6f19cfd..85c0895 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -17,8 +17,8 @@ struct seccomp_filter;
  * @filter: The metadata and ruleset for determining what system calls
  *          are allowed for a task.
  *
- *          @filter must only be accessed from the context of current as there
- *          is no locking.
+ *          @filter must always point to a valid seccomp_filter or NULL as it is
+ *          accessed without locking during system call entry.
  */
 struct seccomp {
 	int mode;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index b7a1004..71512e4 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -201,18 +201,18 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  */
 static u32 seccomp_run_filters(int syscall)
 {
-	struct seccomp_filter *f;
+	struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
 	u32 ret = SECCOMP_RET_ALLOW;
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
-	if (WARN_ON(current->seccomp.filter == NULL))
+	if (WARN_ON(f == NULL))
 		return SECCOMP_RET_KILL;
 
 	/*
 	 * All filters in the list are evaluated and the lowest BPF return
 	 * value always takes priority (ignoring the DATA).
 	 */
-	for (f = current->seccomp.filter; f; f = f->prev) {
+	for (; f; f = ACCESS_ONCE(f->prev)) {
 		u32 cur_ret = sk_run_filter(NULL, f->insns);
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 			ret = cur_ret;
@@ -228,7 +228,7 @@ static u32 seccomp_run_filters(int syscall)
  */
 static long seccomp_attach_filter(struct sock_fprog *fprog)
 {
-	struct seccomp_filter *filter;
+	struct seccomp_filter *filter, *f;
 	unsigned long fp_size = fprog->len * sizeof(struct sock_filter);
 	unsigned long total_insns = fprog->len;
 	long ret;
@@ -236,11 +236,6 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	if (fprog->len == 0 || fprog->len > BPF_MAXINSNS)
 		return -EINVAL;
 
-	for (filter = current->seccomp.filter; filter; filter = filter->prev)
-		total_insns += filter->len + 4;  /* include a 4 instr penalty */
-	if (total_insns > MAX_INSNS_PER_PATH)
-		return -ENOMEM;
-
 	/*
 	 * Installing a seccomp filter requires that the task have
 	 * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
@@ -260,6 +255,13 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	atomic_set(&filter->usage, 1);
 	filter->len = fprog->len;
 
+	task_lock(current);  /* protect current->seccomp.filter from updates */
+	for (f = current->seccomp.filter; f; f = f->prev)
+		total_insns += f->len + 4;  /* include a 4 instr penalty */
+	ret = -ENOMEM;
+	if (total_insns > MAX_INSNS_PER_PATH)
+		goto fail;
+
 	/* Copy the instructions from fprog. */
 	ret = -EFAULT;
 	if (copy_from_user(filter->insns, fprog->filter, fp_size))
@@ -281,8 +283,10 @@ static long seccomp_attach_filter(struct sock_fprog *fprog)
 	 */
 	filter->prev = current->seccomp.filter;
 	current->seccomp.filter = filter;
+	task_unlock(current);
 	return 0;
 fail:
+	task_unlock(current);
 	kfree(filter);
 	return ret;
 }
-- 
1.7.9.5


^ permalink raw reply related	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2014-01-15 19:33 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-01-13 20:30 [PATCH 1/2] seccomp: protect seccomp.filter pointer (w) with the task_lock Will Drewry
2014-01-13 20:30 ` [PATCH 2/2] sys, seccomp: add PR_SECCOMP_EXT and SECCOMP_EXT_ACT_TSYNC Will Drewry
2014-01-13 22:42   ` [PATCH 3/3] Documentation/prctl/seccomp_filter.txt: document extensions Will Drewry
2014-01-13 23:36   ` [PATCH 2/2] sys, seccomp: add PR_SECCOMP_EXT and SECCOMP_EXT_ACT_TSYNC Andy Lutomirski
2014-01-14 18:59     ` Will Drewry
2014-01-14 20:24       ` Andy Lutomirski
2014-01-14 20:59         ` Will Drewry
2014-01-14 21:09           ` Andy Lutomirski
2014-01-14 21:19             ` Will Drewry
2014-01-14 19:07   ` Oleg Nesterov
2014-01-14 19:21   ` Oleg Nesterov
2014-01-14 20:02     ` Oleg Nesterov
2014-01-14 20:13       ` Oleg Nesterov
2014-01-14 20:53         ` Will Drewry
2014-01-14 21:06     ` Will Drewry
2014-01-15 19:04       ` Oleg Nesterov
2014-01-15 19:28         ` Oleg Nesterov
2014-01-15 19:33         ` Will Drewry

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox