All of lore.kernel.org
 help / color / mirror / Atom feed
From: Daniel Borkmann <daniel@iogearbox.net>
To: Tycho Andersen <tycho.andersen@canonical.com>,
	Kees Cook <keescook@chromium.org>,
	Alexei Starovoitov <ast@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>,
	Will Drewry <wad@chromium.org>, Oleg Nesterov <oleg@redhat.com>,
	Andy Lutomirski <luto@amacapital.net>,
	Pavel Emelyanov <xemul@parallels.com>,
	"Serge E. Hallyn" <serge.hallyn@ubuntu.com>,
	linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	linux-api@vger.kernel.org
Subject: Re: [PATCH v2 5/5] seccomp: add a way to attach a filter via eBPF fd
Date: Fri, 11 Sep 2015 14:37:59 +0200	[thread overview]
Message-ID: <55F2CB27.7030804@iogearbox.net> (raw)
In-Reply-To: <1441930862-14347-6-git-send-email-tycho.andersen@canonical.com>

On 09/11/2015 02:21 AM, Tycho Andersen wrote:
> This is the final bit needed to support seccomp filters created via the bpf
> syscall. The patch adds a new seccomp operation SECCOMP_MODE_FILTER_EBPF,
> which takes exactly one command (presumably to be expanded upon later when
> seccomp EBPFs support more interesting things) and an argument struct
> similar to that of bpf(), although the size is explicit in the struct to
> avoid changing the signature of seccomp().
>
> v2: Don't abuse seccomp's third argument; use a separate command and a
>      pointer to a structure instead.

Comments below ...

> Signed-off-by: Tycho Andersen <tycho.andersen@canonical.com>
> CC: Kees Cook <keescook@chromium.org>
> CC: Will Drewry <wad@chromium.org>
> CC: Oleg Nesterov <oleg@redhat.com>
> CC: Andy Lutomirski <luto@amacapital.net>
> CC: Pavel Emelyanov <xemul@parallels.com>
> CC: Serge E. Hallyn <serge.hallyn@ubuntu.com>
> CC: Alexei Starovoitov <ast@kernel.org>
> CC: Daniel Borkmann <daniel@iogearbox.net>
> ---
>   include/uapi/linux/seccomp.h |  16 +++++
>   kernel/seccomp.c             | 135 ++++++++++++++++++++++++++++++++++++++-----
>   2 files changed, 138 insertions(+), 13 deletions(-)
>
> diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
> index 0f238a4..a8694e2 100644
> --- a/include/uapi/linux/seccomp.h
> +++ b/include/uapi/linux/seccomp.h
> @@ -13,10 +13,14 @@
>   /* Valid operations for seccomp syscall. */
>   #define SECCOMP_SET_MODE_STRICT	0
>   #define SECCOMP_SET_MODE_FILTER	1
> +#define SECCOMP_MODE_FILTER_EBPF	2

Should this be SECCOMP_SET_MODE_FILTER_EBPF or just SECCOMP_SET_MODE_EBPF?

>   /* Valid flags for SECCOMP_SET_MODE_FILTER */
>   #define SECCOMP_FILTER_FLAG_TSYNC	1
>
> +/* Valid cmds for SECCOMP_MODE_FILTER_EBPF */
> +#define SECCOMP_EBPF_ADD_FD	0
> +
>   /*
>    * All BPF programs must return a 32-bit value.
>    * The bottom 16-bits are for optional return data.
> @@ -51,4 +55,16 @@ struct seccomp_data {
>   	__u64 args[6];
>   };
>
> +struct seccomp_ebpf {
> +	unsigned int size;
> +
> +	union {
> +		/* SECCOMP_EBPF_ADD_FD */
> +		struct {
> +			unsigned int	add_flags;
> +			__u32		add_fd;
> +		};
> +	};
> +};
> +
>   #endif /* _UAPI_LINUX_SECCOMP_H */
> diff --git a/kernel/seccomp.c b/kernel/seccomp.c
> index 1856f69..e78175a 100644
> --- a/kernel/seccomp.c
> +++ b/kernel/seccomp.c
> @@ -65,6 +65,9 @@ struct seccomp_filter {
>   /* Limit any path through the tree to 256KB worth of instructions. */
>   #define MAX_INSNS_PER_PATH ((1 << 18) / sizeof(struct sock_filter))
>
> +static long seccomp_install_filter(unsigned int flags,
> +				   struct seccomp_filter *prepared);
> +
>   /*
>    * Endianness is explicitly ignored and left for BPF program authors to manage
>    * as per the specific architecture.
> @@ -356,17 +359,6 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
>
>   	BUG_ON(INT_MAX / fprog->len < sizeof(struct sock_filter));
>
> -	/*
> -	 * Installing a seccomp filter requires that the task has
> -	 * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
> -	 * This avoids scenarios where unprivileged tasks can affect the
> -	 * behavior of privileged children.
> -	 */
> -	if (!task_no_new_privs(current) &&
> -	    security_capable_noaudit(current_cred(), current_user_ns(),
> -				     CAP_SYS_ADMIN) != 0)
> -		return ERR_PTR(-EACCES);
> -
>   	/* Allocate a new seccomp_filter */
>   	sfilter = kzalloc(sizeof(*sfilter), GFP_KERNEL | __GFP_NOWARN);
>   	if (!sfilter)
> @@ -510,8 +502,105 @@ static void seccomp_send_sigsys(int syscall, int reason)
>   	info.si_syscall = syscall;
>   	force_sig_info(SIGSYS, &info, current);
>   }
> +
>   #endif	/* CONFIG_SECCOMP_FILTER */
>
> +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SECCOMP_FILTER)
> +static struct seccomp_filter *seccomp_prepare_ebpf(int fd)
> +{
> +	struct seccomp_filter *ret;
> +	struct bpf_prog *prog;
> +
> +	prog = bpf_prog_get(fd);
> +	if (IS_ERR(prog))
> +		return (struct seccomp_filter *) prog;

ERR_CAST()

> +
> +	if (prog->type != BPF_PROG_TYPE_SECCOMP) {
> +		bpf_prog_put(prog);
> +		return ERR_PTR(-EINVAL);
> +	}
> +
> +	ret = kzalloc(sizeof(*ret), GFP_KERNEL | __GFP_NOWARN);
> +	if (!ret) {
> +		bpf_prog_put(prog);
> +		return ERR_PTR(-ENOMEM);
> +	}
> +
> +	ret->prog = prog;
> +	atomic_set(&ret->usage, 1);
> +
> +	/* Intentionally don't bpf_prog_put() here, because the underlying prog
> +	 * is refcounted too and we're holding a reference from the struct
> +	 * seccomp_filter object.
> +	 */
> +	return ret;
> +}
> +
> +static long seccomp_ebpf_add_fd(struct seccomp_ebpf *ebpf)
> +{
> +	struct seccomp_filter *prepared;
> +
> +	prepared = seccomp_prepare_ebpf(ebpf->add_fd);
> +	if (IS_ERR(prepared))
> +		return PTR_ERR(prepared);
> +
> +	return seccomp_install_filter(ebpf->add_flags, prepared);
> +}
> +
> +static long seccomp_mode_filter_ebpf(unsigned int cmd, const char __user *uargs)
> +{
> +	const struct seccomp_ebpf __user *uebpf;
> +	struct seccomp_ebpf ebpf;
> +	unsigned int size;
> +	long ret = -EFAULT;
> +
> +	uebpf = (const struct seccomp_ebpf __user *) uargs;
> +
> +	if (get_user(size, &uebpf->size) != 0)
> +		return -EFAULT;
> +
> +	/* If we're handed a bigger struct than we know of,
> +	 * ensure all the unknown bits are 0 - i.e. new
> +	 * user-space does not rely on any kernel feature
> +	 * extensions we dont know about yet.
> +	 */
> +	if (size > sizeof(ebpf)) {
> +		unsigned char __user *addr;
> +		unsigned char __user *end;
> +		unsigned char val;
> +
> +		addr = (void __user *)uebpf + sizeof(ebpf);
> +		end  = (void __user *)uebpf + size;
> +
> +		for (; addr < end; addr++) {
> +			int err = get_user(val, addr);
> +
> +			if (err)
> +				return err;
> +			if (val)
> +				return -E2BIG;
> +		}
> +		size = sizeof(ebpf);
> +	}
> +
> +	if (copy_from_user(&ebpf, uebpf, size) != 0)
> +		return -EFAULT;

Not sure it's worth adding all this bpf(2)-alike interface complexity into
this, but fair enough, I guess there are some very good reasons and bigger
additions coming then ...

> +	switch (cmd) {
> +	case SECCOMP_EBPF_ADD_FD:
> +		ret = seccomp_ebpf_add_fd(&ebpf);
> +		break;
> +	}
> +
> +	return ret;
> +}
> +#else
> +static long seccomp_mode_filter_ebpf(unsigned int cmd, const char __user *uargs)
> +{
> +	return -EINVAL;
> +}
> +#endif
> +
>   /*
>    * Secure computing mode 1 allows only read/write/exit/sigreturn.
>    * To be fully secure this must be combined with rlimit
> @@ -760,9 +849,7 @@ out:
>   static long seccomp_set_mode_filter(unsigned int flags,
>   				    const char __user *filter)
>   {
> -	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
>   	struct seccomp_filter *prepared = NULL;
> -	long ret = -EINVAL;
>
>   	/* Validate flags. */
>   	if (flags & ~SECCOMP_FILTER_FLAG_MASK)
> @@ -773,6 +860,26 @@ static long seccomp_set_mode_filter(unsigned int flags,
>   	if (IS_ERR(prepared))
>   		return PTR_ERR(prepared);
>
> +	return seccomp_install_filter(flags, prepared);

I (truly) hope, I'm overseeing something ;) ...

... but why doing all the (classic) seccomp-BPF preparation work (which is rather
a lot) up to this point, where you have it ready, only to *then* find out we don't
have the actual permissions ?!

Plus, when seccomp_install_filter() fails with -EACCES, who is releasing all the
allocated foo resp. dropping taken program refs !?

I see the same in seccomp_ebpf_add_fd().

So, an unprivileged child could increase the parent's bpf_prog's reference count
w/o having the actual permissions to do so, and thus controlling it to the point
where the next bpf_prog_put() would unintentionally release it?

(So yeah, I'm hoping I misread something ... ;))

> +}
> +
> +static long seccomp_install_filter(unsigned int flags,
> +				   struct seccomp_filter *prepared)
> +{
> +	const unsigned long seccomp_mode = SECCOMP_MODE_FILTER;
> +	long ret = -EINVAL;
> +
> +	/*
> +	 * Installing a seccomp filter requires that the task has
> +	 * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
> +	 * This avoids scenarios where unprivileged tasks can affect the
> +	 * behavior of privileged children.
> +	 */
> +	if (!task_no_new_privs(current) &&
> +	    security_capable_noaudit(current_cred(), current_user_ns(),
> +				     CAP_SYS_ADMIN) != 0)
> +		return -EACCES;
> +
>   	/*
>   	 * Make sure we cannot change seccomp or nnp state via TSYNC
>   	 * while another thread is in the middle of calling exec.
> @@ -875,6 +982,8 @@ static long do_seccomp(unsigned int op, unsigned int flags,
>   		return seccomp_set_mode_strict();
>   	case SECCOMP_SET_MODE_FILTER:
>   		return seccomp_set_mode_filter(flags, uargs);
> +	case SECCOMP_MODE_FILTER_EBPF:
> +		return seccomp_mode_filter_ebpf(flags, uargs);
>   	default:
>   		return -EINVAL;
>   	}
>

  parent reply	other threads:[~2015-09-11 12:37 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-11  0:20 v2 of seccomp filter c/r patches Tycho Andersen
2015-09-11  0:20 ` Tycho Andersen
2015-09-11  0:20 ` [PATCH v2 2/5] seccomp: make underlying bpf ref counted as well Tycho Andersen
     [not found]   ` <1441930862-14347-3-git-send-email-tycho.andersen-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
2015-09-11 13:02     ` Daniel Borkmann
2015-09-11 13:02       ` Daniel Borkmann
     [not found]       ` <55F2D0EC.9090004-FeC+5ew28dpmcu3hnIyYJQ@public.gmane.org>
2015-09-11 14:44         ` Tycho Andersen
2015-09-11 14:44           ` Tycho Andersen
2015-09-11 16:03           ` Daniel Borkmann
2015-09-11 16:03             ` Daniel Borkmann
     [not found]             ` <55F2FB6F.7050708-FeC+5ew28dpmcu3hnIyYJQ@public.gmane.org>
2015-09-11 17:33               ` Tycho Andersen
2015-09-11 17:33                 ` Tycho Andersen
2015-09-11 18:28                 ` Daniel Borkmann
2015-09-14 16:00                   ` Tycho Andersen
2015-09-14 16:48                     ` Daniel Borkmann
     [not found]                       ` <55F6FA6B.1060108-FeC+5ew28dpmcu3hnIyYJQ@public.gmane.org>
2015-09-14 17:30                         ` Tycho Andersen
2015-09-14 17:30                           ` Tycho Andersen
2015-09-11  0:21 ` [PATCH v2 4/5] seccomp: add a way to access filters via bpf fds Tycho Andersen
2015-09-11 11:47   ` Daniel Borkmann
     [not found]     ` <55F2BF5A.8010006-FeC+5ew28dpmcu3hnIyYJQ@public.gmane.org>
2015-09-11 14:29       ` Tycho Andersen
2015-09-11 14:29         ` Tycho Andersen
     [not found]   ` <1441930862-14347-5-git-send-email-tycho.andersen-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
2015-09-11 12:08     ` Michael Kerrisk (man-pages)
2015-09-11 12:08       ` Michael Kerrisk (man-pages)
     [not found]       ` <CAKgNAki99ZFgLPE5mWWjj1nvdNyke1w0ttqmiG+Uk0rVfqutZw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-09-11 14:31         ` Tycho Andersen
2015-09-11 14:31           ` Tycho Andersen
2015-09-11 16:20   ` Andy Lutomirski
2015-09-11 16:44     ` Tycho Andersen
2015-09-14 17:52       ` Andy Lutomirski
     [not found] ` <1441930862-14347-1-git-send-email-tycho.andersen-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
2015-09-11  0:20   ` [PATCH v2 1/5] ebpf: add a seccomp program type Tycho Andersen
2015-09-11  0:20     ` Tycho Andersen
2015-09-11 12:09     ` Michael Kerrisk (man-pages)
2015-09-11  0:21   ` [PATCH v2 3/5] ebpf: add a way to dump an eBPF program Tycho Andersen
2015-09-11  0:21     ` Tycho Andersen
     [not found]     ` <1441930862-14347-4-git-send-email-tycho.andersen-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
2015-09-11  2:29       ` Alexei Starovoitov
2015-09-11  2:29         ` Alexei Starovoitov
     [not found]         ` <20150911022940.GA4903-2RGepAHry06MXrjNfwE7T/6muRTtt8+awzqs5ZKRSiY@public.gmane.org>
2015-09-11 14:59           ` Tycho Andersen
2015-09-11 14:59             ` Tycho Andersen
2015-09-11 13:39       ` Daniel Borkmann
2015-09-11 13:39         ` Daniel Borkmann
2015-09-11 14:44         ` Tycho Andersen
2015-09-11 12:11     ` Michael Kerrisk (man-pages)
2015-09-11  0:21   ` [PATCH v2 5/5] seccomp: add a way to attach a filter via eBPF fd Tycho Andersen
2015-09-11  0:21     ` Tycho Andersen
2015-09-11 12:10     ` Michael Kerrisk (man-pages)
2015-09-11 12:37     ` Daniel Borkmann [this message]
     [not found]       ` <55F2CB27.7030804-FeC+5ew28dpmcu3hnIyYJQ@public.gmane.org>
2015-09-11 14:40         ` Tycho Andersen
2015-09-11 14:40           ` Tycho Andersen
2015-09-11  2:50   ` v2 of seccomp filter c/r patches Alexei Starovoitov
2015-09-11  2:50     ` Alexei Starovoitov
2015-09-11 16:30   ` Andy Lutomirski
2015-09-11 16:30     ` Andy Lutomirski
     [not found]     ` <CALCETrVYtv1=g-xPjQ-LiX+5GK3xtB6a2hYbat0TuU-Bd4QA6Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-09-11 17:00       ` Andy Lutomirski
2015-09-11 17:00         ` Andy Lutomirski
     [not found]         ` <CALCETrWxLMSgdsdT9gTL80LSovONmCcTYjzqrHqF-WdJ4BN1Uw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-09-11 17:28           ` Tycho Andersen
2015-09-11 17:28             ` Tycho Andersen
2015-09-14 17:52             ` Andy Lutomirski
2015-09-14 17:52               ` Andy Lutomirski
2015-09-15 16:07               ` Tycho Andersen
2015-09-15 18:13                 ` Andy Lutomirski
     [not found]                   ` <CALCETrVxhNvmEdMq0XRy1YZ+oJLDwcmE1y6prs7FGGhsS-Y5gg-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2015-09-15 18:26                     ` Tycho Andersen
2015-09-15 18:26                       ` Tycho Andersen
2015-09-15 20:01                       ` Andy Lutomirski
2015-09-15 20:01                         ` Andy Lutomirski
2015-09-15 21:38                         ` Tycho Andersen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55F2CB27.7030804@iogearbox.net \
    --to=daniel@iogearbox.net \
    --cc=ast@kernel.org \
    --cc=davem@davemloft.net \
    --cc=keescook@chromium.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=netdev@vger.kernel.org \
    --cc=oleg@redhat.com \
    --cc=serge.hallyn@ubuntu.com \
    --cc=tycho.andersen@canonical.com \
    --cc=wad@chromium.org \
    --cc=xemul@parallels.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.