From: Tycho Andersen <tycho@tycho.ws>
To: Kees Cook <keescook@chromium.org>, Andy Lutomirski <luto@amacapital.net>
Cc: Oleg Nesterov <oleg@redhat.com>,
"Eric W . Biederman" <ebiederm@xmission.com>,
"Serge E . Hallyn" <serge@hallyn.com>,
Christian Brauner <christian@brauner.io>,
Tyler Hicks <tyhicks@canonical.com>,
Akihiro Suda <suda.akihiro@lab.ntt.co.jp>,
Aleksa Sarai <asarai@suse.de>,
linux-kernel@vger.kernel.org,
containers@lists.linux-foundation.org, linux-api@vger.kernel.org
Subject: Re: [PATCH v8 1/2] seccomp: add a return code to trap to userspace
Date: Thu, 29 Nov 2018 16:08:26 -0700 [thread overview]
Message-ID: <20181129230826.GB4676@cisco> (raw)
In-Reply-To: <20181029224031.29809-2-tycho@tycho.ws>
On Mon, Oct 29, 2018 at 04:40:30PM -0600, Tycho Andersen wrote:
> + resp.id = req.id;
> + resp.error = -512; /* -ERESTARTSYS */
> + resp.val = 0;
> +
> + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
So, it turns out this *doesn't* work, and the reason this test was
passing is because of poor hygiene on my part.
Per the documentation in include/linux/errno.h,
/*
* These should never be seen by user programs. To return one of ERESTART*
* codes, signal_pending() MUST be set. Note that ptrace can observe these
* at syscall exit tracing, but they will never be left for the debugged user
* process to see.
*/
#define ERESTARTSYS 512
So basically, if you respond with -ERESTARTSYS with no signal pending, you'll
leak it to userspace. It turns out this is already possible with
SECCOMP_RET_TRAP (and probably ptrace alone, although I didn't try it out),
see the program below.
The question is: do we care? If so, it seems like we may need to handle the
-ERESTARTSYS-style cases even when there is no signal pending. If we don't,
there's precedent for us to just do the same thing as what happens for
SECCOMP_RET_TRACE, but we should probably at least fix the docs.
Tycho
#include <stdio.h>
#include <stddef.h>
#include <stdbool.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <signal.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <sys/ptrace.h>
#include <sys/ioctl.h>
#include <inttypes.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <linux/ptrace.h>
#include <linux/elf.h>
#include <pthread.h>
static int filter_syscall(int syscall_nr)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)),
BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, syscall_nr, 0, 1),
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRACE),
BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog bpf_prog = {
.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
.filter = filter,
};
int ret;
ret = syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, 0, &bpf_prog);
if (ret < 0) {
perror("prctl failed");
return -1;
}
return ret;
}
typedef struct {
uint64_t r15;
uint64_t r14;
uint64_t r13;
uint64_t r12;
uint64_t bp;
uint64_t bx;
uint64_t r11;
uint64_t r10;
uint64_t r9;
uint64_t r8;
uint64_t ax;
uint64_t cx;
uint64_t dx;
uint64_t si;
uint64_t di;
uint64_t orig_ax;
uint64_t ip;
uint64_t cs;
uint64_t flags;
uint64_t sp;
uint64_t ss;
uint64_t fs_base;
uint64_t gs_base;
uint64_t ds;
uint64_t es;
uint64_t fs;
uint64_t gs;
} user_regs_struct64;
int main(int argc, char **argv)
{
pid_t pid;
user_regs_struct64 regs;
struct iovec iov = {.iov_base = ®s, .iov_len = sizeof(regs)};
int status;
pid = fork();
if (pid == 0) {
if (signal(SIGUSR1, signal_handler) == SIG_ERR) {
perror("signal");
exit(1);
}
if (filter_syscall(__NR_getpid) < 0)
exit(1);
/* i'm lazy, so sue me :) */
sleep(1);
errno = 0;
pid = syscall(__NR_getpid);
/*
* we get:
* getpid(): -1, errno: 512
* probably should get
* getpid(): <pid> errno: 0
*/
printf("getpid(): %d, errno: %d\n", pid, errno);
exit(errno);
}
if (ptrace(PTRACE_ATTACH, pid, NULL, 0) < 0) {
perror("ptrace attach");
goto out;
}
if (waitpid(pid, NULL, 0) != pid) {
perror("waitpid");
goto out;
}
if (ptrace(PTRACE_SETOPTIONS, pid, NULL, PTRACE_O_TRACESECCOMP) < 0) {
perror("ptrace setoptions");
goto out;
}
if (ptrace(PTRACE_CONT, pid, NULL, 0) != 0) {
perror("ptrace cont");
goto out;
}
if (waitpid(pid, &status, 0) != pid) {
perror("wait for trace");
goto out;
}
if (status >> 8 != (SIGTRAP | (PTRACE_EVENT_SECCOMP<<8))) {
printf("got bad trap event?\n");
goto out;
}
if (ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov) < 0) {
perror("getregset");
goto out;
}
/*
* Tell the syscall to restart. Per include/linux/errno.h this should
* only be set when signal_pending() is set. But we just won't send
* any signals to the process, and we'll see this in userspace.
*/
regs.ax = -512; /* -ERESTARTSYS */
/*
* This makes the this_syscall < 0 check in __seccomp_filter()
* trigger, so we skip the syscall and return whatever is in ax
*/
regs.orig_ax = -512; /* -ERESTARTSYS */
if (ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov) < 0) {
perror("setregset");
goto out;
}
if (ptrace(PTRACE_CONT, pid, NULL, 0) < 0) {
perror("cont after setregset");
goto out;
}
while (1) {
if (waitpid(pid, &status, 0) != pid) {
perror("wait for death");
goto out;
}
if (!WIFSTOPPED(status)) {
break;
}
if (ptrace(PTRACE_CONT, pid, NULL, 0) < 0) {
perror("cont after setregset");
goto out;
}
}
if (WIFSIGNALED(status)) {
printf("didn't exit: %d\n", WTERMSIG(status));
return 1;
}
if (WEXITSTATUS(status)) {
printf("exited: %d\n", WEXITSTATUS(status));
return 1;
}
return 0;
out:
kill(pid, SIGKILL);
return 1;
}
next prev parent reply other threads:[~2018-11-29 23:08 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-10-29 22:40 [PATCH v8 0/2] seccomp trap to userspace Tycho Andersen
2018-10-29 22:40 ` [PATCH v8 1/2] seccomp: add a return code to " Tycho Andersen
2018-10-30 14:32 ` Oleg Nesterov
2018-10-30 15:32 ` Tycho Andersen
2018-11-01 14:48 ` Oleg Nesterov
2018-11-01 20:33 ` Tycho Andersen
2018-11-02 11:29 ` Oleg Nesterov
2018-11-02 13:50 ` Tycho Andersen
2018-10-30 15:02 ` Oleg Nesterov
2018-10-30 15:54 ` Tycho Andersen
2018-10-30 15:54 ` Tycho Andersen
2018-10-30 16:27 ` Oleg Nesterov
2018-10-30 16:39 ` Oleg Nesterov
2018-10-30 17:21 ` Tycho Andersen
2018-10-30 21:32 ` Kees Cook
2018-10-31 13:04 ` Oleg Nesterov
2018-10-30 21:38 ` Kees Cook
2018-10-30 21:49 ` Kees Cook
2018-10-30 21:54 ` Tycho Andersen
2018-10-30 22:00 ` Kees Cook
2018-10-30 22:32 ` Tycho Andersen
2018-10-30 22:34 ` Kees Cook
2018-10-31 0:29 ` Tycho Andersen
2018-10-31 1:29 ` Kees Cook
2018-11-01 13:40 ` Oleg Nesterov
2018-11-01 19:56 ` Tycho Andersen
2018-11-02 10:02 ` Oleg Nesterov
2018-11-02 13:38 ` Tycho Andersen
2018-11-01 13:56 ` Oleg Nesterov
2018-11-01 19:58 ` Tycho Andersen
2018-11-29 23:08 ` Tycho Andersen [this message]
2018-11-30 10:17 ` Oleg Nesterov
2018-10-29 22:40 ` [PATCH v8 2/2] samples: add an example of seccomp user trap Tycho Andersen
2018-10-29 23:31 ` Serge E. Hallyn
2018-10-30 2:05 ` Tycho Andersen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181129230826.GB4676@cisco \
--to=tycho@tycho.ws \
--cc=asarai@suse.de \
--cc=christian@brauner.io \
--cc=containers@lists.linux-foundation.org \
--cc=ebiederm@xmission.com \
--cc=keescook@chromium.org \
--cc=linux-api@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@amacapital.net \
--cc=oleg@redhat.com \
--cc=serge@hallyn.com \
--cc=suda.akihiro@lab.ntt.co.jp \
--cc=tyhicks@canonical.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.