[PATCH bpf-next 1/6] bpf: introduce BPF_PROG_TYPE_FILE_FILTER

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Alexei Starovoitov <ast@kernel.org>
To: "David S . Miller" <davem@davemloft.net>
Cc: <daniel@iogearbox.net>, <luto@amacapital.net>,
	<viro@zeniv.linux.org.uk>, <netdev@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>, <kernel-team@fb.com>
Subject: [PATCH bpf-next 1/6] bpf: introduce BPF_PROG_TYPE_FILE_FILTER
Date: Wed, 3 Oct 2018 19:57:45 -0700	[thread overview]
Message-ID: <20181004025750.498303-2-ast@kernel.org> (raw)
In-Reply-To: <20181004025750.498303-1-ast@kernel.org>

Similar to networking sandboxing programs and cgroup-v2 based hooks
(BPF_CGROUP_INET_[INGRESS|EGRESS,] BPF_CGROUP_INET[4|6]_[BIND|CONNECT], etc)
introduce basic per-container sandboxing for file access via
new BPF_PROG_TYPE_FILE_FILTER program type that attaches after
security_file_open() LSM hook and works as additional file_open filter.
The new cgroup bpf hook is called BPF_CGROUP_FILE_OPEN.

Just like other cgroup-bpf programs new BPF_PROG_TYPE_FILE_FILTER type
is only available to root.

This program type has access to single argument 'struct bpf_file_info'
that contains standard sys_stat fields:
struct bpf_file_info {
        __u64 inode;
        __u32 dev_major;
        __u32 dev_minor;
        __u32 fs_magic;
        __u32 mnt_id;
        __u32 nlink;
        __u32 mode;     /* file mode S_ISDIR, S_ISLNK, 0755, etc */
        __u32 flags;    /* open flags O_RDWR, O_CREAT, etc */
};
Other file attributes can be added in the future to the end of this struct
without breaking bpf programs.

For debugging introduce bpf_get_file_path() helper that returns
NUL-terminated full path of the file. It should never be used for sandboxing.

Use cases:
- disallow certain FS types within containers (fs_magic == CGROUP2_SUPER_MAGIC)
- restrict permissions in particular mount (mnt_id == X && (flags & O_RDWR))
- disallow access to hard linked sensitive files (nlink > 1 && mode == 0700)
- disallow access to world writeable files (mode == 0..7)
- disallow access to given set of files (dev_major == X && dev_minor == Y && inode == Z)

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf-cgroup.h |  10 +++
 include/linux/bpf_types.h  |   1 +
 include/uapi/linux/bpf.h   |  28 +++++-
 kernel/bpf/cgroup.c        | 171 +++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c       |   7 ++
 5 files changed, 216 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 588dd5f0bd85..766f0223c222 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -109,6 +109,8 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 				      short access, enum bpf_attach_type type);
 
+int __cgroup_bpf_file_filter(struct file *file, enum bpf_attach_type type);
+
 static inline enum bpf_cgroup_storage_type cgroup_storage_type(
 	struct bpf_map *map)
 {
@@ -253,6 +255,13 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 									      \
 	__ret;								      \
 })
+#define BPF_CGROUP_RUN_PROG_FILE_FILTER(file)				     \
+({									      \
+	int __ret = 0;							      \
+	if (cgroup_bpf_enabled)						      \
+		__ret = __cgroup_bpf_file_filter(file, BPF_CGROUP_FILE_OPEN); \
+	__ret;								      \
+})
 int cgroup_bpf_prog_attach(const union bpf_attr *attr,
 			   enum bpf_prog_type ptype, struct bpf_prog *prog);
 int cgroup_bpf_prog_detach(const union bpf_attr *attr,
@@ -321,6 +330,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 #define BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk, uaddr, t_ctx) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_FILE_FILTER(file) ({ 0; })
 
 #define for_each_cgroup_storage_type(stype) for (; false; )
 
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 5432f4c9f50e..f182b2e37b94 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -33,6 +33,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2)
 #ifdef CONFIG_INET
 BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport)
 #endif
+BPF_PROG_TYPE(BPF_PROG_TYPE_FILE_FILTER, file_filter)
 
 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9187b41dff6..c0df8dd99edc 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -154,6 +154,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_LIRC_MODE2,
 	BPF_PROG_TYPE_SK_REUSEPORT,
 	BPF_PROG_TYPE_FLOW_DISSECTOR,
+	BPF_PROG_TYPE_FILE_FILTER,
 };
 
 enum bpf_attach_type {
@@ -175,6 +176,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_UDP6_SENDMSG,
 	BPF_LIRC_MODE2,
 	BPF_FLOW_DISSECTOR,
+	BPF_CGROUP_FILE_OPEN,
 	__MAX_BPF_ATTACH_TYPE
 };
 
@@ -2215,6 +2217,18 @@ union bpf_attr {
  *		pointer that was returned from bpf_sk_lookup_xxx\ ().
  *	Return
  *		0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_file_path(struct bpf_file_info *file, char *buf, u32 size_of_buf)
+ * 	Description
+ * 		Reconstruct the full path of *file* and store it into *buf* of
+ * 		*size_of_buf*. The *size_of_buf* must be strictly positive.
+ * 		On success, the helper makes sure that the *buf* is NUL-terminated.
+ * 		On failure, it is filled with string "(error)".
+ * 		This helper should only be used for debugging.
+ * 		'char *path' should never be used for permission checks.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2303,7 +2317,8 @@ union bpf_attr {
 	FN(skb_ancestor_cgroup_id),	\
 	FN(sk_lookup_tcp),		\
 	FN(sk_lookup_udp),		\
-	FN(sk_release),
+	FN(sk_release),			\
+	FN(get_file_path),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2896,4 +2911,15 @@ struct bpf_flow_keys {
 	};
 };
 
+struct bpf_file_info {
+	__u64 inode;
+	__u32 dev_major;
+	__u32 dev_minor;
+	__u32 fs_magic;
+	__u32 mnt_id;
+	__u32 nlink;
+	__u32 mode;	/* file mode S_ISDIR, S_ISLNK, 0755, etc */
+	__u32 flags;	/* open flags O_RDWR, O_CREAT, etc */
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..38d0b4aa83ea 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -15,6 +15,7 @@
 #include <linux/bpf.h>
 #include <linux/bpf-cgroup.h>
 #include <net/sock.h>
+#include <../fs/mount.h>
 
 DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
 EXPORT_SYMBOL(cgroup_bpf_enabled_key);
@@ -754,3 +755,173 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
 	.get_func_proto		= cgroup_dev_func_proto,
 	.is_valid_access	= cgroup_dev_is_valid_access,
 };
+
+int __cgroup_bpf_file_filter(struct file *file, enum bpf_attach_type type)
+{
+	struct cgroup *cgrp;
+	int ret;
+
+	rcu_read_lock();
+	cgrp = task_dfl_cgroup(current);
+	ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], file, BPF_PROG_RUN);
+	rcu_read_unlock();
+
+	return ret == 1 ? 0 : -EPERM;
+}
+EXPORT_SYMBOL(__cgroup_bpf_file_filter);
+
+BPF_CALL_3(bpf_get_file_path, struct file *, file, char *, buf, u64, size)
+{
+	char *p = file_path(file, buf, size);
+	int len;
+
+	if (IS_ERR(p)) {
+		strncpy(buf, "(error)", size);
+		return PTR_ERR(p);
+	}
+	len = buf + size - p;
+	memmove(buf, p, len);
+	memset(buf + len, 0, size - len);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_get_file_path_proto = {
+	.func		= bpf_get_file_path,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+};
+
+static const struct bpf_func_proto *
+cgroup_file_filter_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_get_file_path:
+		return &bpf_get_file_path_proto;
+	default:
+		return cgroup_dev_func_proto(func_id, prog);
+	}
+}
+
+static bool cgroup_file_filter_is_valid_access(int off, int size,
+					       enum bpf_access_type type,
+					       const struct bpf_prog *prog,
+					       struct bpf_insn_access_aux *info)
+{
+	const int size_default = sizeof(__u32);
+
+	if (off < 0 || off + size > sizeof(struct bpf_file_info) ||
+	    off % size || type != BPF_READ)
+		return false;
+
+	switch (off) {
+	case offsetof(struct bpf_file_info, fs_magic):
+	case offsetof(struct bpf_file_info, mnt_id):
+	case offsetof(struct bpf_file_info, dev_major):
+	case offsetof(struct bpf_file_info, dev_minor):
+	case offsetof(struct bpf_file_info, nlink):
+	case offsetof(struct bpf_file_info, mode):
+	case offsetof(struct bpf_file_info, flags):
+		return size == size_default;
+
+	case offsetof(struct bpf_file_info, inode):
+		return size == sizeof(__u64);
+
+	default:
+		if (size != size_default)
+			return false;
+	}
+	return true;
+}
+
+#define LD_1(F) ({					\
+	typeof(F) val = 0;				\
+	*insn++ = BPF_LDX_MEM(BPF_SIZEOF(val),		\
+			      si->dst_reg, si->src_reg,	\
+			      ((size_t)&F));		\
+	*target_size = sizeof(val);			\
+	val;						\
+	})
+
+#define LD_n(F) ({					\
+	typeof(F) val = 0;				\
+	*insn++ = BPF_LDX_MEM(BPF_SIZEOF(val),		\
+			      si->dst_reg, si->dst_reg,	\
+			      ((size_t)&F));		\
+	*target_size = sizeof(val);			\
+	val;						\
+	})
+
+static u32 cgroup_file_filter_ctx_access(enum bpf_access_type type,
+					 const struct bpf_insn *si,
+					 struct bpf_insn *insn_buf,
+					 struct bpf_prog *prog,
+					 u32 *target_size)
+{
+	struct bpf_insn *insn = insn_buf;
+	struct file *file = NULL;
+	struct inode *inode;
+	struct super_block *sb;
+	struct mount *mnt;
+
+	switch (si->off) {
+	case offsetof(struct bpf_file_info, fs_magic):
+		/* dst = file->f_inode->i_sb->s_magic */
+		inode = LD_1(file->f_inode);
+		sb = LD_n(inode->i_sb);
+		LD_n(sb->s_magic);
+		break;
+	case offsetof(struct bpf_file_info, dev_major):
+		/* dst = file->f_inode->i_sb->s_dev */
+		inode = LD_1(file->f_inode);
+		sb = LD_n(inode->i_sb);
+		LD_n(sb->s_dev);
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, MINORBITS);
+		break;
+	case offsetof(struct bpf_file_info, dev_minor):
+		/* dst = file->f_inode->i_sb->s_dev */
+		inode = LD_1(file->f_inode);
+		sb = LD_n(inode->i_sb);
+		LD_n(sb->s_dev);
+		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, MINORMASK);
+		break;
+	case offsetof(struct bpf_file_info, inode):
+		/* dst = file->f_inode->i_ino */
+		inode = LD_1(file->f_inode);
+		LD_n(inode->i_ino);
+		break;
+	case offsetof(struct bpf_file_info, mode):
+		/* dst = file->f_inode->i_mode */
+		inode = LD_1(file->f_inode);
+		LD_n(inode->i_mode);
+		break;
+	case offsetof(struct bpf_file_info, nlink):
+		/* dst = file->f_inode->i_nlink */
+		inode = LD_1(file->f_inode);
+		LD_n(inode->i_nlink);
+		break;
+	case offsetof(struct bpf_file_info, flags):
+		/* dst = file->f_flags */
+		LD_1(file->f_flags);
+		break;
+	case offsetof(struct bpf_file_info, mnt_id):
+		/* dst = real_mount(file->f_path.mnt)->mnt_id */
+		mnt = real_mount(LD_1(file->f_path.mnt));
+		LD_n(mnt->mnt_id);
+		break;
+	}
+	return insn - insn_buf;
+}
+#undef LD_1
+#undef LD_n
+
+const struct bpf_prog_ops file_filter_prog_ops = {
+};
+
+const struct bpf_verifier_ops file_filter_verifier_ops = {
+	.get_func_proto		= cgroup_file_filter_proto,
+	.is_valid_access	= cgroup_file_filter_is_valid_access,
+	.convert_ctx_access	= cgroup_file_filter_ctx_access
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5742df21598c..7b0ffb8d7063 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1630,6 +1630,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_FLOW_DISSECTOR:
 		ptype = BPF_PROG_TYPE_FLOW_DISSECTOR;
 		break;
+	case BPF_CGROUP_FILE_OPEN:
+		ptype = BPF_PROG_TYPE_FILE_FILTER;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -1699,6 +1702,9 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_CGROUP_DEVICE:
 		ptype = BPF_PROG_TYPE_CGROUP_DEVICE;
 		break;
+	case BPF_CGROUP_FILE_OPEN:
+		ptype = BPF_PROG_TYPE_FILE_FILTER;
+		break;
 	case BPF_SK_MSG_VERDICT:
 		return sockmap_get_from_fd(attr, BPF_PROG_TYPE_SK_MSG, NULL);
 	case BPF_SK_SKB_STREAM_PARSER:
@@ -1741,6 +1747,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_CGROUP_UDP6_SENDMSG:
 	case BPF_CGROUP_SOCK_OPS:
 	case BPF_CGROUP_DEVICE:
+	case BPF_CGROUP_FILE_OPEN:
 		break;
 	case BPF_LIRC_MODE2:
 		return lirc_prog_query(attr, uattr);
-- 
2.17.1

next prev parent reply	other threads:[~2018-10-04  2:58 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-10-04  2:57 [PATCH bpf-next 0/6] bpf: introduce BPF_CGROUP_FILE_OPEN Alexei Starovoitov
2018-10-04  2:57 ` Alexei Starovoitov [this message]
2018-10-04 19:41   ` [PATCH bpf-next 1/6] bpf: introduce BPF_PROG_TYPE_FILE_FILTER Roman Gushchin
2018-10-04 19:51     ` Andy Lutomirski
2018-10-04 22:23       ` Alexei Starovoitov
2018-10-05  4:46   ` Al Viro
2018-10-05 22:05     ` Alexei Starovoitov
2018-10-05 22:09       ` Andy Lutomirski
2018-10-05 22:27         ` Alexei Starovoitov
2018-10-05 23:47           ` Al Viro
2018-10-06  0:22             ` Alexei Starovoitov
2018-10-08  0:56   ` Jann Horn
2018-10-08  2:22     ` Alexei Starovoitov
2018-10-08  9:06       ` Mickaël Salaün
2018-10-04  2:57 ` [PATCH bpf-next 2/6] fs: wire in BPF_CGROUP_FILE_OPEN hook Alexei Starovoitov
2018-10-04  2:57 ` [PATCH bpf-next 3/6] tools/bpf: sync uapi/bpf.h Alexei Starovoitov
2018-10-04  2:57 ` [PATCH bpf-next 4/6] trace/bpf: allow %o modifier in bpf_trace_printk Alexei Starovoitov
2018-10-04  2:57 ` [PATCH bpf-next 5/6] libbpf: support BPF_CGROUP_FILE_OPEN in libbpf Alexei Starovoitov
2018-10-04  2:57 ` [PATCH bpf-next 6/6] selftests/bpf: add a test for BPF_CGROUP_FILE_OPEN Alexei Starovoitov

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:588dd5f0bd8 dfblob:766f0223c22 dfblob:5432f4c9f50
dfblob:f182b2e37b9 dfblob:f9187b41dff dfblob:c0df8dd99ed
dfblob:00f6ed2e4f9 dfblob:38d0b4aa83e dfblob:5742df21598
dfblob:7b0ffb8d706 )
 OR (
bs:"[PATCH bpf-next 1/6] bpf: introduce BPF_PROG_TYPE_FILE_FILTER" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181004025750.498303-2-ast@kernel.org \
    --to=ast@kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=netdev@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.