* [PATCH v2 1/6] bpf: add new prog type for cgroup socket filtering
2016-08-24 20:24 [PATCH v2 0/6] Add eBPF hooks for cgroups Daniel Mack
@ 2016-08-24 20:24 ` Daniel Mack
2016-08-24 20:24 ` [PATCH v2 2/6] cgroup: add support for eBPF programs Daniel Mack
` (4 subsequent siblings)
5 siblings, 0 replies; 11+ messages in thread
From: Daniel Mack @ 2016-08-24 20:24 UTC (permalink / raw)
To: htejun, daniel, ast
Cc: davem, kafai, fw, pablo, harald, netdev, sargun, Daniel Mack
For now, this program type is equivalent to BPF_PROG_TYPE_SOCKET_FILTER in
terms of checks during the verification process. It may access the skb as
well.
Programs of this type will be attached to cgroups for network filtering
and accounting.
Signed-off-by: Daniel Mack <daniel@zonque.org>
---
include/uapi/linux/bpf.h | 7 +++++++
kernel/bpf/verifier.c | 1 +
net/core/filter.c | 6 ++++++
3 files changed, 14 insertions(+)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e4c5a1b..1d5db42 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -95,6 +95,13 @@ enum bpf_prog_type {
BPF_PROG_TYPE_SCHED_ACT,
BPF_PROG_TYPE_TRACEPOINT,
BPF_PROG_TYPE_XDP,
+ BPF_PROG_TYPE_CGROUP_SOCKET_FILTER,
+};
+
+enum bpf_attach_type {
+ BPF_ATTACH_TYPE_CGROUP_INET_INGRESS,
+ BPF_ATTACH_TYPE_CGROUP_INET_EGRESS,
+ __MAX_BPF_ATTACH_TYPE
};
#define BPF_PSEUDO_MAP_FD 1
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index abb61f3..12ca880 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1805,6 +1805,7 @@ static bool may_access_skb(enum bpf_prog_type type)
case BPF_PROG_TYPE_SOCKET_FILTER:
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
+ case BPF_PROG_TYPE_CGROUP_SOCKET_FILTER:
return true;
default:
return false;
diff --git a/net/core/filter.c b/net/core/filter.c
index a83766b..bc04e5c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2848,12 +2848,18 @@ static struct bpf_prog_type_list xdp_type __read_mostly = {
.type = BPF_PROG_TYPE_XDP,
};
+static struct bpf_prog_type_list cg_sk_filter_type __read_mostly = {
+ .ops = &sk_filter_ops,
+ .type = BPF_PROG_TYPE_CGROUP_SOCKET_FILTER,
+};
+
static int __init register_sk_filter_ops(void)
{
bpf_register_prog_type(&sk_filter_type);
bpf_register_prog_type(&sched_cls_type);
bpf_register_prog_type(&sched_act_type);
bpf_register_prog_type(&xdp_type);
+ bpf_register_prog_type(&cg_sk_filter_type);
return 0;
}
--
2.5.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v2 2/6] cgroup: add support for eBPF programs
2016-08-24 20:24 [PATCH v2 0/6] Add eBPF hooks for cgroups Daniel Mack
2016-08-24 20:24 ` [PATCH v2 1/6] bpf: add new prog type for cgroup socket filtering Daniel Mack
@ 2016-08-24 20:24 ` Daniel Mack
2016-08-24 21:54 ` Tejun Heo
2016-08-25 6:56 ` kbuild test robot
2016-08-24 20:24 ` [PATCH v2 3/6] bpf: add BPF_PROG_ATTACH and BPF_PROG_DETACH commands Daniel Mack
` (3 subsequent siblings)
5 siblings, 2 replies; 11+ messages in thread
From: Daniel Mack @ 2016-08-24 20:24 UTC (permalink / raw)
To: htejun, daniel, ast
Cc: davem, kafai, fw, pablo, harald, netdev, sargun, Daniel Mack
This patch adds two sets of eBPF program pointers to struct cgroup.
One for such that are directly pinned to a cgroup, and one for such
that are effective for it.
To illustrate the logic behind that, assume the following example
cgroup hierarchy.
A - B - C
\ D - E
If only B has a program attached, it will be effective for B, C, D
and E. If D then attaches a program itself, that will be effective for
both D and E, and the program in B will only affect B and C. Only one
program of a given type is effective for a cgroup.
Attaching and detaching programs will be done through the bpf(2)
syscall. For now, ingress and egress inet socket filtering are the
only supported use-cases.
Signed-off-by: Daniel Mack <daniel@zonque.org>
---
include/linux/bpf-cgroup.h | 70 +++++++++++++++++++
include/linux/cgroup-defs.h | 4 ++
init/Kconfig | 12 ++++
kernel/bpf/Makefile | 1 +
kernel/bpf/cgroup.c | 159 ++++++++++++++++++++++++++++++++++++++++++++
kernel/cgroup.c | 18 +++++
6 files changed, 264 insertions(+)
create mode 100644 include/linux/bpf-cgroup.h
create mode 100644 kernel/bpf/cgroup.c
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
new file mode 100644
index 0000000..d85d50f
--- /dev/null
+++ b/include/linux/bpf-cgroup.h
@@ -0,0 +1,70 @@
+#ifndef _BPF_CGROUP_H
+#define _BPF_CGROUP_H
+
+#include <linux/bpf.h>
+#include <uapi/linux/bpf.h>
+
+struct sock;
+struct cgroup;
+struct sk_buff;
+
+#ifdef CONFIG_CGROUP_BPF
+
+extern struct static_key_false cgroup_bpf_enabled_key;
+#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
+
+struct cgroup_bpf {
+ /*
+ * Store two sets of bpf_prog pointers, one for programs that are
+ * pinned directly to this cgroup, and one for those that are effective
+ * when this cgroup is accessed.
+ */
+ struct bpf_prog *prog[__MAX_BPF_ATTACH_TYPE];
+ struct bpf_prog *prog_effective[__MAX_BPF_ATTACH_TYPE];
+};
+
+void cgroup_bpf_free(struct cgroup *cgrp);
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent);
+
+void __cgroup_bpf_update(struct cgroup *cgrp,
+ struct cgroup *parent,
+ struct bpf_prog *prog,
+ enum bpf_attach_type type);
+
+/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */
+void cgroup_bpf_update(struct cgroup *cgrp,
+ struct bpf_prog *prog,
+ enum bpf_attach_type type);
+
+int __cgroup_bpf_run_filter(struct sock *sk,
+ struct sk_buff *skb,
+ enum bpf_attach_type type);
+
+/* Wrapper for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled */
+static inline int cgroup_bpf_run_filter(struct sock *sk,
+ struct sk_buff *skb,
+ enum bpf_attach_type type)
+{
+ if (cgroup_bpf_enabled)
+ return __cgroup_bpf_run_filter(sk, skb, type);
+
+ return 0;
+}
+
+#else
+
+struct cgroup_bpf {};
+static inline void cgroup_bpf_free(struct cgroup *cgrp) {}
+static inline void cgroup_bpf_inherit(struct cgroup *cgrp,
+ struct cgroup *parent) {}
+
+static inline int cgroup_bpf_run_filter(struct sock *sk,
+ struct sk_buff *skb,
+ enum bpf_attach_type type)
+{
+ return 0;
+}
+
+#endif /* CONFIG_CGROUP_BPF */
+
+#endif /* _BPF_CGROUP_H */
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 5b17de6..861b467 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -16,6 +16,7 @@
#include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h>
#include <linux/workqueue.h>
+#include <linux/bpf-cgroup.h>
#ifdef CONFIG_CGROUPS
@@ -300,6 +301,9 @@ struct cgroup {
/* used to schedule release agent */
struct work_struct release_agent_work;
+ /* used to store eBPF programs */
+ struct cgroup_bpf bpf;
+
/* ids of the ancestors at each level including self */
int ancestor_ids[];
};
diff --git a/init/Kconfig b/init/Kconfig
index cac3f09..5a89c83 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1144,6 +1144,18 @@ config CGROUP_PERF
Say N if unsure.
+config CGROUP_BPF
+ bool "Support for eBPF programs attached to cgroups"
+ depends on BPF_SYSCALL && SOCK_CGROUP_DATA
+ help
+ Allow attaching eBPF programs to a cgroup using the bpf(2)
+ syscall command BPF_PROG_ATTACH.
+
+ In which context these programs are accessed depends on the type
+ of attachment. For instance, programs that are attached using
+ BPF_ATTACH_TYPE_CGROUP_INET_INGRESS will be executed on the
+ ingress path of inet sockets.
+
config CGROUP_DEBUG
bool "Example controller"
default n
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index eed911d..b22256b 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -5,3 +5,4 @@ obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
endif
+obj-$(CONFIG_CGROUP_BPF) += cgroup.o
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
new file mode 100644
index 0000000..eacd0e5a3
--- /dev/null
+++ b/kernel/bpf/cgroup.c
@@ -0,0 +1,159 @@
+/*
+ * Functions to manage eBPF programs attached to cgroups
+ *
+ * Copyright (C) 2016 Daniel Mack
+ *
+ * This file is subject to the terms and conditions of version 2 of the GNU
+ * General Public License. See the file COPYING in the main directory of the
+ * Linux distribution for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/atomic.h>
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/bpf-cgroup.h>
+#include <net/sock.h>
+
+DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
+EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+
+void cgroup_bpf_free(struct cgroup *cgrp)
+{
+ unsigned int type;
+
+ rcu_read_lock();
+
+ for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+ if (!cgrp->bpf.prog[type])
+ continue;
+
+ bpf_prog_put(cgrp->bpf.prog[type]);
+ static_branch_dec(&cgroup_bpf_enabled_key);
+ }
+
+ rcu_read_unlock();
+}
+
+void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
+{
+ unsigned int type;
+
+ rcu_read_lock();
+
+ for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
+ rcu_assign_pointer(cgrp->bpf.prog_effective[type],
+ rcu_dereference(parent->bpf.prog_effective[type]));
+
+ rcu_read_unlock();
+}
+
+/**
+ * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
+ * propagate the change to descendants
+ * @cgrp: The cgroup which descendants to traverse
+ * @prog: A new program to pin
+ * @type: Type of pinning operation (ingress/egress)
+ *
+ * Each cgroup has a set of two pointers for bpf programs; one for eBPF
+ * programs it owns, and which is effective for execution.
+ *
+ * If @prog is %NULL, this function attaches a new program to the cgroup and
+ * releases the one that is currently attached, if any. @prog is then made
+ * the effective program of type @type in that cgroup.
+ *
+ * If @prog is %NULL, the currently attached program of type @type is released,
+ * and, the effective program of the parent cgroup is inherited to @cgrp.
+ *
+ * Then, the descendants of @cgrp are walked and the effective program for
+ * each of them is set to the effective program of @cgrp, unless the
+ * descendant has its own program attached, in which case the subbranch is
+ * skipped. This ensures that delegated subcgroups with own programs are left
+ * untouched.
+ *
+ * Must be called with cgroup_mutex held.
+ */
+void __cgroup_bpf_update(struct cgroup *cgrp,
+ struct cgroup *parent,
+ struct bpf_prog *prog,
+ enum bpf_attach_type type)
+{
+ struct bpf_prog *old_prog, *effective;
+ struct cgroup_subsys_state *pos;
+
+ rcu_read_lock();
+
+ old_prog = xchg(cgrp->bpf.prog + type, prog);
+ if (old_prog) {
+ bpf_prog_put(old_prog);
+ static_branch_dec(&cgroup_bpf_enabled_key);
+ }
+
+ if (prog)
+ static_branch_inc(&cgroup_bpf_enabled_key);
+
+ effective = (!prog && parent) ?
+ rcu_dereference(parent->bpf.prog_effective[type]) : prog;
+
+ rcu_read_unlock();
+
+ css_for_each_descendant_pre(pos, &cgrp->self) {
+ struct cgroup *desc = container_of(pos, struct cgroup, self);
+
+ /* skip the subtree if the descendant has its own program */
+ if (desc->bpf.prog[type] && desc != cgrp)
+ pos = css_rightmost_descendant(pos);
+ else
+ rcu_assign_pointer(desc->bpf.prog_effective[type],
+ effective);
+ }
+}
+
+/**
+ * __cgroup_bpf_run_filter() - Run a program for packet filtering
+ * @sk: The socken sending or receiving traffic
+ * @skb: The skb that is being sent or received
+ * @type: The type of program to be exectuted
+ *
+ * If no socket is passed, or the socket is not of type INET or INET6,
+ * this function does nothing and returns 0.
+ *
+ * The program type passed in via @type must be suitable for network
+ * filtering. No further check is performed to assert that.
+ *
+ * This function will return %-EPERM if any if an attached program was found
+ * and if it returned != 1 during execution. In all other cases, 0 is returned.
+ */
+int __cgroup_bpf_run_filter(struct sock *sk,
+ struct sk_buff *skb,
+ enum bpf_attach_type type)
+{
+ struct bpf_prog *prog;
+ struct cgroup *cgrp;
+ int ret = 0;
+
+ if (!sk)
+ return 0;
+
+ if (sk->sk_family != AF_INET &&
+ sk->sk_family != AF_INET6)
+ return 0;
+
+ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+
+ rcu_read_lock();
+
+ prog = rcu_dereference(cgrp->bpf.prog_effective[type]);
+ if (prog) {
+ unsigned int offset = skb->data - skb_mac_header(skb);
+
+ __skb_push(skb, offset);
+ ret = bpf_prog_run_clear_cb(prog, skb) == 1 ? 0 : -EPERM;
+ __skb_pull(skb, offset);
+ }
+
+ rcu_read_unlock();
+
+ return ret;
+}
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index d1c51b7..d53d4b5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5038,6 +5038,8 @@ static void css_release_work_fn(struct work_struct *work)
if (cgrp->kn)
RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
NULL);
+
+ cgroup_bpf_free(cgrp);
}
mutex_unlock(&cgroup_mutex);
@@ -5245,6 +5247,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
if (!cgroup_on_dfl(cgrp))
cgrp->subtree_control = cgroup_control(cgrp);
+ if (parent)
+ cgroup_bpf_inherit(cgrp, parent);
+
cgroup_propagate_control(cgrp);
/* @cgrp doesn't have dir yet so the following will only create csses */
@@ -6417,6 +6422,19 @@ static __init int cgroup_namespaces_init(void)
}
subsys_initcall(cgroup_namespaces_init);
+#ifdef CONFIG_CGROUP_BPF
+void cgroup_bpf_update(struct cgroup *cgrp,
+ struct bpf_prog *prog,
+ enum bpf_attach_type type)
+{
+ struct cgroup *parent = cgroup_parent(cgrp);
+
+ mutex_lock(&cgroup_mutex);
+ __cgroup_bpf_update(cgrp, parent, prog, type);
+ mutex_unlock(&cgroup_mutex);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
#ifdef CONFIG_CGROUP_DEBUG
static struct cgroup_subsys_state *
debug_css_alloc(struct cgroup_subsys_state *parent_css)
--
2.5.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH v2 2/6] cgroup: add support for eBPF programs
2016-08-24 20:24 ` [PATCH v2 2/6] cgroup: add support for eBPF programs Daniel Mack
@ 2016-08-24 21:54 ` Tejun Heo
2016-08-24 22:30 ` Daniel Mack
2016-08-25 6:56 ` kbuild test robot
1 sibling, 1 reply; 11+ messages in thread
From: Tejun Heo @ 2016-08-24 21:54 UTC (permalink / raw)
To: Daniel Mack; +Cc: daniel, ast, davem, kafai, fw, pablo, harald, netdev, sargun
Hello, Daniel.
On Wed, Aug 24, 2016 at 10:24:19PM +0200, Daniel Mack wrote:
> +void cgroup_bpf_free(struct cgroup *cgrp)
> +{
> + unsigned int type;
> +
> + rcu_read_lock();
> +
> + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
> + if (!cgrp->bpf.prog[type])
> + continue;
> +
> + bpf_prog_put(cgrp->bpf.prog[type]);
> + static_branch_dec(&cgroup_bpf_enabled_key);
> + }
> +
> + rcu_read_unlock();
These rcu locking seem suspicious to me. RCU locking on writer side
is usually bogus. We sometimes do it to work around locking
assertions in accessors but it's a better idea to make the assertions
better in those cases - e.g. sth like assert_mylock_or_rcu_locked().
> +void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
> +{
> + unsigned int type;
> +
> + rcu_read_lock();
Ditto.
> + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
> + rcu_assign_pointer(cgrp->bpf.prog_effective[type],
> + rcu_dereference(parent->bpf.prog_effective[type]));
> +
> + rcu_read_unlock();
> +}
...
> +void __cgroup_bpf_update(struct cgroup *cgrp,
> + struct cgroup *parent,
> + struct bpf_prog *prog,
> + enum bpf_attach_type type)
> +{
> + struct bpf_prog *old_prog, *effective;
> + struct cgroup_subsys_state *pos;
> +
> + rcu_read_lock();
Ditto.
> + old_prog = xchg(cgrp->bpf.prog + type, prog);
> + if (old_prog) {
> + bpf_prog_put(old_prog);
> + static_branch_dec(&cgroup_bpf_enabled_key);
> + }
> +
> + if (prog)
> + static_branch_inc(&cgroup_bpf_enabled_key);
Minor but probably better to inc first and then dec so that you can
avoid unnecessary enabled -> disabled -> enabled sequence.
> + effective = (!prog && parent) ?
> + rcu_dereference(parent->bpf.prog_effective[type]) : prog;
If this is what's triggering rcu warnings, there's an accessor to use
in these situations.
> + rcu_read_unlock();
> +
> + css_for_each_descendant_pre(pos, &cgrp->self) {
On the other hand, this walk actually requires rcu read locking unless
you're holding cgroup_mutex.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH v2 2/6] cgroup: add support for eBPF programs
2016-08-24 21:54 ` Tejun Heo
@ 2016-08-24 22:30 ` Daniel Mack
0 siblings, 0 replies; 11+ messages in thread
From: Daniel Mack @ 2016-08-24 22:30 UTC (permalink / raw)
To: Tejun Heo; +Cc: daniel, ast, davem, kafai, fw, pablo, harald, netdev, sargun
Hi Tejun,
On 08/24/2016 11:54 PM, Tejun Heo wrote:
> On Wed, Aug 24, 2016 at 10:24:19PM +0200, Daniel Mack wrote:
>> +void cgroup_bpf_free(struct cgroup *cgrp)
>> +{
>> + unsigned int type;
>> +
>> + rcu_read_lock();
>> +
>> + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
>> + if (!cgrp->bpf.prog[type])
>> + continue;
>> +
>> + bpf_prog_put(cgrp->bpf.prog[type]);
>> + static_branch_dec(&cgroup_bpf_enabled_key);
>> + }
>> +
>> + rcu_read_unlock();
>
> These rcu locking seem suspicious to me. RCU locking on writer side
> is usually bogus. We sometimes do it to work around locking
> assertions in accessors but it's a better idea to make the assertions
> better in those cases - e.g. sth like assert_mylock_or_rcu_locked().
Right, in this case, it is unnecessary, as the bpf.prog[] is not under RCU.
>> +void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
>> +{
>> + unsigned int type;
>> +
>> + rcu_read_lock();
>
> Ditto.
>
>> + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
>> + rcu_assign_pointer(cgrp->bpf.prog_effective[type],
>> + rcu_dereference(parent->bpf.prog_effective[type]));
Okay, yes. We're under cgroup_mutex write-path protection here, so
that's unnecessary too.
>> +void __cgroup_bpf_update(struct cgroup *cgrp,
>> + struct cgroup *parent,
>> + struct bpf_prog *prog,
>> + enum bpf_attach_type type)
>> +{
>> + struct bpf_prog *old_prog, *effective;
>> + struct cgroup_subsys_state *pos;
>> +
>> + rcu_read_lock();
>
> Ditto.
Yes, agreed, as above.
>> + old_prog = xchg(cgrp->bpf.prog + type, prog);
>> + if (old_prog) {
>> + bpf_prog_put(old_prog);
>> + static_branch_dec(&cgroup_bpf_enabled_key);
>> + }
>> +
>> + if (prog)
>> + static_branch_inc(&cgroup_bpf_enabled_key);
>
> Minor but probably better to inc first and then dec so that you can
> avoid unnecessary enabled -> disabled -> enabled sequence.
Good point. Will fix.
>> + rcu_read_unlock();
>> +
>> + css_for_each_descendant_pre(pos, &cgrp->self) {
>
> On the other hand, this walk actually requires rcu read locking unless
> you're holding cgroup_mutex.
I am - this function is always called with cgroup_mutex held through the
wrapper in kernel/cgroup.c.
Thanks a lot - will put all that changes in v3.
Daniel
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v2 2/6] cgroup: add support for eBPF programs
2016-08-24 20:24 ` [PATCH v2 2/6] cgroup: add support for eBPF programs Daniel Mack
2016-08-24 21:54 ` Tejun Heo
@ 2016-08-25 6:56 ` kbuild test robot
1 sibling, 0 replies; 11+ messages in thread
From: kbuild test robot @ 2016-08-25 6:56 UTC (permalink / raw)
To: Daniel Mack
Cc: kbuild-all, htejun, daniel, ast, davem, kafai, fw, pablo, harald,
netdev, sargun, Daniel Mack
Hi Daniel,
[auto build test WARNING on net-next/master]
[also build test WARNING on v4.8-rc3 next-20160824]
[cannot apply to linus/master linux/master]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]
[Suggest to use git(>=2.9.0) format-patch --base=<commit> (or --base=auto for convenience) to record what (public, well-known) commit your patch series was built on]
[Check https://git-scm.com/docs/git-format-patch for more information]
url: https://github.com/0day-ci/linux/commits/Daniel-Mack/Add-eBPF-hooks-for-cgroups/20160825-042759
reproduce:
# apt-get install sparse
make ARCH=x86_64 allmodconfig
make C=1 CF=-D__CHECK_ENDIAN__
sparse warnings: (new ones prefixed by >>)
include/linux/compiler.h:230:8: sparse: attribute 'no_sanitize_address': unknown attribute
>> kernel/bpf/cgroup.c:46:17: sparse: incompatible types in comparison expression (different address spaces)
>> kernel/bpf/cgroup.c:46:17: sparse: incompatible types in comparison expression (different address spaces)
kernel/bpf/cgroup.c:97:17: sparse: incompatible types in comparison expression (different address spaces)
kernel/bpf/cgroup.c:147:16: sparse: incompatible types in comparison expression (different address spaces)
vim +46 kernel/bpf/cgroup.c
30 continue;
31
32 bpf_prog_put(cgrp->bpf.prog[type]);
33 static_branch_dec(&cgroup_bpf_enabled_key);
34 }
35
36 rcu_read_unlock();
37 }
38
39 void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent)
40 {
41 unsigned int type;
42
43 rcu_read_lock();
44
45 for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
> 46 rcu_assign_pointer(cgrp->bpf.prog_effective[type],
47 rcu_dereference(parent->bpf.prog_effective[type]));
48
49 rcu_read_unlock();
50 }
51
52 /**
53 * __cgroup_bpf_update() - Update the pinned program of a cgroup, and
54 * propagate the change to descendants
---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 3/6] bpf: add BPF_PROG_ATTACH and BPF_PROG_DETACH commands
2016-08-24 20:24 [PATCH v2 0/6] Add eBPF hooks for cgroups Daniel Mack
2016-08-24 20:24 ` [PATCH v2 1/6] bpf: add new prog type for cgroup socket filtering Daniel Mack
2016-08-24 20:24 ` [PATCH v2 2/6] cgroup: add support for eBPF programs Daniel Mack
@ 2016-08-24 20:24 ` Daniel Mack
2016-08-24 22:14 ` Tejun Heo
2016-08-24 20:24 ` [PATCH v2 4/6] net: filter: run cgroup eBPF ingress programs Daniel Mack
` (2 subsequent siblings)
5 siblings, 1 reply; 11+ messages in thread
From: Daniel Mack @ 2016-08-24 20:24 UTC (permalink / raw)
To: htejun, daniel, ast
Cc: davem, kafai, fw, pablo, harald, netdev, sargun, Daniel Mack
Extend the bpf(2) syscall by two new commands, BPF_PROG_ATTACH and
BPF_PROG_DETACH which allow attaching and detaching eBPF programs
to a target.
On the API level, the target could be anything that has an fd in
userspace, hence the name of the field in union bpf_attr is called
'target_fd'.
When called with BPF_ATTACH_TYPE_CGROUP_INET_{E,IN}GRESS, the target is
expected to be a valid file descriptor of a cgroup v2 directory which
has the bpf controller enabled. These are the only use-cases
implemented by this patch at this point, but more can be added.
If a program of the given type already exists in the given cgroup,
the program is swapped automically, so userspace does not have to drop
an existing program first before installing a new one, which would
otherwise leave a gap in which no program is attached.
For more information on the propagation logic to subcgroups, please
refer to the bpf cgroup controller implementation.
The API is guarded by CAP_NET_ADMIN.
Signed-off-by: Daniel Mack <daniel@zonque.org>
syscall
---
include/uapi/linux/bpf.h | 9 ++++++
kernel/bpf/syscall.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 88 insertions(+)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1d5db42..4cc2dcf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -73,6 +73,8 @@ enum bpf_cmd {
BPF_PROG_LOAD,
BPF_OBJ_PIN,
BPF_OBJ_GET,
+ BPF_PROG_ATTACH,
+ BPF_PROG_DETACH,
};
enum bpf_map_type {
@@ -147,6 +149,13 @@ union bpf_attr {
__aligned_u64 pathname;
__u32 bpf_fd;
};
+
+ struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+ __u32 target_fd; /* container object to attach to */
+ __u32 attach_bpf_fd; /* eBPF program to attach */
+ __u32 attach_type; /* BPF_ATTACH_TYPE_* */
+ __u64 attach_flags;
+ };
} __attribute__((aligned(8)));
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 228f962..208cba2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -822,6 +822,75 @@ static int bpf_obj_get(const union bpf_attr *attr)
return bpf_obj_get_user(u64_to_ptr(attr->pathname));
}
+#ifdef CONFIG_CGROUP_BPF
+static int bpf_prog_attach(const union bpf_attr *attr)
+{
+ struct bpf_prog *prog;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ /* Flags are unused for now */
+ if (attr->attach_flags != 0)
+ return -EINVAL;
+
+ switch (attr->attach_type) {
+ case BPF_ATTACH_TYPE_CGROUP_INET_INGRESS:
+ case BPF_ATTACH_TYPE_CGROUP_INET_EGRESS: {
+ struct cgroup *cgrp;
+
+ prog = bpf_prog_get_type(attr->attach_bpf_fd,
+ BPF_PROG_TYPE_CGROUP_SOCKET_FILTER);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp)) {
+ bpf_prog_put(prog);
+ return PTR_ERR(cgrp);
+ }
+
+ cgroup_bpf_update(cgrp, prog, attr->attach_type);
+ cgroup_put(cgrp);
+
+ break;
+ }
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int bpf_prog_detach(const union bpf_attr *attr)
+{
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (attr->attach_type) {
+ case BPF_ATTACH_TYPE_CGROUP_INET_INGRESS:
+ case BPF_ATTACH_TYPE_CGROUP_INET_EGRESS: {
+ struct cgroup *cgrp;
+
+ cgrp = cgroup_get_from_fd(attr->target_fd);
+ if (IS_ERR(cgrp))
+ return PTR_ERR(cgrp);
+
+ cgroup_bpf_update(cgrp, NULL, attr->attach_type);
+ cgroup_put(cgrp);
+
+ break;
+ }
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+#endif /* CONFIG_CGROUP_BPF */
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@@ -888,6 +957,16 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_OBJ_GET:
err = bpf_obj_get(&attr);
break;
+
+#ifdef CONFIG_CGROUP_BPF
+ case BPF_PROG_ATTACH:
+ err = bpf_prog_attach(&attr);
+ break;
+ case BPF_PROG_DETACH:
+ err = bpf_prog_detach(&attr);
+ break;
+#endif
+
default:
err = -EINVAL;
break;
--
2.5.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH v2 3/6] bpf: add BPF_PROG_ATTACH and BPF_PROG_DETACH commands
2016-08-24 20:24 ` [PATCH v2 3/6] bpf: add BPF_PROG_ATTACH and BPF_PROG_DETACH commands Daniel Mack
@ 2016-08-24 22:14 ` Tejun Heo
0 siblings, 0 replies; 11+ messages in thread
From: Tejun Heo @ 2016-08-24 22:14 UTC (permalink / raw)
To: Daniel Mack; +Cc: daniel, ast, davem, kafai, fw, pablo, harald, netdev, sargun
Hello,
On Wed, Aug 24, 2016 at 10:24:20PM +0200, Daniel Mack wrote:
> SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
> {
> union bpf_attr attr = {};
> @@ -888,6 +957,16 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
> case BPF_OBJ_GET:
> err = bpf_obj_get(&attr);
> break;
> +
> +#ifdef CONFIG_CGROUP_BPF
> + case BPF_PROG_ATTACH:
> + err = bpf_prog_attach(&attr);
> + break;
> + case BPF_PROG_DETACH:
> + err = bpf_prog_detach(&attr);
> + break;
> +#endif
So, this is one thing I haven't realized while pushing for "just embed
it in cgroup". Breaking it out to a separate controller allows using
its own locking instead of having to piggyback on cgroup_mutex. That
said, as long as cgroup_mutex is not nested inside some inner mutex,
this shouldn't be a problem. I still think the embedding is fine and
whether we make it an implicit controller or not doesn't affect
userland API at all, so it's an implementation detail that we can
change later if necessary.
Thanks.
--
tejun
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v2 4/6] net: filter: run cgroup eBPF ingress programs
2016-08-24 20:24 [PATCH v2 0/6] Add eBPF hooks for cgroups Daniel Mack
` (2 preceding siblings ...)
2016-08-24 20:24 ` [PATCH v2 3/6] bpf: add BPF_PROG_ATTACH and BPF_PROG_DETACH commands Daniel Mack
@ 2016-08-24 20:24 ` Daniel Mack
2016-08-24 20:24 ` [PATCH v2 5/6] net: core: run cgroup eBPF egress programs Daniel Mack
2016-08-24 20:24 ` [PATCH v2 6/6] samples: bpf: add userspace example for attaching eBPF programs to cgroups Daniel Mack
5 siblings, 0 replies; 11+ messages in thread
From: Daniel Mack @ 2016-08-24 20:24 UTC (permalink / raw)
To: htejun, daniel, ast
Cc: davem, kafai, fw, pablo, harald, netdev, sargun, Daniel Mack
If the cgroup associated with the receiving socket has an eBPF
programs installed, run them from sk_filter_trim_cap().
eBPF programs used in this context are expected to either return 1 to
let the packet pass, or != 1 to drop them. The programs have access to
the full skb, including the MAC headers.
Note that cgroup_bpf_run_filter() is stubbed out as static inline nop
for !CONFIG_CGROUP_BPF, and is otherwise guarded by a static key if
the feature is unused.
Signed-off-by: Daniel Mack <daniel@zonque.org>
---
net/core/filter.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/net/core/filter.c b/net/core/filter.c
index bc04e5c..163f75b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -78,6 +78,11 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC))
return -ENOMEM;
+ err = cgroup_bpf_run_filter(sk, skb,
+ BPF_ATTACH_TYPE_CGROUP_INET_INGRESS);
+ if (err)
+ return err;
+
err = security_sock_rcv_skb(sk, skb);
if (err)
return err;
--
2.5.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v2 5/6] net: core: run cgroup eBPF egress programs
2016-08-24 20:24 [PATCH v2 0/6] Add eBPF hooks for cgroups Daniel Mack
` (3 preceding siblings ...)
2016-08-24 20:24 ` [PATCH v2 4/6] net: filter: run cgroup eBPF ingress programs Daniel Mack
@ 2016-08-24 20:24 ` Daniel Mack
2016-08-24 20:24 ` [PATCH v2 6/6] samples: bpf: add userspace example for attaching eBPF programs to cgroups Daniel Mack
5 siblings, 0 replies; 11+ messages in thread
From: Daniel Mack @ 2016-08-24 20:24 UTC (permalink / raw)
To: htejun, daniel, ast
Cc: davem, kafai, fw, pablo, harald, netdev, sargun, Daniel Mack
If the cgroup associated with the receiving socket has an eBPF
programs installed, run them from __dev_queue_xmit().
eBPF programs used in this context are expected to either return 1 to
let the packet pass, or != 1 to drop them. The programs have access to
the full skb, including the MAC headers.
Note that cgroup_bpf_run_filter() is stubbed out as static inline nop
for !CONFIG_CGROUP_BPF, and is otherwise guarded by a static key if
the feature is unused.
Signed-off-by: Daniel Mack <daniel@zonque.org>
---
net/core/dev.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/net/core/dev.c b/net/core/dev.c
index a75df86..17484e6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -141,6 +141,7 @@
#include <linux/netfilter_ingress.h>
#include <linux/sctp.h>
#include <linux/crash_dump.h>
+#include <linux/bpf-cgroup.h>
#include "net-sysfs.h"
@@ -3329,6 +3330,11 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_SCHED_TSTAMP))
__skb_tstamp_tx(skb, NULL, skb->sk, SCM_TSTAMP_SCHED);
+ rc = cgroup_bpf_run_filter(skb->sk, skb,
+ BPF_ATTACH_TYPE_CGROUP_INET_EGRESS);
+ if (rc)
+ return rc;
+
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
--
2.5.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH v2 6/6] samples: bpf: add userspace example for attaching eBPF programs to cgroups
2016-08-24 20:24 [PATCH v2 0/6] Add eBPF hooks for cgroups Daniel Mack
` (4 preceding siblings ...)
2016-08-24 20:24 ` [PATCH v2 5/6] net: core: run cgroup eBPF egress programs Daniel Mack
@ 2016-08-24 20:24 ` Daniel Mack
5 siblings, 0 replies; 11+ messages in thread
From: Daniel Mack @ 2016-08-24 20:24 UTC (permalink / raw)
To: htejun, daniel, ast
Cc: davem, kafai, fw, pablo, harald, netdev, sargun, Daniel Mack
Add a simple userpace program to demonstrate the new API to attach eBPF
programs to cgroups. This is what it does:
* Create arraymap in kernel with 4 byte keys and 8 byte values
* Load eBPF program
The eBPF program accesses the map passed in to store two pieces of
information. The number of invocations of the program, which maps
to the number of packets received, is stored to key 0. Key 1 is
incremented on each iteration by the number of bytes stored in
the skb.
* Detach any eBPF program previously attached to the cgroup
* Attach the new program to the cgroup using BPF_PROG_ATTACH
* Once a second, read map[0] and map[1] to see how many bytes and
packets were seen on any socket of tasks in the given cgroup.
The program takes a cgroup path as 1st argument, and either "ingress"
or "egress" as 2nd. Optionally, "drop" can be passed as 3rd argument,
which will make the generated eBPF program return 0 instead of 1, so
the kernel will drop the packet.
libbpf gained two new wrappers for the new syscall commands.
Signed-off-by: Daniel Mack <daniel@zonque.org>
---
samples/bpf/Makefile | 2 +
samples/bpf/libbpf.c | 23 +++++++
samples/bpf/libbpf.h | 3 +
samples/bpf/test_cgrp2_attach.c | 147 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 175 insertions(+)
create mode 100644 samples/bpf/test_cgrp2_attach.c
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index db3cb06..5c752f5 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -22,6 +22,7 @@ hostprogs-y += spintest
hostprogs-y += map_perf_test
hostprogs-y += test_overhead
hostprogs-y += test_cgrp2_array_pin
+hostprogs-y += test_cgrp2_attach
hostprogs-y += xdp1
hostprogs-y += xdp2
hostprogs-y += test_current_task_under_cgroup
@@ -47,6 +48,7 @@ spintest-objs := bpf_load.o libbpf.o spintest_user.o
map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o
test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o
test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
+test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o
xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
# reuse xdp1 source intentionally
xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
diff --git a/samples/bpf/libbpf.c b/samples/bpf/libbpf.c
index 9969e35..95e196e 100644
--- a/samples/bpf/libbpf.c
+++ b/samples/bpf/libbpf.c
@@ -104,6 +104,29 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
}
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr = {
+ .target_fd = target_fd,
+ .attach_bpf_fd = prog_fd,
+ .attach_type = type,
+ .attach_flags = 0,
+ };
+
+ return syscall(__NR_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr = {
+ .target_fd = target_fd,
+ .attach_type = type,
+ .attach_flags = 0,
+ };
+
+ return syscall(__NR_bpf, BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
int bpf_obj_pin(int fd, const char *pathname)
{
union bpf_attr attr = {
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index 364582b..f973241 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -15,6 +15,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, int insn_len,
const char *license, int kern_version);
+int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type);
+int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
+
int bpf_obj_pin(int fd, const char *pathname);
int bpf_obj_get(const char *pathname);
diff --git a/samples/bpf/test_cgrp2_attach.c b/samples/bpf/test_cgrp2_attach.c
new file mode 100644
index 0000000..0a44c3d
--- /dev/null
+++ b/samples/bpf/test_cgrp2_attach.c
@@ -0,0 +1,147 @@
+/* eBPF example program:
+ *
+ * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
+ *
+ * - Loads eBPF program
+ *
+ * The eBPF program accesses the map passed in to store two pieces of
+ * information. The number of invocations of the program, which maps
+ * to the number of packets received, is stored to key 0. Key 1 is
+ * incremented on each iteration by the number of bytes stored in
+ * the skb.
+ *
+ * - Detaches any eBPF program previously attached to the cgroup
+ *
+ * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
+ *
+ * - Every second, reads map[0] and map[1] to see how many bytes and
+ * packets were seen on any socket of tasks in the given cgroup.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <linux/bpf.h>
+
+#include "libbpf.h"
+
+enum {
+ MAP_KEY_PACKETS,
+ MAP_KEY_BYTES,
+};
+
+static int prog_load(int map_fd, int verdict)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), /* save r6 so it's not clobbered by BPF_CALL */
+
+ /* Count packets */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_PACKETS), /* r0 = 0 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd), /* load map fd to r1 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 1), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ /* Count bytes */
+ BPF_MOV64_IMM(BPF_REG_0, MAP_KEY_BYTES), /* r0 = 1 */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, offsetof(struct __sk_buff, len)), /* r1 = skb->len */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCKET_FILTER,
+ prog, sizeof(prog), "GPL", 0);
+}
+
+static int usage(const char *argv0)
+{
+ printf("Usage: %s <cg-path> <egress|ingress> [drop]\n", argv0);
+ return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+ int cg_fd, map_fd, prog_fd, key, ret;
+ long long pkt_cnt, byte_cnt;
+ enum bpf_attach_type type;
+ int verdict = 1;
+
+ if (argc < 3)
+ return usage(argv[0]);
+
+ if (strcmp(argv[2], "ingress") == 0)
+ type = BPF_ATTACH_TYPE_CGROUP_INET_INGRESS;
+ else if (strcmp(argv[2], "egress") == 0)
+ type = BPF_ATTACH_TYPE_CGROUP_INET_EGRESS;
+ else
+ return usage(argv[0]);
+
+ if (argc > 3 && strcmp(argv[3], "drop") == 0)
+ verdict = 0;
+
+ cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY);
+ if (cg_fd < 0) {
+ printf("Failed to open cgroup path: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY,
+ sizeof(key), sizeof(byte_cnt),
+ 256, 0);
+ if (map_fd < 0) {
+ printf("Failed to create map: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ prog_fd = prog_load(map_fd, verdict);
+ printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf);
+
+ if (prog_fd < 0) {
+ printf("Failed to load prog: '%s'\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ ret = bpf_prog_detach(cg_fd, type);
+ printf("bpf_prog_detach() returned '%s' (%d)\n", strerror(errno), errno);
+
+ ret = bpf_prog_attach(prog_fd, cg_fd, type);
+ if (ret < 0) {
+ printf("Failed to attach prog to cgroup: '%s'\n",
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ while (1) {
+ key = MAP_KEY_PACKETS;
+ assert(bpf_lookup_elem(map_fd, &key, &pkt_cnt) == 0);
+
+ key = MAP_KEY_BYTES;
+ assert(bpf_lookup_elem(map_fd, &key, &byte_cnt) == 0);
+
+ printf("cgroup received %lld packets, %lld bytes\n",
+ pkt_cnt, byte_cnt);
+ sleep(1);
+ }
+
+ return EXIT_SUCCESS;
+}
--
2.5.5
^ permalink raw reply related [flat|nested] 11+ messages in thread