[net-next v2 v2 1/2] bpf: Add bpf_current_task_in

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [net-next v2 v2 1/2] bpf: Add bpf_current_task_in_cgroup helper
@ 2016-08-10  0:00 Sargun Dhillon
  2016-08-10  0:23 ` Alexei Starovoitov
  0 siblings, 1 reply; 10+ messages in thread
From: Sargun Dhillon @ 2016-08-10  0:00 UTC (permalink / raw)
  To: netdev; +Cc: alexei.starovoitov, daniel

This adds a bpf helper that's similar to the skb_in_cgroup helper to check
whether the probe is currently executing in the context of a specific
subset of the cgroupsv2 hierarchy. It does this based on membership test
for a cgroup arraymap. It is invalid to call this in an interrupt, and
it'll return an error. The helper is primarily to be used in debugging
activities for containers, where you may have multiple programs running in
a given top-level "container".

This patch also genericizes some of the arraymap fetching logic between the
skb_in_cgroup helper and this new helper.

Signed-off-by: Sargun Dhillon <sargun@sargun.me>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
---
 include/linux/bpf.h      | 24 ++++++++++++++++++++++++
 include/uapi/linux/bpf.h | 11 +++++++++++
 kernel/bpf/arraymap.c    |  2 +-
 kernel/bpf/verifier.c    |  4 +++-
 kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++++++
 net/core/filter.c        | 11 ++++-------
 6 files changed, 77 insertions(+), 9 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1113423..9adf712 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -319,4 +319,28 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
 void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
+#ifdef CONFIG_CGROUPS
+/* Helper to fetch a cgroup pointer based on index.
+ * @map: a cgroup arraymap
+ * @idx: index of the item you want to fetch
+ *
+ * Returns pointer on success,
+ * Error code if item not found, or out-of-bounds access
+ */
+static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
+{
+	struct cgroup *cgrp;
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+
+	if (unlikely(idx >= array->map.max_entries))
+		return ERR_PTR(-E2BIG);
+
+	cgrp = READ_ONCE(array->ptrs[idx]);
+	if (unlikely(!cgrp))
+		return ERR_PTR(-EAGAIN);
+
+	return cgrp;
+}
+#endif /* CONFIG_CGROUPS */
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index da218fe..64b1a07 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -375,6 +375,17 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_probe_write_user,
 
+	/**
+	 * bpf_current_task_in_cgroup(map, index) - Check cgroup2 membership of current task
+	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
+	 * @index: index of the cgroup in the bpf_map
+	 * Return:
+	 *   == 0 current failed the cgroup2 descendant test
+	 *   == 1 current succeeded the cgroup2 descendant test
+	 *    < 0 error
+	 */
+	BPF_FUNC_current_task_in_cgroup,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 633a650..a2ac051 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
 }
 late_initcall(register_perf_event_array_map);
 
-#ifdef CONFIG_SOCK_CGROUP_DATA
+#ifdef CONFIG_CGROUPS
 static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
 				     struct file *map_file /* not used */,
 				     int fd)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7094c69..80efab8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_CGROUP_ARRAY:
-		if (func_id != BPF_FUNC_skb_in_cgroup)
+		if (func_id != BPF_FUNC_skb_in_cgroup &&
+		    func_id != BPF_FUNC_current_task_in_cgroup)
 			goto error;
 		break;
 	default:
@@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 			goto error;
 		break;
+	case BPF_FUNC_current_task_in_cgroup:
 	case BPF_FUNC_skb_in_cgroup:
 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
 			goto error;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b20438f..39f0290 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -376,6 +376,36 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+#ifdef CONFIG_CGROUPS
+static u64 bpf_current_task_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct bpf_map *map = (struct bpf_map *)(long)r1;
+	struct css_set *cset;
+	struct cgroup *cgrp;
+	u32 idx = (u32)r2;
+
+	if (unlikely(in_interrupt()))
+		return -EINVAL;
+
+	cgrp = fetch_arraymap_ptr(map, idx);
+
+	if (unlikely(IS_ERR(cgrp)))
+		return PTR_ERR(cgrp);
+
+	cset = task_css_set(current);
+
+	return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
+}
+
+static const struct bpf_func_proto bpf_current_task_in_cgroup_proto = {
+	.func           = bpf_current_task_in_cgroup,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_CONST_MAP_PTR,
+	.arg2_type      = ARG_ANYTHING,
+};
+#endif /* CONFIG_CGROUPS */
+
 static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -407,6 +437,10 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
 		return &bpf_perf_event_read_proto;
 	case BPF_FUNC_probe_write_user:
 		return bpf_get_probe_write_proto();
+#ifdef CONFIG_CGROUPS
+	case BPF_FUNC_current_task_in_cgroup:
+		return &bpf_current_task_in_cgroup_proto;
+#endif
 	default:
 		return NULL;
 	}
diff --git a/net/core/filter.c b/net/core/filter.c
index b5add4e..7fda861 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2321,21 +2321,18 @@ static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 {
 	struct sk_buff *skb = (struct sk_buff *)(long)r1;
 	struct bpf_map *map = (struct bpf_map *)(long)r2;
-	struct bpf_array *array = container_of(map, struct bpf_array, map);
 	struct cgroup *cgrp;
 	struct sock *sk;
-	u32 i = (u32)r3;
+	u32 idx = (u32)r3;
 
 	sk = skb->sk;
 	if (!sk || !sk_fullsock(sk))
 		return -ENOENT;
 
-	if (unlikely(i >= array->map.max_entries))
-		return -E2BIG;
+	cgrp = fetch_arraymap_ptr(map, idx);
 
-	cgrp = READ_ONCE(array->ptrs[i]);
-	if (unlikely(!cgrp))
-		return -EAGAIN;
+	if (unlikely(IS_ERR(cgrp)))
+		return PTR_ERR(cgrp);
 
 	return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [net-next v2 v2 1/2] bpf: Add bpf_current_task_in_cgroup helper
  2016-08-10  0:00 [net-next v2 v2 1/2] bpf: Add bpf_current_task_in_cgroup helper Sargun Dhillon
@ 2016-08-10  0:23 ` Alexei Starovoitov
  2016-08-10  0:55   ` Sargun Dhillon
  0 siblings, 1 reply; 10+ messages in thread
From: Alexei Starovoitov @ 2016-08-10  0:23 UTC (permalink / raw)
  To: Sargun Dhillon; +Cc: netdev, daniel

On Tue, Aug 09, 2016 at 05:00:12PM -0700, Sargun Dhillon wrote:
> This adds a bpf helper that's similar to the skb_in_cgroup helper to check
> whether the probe is currently executing in the context of a specific
> subset of the cgroupsv2 hierarchy. It does this based on membership test
> for a cgroup arraymap. It is invalid to call this in an interrupt, and
> it'll return an error. The helper is primarily to be used in debugging
> activities for containers, where you may have multiple programs running in
> a given top-level "container".
> 
> This patch also genericizes some of the arraymap fetching logic between the
> skb_in_cgroup helper and this new helper.
> 
> Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> ---
>  include/linux/bpf.h      | 24 ++++++++++++++++++++++++
>  include/uapi/linux/bpf.h | 11 +++++++++++
>  kernel/bpf/arraymap.c    |  2 +-
>  kernel/bpf/verifier.c    |  4 +++-
>  kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++++++
>  net/core/filter.c        | 11 ++++-------
>  6 files changed, 77 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 1113423..9adf712 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -319,4 +319,28 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
>  void bpf_user_rnd_init_once(void);
>  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>  
> +#ifdef CONFIG_CGROUPS
> +/* Helper to fetch a cgroup pointer based on index.
> + * @map: a cgroup arraymap
> + * @idx: index of the item you want to fetch
> + *
> + * Returns pointer on success,
> + * Error code if item not found, or out-of-bounds access
> + */
> +static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
> +{
> +	struct cgroup *cgrp;
> +	struct bpf_array *array = container_of(map, struct bpf_array, map);
> +
> +	if (unlikely(idx >= array->map.max_entries))
> +		return ERR_PTR(-E2BIG);
> +
> +	cgrp = READ_ONCE(array->ptrs[idx]);
> +	if (unlikely(!cgrp))
> +		return ERR_PTR(-EAGAIN);
> +
> +	return cgrp;
> +}
> +#endif /* CONFIG_CGROUPS */
> +
>  #endif /* _LINUX_BPF_H */
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index da218fe..64b1a07 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -375,6 +375,17 @@ enum bpf_func_id {
>  	 */
>  	BPF_FUNC_probe_write_user,
>  
> +	/**
> +	 * bpf_current_task_in_cgroup(map, index) - Check cgroup2 membership of current task
> +	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
> +	 * @index: index of the cgroup in the bpf_map
> +	 * Return:
> +	 *   == 0 current failed the cgroup2 descendant test
> +	 *   == 1 current succeeded the cgroup2 descendant test
> +	 *    < 0 error
> +	 */
> +	BPF_FUNC_current_task_in_cgroup,
> +
>  	__BPF_FUNC_MAX_ID,
>  };
>  
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index 633a650..a2ac051 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
>  }
>  late_initcall(register_perf_event_array_map);
>  
> -#ifdef CONFIG_SOCK_CGROUP_DATA
> +#ifdef CONFIG_CGROUPS
>  static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
>  				     struct file *map_file /* not used */,
>  				     int fd)
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 7094c69..80efab8 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
>  			goto error;
>  		break;
>  	case BPF_MAP_TYPE_CGROUP_ARRAY:
> -		if (func_id != BPF_FUNC_skb_in_cgroup)
> +		if (func_id != BPF_FUNC_skb_in_cgroup &&
> +		    func_id != BPF_FUNC_current_task_in_cgroup)
>  			goto error;
>  		break;
>  	default:
> @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
>  		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
>  			goto error;
>  		break;
> +	case BPF_FUNC_current_task_in_cgroup:
>  	case BPF_FUNC_skb_in_cgroup:
>  		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
>  			goto error;
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index b20438f..39f0290 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -376,6 +376,36 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
>  	.ret_type	= RET_INTEGER,
>  };
>  
> +#ifdef CONFIG_CGROUPS
> +static u64 bpf_current_task_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)

please don't introduce #ifdef into .c code.
In this case add #else in .h to fetch_arraymap_ptr() that returns -EOPNOTSUPP.
Also why guard it with CONFIG_CGROUPS in .h at all?
I think it should compile fine even when cgroups are not defined.
The helper won't be functional anyway, since no cgroup_fd can be added
to cgroup map.

> +{
> +	struct bpf_map *map = (struct bpf_map *)(long)r1;
> +	struct css_set *cset;
> +	struct cgroup *cgrp;
> +	u32 idx = (u32)r2;
> +
> +	if (unlikely(in_interrupt()))
> +		return -EINVAL;
> +
> +	cgrp = fetch_arraymap_ptr(map, idx);
> +
> +	if (unlikely(IS_ERR(cgrp)))
> +		return PTR_ERR(cgrp);
> +
> +	cset = task_css_set(current);
> +
> +	return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
> +}
> +
> +static const struct bpf_func_proto bpf_current_task_in_cgroup_proto = {
> +	.func           = bpf_current_task_in_cgroup,
> +	.gpl_only       = false,
> +	.ret_type       = RET_INTEGER,
> +	.arg1_type      = ARG_CONST_MAP_PTR,
> +	.arg2_type      = ARG_ANYTHING,
> +};
> +#endif /* CONFIG_CGROUPS */
> +
>  static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
>  {
>  	switch (func_id) {
> @@ -407,6 +437,10 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
>  		return &bpf_perf_event_read_proto;
>  	case BPF_FUNC_probe_write_user:
>  		return bpf_get_probe_write_proto();
> +#ifdef CONFIG_CGROUPS
> +	case BPF_FUNC_current_task_in_cgroup:
> +		return &bpf_current_task_in_cgroup_proto;
> +#endif

same here. looks unnecessary and #ifdef in .c are frowned upon.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [net-next v2 v2 1/2] bpf: Add bpf_current_task_in_cgroup helper
  2016-08-10  0:23 ` Alexei Starovoitov
@ 2016-08-10  0:55   ` Sargun Dhillon
  2016-08-10  1:02     ` Alexei Starovoitov
  0 siblings, 1 reply; 10+ messages in thread
From: Sargun Dhillon @ 2016-08-10  0:55 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: netdev, daniel

On Tue, Aug 09, 2016 at 05:23:50PM -0700, Alexei Starovoitov wrote:
> On Tue, Aug 09, 2016 at 05:00:12PM -0700, Sargun Dhillon wrote:
> > This adds a bpf helper that's similar to the skb_in_cgroup helper to check
> > whether the probe is currently executing in the context of a specific
> > subset of the cgroupsv2 hierarchy. It does this based on membership test
> > for a cgroup arraymap. It is invalid to call this in an interrupt, and
> > it'll return an error. The helper is primarily to be used in debugging
> > activities for containers, where you may have multiple programs running in
> > a given top-level "container".
> > 
> > This patch also genericizes some of the arraymap fetching logic between the
> > skb_in_cgroup helper and this new helper.
> > 
> > Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > ---
> >  include/linux/bpf.h      | 24 ++++++++++++++++++++++++
> >  include/uapi/linux/bpf.h | 11 +++++++++++
> >  kernel/bpf/arraymap.c    |  2 +-
> >  kernel/bpf/verifier.c    |  4 +++-
> >  kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++++++
> >  net/core/filter.c        | 11 ++++-------
> >  6 files changed, 77 insertions(+), 9 deletions(-)
> > 
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 1113423..9adf712 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -319,4 +319,28 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
> >  void bpf_user_rnd_init_once(void);
> >  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
> >  
> > +#ifdef CONFIG_CGROUPS
> > +/* Helper to fetch a cgroup pointer based on index.
> > + * @map: a cgroup arraymap
> > + * @idx: index of the item you want to fetch
> > + *
> > + * Returns pointer on success,
> > + * Error code if item not found, or out-of-bounds access
> > + */
> > +static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
> > +{
> > +	struct cgroup *cgrp;
> > +	struct bpf_array *array = container_of(map, struct bpf_array, map);
> > +
> > +	if (unlikely(idx >= array->map.max_entries))
> > +		return ERR_PTR(-E2BIG);
> > +
> > +	cgrp = READ_ONCE(array->ptrs[idx]);
> > +	if (unlikely(!cgrp))
> > +		return ERR_PTR(-EAGAIN);
> > +
> > +	return cgrp;
> > +}
> > +#endif /* CONFIG_CGROUPS */
> > +
> >  #endif /* _LINUX_BPF_H */
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index da218fe..64b1a07 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -375,6 +375,17 @@ enum bpf_func_id {
> >  	 */
> >  	BPF_FUNC_probe_write_user,
> >  
> > +	/**
> > +	 * bpf_current_task_in_cgroup(map, index) - Check cgroup2 membership of current task
> > +	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
> > +	 * @index: index of the cgroup in the bpf_map
> > +	 * Return:
> > +	 *   == 0 current failed the cgroup2 descendant test
> > +	 *   == 1 current succeeded the cgroup2 descendant test
> > +	 *    < 0 error
> > +	 */
> > +	BPF_FUNC_current_task_in_cgroup,
> > +
> >  	__BPF_FUNC_MAX_ID,
> >  };
> >  
> > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > index 633a650..a2ac051 100644
> > --- a/kernel/bpf/arraymap.c
> > +++ b/kernel/bpf/arraymap.c
> > @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
> >  }
> >  late_initcall(register_perf_event_array_map);
> >  
> > -#ifdef CONFIG_SOCK_CGROUP_DATA
> > +#ifdef CONFIG_CGROUPS
> >  static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
> >  				     struct file *map_file /* not used */,
> >  				     int fd)
> > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > index 7094c69..80efab8 100644
> > --- a/kernel/bpf/verifier.c
> > +++ b/kernel/bpf/verifier.c
> > @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> >  			goto error;
> >  		break;
> >  	case BPF_MAP_TYPE_CGROUP_ARRAY:
> > -		if (func_id != BPF_FUNC_skb_in_cgroup)
> > +		if (func_id != BPF_FUNC_skb_in_cgroup &&
> > +		    func_id != BPF_FUNC_current_task_in_cgroup)
> >  			goto error;
> >  		break;
> >  	default:
> > @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> >  		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
> >  			goto error;
> >  		break;
> > +	case BPF_FUNC_current_task_in_cgroup:
> >  	case BPF_FUNC_skb_in_cgroup:
> >  		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
> >  			goto error;
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index b20438f..39f0290 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -376,6 +376,36 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
> >  	.ret_type	= RET_INTEGER,
> >  };
> >  
> > +#ifdef CONFIG_CGROUPS
> > +static u64 bpf_current_task_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
> 
> please don't introduce #ifdef into .c code.
> In this case add #else in .h to fetch_arraymap_ptr() that returns -EOPNOTSUPP.
> Also why guard it with CONFIG_CGROUPS in .h at all?
> I think it should compile fine even when cgroups are not defined.
> The helper won't be functional anyway, since no cgroup_fd can be added
> to cgroup map.
> 
Pardon my ignorance, but if I don't have an ifdef wont I be referencing a bunch 
of non-existent code (like cgroup_is_descendant)? Unless your suggestion is to 
move this to the .h file as well.

> > +{
> > +	struct bpf_map *map = (struct bpf_map *)(long)r1;
> > +	struct css_set *cset;
> > +	struct cgroup *cgrp;
> > +	u32 idx = (u32)r2;
> > +
> > +	if (unlikely(in_interrupt()))
> > +		return -EINVAL;
> > +
> > +	cgrp = fetch_arraymap_ptr(map, idx);
> > +
> > +	if (unlikely(IS_ERR(cgrp)))
> > +		return PTR_ERR(cgrp);
> > +
> > +	cset = task_css_set(current);
> > +
> > +	return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
> > +}
> > +
> > +static const struct bpf_func_proto bpf_current_task_in_cgroup_proto = {
> > +	.func           = bpf_current_task_in_cgroup,
> > +	.gpl_only       = false,
> > +	.ret_type       = RET_INTEGER,
> > +	.arg1_type      = ARG_CONST_MAP_PTR,
> > +	.arg2_type      = ARG_ANYTHING,
> > +};
> > +#endif /* CONFIG_CGROUPS */
> > +
> >  static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
> >  {
> >  	switch (func_id) {
> > @@ -407,6 +437,10 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
> >  		return &bpf_perf_event_read_proto;
> >  	case BPF_FUNC_probe_write_user:
> >  		return bpf_get_probe_write_proto();
> > +#ifdef CONFIG_CGROUPS
> > +	case BPF_FUNC_current_task_in_cgroup:
> > +		return &bpf_current_task_in_cgroup_proto;
> > +#endif
> 
> same here. looks unnecessary and #ifdef in .c are frowned upon.
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [net-next v2 v2 1/2] bpf: Add bpf_current_task_in_cgroup helper
  2016-08-10  0:55   ` Sargun Dhillon
@ 2016-08-10  1:02     ` Alexei Starovoitov
  2016-08-10  1:26       ` Sargun Dhillon
  0 siblings, 1 reply; 10+ messages in thread
From: Alexei Starovoitov @ 2016-08-10  1:02 UTC (permalink / raw)
  To: Sargun Dhillon; +Cc: netdev, daniel

On Tue, Aug 09, 2016 at 05:55:26PM -0700, Sargun Dhillon wrote:
> On Tue, Aug 09, 2016 at 05:23:50PM -0700, Alexei Starovoitov wrote:
> > On Tue, Aug 09, 2016 at 05:00:12PM -0700, Sargun Dhillon wrote:
> > > This adds a bpf helper that's similar to the skb_in_cgroup helper to check
> > > whether the probe is currently executing in the context of a specific
> > > subset of the cgroupsv2 hierarchy. It does this based on membership test
> > > for a cgroup arraymap. It is invalid to call this in an interrupt, and
> > > it'll return an error. The helper is primarily to be used in debugging
> > > activities for containers, where you may have multiple programs running in
> > > a given top-level "container".
> > > 
> > > This patch also genericizes some of the arraymap fetching logic between the
> > > skb_in_cgroup helper and this new helper.
> > > 
> > > Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > > ---
> > >  include/linux/bpf.h      | 24 ++++++++++++++++++++++++
> > >  include/uapi/linux/bpf.h | 11 +++++++++++
> > >  kernel/bpf/arraymap.c    |  2 +-
> > >  kernel/bpf/verifier.c    |  4 +++-
> > >  kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++++++
> > >  net/core/filter.c        | 11 ++++-------
> > >  6 files changed, 77 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > index 1113423..9adf712 100644
> > > --- a/include/linux/bpf.h
> > > +++ b/include/linux/bpf.h
> > > @@ -319,4 +319,28 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
> > >  void bpf_user_rnd_init_once(void);
> > >  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
> > >  
> > > +#ifdef CONFIG_CGROUPS
> > > +/* Helper to fetch a cgroup pointer based on index.
> > > + * @map: a cgroup arraymap
> > > + * @idx: index of the item you want to fetch
> > > + *
> > > + * Returns pointer on success,
> > > + * Error code if item not found, or out-of-bounds access
> > > + */
> > > +static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
> > > +{
> > > +	struct cgroup *cgrp;
> > > +	struct bpf_array *array = container_of(map, struct bpf_array, map);
> > > +
> > > +	if (unlikely(idx >= array->map.max_entries))
> > > +		return ERR_PTR(-E2BIG);
> > > +
> > > +	cgrp = READ_ONCE(array->ptrs[idx]);
> > > +	if (unlikely(!cgrp))
> > > +		return ERR_PTR(-EAGAIN);
> > > +
> > > +	return cgrp;
> > > +}
> > > +#endif /* CONFIG_CGROUPS */
> > > +
> > >  #endif /* _LINUX_BPF_H */
> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > index da218fe..64b1a07 100644
> > > --- a/include/uapi/linux/bpf.h
> > > +++ b/include/uapi/linux/bpf.h
> > > @@ -375,6 +375,17 @@ enum bpf_func_id {
> > >  	 */
> > >  	BPF_FUNC_probe_write_user,
> > >  
> > > +	/**
> > > +	 * bpf_current_task_in_cgroup(map, index) - Check cgroup2 membership of current task
> > > +	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
> > > +	 * @index: index of the cgroup in the bpf_map
> > > +	 * Return:
> > > +	 *   == 0 current failed the cgroup2 descendant test
> > > +	 *   == 1 current succeeded the cgroup2 descendant test
> > > +	 *    < 0 error
> > > +	 */
> > > +	BPF_FUNC_current_task_in_cgroup,
> > > +
> > >  	__BPF_FUNC_MAX_ID,
> > >  };
> > >  
> > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > > index 633a650..a2ac051 100644
> > > --- a/kernel/bpf/arraymap.c
> > > +++ b/kernel/bpf/arraymap.c
> > > @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
> > >  }
> > >  late_initcall(register_perf_event_array_map);
> > >  
> > > -#ifdef CONFIG_SOCK_CGROUP_DATA
> > > +#ifdef CONFIG_CGROUPS
> > >  static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
> > >  				     struct file *map_file /* not used */,
> > >  				     int fd)
> > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > index 7094c69..80efab8 100644
> > > --- a/kernel/bpf/verifier.c
> > > +++ b/kernel/bpf/verifier.c
> > > @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> > >  			goto error;
> > >  		break;
> > >  	case BPF_MAP_TYPE_CGROUP_ARRAY:
> > > -		if (func_id != BPF_FUNC_skb_in_cgroup)
> > > +		if (func_id != BPF_FUNC_skb_in_cgroup &&
> > > +		    func_id != BPF_FUNC_current_task_in_cgroup)
> > >  			goto error;
> > >  		break;
> > >  	default:
> > > @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> > >  		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
> > >  			goto error;
> > >  		break;
> > > +	case BPF_FUNC_current_task_in_cgroup:
> > >  	case BPF_FUNC_skb_in_cgroup:
> > >  		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
> > >  			goto error;
> > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > > index b20438f..39f0290 100644
> > > --- a/kernel/trace/bpf_trace.c
> > > +++ b/kernel/trace/bpf_trace.c
> > > @@ -376,6 +376,36 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
> > >  	.ret_type	= RET_INTEGER,
> > >  };
> > >  
> > > +#ifdef CONFIG_CGROUPS
> > > +static u64 bpf_current_task_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
> > 
> > please don't introduce #ifdef into .c code.
> > In this case add #else in .h to fetch_arraymap_ptr() that returns -EOPNOTSUPP.
> > Also why guard it with CONFIG_CGROUPS in .h at all?
> > I think it should compile fine even when cgroups are not defined.
> > The helper won't be functional anyway, since no cgroup_fd can be added
> > to cgroup map.
> > 
> Pardon my ignorance, but if I don't have an ifdef wont I be referencing a bunch 
> of non-existent code (like cgroup_is_descendant)? Unless your suggestion is to 
> move this to the .h file as well.

I see. If cgroup_is_descendant is the only function we use, how about
adding it as 'return false' in #else part of linux/cgroup.h ?
There are a bunch of them there like cgroup_exit/cgroup_free/...

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [net-next v2 v2 1/2] bpf: Add bpf_current_task_in_cgroup helper
  2016-08-10  1:02     ` Alexei Starovoitov
@ 2016-08-10  1:26       ` Sargun Dhillon
  2016-08-10  3:27         ` Alexei Starovoitov
  0 siblings, 1 reply; 10+ messages in thread
From: Sargun Dhillon @ 2016-08-10  1:26 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: netdev, daniel

On Tue, Aug 09, 2016 at 06:02:34PM -0700, Alexei Starovoitov wrote:
> On Tue, Aug 09, 2016 at 05:55:26PM -0700, Sargun Dhillon wrote:
> > On Tue, Aug 09, 2016 at 05:23:50PM -0700, Alexei Starovoitov wrote:
> > > On Tue, Aug 09, 2016 at 05:00:12PM -0700, Sargun Dhillon wrote:
> > > > This adds a bpf helper that's similar to the skb_in_cgroup helper to check
> > > > whether the probe is currently executing in the context of a specific
> > > > subset of the cgroupsv2 hierarchy. It does this based on membership test
> > > > for a cgroup arraymap. It is invalid to call this in an interrupt, and
> > > > it'll return an error. The helper is primarily to be used in debugging
> > > > activities for containers, where you may have multiple programs running in
> > > > a given top-level "container".
> > > > 
> > > > This patch also genericizes some of the arraymap fetching logic between the
> > > > skb_in_cgroup helper and this new helper.
> > > > 
> > > > Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> > > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > > > ---
> > > >  include/linux/bpf.h      | 24 ++++++++++++++++++++++++
> > > >  include/uapi/linux/bpf.h | 11 +++++++++++
> > > >  kernel/bpf/arraymap.c    |  2 +-
> > > >  kernel/bpf/verifier.c    |  4 +++-
> > > >  kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++++++
> > > >  net/core/filter.c        | 11 ++++-------
> > > >  6 files changed, 77 insertions(+), 9 deletions(-)
> > > > 
> > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > index 1113423..9adf712 100644
> > > > --- a/include/linux/bpf.h
> > > > +++ b/include/linux/bpf.h
> > > > @@ -319,4 +319,28 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
> > > >  void bpf_user_rnd_init_once(void);
> > > >  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
> > > >  
> > > > +#ifdef CONFIG_CGROUPS
> > > > +/* Helper to fetch a cgroup pointer based on index.
> > > > + * @map: a cgroup arraymap
> > > > + * @idx: index of the item you want to fetch
> > > > + *
> > > > + * Returns pointer on success,
> > > > + * Error code if item not found, or out-of-bounds access
> > > > + */
> > > > +static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
> > > > +{
> > > > +	struct cgroup *cgrp;
> > > > +	struct bpf_array *array = container_of(map, struct bpf_array, map);
> > > > +
> > > > +	if (unlikely(idx >= array->map.max_entries))
> > > > +		return ERR_PTR(-E2BIG);
> > > > +
> > > > +	cgrp = READ_ONCE(array->ptrs[idx]);
> > > > +	if (unlikely(!cgrp))
> > > > +		return ERR_PTR(-EAGAIN);
> > > > +
> > > > +	return cgrp;
> > > > +}
> > > > +#endif /* CONFIG_CGROUPS */
> > > > +
> > > >  #endif /* _LINUX_BPF_H */
> > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > index da218fe..64b1a07 100644
> > > > --- a/include/uapi/linux/bpf.h
> > > > +++ b/include/uapi/linux/bpf.h
> > > > @@ -375,6 +375,17 @@ enum bpf_func_id {
> > > >  	 */
> > > >  	BPF_FUNC_probe_write_user,
> > > >  
> > > > +	/**
> > > > +	 * bpf_current_task_in_cgroup(map, index) - Check cgroup2 membership of current task
> > > > +	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
> > > > +	 * @index: index of the cgroup in the bpf_map
> > > > +	 * Return:
> > > > +	 *   == 0 current failed the cgroup2 descendant test
> > > > +	 *   == 1 current succeeded the cgroup2 descendant test
> > > > +	 *    < 0 error
> > > > +	 */
> > > > +	BPF_FUNC_current_task_in_cgroup,
> > > > +
> > > >  	__BPF_FUNC_MAX_ID,
> > > >  };
> > > >  
> > > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > > > index 633a650..a2ac051 100644
> > > > --- a/kernel/bpf/arraymap.c
> > > > +++ b/kernel/bpf/arraymap.c
> > > > @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
> > > >  }
> > > >  late_initcall(register_perf_event_array_map);
> > > >  
> > > > -#ifdef CONFIG_SOCK_CGROUP_DATA
> > > > +#ifdef CONFIG_CGROUPS
> > > >  static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
> > > >  				     struct file *map_file /* not used */,
> > > >  				     int fd)
> > > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > > index 7094c69..80efab8 100644
> > > > --- a/kernel/bpf/verifier.c
> > > > +++ b/kernel/bpf/verifier.c
> > > > @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> > > >  			goto error;
> > > >  		break;
> > > >  	case BPF_MAP_TYPE_CGROUP_ARRAY:
> > > > -		if (func_id != BPF_FUNC_skb_in_cgroup)
> > > > +		if (func_id != BPF_FUNC_skb_in_cgroup &&
> > > > +		    func_id != BPF_FUNC_current_task_in_cgroup)
> > > >  			goto error;
> > > >  		break;
> > > >  	default:
> > > > @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> > > >  		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
> > > >  			goto error;
> > > >  		break;
> > > > +	case BPF_FUNC_current_task_in_cgroup:
> > > >  	case BPF_FUNC_skb_in_cgroup:
> > > >  		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
> > > >  			goto error;
> > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > > > index b20438f..39f0290 100644
> > > > --- a/kernel/trace/bpf_trace.c
> > > > +++ b/kernel/trace/bpf_trace.c
> > > > @@ -376,6 +376,36 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
> > > >  	.ret_type	= RET_INTEGER,
> > > >  };
> > > >  
> > > > +#ifdef CONFIG_CGROUPS
> > > > +static u64 bpf_current_task_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
> > > 
> > > please don't introduce #ifdef into .c code.
> > > In this case add #else in .h to fetch_arraymap_ptr() that returns -EOPNOTSUPP.
> > > Also why guard it with CONFIG_CGROUPS in .h at all?
> > > I think it should compile fine even when cgroups are not defined.
> > > The helper won't be functional anyway, since no cgroup_fd can be added
> > > to cgroup map.
> > > 
> > Pardon my ignorance, but if I don't have an ifdef wont I be referencing a bunch 
> > of non-existent code (like cgroup_is_descendant)? Unless your suggestion is to 
> > move this to the .h file as well.
> 
> I see. If cgroup_is_descendant is the only function we use, how about
> adding it as 'return false' in #else part of linux/cgroup.h ?
> There are a bunch of them there like cgroup_exit/cgroup_free/...
> 
It appears messier than that:
kernel/trace/bpf_trace.c: In function ‘bpf_current_task_in_cgroup’:
kernel/trace/bpf_trace.c:394:9: error: implicit declaration of function ‘task_css_set’ [-Werror=implicit-function-declaration]
  cset = task_css_set(current);
         ^
kernel/trace/bpf_trace.c:394:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
  cset = task_css_set(current);
       ^
kernel/trace/bpf_trace.c:396:9: error: implicit declaration of function ‘cgroup_is_descendant’ [-Werror=implicit-function-declaration]
  return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
         ^
kernel/trace/bpf_trace.c:396:34: error: dereferencing pointer to incomplete type ‘struct css_set’
  return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
                                  ^
kernel/trace/bpf_trace.c:397:1: warning: control reaches end of non-void function [-Wreturn-type]
 }

----

I'd have to define the two functions, and work around the fact I'm referring to a member of css_set.
Also, this means that folks wouldn't be warned at load time that the verifier couldn't support their code. 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [net-next v2 v2 1/2] bpf: Add bpf_current_task_in_cgroup helper
  2016-08-10  1:26       ` Sargun Dhillon
@ 2016-08-10  3:27         ` Alexei Starovoitov
  2016-08-10  3:40           ` Sargun Dhillon
  0 siblings, 1 reply; 10+ messages in thread
From: Alexei Starovoitov @ 2016-08-10  3:27 UTC (permalink / raw)
  To: Sargun Dhillon; +Cc: netdev, daniel

On Tue, Aug 09, 2016 at 06:26:37PM -0700, Sargun Dhillon wrote:
> On Tue, Aug 09, 2016 at 06:02:34PM -0700, Alexei Starovoitov wrote:
> > On Tue, Aug 09, 2016 at 05:55:26PM -0700, Sargun Dhillon wrote:
> > > On Tue, Aug 09, 2016 at 05:23:50PM -0700, Alexei Starovoitov wrote:
> > > > On Tue, Aug 09, 2016 at 05:00:12PM -0700, Sargun Dhillon wrote:
> > > > > This adds a bpf helper that's similar to the skb_in_cgroup helper to check
> > > > > whether the probe is currently executing in the context of a specific
> > > > > subset of the cgroupsv2 hierarchy. It does this based on membership test
> > > > > for a cgroup arraymap. It is invalid to call this in an interrupt, and
> > > > > it'll return an error. The helper is primarily to be used in debugging
> > > > > activities for containers, where you may have multiple programs running in
> > > > > a given top-level "container".
> > > > > 
> > > > > This patch also genericizes some of the arraymap fetching logic between the
> > > > > skb_in_cgroup helper and this new helper.
> > > > > 
> > > > > Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> > > > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > > > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > > > > ---
> > > > >  include/linux/bpf.h      | 24 ++++++++++++++++++++++++
> > > > >  include/uapi/linux/bpf.h | 11 +++++++++++
> > > > >  kernel/bpf/arraymap.c    |  2 +-
> > > > >  kernel/bpf/verifier.c    |  4 +++-
> > > > >  kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++++++
> > > > >  net/core/filter.c        | 11 ++++-------
> > > > >  6 files changed, 77 insertions(+), 9 deletions(-)
> > > > > 
> > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > index 1113423..9adf712 100644
> > > > > --- a/include/linux/bpf.h
> > > > > +++ b/include/linux/bpf.h
> > > > > @@ -319,4 +319,28 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
> > > > >  void bpf_user_rnd_init_once(void);
> > > > >  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
> > > > >  
> > > > > +#ifdef CONFIG_CGROUPS
> > > > > +/* Helper to fetch a cgroup pointer based on index.
> > > > > + * @map: a cgroup arraymap
> > > > > + * @idx: index of the item you want to fetch
> > > > > + *
> > > > > + * Returns pointer on success,
> > > > > + * Error code if item not found, or out-of-bounds access
> > > > > + */
> > > > > +static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
> > > > > +{
> > > > > +	struct cgroup *cgrp;
> > > > > +	struct bpf_array *array = container_of(map, struct bpf_array, map);
> > > > > +
> > > > > +	if (unlikely(idx >= array->map.max_entries))
> > > > > +		return ERR_PTR(-E2BIG);
> > > > > +
> > > > > +	cgrp = READ_ONCE(array->ptrs[idx]);
> > > > > +	if (unlikely(!cgrp))
> > > > > +		return ERR_PTR(-EAGAIN);
> > > > > +
> > > > > +	return cgrp;
> > > > > +}
> > > > > +#endif /* CONFIG_CGROUPS */
> > > > > +
> > > > >  #endif /* _LINUX_BPF_H */
> > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > > index da218fe..64b1a07 100644
> > > > > --- a/include/uapi/linux/bpf.h
> > > > > +++ b/include/uapi/linux/bpf.h
> > > > > @@ -375,6 +375,17 @@ enum bpf_func_id {
> > > > >  	 */
> > > > >  	BPF_FUNC_probe_write_user,
> > > > >  
> > > > > +	/**
> > > > > +	 * bpf_current_task_in_cgroup(map, index) - Check cgroup2 membership of current task
> > > > > +	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
> > > > > +	 * @index: index of the cgroup in the bpf_map
> > > > > +	 * Return:
> > > > > +	 *   == 0 current failed the cgroup2 descendant test
> > > > > +	 *   == 1 current succeeded the cgroup2 descendant test
> > > > > +	 *    < 0 error
> > > > > +	 */
> > > > > +	BPF_FUNC_current_task_in_cgroup,
> > > > > +
> > > > >  	__BPF_FUNC_MAX_ID,
> > > > >  };
> > > > >  
> > > > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > > > > index 633a650..a2ac051 100644
> > > > > --- a/kernel/bpf/arraymap.c
> > > > > +++ b/kernel/bpf/arraymap.c
> > > > > @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
> > > > >  }
> > > > >  late_initcall(register_perf_event_array_map);
> > > > >  
> > > > > -#ifdef CONFIG_SOCK_CGROUP_DATA
> > > > > +#ifdef CONFIG_CGROUPS
> > > > >  static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
> > > > >  				     struct file *map_file /* not used */,
> > > > >  				     int fd)
> > > > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > > > index 7094c69..80efab8 100644
> > > > > --- a/kernel/bpf/verifier.c
> > > > > +++ b/kernel/bpf/verifier.c
> > > > > @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> > > > >  			goto error;
> > > > >  		break;
> > > > >  	case BPF_MAP_TYPE_CGROUP_ARRAY:
> > > > > -		if (func_id != BPF_FUNC_skb_in_cgroup)
> > > > > +		if (func_id != BPF_FUNC_skb_in_cgroup &&
> > > > > +		    func_id != BPF_FUNC_current_task_in_cgroup)
> > > > >  			goto error;
> > > > >  		break;
> > > > >  	default:
> > > > > @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> > > > >  		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
> > > > >  			goto error;
> > > > >  		break;
> > > > > +	case BPF_FUNC_current_task_in_cgroup:
> > > > >  	case BPF_FUNC_skb_in_cgroup:
> > > > >  		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
> > > > >  			goto error;
> > > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > > > > index b20438f..39f0290 100644
> > > > > --- a/kernel/trace/bpf_trace.c
> > > > > +++ b/kernel/trace/bpf_trace.c
> > > > > @@ -376,6 +376,36 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
> > > > >  	.ret_type	= RET_INTEGER,
> > > > >  };
> > > > >  
> > > > > +#ifdef CONFIG_CGROUPS
> > > > > +static u64 bpf_current_task_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
> > > > 
> > > > please don't introduce #ifdef into .c code.
> > > > In this case add #else in .h to fetch_arraymap_ptr() that returns -EOPNOTSUPP.
> > > > Also why guard it with CONFIG_CGROUPS in .h at all?
> > > > I think it should compile fine even when cgroups are not defined.
> > > > The helper won't be functional anyway, since no cgroup_fd can be added
> > > > to cgroup map.
> > > > 
> > > Pardon my ignorance, but if I don't have an ifdef wont I be referencing a bunch 
> > > of non-existent code (like cgroup_is_descendant)? Unless your suggestion is to 
> > > move this to the .h file as well.
> > 
> > I see. If cgroup_is_descendant is the only function we use, how about
> > adding it as 'return false' in #else part of linux/cgroup.h ?
> > There are a bunch of them there like cgroup_exit/cgroup_free/...
> > 
> It appears messier than that:
> kernel/trace/bpf_trace.c: In function ‘bpf_current_task_in_cgroup’:
> kernel/trace/bpf_trace.c:394:9: error: implicit declaration of function ‘task_css_set’ [-Werror=implicit-function-declaration]
>   cset = task_css_set(current);
>          ^
> kernel/trace/bpf_trace.c:394:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
>   cset = task_css_set(current);
>        ^
> kernel/trace/bpf_trace.c:396:9: error: implicit declaration of function ‘cgroup_is_descendant’ [-Werror=implicit-function-declaration]
>   return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
>          ^
> kernel/trace/bpf_trace.c:396:34: error: dereferencing pointer to incomplete type ‘struct css_set’
>   return cgroup_is_descendant(cset->dfl_cgrp, cgrp);

yeah. exporting struct css_set is too much. 
No other ideas how to reduce so many #ifdef CONFIG_CGROUPS.
At least the one in .h can be dropped, right?
Please cc Tejun in the next spin.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [net-next v2 v2 1/2] bpf: Add bpf_current_task_in_cgroup helper
  2016-08-10  3:27         ` Alexei Starovoitov
@ 2016-08-10  3:40           ` Sargun Dhillon
  2016-08-10  3:52             ` Alexei Starovoitov
  0 siblings, 1 reply; 10+ messages in thread
From: Sargun Dhillon @ 2016-08-10  3:40 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: netdev, daniel

On Tue, Aug 09, 2016 at 08:27:32PM -0700, Alexei Starovoitov wrote:
> On Tue, Aug 09, 2016 at 06:26:37PM -0700, Sargun Dhillon wrote:
> > On Tue, Aug 09, 2016 at 06:02:34PM -0700, Alexei Starovoitov wrote:
> > > On Tue, Aug 09, 2016 at 05:55:26PM -0700, Sargun Dhillon wrote:
> > > > On Tue, Aug 09, 2016 at 05:23:50PM -0700, Alexei Starovoitov wrote:
> > > > > On Tue, Aug 09, 2016 at 05:00:12PM -0700, Sargun Dhillon wrote:
> > > > > > This adds a bpf helper that's similar to the skb_in_cgroup helper to check
> > > > > > whether the probe is currently executing in the context of a specific
> > > > > > subset of the cgroupsv2 hierarchy. It does this based on membership test
> > > > > > for a cgroup arraymap. It is invalid to call this in an interrupt, and
> > > > > > it'll return an error. The helper is primarily to be used in debugging
> > > > > > activities for containers, where you may have multiple programs running in
> > > > > > a given top-level "container".
> > > > > > 
> > > > > > This patch also genericizes some of the arraymap fetching logic between the
> > > > > > skb_in_cgroup helper and this new helper.
> > > > > > 
> > > > > > Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> > > > > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > > > > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > > > > > ---
> > > > > >  include/linux/bpf.h      | 24 ++++++++++++++++++++++++
> > > > > >  include/uapi/linux/bpf.h | 11 +++++++++++
> > > > > >  kernel/bpf/arraymap.c    |  2 +-
> > > > > >  kernel/bpf/verifier.c    |  4 +++-
> > > > > >  kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++++++
> > > > > >  net/core/filter.c        | 11 ++++-------
> > > > > >  6 files changed, 77 insertions(+), 9 deletions(-)
> > > > > > 
> > > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > > index 1113423..9adf712 100644
> > > > > > --- a/include/linux/bpf.h
> > > > > > +++ b/include/linux/bpf.h
> > > > > > @@ -319,4 +319,28 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
> > > > > >  void bpf_user_rnd_init_once(void);
> > > > > >  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
> > > > > >  
> > > > > > +#ifdef CONFIG_CGROUPS
> > > > > > +/* Helper to fetch a cgroup pointer based on index.
> > > > > > + * @map: a cgroup arraymap
> > > > > > + * @idx: index of the item you want to fetch
> > > > > > + *
> > > > > > + * Returns pointer on success,
> > > > > > + * Error code if item not found, or out-of-bounds access
> > > > > > + */
> > > > > > +static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
> > > > > > +{
> > > > > > +	struct cgroup *cgrp;
> > > > > > +	struct bpf_array *array = container_of(map, struct bpf_array, map);
> > > > > > +
> > > > > > +	if (unlikely(idx >= array->map.max_entries))
> > > > > > +		return ERR_PTR(-E2BIG);
> > > > > > +
> > > > > > +	cgrp = READ_ONCE(array->ptrs[idx]);
> > > > > > +	if (unlikely(!cgrp))
> > > > > > +		return ERR_PTR(-EAGAIN);
> > > > > > +
> > > > > > +	return cgrp;
> > > > > > +}
> > > > > > +#endif /* CONFIG_CGROUPS */
> > > > > > +
> > > > > >  #endif /* _LINUX_BPF_H */
> > > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > > > index da218fe..64b1a07 100644
> > > > > > --- a/include/uapi/linux/bpf.h
> > > > > > +++ b/include/uapi/linux/bpf.h
> > > > > > @@ -375,6 +375,17 @@ enum bpf_func_id {
> > > > > >  	 */
> > > > > >  	BPF_FUNC_probe_write_user,
> > > > > >  
> > > > > > +	/**
> > > > > > +	 * bpf_current_task_in_cgroup(map, index) - Check cgroup2 membership of current task
> > > > > > +	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
> > > > > > +	 * @index: index of the cgroup in the bpf_map
> > > > > > +	 * Return:
> > > > > > +	 *   == 0 current failed the cgroup2 descendant test
> > > > > > +	 *   == 1 current succeeded the cgroup2 descendant test
> > > > > > +	 *    < 0 error
> > > > > > +	 */
> > > > > > +	BPF_FUNC_current_task_in_cgroup,
> > > > > > +
> > > > > >  	__BPF_FUNC_MAX_ID,
> > > > > >  };
> > > > > >  
> > > > > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > > > > > index 633a650..a2ac051 100644
> > > > > > --- a/kernel/bpf/arraymap.c
> > > > > > +++ b/kernel/bpf/arraymap.c
> > > > > > @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
> > > > > >  }
> > > > > >  late_initcall(register_perf_event_array_map);
> > > > > >  
> > > > > > -#ifdef CONFIG_SOCK_CGROUP_DATA
> > > > > > +#ifdef CONFIG_CGROUPS
> > > > > >  static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
> > > > > >  				     struct file *map_file /* not used */,
> > > > > >  				     int fd)
> > > > > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > > > > index 7094c69..80efab8 100644
> > > > > > --- a/kernel/bpf/verifier.c
> > > > > > +++ b/kernel/bpf/verifier.c
> > > > > > @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> > > > > >  			goto error;
> > > > > >  		break;
> > > > > >  	case BPF_MAP_TYPE_CGROUP_ARRAY:
> > > > > > -		if (func_id != BPF_FUNC_skb_in_cgroup)
> > > > > > +		if (func_id != BPF_FUNC_skb_in_cgroup &&
> > > > > > +		    func_id != BPF_FUNC_current_task_in_cgroup)
> > > > > >  			goto error;
> > > > > >  		break;
> > > > > >  	default:
> > > > > > @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> > > > > >  		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
> > > > > >  			goto error;
> > > > > >  		break;
> > > > > > +	case BPF_FUNC_current_task_in_cgroup:
> > > > > >  	case BPF_FUNC_skb_in_cgroup:
> > > > > >  		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
> > > > > >  			goto error;
> > > > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > > > > > index b20438f..39f0290 100644
> > > > > > --- a/kernel/trace/bpf_trace.c
> > > > > > +++ b/kernel/trace/bpf_trace.c
> > > > > > @@ -376,6 +376,36 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
> > > > > >  	.ret_type	= RET_INTEGER,
> > > > > >  };
> > > > > >  
> > > > > > +#ifdef CONFIG_CGROUPS
> > > > > > +static u64 bpf_current_task_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
> > > > > 
> > > > > please don't introduce #ifdef into .c code.
> > > > > In this case add #else in .h to fetch_arraymap_ptr() that returns -EOPNOTSUPP.
> > > > > Also why guard it with CONFIG_CGROUPS in .h at all?
> > > > > I think it should compile fine even when cgroups are not defined.
> > > > > The helper won't be functional anyway, since no cgroup_fd can be added
> > > > > to cgroup map.
> > > > > 
> > > > Pardon my ignorance, but if I don't have an ifdef wont I be referencing a bunch 
> > > > of non-existent code (like cgroup_is_descendant)? Unless your suggestion is to 
> > > > move this to the .h file as well.
> > > 
> > > I see. If cgroup_is_descendant is the only function we use, how about
> > > adding it as 'return false' in #else part of linux/cgroup.h ?
> > > There are a bunch of them there like cgroup_exit/cgroup_free/...
> > > 
> > It appears messier than that:
> > kernel/trace/bpf_trace.c: In function ‘bpf_current_task_in_cgroup’:
> > kernel/trace/bpf_trace.c:394:9: error: implicit declaration of function ‘task_css_set’ [-Werror=implicit-function-declaration]
> >   cset = task_css_set(current);
> >          ^
> > kernel/trace/bpf_trace.c:394:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
> >   cset = task_css_set(current);
> >        ^
> > kernel/trace/bpf_trace.c:396:9: error: implicit declaration of function ‘cgroup_is_descendant’ [-Werror=implicit-function-declaration]
> >   return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
> >          ^
> > kernel/trace/bpf_trace.c:396:34: error: dereferencing pointer to incomplete type ‘struct css_set’
> >   return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
> 
> yeah. exporting struct css_set is too much. 
> No other ideas how to reduce so many #ifdef CONFIG_CGROUPS.
> At least the one in .h can be dropped, right?
> Please cc Tejun in the next spin.
> 
The path I'm going down is to add this helper to cgroup.h:

#ifdef CONFIG_CGROUPS
/**
 * task_in_cgroup_hierarchy - test task's membership of cgroup ancestry
 * @task: the task to be tested
 * @ancestor: possible ancestor of @task's cgroup
 *
 * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
 * It follows all the same rules as cgroup_is_descendant, and only applies
 * to the default hierarchy.
 */
static inline bool task_in_cgroup_hierarchy(struct task_struct *task,
					    struct cgroup *ancestor)
{
	struct css_set *cset = task_css_set(task);
	return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
}

#else

struct cgroup;
static inline bool task_in_cgroup_hierarchy(struct task_struct *task,
				  struct cgroup *ancestor) { return false; }
#endif


It gets rid of all of the CONFIG_CGROUPS, and I think it's a useful helper for 
others. I can't be the only one, esp. as we move towards cgroupv2 more that
depends on default cgroups hierarchy checks.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [net-next v2 v2 1/2] bpf: Add bpf_current_task_in_cgroup helper
  2016-08-10  3:40           ` Sargun Dhillon
@ 2016-08-10  3:52             ` Alexei Starovoitov
  2016-08-10  4:40               ` Sargun Dhillon
  0 siblings, 1 reply; 10+ messages in thread
From: Alexei Starovoitov @ 2016-08-10  3:52 UTC (permalink / raw)
  To: Sargun Dhillon; +Cc: netdev, daniel

On Tue, Aug 09, 2016 at 08:40:05PM -0700, Sargun Dhillon wrote:
> On Tue, Aug 09, 2016 at 08:27:32PM -0700, Alexei Starovoitov wrote:
> > On Tue, Aug 09, 2016 at 06:26:37PM -0700, Sargun Dhillon wrote:
> > > On Tue, Aug 09, 2016 at 06:02:34PM -0700, Alexei Starovoitov wrote:
> > > > On Tue, Aug 09, 2016 at 05:55:26PM -0700, Sargun Dhillon wrote:
> > > > > On Tue, Aug 09, 2016 at 05:23:50PM -0700, Alexei Starovoitov wrote:
> > > > > > On Tue, Aug 09, 2016 at 05:00:12PM -0700, Sargun Dhillon wrote:
> > > > > > > This adds a bpf helper that's similar to the skb_in_cgroup helper to check
> > > > > > > whether the probe is currently executing in the context of a specific
> > > > > > > subset of the cgroupsv2 hierarchy. It does this based on membership test
> > > > > > > for a cgroup arraymap. It is invalid to call this in an interrupt, and
> > > > > > > it'll return an error. The helper is primarily to be used in debugging
> > > > > > > activities for containers, where you may have multiple programs running in
> > > > > > > a given top-level "container".
> > > > > > > 
> > > > > > > This patch also genericizes some of the arraymap fetching logic between the
> > > > > > > skb_in_cgroup helper and this new helper.
> > > > > > > 
> > > > > > > Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> > > > > > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > > > > > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > > > > > > ---
> > > > > > >  include/linux/bpf.h      | 24 ++++++++++++++++++++++++
> > > > > > >  include/uapi/linux/bpf.h | 11 +++++++++++
> > > > > > >  kernel/bpf/arraymap.c    |  2 +-
> > > > > > >  kernel/bpf/verifier.c    |  4 +++-
> > > > > > >  kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++++++
> > > > > > >  net/core/filter.c        | 11 ++++-------
> > > > > > >  6 files changed, 77 insertions(+), 9 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > > > index 1113423..9adf712 100644
> > > > > > > --- a/include/linux/bpf.h
> > > > > > > +++ b/include/linux/bpf.h
> > > > > > > @@ -319,4 +319,28 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
> > > > > > >  void bpf_user_rnd_init_once(void);
> > > > > > >  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
> > > > > > >  
> > > > > > > +#ifdef CONFIG_CGROUPS
> > > > > > > +/* Helper to fetch a cgroup pointer based on index.
> > > > > > > + * @map: a cgroup arraymap
> > > > > > > + * @idx: index of the item you want to fetch
> > > > > > > + *
> > > > > > > + * Returns pointer on success,
> > > > > > > + * Error code if item not found, or out-of-bounds access
> > > > > > > + */
> > > > > > > +static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
> > > > > > > +{
> > > > > > > +	struct cgroup *cgrp;
> > > > > > > +	struct bpf_array *array = container_of(map, struct bpf_array, map);
> > > > > > > +
> > > > > > > +	if (unlikely(idx >= array->map.max_entries))
> > > > > > > +		return ERR_PTR(-E2BIG);
> > > > > > > +
> > > > > > > +	cgrp = READ_ONCE(array->ptrs[idx]);
> > > > > > > +	if (unlikely(!cgrp))
> > > > > > > +		return ERR_PTR(-EAGAIN);
> > > > > > > +
> > > > > > > +	return cgrp;
> > > > > > > +}
> > > > > > > +#endif /* CONFIG_CGROUPS */
> > > > > > > +
> > > > > > >  #endif /* _LINUX_BPF_H */
> > > > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > > > > index da218fe..64b1a07 100644
> > > > > > > --- a/include/uapi/linux/bpf.h
> > > > > > > +++ b/include/uapi/linux/bpf.h
> > > > > > > @@ -375,6 +375,17 @@ enum bpf_func_id {
> > > > > > >  	 */
> > > > > > >  	BPF_FUNC_probe_write_user,
> > > > > > >  
> > > > > > > +	/**
> > > > > > > +	 * bpf_current_task_in_cgroup(map, index) - Check cgroup2 membership of current task
> > > > > > > +	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
> > > > > > > +	 * @index: index of the cgroup in the bpf_map
> > > > > > > +	 * Return:
> > > > > > > +	 *   == 0 current failed the cgroup2 descendant test
> > > > > > > +	 *   == 1 current succeeded the cgroup2 descendant test
> > > > > > > +	 *    < 0 error
> > > > > > > +	 */
> > > > > > > +	BPF_FUNC_current_task_in_cgroup,
> > > > > > > +
> > > > > > >  	__BPF_FUNC_MAX_ID,
> > > > > > >  };
> > > > > > >  
> > > > > > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > > > > > > index 633a650..a2ac051 100644
> > > > > > > --- a/kernel/bpf/arraymap.c
> > > > > > > +++ b/kernel/bpf/arraymap.c
> > > > > > > @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
> > > > > > >  }
> > > > > > >  late_initcall(register_perf_event_array_map);
> > > > > > >  
> > > > > > > -#ifdef CONFIG_SOCK_CGROUP_DATA
> > > > > > > +#ifdef CONFIG_CGROUPS
> > > > > > >  static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
> > > > > > >  				     struct file *map_file /* not used */,
> > > > > > >  				     int fd)
> > > > > > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > > > > > index 7094c69..80efab8 100644
> > > > > > > --- a/kernel/bpf/verifier.c
> > > > > > > +++ b/kernel/bpf/verifier.c
> > > > > > > @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> > > > > > >  			goto error;
> > > > > > >  		break;
> > > > > > >  	case BPF_MAP_TYPE_CGROUP_ARRAY:
> > > > > > > -		if (func_id != BPF_FUNC_skb_in_cgroup)
> > > > > > > +		if (func_id != BPF_FUNC_skb_in_cgroup &&
> > > > > > > +		    func_id != BPF_FUNC_current_task_in_cgroup)
> > > > > > >  			goto error;
> > > > > > >  		break;
> > > > > > >  	default:
> > > > > > > @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> > > > > > >  		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
> > > > > > >  			goto error;
> > > > > > >  		break;
> > > > > > > +	case BPF_FUNC_current_task_in_cgroup:
> > > > > > >  	case BPF_FUNC_skb_in_cgroup:
> > > > > > >  		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
> > > > > > >  			goto error;
> > > > > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > > > > > > index b20438f..39f0290 100644
> > > > > > > --- a/kernel/trace/bpf_trace.c
> > > > > > > +++ b/kernel/trace/bpf_trace.c
> > > > > > > @@ -376,6 +376,36 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
> > > > > > >  	.ret_type	= RET_INTEGER,
> > > > > > >  };
> > > > > > >  
> > > > > > > +#ifdef CONFIG_CGROUPS
> > > > > > > +static u64 bpf_current_task_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
> > > > > > 
> > > > > > please don't introduce #ifdef into .c code.
> > > > > > In this case add #else in .h to fetch_arraymap_ptr() that returns -EOPNOTSUPP.
> > > > > > Also why guard it with CONFIG_CGROUPS in .h at all?
> > > > > > I think it should compile fine even when cgroups are not defined.
> > > > > > The helper won't be functional anyway, since no cgroup_fd can be added
> > > > > > to cgroup map.
> > > > > > 
> > > > > Pardon my ignorance, but if I don't have an ifdef wont I be referencing a bunch 
> > > > > of non-existent code (like cgroup_is_descendant)? Unless your suggestion is to 
> > > > > move this to the .h file as well.
> > > > 
> > > > I see. If cgroup_is_descendant is the only function we use, how about
> > > > adding it as 'return false' in #else part of linux/cgroup.h ?
> > > > There are a bunch of them there like cgroup_exit/cgroup_free/...
> > > > 
> > > It appears messier than that:
> > > kernel/trace/bpf_trace.c: In function ‘bpf_current_task_in_cgroup’:
> > > kernel/trace/bpf_trace.c:394:9: error: implicit declaration of function ‘task_css_set’ [-Werror=implicit-function-declaration]
> > >   cset = task_css_set(current);
> > >          ^
> > > kernel/trace/bpf_trace.c:394:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
> > >   cset = task_css_set(current);
> > >        ^
> > > kernel/trace/bpf_trace.c:396:9: error: implicit declaration of function ‘cgroup_is_descendant’ [-Werror=implicit-function-declaration]
> > >   return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
> > >          ^
> > > kernel/trace/bpf_trace.c:396:34: error: dereferencing pointer to incomplete type ‘struct css_set’
> > >   return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
> > 
> > yeah. exporting struct css_set is too much. 
> > No other ideas how to reduce so many #ifdef CONFIG_CGROUPS.
> > At least the one in .h can be dropped, right?
> > Please cc Tejun in the next spin.
> > 
> The path I'm going down is to add this helper to cgroup.h:
> 
> #ifdef CONFIG_CGROUPS
> /**
>  * task_in_cgroup_hierarchy - test task's membership of cgroup ancestry
>  * @task: the task to be tested
>  * @ancestor: possible ancestor of @task's cgroup
>  *
>  * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
>  * It follows all the same rules as cgroup_is_descendant, and only applies
>  * to the default hierarchy.
>  */
> static inline bool task_in_cgroup_hierarchy(struct task_struct *task,
> 					    struct cgroup *ancestor)
> {
> 	struct css_set *cset = task_css_set(task);
> 	return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
> }
> 
> #else
> 
> struct cgroup;
> static inline bool task_in_cgroup_hierarchy(struct task_struct *task,
> 				  struct cgroup *ancestor) { return false; }
> #endif
> 
> 
> It gets rid of all of the CONFIG_CGROUPS, and I think it's a useful helper for 
> others. I can't be the only one, esp. as we move towards cgroupv2 more that
> depends on default cgroups hierarchy checks.

good idea. I think that's the best option so far.
If we can get rid of ifdef in net/core/filter.c at the same time
that would be even better. Looks like it needs dummy sock_cgroup_ptr().

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [net-next v2 v2 1/2] bpf: Add bpf_current_task_in_cgroup helper
  2016-08-10  3:52             ` Alexei Starovoitov
@ 2016-08-10  4:40               ` Sargun Dhillon
  2016-08-10  9:03                 ` Daniel Borkmann
  0 siblings, 1 reply; 10+ messages in thread
From: Sargun Dhillon @ 2016-08-10  4:40 UTC (permalink / raw)
  To: Alexei Starovoitov; +Cc: netdev, daniel

On Tue, Aug 09, 2016 at 08:52:01PM -0700, Alexei Starovoitov wrote:
> On Tue, Aug 09, 2016 at 08:40:05PM -0700, Sargun Dhillon wrote:
> > On Tue, Aug 09, 2016 at 08:27:32PM -0700, Alexei Starovoitov wrote:
> > > On Tue, Aug 09, 2016 at 06:26:37PM -0700, Sargun Dhillon wrote:
> > > > On Tue, Aug 09, 2016 at 06:02:34PM -0700, Alexei Starovoitov wrote:
> > > > > On Tue, Aug 09, 2016 at 05:55:26PM -0700, Sargun Dhillon wrote:
> > > > > > On Tue, Aug 09, 2016 at 05:23:50PM -0700, Alexei Starovoitov wrote:
> > > > > > > On Tue, Aug 09, 2016 at 05:00:12PM -0700, Sargun Dhillon wrote:
> > > > > > > > This adds a bpf helper that's similar to the skb_in_cgroup helper to check
> > > > > > > > whether the probe is currently executing in the context of a specific
> > > > > > > > subset of the cgroupsv2 hierarchy. It does this based on membership test
> > > > > > > > for a cgroup arraymap. It is invalid to call this in an interrupt, and
> > > > > > > > it'll return an error. The helper is primarily to be used in debugging
> > > > > > > > activities for containers, where you may have multiple programs running in
> > > > > > > > a given top-level "container".
> > > > > > > > 
> > > > > > > > This patch also genericizes some of the arraymap fetching logic between the
> > > > > > > > skb_in_cgroup helper and this new helper.
> > > > > > > > 
> > > > > > > > Signed-off-by: Sargun Dhillon <sargun@sargun.me>
> > > > > > > > Cc: Alexei Starovoitov <ast@kernel.org>
> > > > > > > > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > > > > > > > ---
> > > > > > > >  include/linux/bpf.h      | 24 ++++++++++++++++++++++++
> > > > > > > >  include/uapi/linux/bpf.h | 11 +++++++++++
> > > > > > > >  kernel/bpf/arraymap.c    |  2 +-
> > > > > > > >  kernel/bpf/verifier.c    |  4 +++-
> > > > > > > >  kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++++++
> > > > > > > >  net/core/filter.c        | 11 ++++-------
> > > > > > > >  6 files changed, 77 insertions(+), 9 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > > > > > > > index 1113423..9adf712 100644
> > > > > > > > --- a/include/linux/bpf.h
> > > > > > > > +++ b/include/linux/bpf.h
> > > > > > > > @@ -319,4 +319,28 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
> > > > > > > >  void bpf_user_rnd_init_once(void);
> > > > > > > >  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
> > > > > > > >  
> > > > > > > > +#ifdef CONFIG_CGROUPS
> > > > > > > > +/* Helper to fetch a cgroup pointer based on index.
> > > > > > > > + * @map: a cgroup arraymap
> > > > > > > > + * @idx: index of the item you want to fetch
> > > > > > > > + *
> > > > > > > > + * Returns pointer on success,
> > > > > > > > + * Error code if item not found, or out-of-bounds access
> > > > > > > > + */
> > > > > > > > +static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
> > > > > > > > +{
> > > > > > > > +	struct cgroup *cgrp;
> > > > > > > > +	struct bpf_array *array = container_of(map, struct bpf_array, map);
> > > > > > > > +
> > > > > > > > +	if (unlikely(idx >= array->map.max_entries))
> > > > > > > > +		return ERR_PTR(-E2BIG);
> > > > > > > > +
> > > > > > > > +	cgrp = READ_ONCE(array->ptrs[idx]);
> > > > > > > > +	if (unlikely(!cgrp))
> > > > > > > > +		return ERR_PTR(-EAGAIN);
> > > > > > > > +
> > > > > > > > +	return cgrp;
> > > > > > > > +}
> > > > > > > > +#endif /* CONFIG_CGROUPS */
> > > > > > > > +
> > > > > > > >  #endif /* _LINUX_BPF_H */
> > > > > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > > > > > > index da218fe..64b1a07 100644
> > > > > > > > --- a/include/uapi/linux/bpf.h
> > > > > > > > +++ b/include/uapi/linux/bpf.h
> > > > > > > > @@ -375,6 +375,17 @@ enum bpf_func_id {
> > > > > > > >  	 */
> > > > > > > >  	BPF_FUNC_probe_write_user,
> > > > > > > >  
> > > > > > > > +	/**
> > > > > > > > +	 * bpf_current_task_in_cgroup(map, index) - Check cgroup2 membership of current task
> > > > > > > > +	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
> > > > > > > > +	 * @index: index of the cgroup in the bpf_map
> > > > > > > > +	 * Return:
> > > > > > > > +	 *   == 0 current failed the cgroup2 descendant test
> > > > > > > > +	 *   == 1 current succeeded the cgroup2 descendant test
> > > > > > > > +	 *    < 0 error
> > > > > > > > +	 */
> > > > > > > > +	BPF_FUNC_current_task_in_cgroup,
> > > > > > > > +
> > > > > > > >  	__BPF_FUNC_MAX_ID,
> > > > > > > >  };
> > > > > > > >  
> > > > > > > > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> > > > > > > > index 633a650..a2ac051 100644
> > > > > > > > --- a/kernel/bpf/arraymap.c
> > > > > > > > +++ b/kernel/bpf/arraymap.c
> > > > > > > > @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
> > > > > > > >  }
> > > > > > > >  late_initcall(register_perf_event_array_map);
> > > > > > > >  
> > > > > > > > -#ifdef CONFIG_SOCK_CGROUP_DATA
> > > > > > > > +#ifdef CONFIG_CGROUPS
> > > > > > > >  static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
> > > > > > > >  				     struct file *map_file /* not used */,
> > > > > > > >  				     int fd)
> > > > > > > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> > > > > > > > index 7094c69..80efab8 100644
> > > > > > > > --- a/kernel/bpf/verifier.c
> > > > > > > > +++ b/kernel/bpf/verifier.c
> > > > > > > > @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> > > > > > > >  			goto error;
> > > > > > > >  		break;
> > > > > > > >  	case BPF_MAP_TYPE_CGROUP_ARRAY:
> > > > > > > > -		if (func_id != BPF_FUNC_skb_in_cgroup)
> > > > > > > > +		if (func_id != BPF_FUNC_skb_in_cgroup &&
> > > > > > > > +		    func_id != BPF_FUNC_current_task_in_cgroup)
> > > > > > > >  			goto error;
> > > > > > > >  		break;
> > > > > > > >  	default:
> > > > > > > > @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
> > > > > > > >  		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
> > > > > > > >  			goto error;
> > > > > > > >  		break;
> > > > > > > > +	case BPF_FUNC_current_task_in_cgroup:
> > > > > > > >  	case BPF_FUNC_skb_in_cgroup:
> > > > > > > >  		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
> > > > > > > >  			goto error;
> > > > > > > > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > > > > > > > index b20438f..39f0290 100644
> > > > > > > > --- a/kernel/trace/bpf_trace.c
> > > > > > > > +++ b/kernel/trace/bpf_trace.c
> > > > > > > > @@ -376,6 +376,36 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
> > > > > > > >  	.ret_type	= RET_INTEGER,
> > > > > > > >  };
> > > > > > > >  
> > > > > > > > +#ifdef CONFIG_CGROUPS
> > > > > > > > +static u64 bpf_current_task_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
> > > > > > > 
> > > > > > > please don't introduce #ifdef into .c code.
> > > > > > > In this case add #else in .h to fetch_arraymap_ptr() that returns -EOPNOTSUPP.
> > > > > > > Also why guard it with CONFIG_CGROUPS in .h at all?
> > > > > > > I think it should compile fine even when cgroups are not defined.
> > > > > > > The helper won't be functional anyway, since no cgroup_fd can be added
> > > > > > > to cgroup map.
> > > > > > > 
> > > > > > Pardon my ignorance, but if I don't have an ifdef wont I be referencing a bunch 
> > > > > > of non-existent code (like cgroup_is_descendant)? Unless your suggestion is to 
> > > > > > move this to the .h file as well.
> > > > > 
> > > > > I see. If cgroup_is_descendant is the only function we use, how about
> > > > > adding it as 'return false' in #else part of linux/cgroup.h ?
> > > > > There are a bunch of them there like cgroup_exit/cgroup_free/...
> > > > > 
> > > > It appears messier than that:
> > > > kernel/trace/bpf_trace.c: In function ‘bpf_current_task_in_cgroup’:
> > > > kernel/trace/bpf_trace.c:394:9: error: implicit declaration of function ‘task_css_set’ [-Werror=implicit-function-declaration]
> > > >   cset = task_css_set(current);
> > > >          ^
> > > > kernel/trace/bpf_trace.c:394:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
> > > >   cset = task_css_set(current);
> > > >        ^
> > > > kernel/trace/bpf_trace.c:396:9: error: implicit declaration of function ‘cgroup_is_descendant’ [-Werror=implicit-function-declaration]
> > > >   return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
> > > >          ^
> > > > kernel/trace/bpf_trace.c:396:34: error: dereferencing pointer to incomplete type ‘struct css_set’
> > > >   return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
> > > 
> > > yeah. exporting struct css_set is too much. 
> > > No other ideas how to reduce so many #ifdef CONFIG_CGROUPS.
> > > At least the one in .h can be dropped, right?
> > > Please cc Tejun in the next spin.
> > > 
> > The path I'm going down is to add this helper to cgroup.h:
> > 
> > #ifdef CONFIG_CGROUPS
> > /**
> >  * task_in_cgroup_hierarchy - test task's membership of cgroup ancestry
> >  * @task: the task to be tested
> >  * @ancestor: possible ancestor of @task's cgroup
> >  *
> >  * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
> >  * It follows all the same rules as cgroup_is_descendant, and only applies
> >  * to the default hierarchy.
> >  */
> > static inline bool task_in_cgroup_hierarchy(struct task_struct *task,
> > 					    struct cgroup *ancestor)
> > {
> > 	struct css_set *cset = task_css_set(task);
> > 	return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
> > }
> > 
> > #else
> > 
> > struct cgroup;
> > static inline bool task_in_cgroup_hierarchy(struct task_struct *task,
> > 				  struct cgroup *ancestor) { return false; }
> > #endif
> > 
> > 
> > It gets rid of all of the CONFIG_CGROUPS, and I think it's a useful helper for 
> > others. I can't be the only one, esp. as we move towards cgroupv2 more that
> > depends on default cgroups hierarchy checks.
> 
> good idea. I think that's the best option so far.
> If we can get rid of ifdef in net/core/filter.c at the same time
> that would be even better. Looks like it needs dummy sock_cgroup_ptr().
> 
The net/core/filter.c one is more complicated. If I follow the same pattern, 
we'd have a sock-specific sock_in_cgroup_hierarchy function. Is that general 
purpose enough, or checked elsewhere in the kernel?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [net-next v2 v2 1/2] bpf: Add bpf_current_task_in_cgroup helper
  2016-08-10  4:40               ` Sargun Dhillon
@ 2016-08-10  9:03                 ` Daniel Borkmann
  0 siblings, 0 replies; 10+ messages in thread
From: Daniel Borkmann @ 2016-08-10  9:03 UTC (permalink / raw)
  To: Sargun Dhillon, Alexei Starovoitov; +Cc: netdev

On 08/10/2016 06:40 AM, Sargun Dhillon wrote:
> On Tue, Aug 09, 2016 at 08:52:01PM -0700, Alexei Starovoitov wrote:
>> On Tue, Aug 09, 2016 at 08:40:05PM -0700, Sargun Dhillon wrote:
>>> On Tue, Aug 09, 2016 at 08:27:32PM -0700, Alexei Starovoitov wrote:
>>>> On Tue, Aug 09, 2016 at 06:26:37PM -0700, Sargun Dhillon wrote:
>>>>> On Tue, Aug 09, 2016 at 06:02:34PM -0700, Alexei Starovoitov wrote:
>>>>>> On Tue, Aug 09, 2016 at 05:55:26PM -0700, Sargun Dhillon wrote:
>>>>>>> On Tue, Aug 09, 2016 at 05:23:50PM -0700, Alexei Starovoitov wrote:
>>>>>>>> On Tue, Aug 09, 2016 at 05:00:12PM -0700, Sargun Dhillon wrote:
>>>>>>>>> This adds a bpf helper that's similar to the skb_in_cgroup helper to check
>>>>>>>>> whether the probe is currently executing in the context of a specific
>>>>>>>>> subset of the cgroupsv2 hierarchy. It does this based on membership test
>>>>>>>>> for a cgroup arraymap. It is invalid to call this in an interrupt, and
>>>>>>>>> it'll return an error. The helper is primarily to be used in debugging
>>>>>>>>> activities for containers, where you may have multiple programs running in
>>>>>>>>> a given top-level "container".
>>>>>>>>>
>>>>>>>>> This patch also genericizes some of the arraymap fetching logic between the
>>>>>>>>> skb_in_cgroup helper and this new helper.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Sargun Dhillon <sargun@sargun.me>
>>>>>>>>> Cc: Alexei Starovoitov <ast@kernel.org>
>>>>>>>>> Cc: Daniel Borkmann <daniel@iogearbox.net>
>>>>>>>>> ---
>>>>>>>>>   include/linux/bpf.h      | 24 ++++++++++++++++++++++++
>>>>>>>>>   include/uapi/linux/bpf.h | 11 +++++++++++
>>>>>>>>>   kernel/bpf/arraymap.c    |  2 +-
>>>>>>>>>   kernel/bpf/verifier.c    |  4 +++-
>>>>>>>>>   kernel/trace/bpf_trace.c | 34 ++++++++++++++++++++++++++++++++++
>>>>>>>>>   net/core/filter.c        | 11 ++++-------
>>>>>>>>>   6 files changed, 77 insertions(+), 9 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>>>>>>>>> index 1113423..9adf712 100644
>>>>>>>>> --- a/include/linux/bpf.h
>>>>>>>>> +++ b/include/linux/bpf.h
>>>>>>>>> @@ -319,4 +319,28 @@ extern const struct bpf_func_proto bpf_get_stackid_proto;
>>>>>>>>>   void bpf_user_rnd_init_once(void);
>>>>>>>>>   u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
>>>>>>>>>
>>>>>>>>> +#ifdef CONFIG_CGROUPS
>>>>>>>>> +/* Helper to fetch a cgroup pointer based on index.
>>>>>>>>> + * @map: a cgroup arraymap
>>>>>>>>> + * @idx: index of the item you want to fetch
>>>>>>>>> + *
>>>>>>>>> + * Returns pointer on success,
>>>>>>>>> + * Error code if item not found, or out-of-bounds access
>>>>>>>>> + */
>>>>>>>>> +static inline struct cgroup *fetch_arraymap_ptr(struct bpf_map *map, int idx)
>>>>>>>>> +{
>>>>>>>>> +	struct cgroup *cgrp;
>>>>>>>>> +	struct bpf_array *array = container_of(map, struct bpf_array, map);
>>>>>>>>> +
>>>>>>>>> +	if (unlikely(idx >= array->map.max_entries))
>>>>>>>>> +		return ERR_PTR(-E2BIG);
>>>>>>>>> +
>>>>>>>>> +	cgrp = READ_ONCE(array->ptrs[idx]);
>>>>>>>>> +	if (unlikely(!cgrp))
>>>>>>>>> +		return ERR_PTR(-EAGAIN);
>>>>>>>>> +
>>>>>>>>> +	return cgrp;
>>>>>>>>> +}
>>>>>>>>> +#endif /* CONFIG_CGROUPS */
>>>>>>>>> +
>>>>>>>>>   #endif /* _LINUX_BPF_H */
>>>>>>>>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>>>>>>>>> index da218fe..64b1a07 100644
>>>>>>>>> --- a/include/uapi/linux/bpf.h
>>>>>>>>> +++ b/include/uapi/linux/bpf.h
>>>>>>>>> @@ -375,6 +375,17 @@ enum bpf_func_id {
>>>>>>>>>   	 */
>>>>>>>>>   	BPF_FUNC_probe_write_user,
>>>>>>>>>
>>>>>>>>> +	/**
>>>>>>>>> +	 * bpf_current_task_in_cgroup(map, index) - Check cgroup2 membership of current task
>>>>>>>>> +	 * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
>>>>>>>>> +	 * @index: index of the cgroup in the bpf_map
>>>>>>>>> +	 * Return:
>>>>>>>>> +	 *   == 0 current failed the cgroup2 descendant test
>>>>>>>>> +	 *   == 1 current succeeded the cgroup2 descendant test
>>>>>>>>> +	 *    < 0 error
>>>>>>>>> +	 */
>>>>>>>>> +	BPF_FUNC_current_task_in_cgroup,
>>>>>>>>> +
>>>>>>>>>   	__BPF_FUNC_MAX_ID,
>>>>>>>>>   };
>>>>>>>>>
>>>>>>>>> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
>>>>>>>>> index 633a650..a2ac051 100644
>>>>>>>>> --- a/kernel/bpf/arraymap.c
>>>>>>>>> +++ b/kernel/bpf/arraymap.c
>>>>>>>>> @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
>>>>>>>>>   }
>>>>>>>>>   late_initcall(register_perf_event_array_map);
>>>>>>>>>
>>>>>>>>> -#ifdef CONFIG_SOCK_CGROUP_DATA
>>>>>>>>> +#ifdef CONFIG_CGROUPS
>>>>>>>>>   static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
>>>>>>>>>   				     struct file *map_file /* not used */,
>>>>>>>>>   				     int fd)
>>>>>>>>> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
>>>>>>>>> index 7094c69..80efab8 100644
>>>>>>>>> --- a/kernel/bpf/verifier.c
>>>>>>>>> +++ b/kernel/bpf/verifier.c
>>>>>>>>> @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
>>>>>>>>>   			goto error;
>>>>>>>>>   		break;
>>>>>>>>>   	case BPF_MAP_TYPE_CGROUP_ARRAY:
>>>>>>>>> -		if (func_id != BPF_FUNC_skb_in_cgroup)
>>>>>>>>> +		if (func_id != BPF_FUNC_skb_in_cgroup &&
>>>>>>>>> +		    func_id != BPF_FUNC_current_task_in_cgroup)
>>>>>>>>>   			goto error;
>>>>>>>>>   		break;
>>>>>>>>>   	default:
>>>>>>>>> @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
>>>>>>>>>   		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
>>>>>>>>>   			goto error;
>>>>>>>>>   		break;
>>>>>>>>> +	case BPF_FUNC_current_task_in_cgroup:
>>>>>>>>>   	case BPF_FUNC_skb_in_cgroup:
>>>>>>>>>   		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
>>>>>>>>>   			goto error;
>>>>>>>>> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
>>>>>>>>> index b20438f..39f0290 100644
>>>>>>>>> --- a/kernel/trace/bpf_trace.c
>>>>>>>>> +++ b/kernel/trace/bpf_trace.c
>>>>>>>>> @@ -376,6 +376,36 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
>>>>>>>>>   	.ret_type	= RET_INTEGER,
>>>>>>>>>   };
>>>>>>>>>
>>>>>>>>> +#ifdef CONFIG_CGROUPS
>>>>>>>>> +static u64 bpf_current_task_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
>>>>>>>>
>>>>>>>> please don't introduce #ifdef into .c code.
>>>>>>>> In this case add #else in .h to fetch_arraymap_ptr() that returns -EOPNOTSUPP.
>>>>>>>> Also why guard it with CONFIG_CGROUPS in .h at all?
>>>>>>>> I think it should compile fine even when cgroups are not defined.
>>>>>>>> The helper won't be functional anyway, since no cgroup_fd can be added
>>>>>>>> to cgroup map.
>>>>>>>>
>>>>>>> Pardon my ignorance, but if I don't have an ifdef wont I be referencing a bunch
>>>>>>> of non-existent code (like cgroup_is_descendant)? Unless your suggestion is to
>>>>>>> move this to the .h file as well.
>>>>>>
>>>>>> I see. If cgroup_is_descendant is the only function we use, how about
>>>>>> adding it as 'return false' in #else part of linux/cgroup.h ?
>>>>>> There are a bunch of them there like cgroup_exit/cgroup_free/...
>>>>>>
>>>>> It appears messier than that:
>>>>> kernel/trace/bpf_trace.c: In function ‘bpf_current_task_in_cgroup’:
>>>>> kernel/trace/bpf_trace.c:394:9: error: implicit declaration of function ‘task_css_set’ [-Werror=implicit-function-declaration]
>>>>>    cset = task_css_set(current);
>>>>>           ^
>>>>> kernel/trace/bpf_trace.c:394:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
>>>>>    cset = task_css_set(current);
>>>>>         ^
>>>>> kernel/trace/bpf_trace.c:396:9: error: implicit declaration of function ‘cgroup_is_descendant’ [-Werror=implicit-function-declaration]
>>>>>    return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
>>>>>           ^
>>>>> kernel/trace/bpf_trace.c:396:34: error: dereferencing pointer to incomplete type ‘struct css_set’
>>>>>    return cgroup_is_descendant(cset->dfl_cgrp, cgrp);
>>>>
>>>> yeah. exporting struct css_set is too much.
>>>> No other ideas how to reduce so many #ifdef CONFIG_CGROUPS.
>>>> At least the one in .h can be dropped, right?
>>>> Please cc Tejun in the next spin.
>>>>
>>> The path I'm going down is to add this helper to cgroup.h:
>>>
>>> #ifdef CONFIG_CGROUPS
>>> /**
>>>   * task_in_cgroup_hierarchy - test task's membership of cgroup ancestry
>>>   * @task: the task to be tested
>>>   * @ancestor: possible ancestor of @task's cgroup
>>>   *
>>>   * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
>>>   * It follows all the same rules as cgroup_is_descendant, and only applies
>>>   * to the default hierarchy.
>>>   */
>>> static inline bool task_in_cgroup_hierarchy(struct task_struct *task,
>>> 					    struct cgroup *ancestor)
>>> {
>>> 	struct css_set *cset = task_css_set(task);
>>> 	return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
>>> }
>>>
>>> #else
>>>
>>> struct cgroup;
>>> static inline bool task_in_cgroup_hierarchy(struct task_struct *task,
>>> 				  struct cgroup *ancestor) { return false; }
>>> #endif
>>>
>>>
>>> It gets rid of all of the CONFIG_CGROUPS, and I think it's a useful helper for
>>> others. I can't be the only one, esp. as we move towards cgroupv2 more that
>>> depends on default cgroups hierarchy checks.
>>
>> good idea. I think that's the best option so far.
>> If we can get rid of ifdef in net/core/filter.c at the same time
>> that would be even better. Looks like it needs dummy sock_cgroup_ptr().
>>
> The net/core/filter.c one is more complicated. If I follow the same pattern,
> we'd have a sock-specific sock_in_cgroup_hierarchy function. Is that general
> purpose enough, or checked elsewhere in the kernel?

If I'm not missing something, I think what it would need is the following,
uncompiled:

For include/net/sock.h:

static inline int sk_cgroup_is_descendant(struct sock *sk,
					  struct cgroup *cgrp)
{
#ifdef CONFIG_SOCK_CGROUP_DATA
         return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp);
#else
	return -EOPNOTSUPP;
#endif
}

And then in net/core/filter.c,:

static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
[...]
	cgrp = READ_ONCE(array->ptrs[i]);
	if (unlikely(!cgrp))
		return -EAGAIN;

	return sk_cgroup_is_descendant(sk, cgrp);
}

This should be enough to get rid of the ugly CONFIG_SOCK_CGROUP_DATA
there. And it would also allow for only returning a membership (0 or 1)
when there's *actually* CONFIG_SOCK_CGROUP_DATA enabled, but otherwise
indicates -EOPNOTSUPP as error.

Similar approach should be done for the kprobes.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2016-08-10 18:20 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-08-10  0:00 [net-next v2 v2 1/2] bpf: Add bpf_current_task_in_cgroup helper Sargun Dhillon
2016-08-10  0:23 ` Alexei Starovoitov
2016-08-10  0:55   ` Sargun Dhillon
2016-08-10  1:02     ` Alexei Starovoitov
2016-08-10  1:26       ` Sargun Dhillon
2016-08-10  3:27         ` Alexei Starovoitov
2016-08-10  3:40           ` Sargun Dhillon
2016-08-10  3:52             ` Alexei Starovoitov
2016-08-10  4:40               ` Sargun Dhillon
2016-08-10  9:03                 ` Daniel Borkmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).