[PATCH net-next 0/2] BPF inline improvements

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH net-next 0/2] BPF inline improvements
@ 2017-08-18 23:51 Daniel Borkmann
  2017-08-18 23:51 ` [PATCH net-next 1/2] bpf: improve htab inlining for future 32 bit jits Daniel Borkmann
  2017-08-18 23:51 ` [PATCH net-next 2/2] bpf: inline map in map lookup functions for array and htab Daniel Borkmann
  0 siblings, 2 replies; 7+ messages in thread
From: Daniel Borkmann @ 2017-08-18 23:51 UTC (permalink / raw)
  To: davem; +Cc: ast, netdev, Daniel Borkmann

First one makes htab inlining more robust wrt future jits and
second one inlines map in map lookups through map_gen_lookup()
callback.

Thanks!

Daniel Borkmann (2):
  bpf: improve htab inlining for future 32 bit jits
  bpf: inline map in map lookup functions for array and htab

 kernel/bpf/arraymap.c | 26 ++++++++++++++++++++++++++
 kernel/bpf/hashtab.c  | 27 +++++++++++++++++++++++++--
 2 files changed, 51 insertions(+), 2 deletions(-)

-- 
1.9.3

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH net-next 1/2] bpf: improve htab inlining for future 32 bit jits
  2017-08-18 23:51 [PATCH net-next 0/2] BPF inline improvements Daniel Borkmann
@ 2017-08-18 23:51 ` Daniel Borkmann
  2017-08-19  0:00   ` Alexei Starovoitov
  2017-08-18 23:51 ` [PATCH net-next 2/2] bpf: inline map in map lookup functions for array and htab Daniel Borkmann
  1 sibling, 1 reply; 7+ messages in thread
From: Daniel Borkmann @ 2017-08-18 23:51 UTC (permalink / raw)
  To: davem; +Cc: ast, netdev, Daniel Borkmann

Lets future proof htab lookup inlining, commit 9015d2f59535 ("bpf:
inline htab_map_lookup_elem()") was making the assumption that a
direct call emission to __htab_map_lookup_elem() will always work
out for JITs. This is currently true since all JITs we have are
for 64 bit archs, but in case of 32 bit JITs like upcoming arm32,
we get a NULL pointer dereference when executing the call to
__htab_map_lookup_elem() since passed arguments are of a different
size (unsigned long vs. u64 for pointers) than what we do out of
BPF. Thus, lets do a proper BPF_CALL_2() declaration such that we
don't need to make any such assumptions.

Reported-by: Shubham Bansal <illusionist.neo@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/hashtab.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 4fb4631..cabf37b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -437,7 +437,8 @@ static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head,
  * The return value is adjusted by BPF instructions
  * in htab_map_gen_lookup().
  */
-static void *__htab_map_lookup_elem(struct bpf_map *map, void *key)
+static __always_inline void *__htab_map_lookup_elem(struct bpf_map *map,
+						    void *key)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 	struct hlist_nulls_head *head;
@@ -479,12 +480,17 @@ static void *htab_map_lookup_elem(struct bpf_map *map, void *key)
  * bpf_prog
  *   __htab_map_lookup_elem
  */
+BPF_CALL_2(bpf_htab_lookup_helper, struct bpf_map *, map, void *, key)
+{
+	return (unsigned long) __htab_map_lookup_elem(map, key);
+}
+
 static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 {
 	struct bpf_insn *insn = insn_buf;
 	const int ret = BPF_REG_0;
 
-	*insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
+	*insn++ = BPF_EMIT_CALL(bpf_htab_lookup_helper);
 	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
 				offsetof(struct htab_elem, key) +
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH net-next 2/2] bpf: inline map in map lookup functions for array and htab
  2017-08-18 23:51 [PATCH net-next 0/2] BPF inline improvements Daniel Borkmann
  2017-08-18 23:51 ` [PATCH net-next 1/2] bpf: improve htab inlining for future 32 bit jits Daniel Borkmann
@ 2017-08-18 23:51 ` Daniel Borkmann
  2017-08-19  0:05   ` Alexei Starovoitov
  1 sibling, 1 reply; 7+ messages in thread
From: Daniel Borkmann @ 2017-08-18 23:51 UTC (permalink / raw)
  To: davem; +Cc: ast, netdev, Daniel Borkmann

Avoid two successive functions calls for the map in map lookup, first
is the bpf_map_lookup_elem() helper call, and second the callback via
map->ops->map_lookup_elem() to get to the map in map implementation.
Implementation inlines array and htab flavor for map in map lookups.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/arraymap.c | 26 ++++++++++++++++++++++++++
 kernel/bpf/hashtab.c  | 17 +++++++++++++++++
 2 files changed, 43 insertions(+)

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index d771a38..b25d6ce 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -603,6 +603,31 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
 	return READ_ONCE(*inner_map);
 }
 
+static u32 array_of_map_gen_lookup(struct bpf_map *map,
+				   struct bpf_insn *insn_buf)
+{
+	u32 elem_size = round_up(map->value_size, 8);
+	struct bpf_insn *insn = insn_buf;
+	const int ret = BPF_REG_0;
+	const int map_ptr = BPF_REG_1;
+	const int index = BPF_REG_2;
+
+	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
+	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
+	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+	if (is_power_of_2(elem_size))
+		*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
+	else
+		*insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
+	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
+	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
+	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
+	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+	*insn++ = BPF_MOV64_IMM(ret, 0);
+
+	return insn - insn_buf;
+}
+
 const struct bpf_map_ops array_of_maps_map_ops = {
 	.map_alloc = array_of_map_alloc,
 	.map_free = array_of_map_free,
@@ -612,4 +637,5 @@ static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
+	.map_gen_lookup = array_of_map_gen_lookup,
 };
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index cabf37b..750261d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1317,6 +1317,22 @@ static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key)
 	return READ_ONCE(*inner_map);
 }
 
+static u32 htab_of_map_gen_lookup(struct bpf_map *map,
+				  struct bpf_insn *insn_buf)
+{
+	struct bpf_insn *insn = insn_buf;
+	const int ret = BPF_REG_0;
+
+	*insn++ = BPF_EMIT_CALL(bpf_htab_lookup_helper);
+	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
+	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
+				offsetof(struct htab_elem, key) +
+				round_up(map->key_size, 8));
+	*insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
+
+	return insn - insn_buf;
+}
+
 static void htab_of_map_free(struct bpf_map *map)
 {
 	bpf_map_meta_free(map->inner_map_meta);
@@ -1332,4 +1348,5 @@ static void htab_of_map_free(struct bpf_map *map)
 	.map_fd_get_ptr = bpf_map_fd_get_ptr,
 	.map_fd_put_ptr = bpf_map_fd_put_ptr,
 	.map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
+	.map_gen_lookup = htab_of_map_gen_lookup,
 };
-- 
1.9.3

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 1/2] bpf: improve htab inlining for future 32 bit jits
  2017-08-18 23:51 ` [PATCH net-next 1/2] bpf: improve htab inlining for future 32 bit jits Daniel Borkmann
@ 2017-08-19  0:00   ` Alexei Starovoitov
  2017-08-19  0:21     ` Daniel Borkmann
  0 siblings, 1 reply; 7+ messages in thread
From: Alexei Starovoitov @ 2017-08-19  0:00 UTC (permalink / raw)
  To: Daniel Borkmann, davem; +Cc: netdev

On 8/18/17 4:51 PM, Daniel Borkmann wrote:
> Lets future proof htab lookup inlining, commit 9015d2f59535 ("bpf:
> inline htab_map_lookup_elem()") was making the assumption that a
> direct call emission to __htab_map_lookup_elem() will always work
> out for JITs. This is currently true since all JITs we have are
> for 64 bit archs, but in case of 32 bit JITs like upcoming arm32,
> we get a NULL pointer dereference when executing the call to
> __htab_map_lookup_elem() since passed arguments are of a different
> size (unsigned long vs. u64 for pointers) than what we do out of
> BPF. Thus, lets do a proper BPF_CALL_2() declaration such that we
> don't need to make any such assumptions.
>
> Reported-by: Shubham Bansal <illusionist.neo@gmail.com>
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

assuming on 64-bit archs the should be no perf difference
and only increase in .text, since __htab_map_lookup_elem
is now force inlined into a bunch of places?
I guess that's ok, but kinda sux for 64-bit archs to pay
such penalty because of 32-bit archs.
May be drop always_inline and do such thing conditionally
on 32-bit archs only?
what's the increase in .text?
any difference seen in map_perf_test?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 2/2] bpf: inline map in map lookup functions for array and htab
  2017-08-18 23:51 ` [PATCH net-next 2/2] bpf: inline map in map lookup functions for array and htab Daniel Borkmann
@ 2017-08-19  0:05   ` Alexei Starovoitov
  0 siblings, 0 replies; 7+ messages in thread
From: Alexei Starovoitov @ 2017-08-19  0:05 UTC (permalink / raw)
  To: Daniel Borkmann, davem; +Cc: netdev

On 8/18/17 4:51 PM, Daniel Borkmann wrote:
> Avoid two successive functions calls for the map in map lookup, first
> is the bpf_map_lookup_elem() helper call, and second the callback via
> map->ops->map_lookup_elem() to get to the map in map implementation.
> Implementation inlines array and htab flavor for map in map lookups.
>
> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>

lgtm
thanks for adding this optimization.
Acked-by: Alexei Starovoitov <ast@kernel.org>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 1/2] bpf: improve htab inlining for future 32 bit jits
  2017-08-19  0:00   ` Alexei Starovoitov
@ 2017-08-19  0:21     ` Daniel Borkmann
  2017-08-19  0:24       ` Alexei Starovoitov
  0 siblings, 1 reply; 7+ messages in thread
From: Daniel Borkmann @ 2017-08-19  0:21 UTC (permalink / raw)
  To: Alexei Starovoitov, davem; +Cc: netdev

On 08/19/2017 02:00 AM, Alexei Starovoitov wrote:
> On 8/18/17 4:51 PM, Daniel Borkmann wrote:
>> Lets future proof htab lookup inlining, commit 9015d2f59535 ("bpf:
>> inline htab_map_lookup_elem()") was making the assumption that a
>> direct call emission to __htab_map_lookup_elem() will always work
>> out for JITs. This is currently true since all JITs we have are
>> for 64 bit archs, but in case of 32 bit JITs like upcoming arm32,
>> we get a NULL pointer dereference when executing the call to
>> __htab_map_lookup_elem() since passed arguments are of a different
>> size (unsigned long vs. u64 for pointers) than what we do out of
>> BPF. Thus, lets do a proper BPF_CALL_2() declaration such that we
>> don't need to make any such assumptions.
>>
>> Reported-by: Shubham Bansal <illusionist.neo@gmail.com>
>> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
>
> assuming on 64-bit archs the should be no perf difference
> and only increase in .text, since __htab_map_lookup_elem
> is now force inlined into a bunch of places?
> I guess that's ok, but kinda sux for 64-bit archs to pay
> such penalty because of 32-bit archs.

Yeah true, text bumps from 11k to 13k, doesn't pay off.

> May be drop always_inline and do such thing conditionally
> on 32-bit archs only?

I will guard with this instead:

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4f6e7eb..e42c096 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4160,7 +4160,11 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
                         continue;
                 }

-               if (ebpf_jit_enabled() && insn->imm == BPF_FUNC_map_lookup_elem) {
+               /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
+                * handlers are currently limited to 64 bit only.
+                */
+               if (ebpf_jit_enabled() && BITS_PER_LONG == 64 &&
+                   insn->imm == BPF_FUNC_map_lookup_elem) {
                         map_ptr = env->insn_aux_data[i + delta].map_ptr;
                         if (map_ptr == BPF_MAP_PTR_POISON ||
                             !map_ptr->ops->map_gen_lookup)

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net-next 1/2] bpf: improve htab inlining for future 32 bit jits
  2017-08-19  0:21     ` Daniel Borkmann
@ 2017-08-19  0:24       ` Alexei Starovoitov
  0 siblings, 0 replies; 7+ messages in thread
From: Alexei Starovoitov @ 2017-08-19  0:24 UTC (permalink / raw)
  To: Daniel Borkmann, davem; +Cc: netdev

On 8/18/17 5:21 PM, Daniel Borkmann wrote:
> On 08/19/2017 02:00 AM, Alexei Starovoitov wrote:
>> On 8/18/17 4:51 PM, Daniel Borkmann wrote:
>>> Lets future proof htab lookup inlining, commit 9015d2f59535 ("bpf:
>>> inline htab_map_lookup_elem()") was making the assumption that a
>>> direct call emission to __htab_map_lookup_elem() will always work
>>> out for JITs. This is currently true since all JITs we have are
>>> for 64 bit archs, but in case of 32 bit JITs like upcoming arm32,
>>> we get a NULL pointer dereference when executing the call to
>>> __htab_map_lookup_elem() since passed arguments are of a different
>>> size (unsigned long vs. u64 for pointers) than what we do out of
>>> BPF. Thus, lets do a proper BPF_CALL_2() declaration such that we
>>> don't need to make any such assumptions.
>>>
>>> Reported-by: Shubham Bansal <illusionist.neo@gmail.com>
>>> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
>>
>> assuming on 64-bit archs the should be no perf difference
>> and only increase in .text, since __htab_map_lookup_elem
>> is now force inlined into a bunch of places?
>> I guess that's ok, but kinda sux for 64-bit archs to pay
>> such penalty because of 32-bit archs.
>
> Yeah true, text bumps from 11k to 13k, doesn't pay off.
>
>> May be drop always_inline and do such thing conditionally
>> on 32-bit archs only?
>
> I will guard with this instead:
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 4f6e7eb..e42c096 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -4160,7 +4160,11 @@ static int fixup_bpf_calls(struct
> bpf_verifier_env *env)
>                         continue;
>                 }
>
> -               if (ebpf_jit_enabled() && insn->imm ==
> BPF_FUNC_map_lookup_elem) {
> +               /* BPF_EMIT_CALL() assumptions in some of the
> map_gen_lookup
> +                * handlers are currently limited to 64 bit only.
> +                */
> +               if (ebpf_jit_enabled() && BITS_PER_LONG == 64 &&
> +                   insn->imm == BPF_FUNC_map_lookup_elem) {
>                         map_ptr = env->insn_aux_data[i + delta].map_ptr;
>                         if (map_ptr == BPF_MAP_PTR_POISON ||
>                             !map_ptr->ops->map_gen_lookup)

sure. looks good to me for now. We probably need to first measure
the perf gains out of inlining on 32-bit arm to go next step.
Thanks!

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2017-08-19  0:24 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-08-18 23:51 [PATCH net-next 0/2] BPF inline improvements Daniel Borkmann
2017-08-18 23:51 ` [PATCH net-next 1/2] bpf: improve htab inlining for future 32 bit jits Daniel Borkmann
2017-08-19  0:00   ` Alexei Starovoitov
2017-08-19  0:21     ` Daniel Borkmann
2017-08-19  0:24       ` Alexei Starovoitov
2017-08-18 23:51 ` [PATCH net-next 2/2] bpf: inline map in map lookup functions for array and htab Daniel Borkmann
2017-08-19  0:05   ` Alexei Starovoitov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).