netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next 0/6] bpf: LRU performance and test-program improvements
@ 2017-04-14 17:30 Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 1/6] bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU Martin KaFai Lau
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

The first 4 patches make a few improvements to the LRU tests.

Patch 5/6 is to improve the performance of BPF_F_NO_COMMON_LRU map.

Patch 6/6 adds an example in using LRU map with map-in-map.

Martin KaFai Lau (6):
  bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU
  bpf: lru: Cleanup test_lru_map.c
  bpf: lru: Refactor LRU map tests in map_perf_test
  bpf: Allow bpf sample programs (*_user.c) to change bpf_map_def
  bpf: lru: Lower the PERCPU_NR_SCANS from 16 to 4
  bpf: lru: Add map-in-map LRU example

 kernel/bpf/bpf_lru_list.c                  |   2 +-
 samples/bpf/bpf_load.c                     | 114 ++++++++++---
 samples/bpf/bpf_load.h                     |  13 ++
 samples/bpf/map_perf_test_kern.c           |  75 +++++++--
 samples/bpf/map_perf_test_user.c           | 247 +++++++++++++++++++++--------
 tools/testing/selftests/bpf/test_lru_map.c | 104 ++++++++----
 6 files changed, 428 insertions(+), 127 deletions(-)

-- 
2.9.3

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH net-next 1/6] bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
@ 2017-04-14 17:30 ` Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 2/6] bpf: lru: Cleanup test_lru_map.c Martin KaFai Lau
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

test_lru_sanity3 is not applicable to BPF_F_NO_COMMON_LRU.
It just happens to work when PERCPU_FREE_TARGET == 16.

This patch:
1) Disable test_lru_sanity3 for BPF_F_NO_COMMON_LRU
2) Add test_lru_sanity6 to test list rotation for
   the BPF_F_NO_COMMON_LRU map.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_lru_map.c | 70 +++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 00b0aff56e2e..04b2242937cc 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -387,6 +387,10 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	unsigned int map_size;
 	int next_cpu = 0;
 
+	if (map_flags & BPF_F_NO_COMMON_LRU)
+		/* This test is only applicable to common LRU list */
+		return;
+
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
 	       map_flags);
 
@@ -396,11 +400,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	assert(batch_size * 2 == tgt_free);
 
 	map_size = tgt_free * 2;
-	if (map_flags & BPF_F_NO_COMMON_LRU)
-		lru_map_fd = create_map(map_type, map_flags,
-					map_size * nr_cpus);
-	else
-		lru_map_fd = create_map(map_type, map_flags, map_size);
+	lru_map_fd = create_map(map_type, map_flags, map_size);
 	assert(lru_map_fd != -1);
 
 	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
@@ -566,6 +566,65 @@ static void test_lru_sanity5(int map_type, int map_flags)
 	printf("Pass\n");
 }
 
+/* Test list rotation for BPF_F_NO_COMMON_LRU map */
+static void test_lru_sanity6(int map_type, int map_flags, int tgt_free)
+{
+	int lru_map_fd, expected_map_fd;
+	unsigned long long key, value[nr_cpus];
+	unsigned int map_size = tgt_free * 2;
+	int next_cpu = 0;
+
+	if (!(map_flags & BPF_F_NO_COMMON_LRU))
+		return;
+
+	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+	       map_flags);
+
+	assert(sched_next_online(0, &next_cpu) != -1);
+
+	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+	assert(expected_map_fd != -1);
+
+	lru_map_fd = create_map(map_type, map_flags, map_size * nr_cpus);
+	assert(lru_map_fd != -1);
+
+	value[0] = 1234;
+
+	for (key = 1; key <= tgt_free; key++) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	for (; key <= tgt_free * 2; key++) {
+		unsigned long long stable_key;
+
+		/* Make ref bit sticky for key: [1, tgt_free] */
+		for (stable_key = 1; stable_key <= tgt_free; stable_key++) {
+			/* Mark the ref bit */
+			assert(!bpf_map_lookup_elem(lru_map_fd, &stable_key,
+						    value));
+		}
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	for (; key <= tgt_free * 3; key++) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	assert(map_equal(lru_map_fd, expected_map_fd));
+
+	close(expected_map_fd);
+	close(lru_map_fd);
+
+	printf("Pass\n");
+}
+
 int main(int argc, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
@@ -593,6 +652,7 @@ int main(int argc, char **argv)
 			test_lru_sanity3(map_types[t], map_flags[f], tgt_free);
 			test_lru_sanity4(map_types[t], map_flags[f], tgt_free);
 			test_lru_sanity5(map_types[t], map_flags[f]);
+			test_lru_sanity6(map_types[t], map_flags[f], tgt_free);
 
 			printf("\n");
 		}
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 2/6] bpf: lru: Cleanup test_lru_map.c
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 1/6] bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU Martin KaFai Lau
@ 2017-04-14 17:30 ` Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 3/6] bpf: lru: Refactor LRU map tests in map_perf_test Martin KaFai Lau
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

This patch does the following cleanup on test_lru_map.c
1) Fix indentation (Replace spaces by tabs)
2) Remove redundant BPF_F_NO_COMMON_LRU test
3) Simplify some comments

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/testing/selftests/bpf/test_lru_map.c | 32 +++++++++---------------------
 1 file changed, 9 insertions(+), 23 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 04b2242937cc..87c05e5bdfdd 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -191,12 +191,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	int next_cpu = 0;
 
 	if (map_flags & BPF_F_NO_COMMON_LRU)
-		/* Ther percpu lru list (i.e each cpu has its own LRU
-		 * list) does not have a local free list.  Hence,
-		 * it will only free old nodes till there is no free
-		 * from the LRU list.  Hence, this test does not apply
-		 * to BPF_F_NO_COMMON_LRU
-		 */
+		/* This test is only applicable to common LRU list */
 		return;
 
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
@@ -227,7 +222,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	for (key = 1; key < end_key; key++) {
 		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	/* Insert 1+tgt_free to 2*tgt_free
@@ -273,12 +268,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	int next_cpu = 0;
 
 	if (map_flags & BPF_F_NO_COMMON_LRU)
-		/* Ther percpu lru list (i.e each cpu has its own LRU
-		 * list) does not have a local free list.  Hence,
-		 * it will only free old nodes till there is no free
-		 * from the LRU list.  Hence, this test does not apply
-		 * to BPF_F_NO_COMMON_LRU
-		 */
+		/* This test is only applicable to common LRU list */
 		return;
 
 	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
@@ -290,11 +280,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	assert(batch_size * 2 == tgt_free);
 
 	map_size = tgt_free + batch_size;
-	if (map_flags & BPF_F_NO_COMMON_LRU)
-		lru_map_fd = create_map(map_type, map_flags,
-					map_size * nr_cpus);
-	else
-		lru_map_fd = create_map(map_type, map_flags, map_size);
+	lru_map_fd = create_map(map_type, map_flags, map_size);
 	assert(lru_map_fd != -1);
 
 	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
@@ -341,7 +327,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(value[0] == 4321);
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	value[0] = 1234;
@@ -361,7 +347,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
@@ -419,7 +405,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 	for (key = 1; key < end_key; key++) {
 		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	/* Add 1+2*tgt_free to tgt_free*5/2
@@ -431,7 +417,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
@@ -491,7 +477,7 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
-				            BPF_NOEXIST));
+					    BPF_NOEXIST));
 	}
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 3/6] bpf: lru: Refactor LRU map tests in map_perf_test
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 1/6] bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 2/6] bpf: lru: Cleanup test_lru_map.c Martin KaFai Lau
@ 2017-04-14 17:30 ` Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 4/6] bpf: Allow bpf sample programs (*_user.c) to change bpf_map_def Martin KaFai Lau
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

One more LRU test will be added later in this patch series.
In this patch, we first move all existing LRU map tests into
a single syscall (connect) first so that the future new
LRU test can be added without hunting another syscall.

One of the map name is also changed from percpu_lru_hash_map
to nocommon_lru_hash_map to avoid the confusion with percpu_hash_map.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/map_perf_test_kern.c | 43 +++++++++++++++++++++++---------
 samples/bpf/map_perf_test_user.c | 53 +++++++++++++++++++++++++++-------------
 2 files changed, 67 insertions(+), 29 deletions(-)

diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index 9da2a3441b0a..404ed53b8a53 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -26,7 +26,7 @@ struct bpf_map_def SEC("maps") lru_hash_map = {
 	.max_entries = 10000,
 };
 
-struct bpf_map_def SEC("maps") percpu_lru_hash_map = {
+struct bpf_map_def SEC("maps") nocommon_lru_hash_map = {
 	.type = BPF_MAP_TYPE_LRU_HASH,
 	.key_size = sizeof(u32),
 	.value_size = sizeof(long),
@@ -100,6 +100,7 @@ int stress_percpu_hmap(struct pt_regs *ctx)
 		bpf_map_delete_elem(&percpu_hash_map, &key);
 	return 0;
 }
+
 SEC("kprobe/sys_getgid")
 int stress_hmap_alloc(struct pt_regs *ctx)
 {
@@ -128,24 +129,42 @@ int stress_percpu_hmap_alloc(struct pt_regs *ctx)
 	return 0;
 }
 
-SEC("kprobe/sys_getpid")
+SEC("kprobe/sys_connect")
 int stress_lru_hmap_alloc(struct pt_regs *ctx)
 {
-	u32 key = bpf_get_prandom_u32();
+	struct sockaddr_in6 *in6;
+	u16 test_case, dst6[8];
+	int addrlen, ret;
+	char fmt[] = "Failed at stress_lru_hmap_alloc. ret:%d\n";
 	long val = 1;
+	u32 key = bpf_get_prandom_u32();
 
-	bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
+	in6 = (struct sockaddr_in6 *)PT_REGS_PARM2(ctx);
+	addrlen = (int)PT_REGS_PARM3(ctx);
 
-	return 0;
-}
+	if (addrlen != sizeof(*in6))
+		return 0;
 
-SEC("kprobe/sys_getppid")
-int stress_percpu_lru_hmap_alloc(struct pt_regs *ctx)
-{
-	u32 key = bpf_get_prandom_u32();
-	long val = 1;
+	ret = bpf_probe_read(dst6, sizeof(dst6), &in6->sin6_addr);
+	if (ret)
+		goto done;
+
+	if (dst6[0] != 0xdead || dst6[1] != 0xbeef)
+		return 0;
+
+	test_case = dst6[7];
+
+	if (test_case == 0)
+		ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
+	else if (test_case == 1)
+		ret = bpf_map_update_elem(&nocommon_lru_hash_map, &key, &val,
+					  BPF_ANY);
+	else
+		ret = -EINVAL;
 
-	bpf_map_update_elem(&percpu_lru_hash_map, &key, &val, BPF_ANY);
+done:
+	if (ret)
+		bpf_trace_printk(fmt, sizeof(fmt), ret);
 
 	return 0;
 }
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index e29ff318a793..51cb8f238aa2 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -18,6 +18,9 @@
 #include <string.h>
 #include <time.h>
 #include <sys/resource.h>
+#include <arpa/inet.h>
+#include <errno.h>
+
 #include "libbpf.h"
 #include "bpf_load.h"
 
@@ -36,7 +39,7 @@ static __u64 time_get_ns(void)
 #define HASH_KMALLOC		(1 << 2)
 #define PERCPU_HASH_KMALLOC	(1 << 3)
 #define LRU_HASH_PREALLOC	(1 << 4)
-#define PERCPU_LRU_HASH_PREALLOC	(1 << 5)
+#define NOCOMMON_LRU_HASH_PREALLOC	(1 << 5)
 #define LPM_KMALLOC		(1 << 6)
 #define HASH_LOOKUP		(1 << 7)
 #define ARRAY_LOOKUP		(1 << 8)
@@ -55,28 +58,44 @@ static void test_hash_prealloc(int cpu)
 	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
 }
 
-static void test_lru_hash_prealloc(int cpu)
+static void do_test_lru(int lru_test_flag, int cpu)
 {
+	struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 };
+	const char *test_name;
 	__u64 start_time;
-	int i;
+	int i, ret;
+
+	in6.sin6_addr.s6_addr16[0] = 0xdead;
+	in6.sin6_addr.s6_addr16[1] = 0xbeef;
+
+	if (lru_test_flag & LRU_HASH_PREALLOC) {
+		test_name = "lru_hash_map_perf";
+		in6.sin6_addr.s6_addr16[7] = 0;
+	} else if (lru_test_flag & NOCOMMON_LRU_HASH_PREALLOC) {
+		test_name = "nocommon_lru_hash_map_perf";
+		in6.sin6_addr.s6_addr16[7] = 1;
+	} else {
+		assert(0);
+	}
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
-		syscall(__NR_getpid);
-	printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	for (i = 0; i < MAX_CNT; i++) {
+		ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6));
+		assert(ret == -1 && errno == EBADF);
+	}
+	printf("%d:%s pre-alloc %lld events per sec\n",
+	       cpu, test_name,
+	       MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
 }
 
-static void test_percpu_lru_hash_prealloc(int cpu)
+static void test_lru_hash_prealloc(int cpu)
 {
-	__u64 start_time;
-	int i;
+	do_test_lru(LRU_HASH_PREALLOC, cpu);
+}
 
-	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
-		syscall(__NR_getppid);
-	printf("%d:lru_hash_map_perf pre-alloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+static void test_nocommon_lru_hash_prealloc(int cpu)
+{
+	do_test_lru(NOCOMMON_LRU_HASH_PREALLOC, cpu);
 }
 
 static void test_percpu_hash_prealloc(int cpu)
@@ -174,8 +193,8 @@ static void loop(int cpu)
 	if (test_flags & LRU_HASH_PREALLOC)
 		test_lru_hash_prealloc(cpu);
 
-	if (test_flags & PERCPU_LRU_HASH_PREALLOC)
-		test_percpu_lru_hash_prealloc(cpu);
+	if (test_flags & NOCOMMON_LRU_HASH_PREALLOC)
+		test_nocommon_lru_hash_prealloc(cpu);
 
 	if (test_flags & LPM_KMALLOC)
 		test_lpm_kmalloc(cpu);
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 4/6] bpf: Allow bpf sample programs (*_user.c) to change bpf_map_def
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
                   ` (2 preceding siblings ...)
  2017-04-14 17:30 ` [PATCH net-next 3/6] bpf: lru: Refactor LRU map tests in map_perf_test Martin KaFai Lau
@ 2017-04-14 17:30 ` Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 5/6] bpf: lru: Lower the PERCPU_NR_SCANS from 16 to 4 Martin KaFai Lau
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

The current bpf_map_def is statically defined during compile
time.  This patch allows the *_user.c program to change it during
runtime.  It is done by adding load_bpf_file_fixup_map() which
takes a callback.  The callback will be called before creating
each map so that it has a chance to modify the bpf_map_def.

The current usecase is to change max_entries in map_perf_test.
It is interesting to test with a much bigger map size in
some cases (e.g. the following patch on bpf_lru_map.c).
However,  it is hard to find one size to fit all testing
environment.  Hence, it is handy to take the max_entries
as a cmdline arg and then configure the bpf_map_def during
runtime.

This patch adds two cmdline args.  One is to configure
the map's max_entries.  Another is to configure the max_cnt
which controls how many times a syscall is called.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 samples/bpf/bpf_load.c           | 114 +++++++++++++++++++++++++-----
 samples/bpf/bpf_load.h           |  13 ++++
 samples/bpf/map_perf_test_user.c | 148 ++++++++++++++++++++++++---------------
 3 files changed, 201 insertions(+), 74 deletions(-)

diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index dcdce1270d38..0d449d8032d1 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -21,6 +21,7 @@
 #include <sys/mman.h>
 #include <poll.h>
 #include <ctype.h>
+#include <assert.h>
 #include "libbpf.h"
 #include "bpf_load.h"
 #include "perf-sys.h"
@@ -37,15 +38,6 @@ int event_fd[MAX_PROGS];
 int prog_cnt;
 int prog_array_fd = -1;
 
-struct bpf_map_def {
-	unsigned int type;
-	unsigned int key_size;
-	unsigned int value_size;
-	unsigned int max_entries;
-	unsigned int map_flags;
-	unsigned int inner_map_idx;
-};
-
 static int populate_prog_array(const char *event, int prog_fd)
 {
 	int ind = atoi(event), err;
@@ -193,11 +185,14 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
 	return 0;
 }
 
-static int load_maps(struct bpf_map_def *maps, int len)
+static int load_maps(struct bpf_map_def *maps, int len,
+		     const char **map_names, fixup_map_cb fixup_map)
 {
 	int i;
 
 	for (i = 0; i < len / sizeof(struct bpf_map_def); i++) {
+		if (fixup_map)
+			fixup_map(&maps[i], map_names[i], i);
 
 		if (maps[i].type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
 		    maps[i].type == BPF_MAP_TYPE_HASH_OF_MAPS) {
@@ -280,14 +275,64 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
 	return 0;
 }
 
-int load_bpf_file(char *path)
+static int cmp_symbols(const void *l, const void *r)
+{
+	const GElf_Sym *lsym = (const GElf_Sym *)l;
+	const GElf_Sym *rsym = (const GElf_Sym *)r;
+
+	if (lsym->st_value < rsym->st_value)
+		return -1;
+	else if (lsym->st_value > rsym->st_value)
+		return 1;
+	else
+		return 0;
+}
+
+static int get_sorted_map_names(Elf *elf, Elf_Data *symbols, int maps_shndx,
+				int strtabidx, char **map_names)
+{
+	GElf_Sym map_symbols[MAX_MAPS];
+	int i, nr_maps = 0;
+
+	for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+		assert(nr_maps < MAX_MAPS);
+		if (!gelf_getsym(symbols, i, &map_symbols[nr_maps]))
+			continue;
+		if (map_symbols[nr_maps].st_shndx != maps_shndx)
+			continue;
+		nr_maps++;
+	}
+
+	qsort(map_symbols, nr_maps, sizeof(GElf_Sym), cmp_symbols);
+
+	for (i = 0; i < nr_maps; i++) {
+		char *map_name;
+
+		map_name = elf_strptr(elf, strtabidx, map_symbols[i].st_name);
+		if (!map_name) {
+			printf("cannot get map symbol\n");
+			return 1;
+		}
+
+		map_names[i] = strdup(map_name);
+		if (!map_names[i]) {
+			printf("strdup(%s): %s(%d)\n", map_name,
+			       strerror(errno), errno);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
 {
-	int fd, i;
+	int fd, i, ret, maps_shndx = -1, strtabidx = -1;
 	Elf *elf;
 	GElf_Ehdr ehdr;
 	GElf_Shdr shdr, shdr_prog;
-	Elf_Data *data, *data_prog, *symbols = NULL;
-	char *shname, *shname_prog;
+	Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
+	char *shname, *shname_prog, *map_names[MAX_MAPS] = { NULL };
 
 	/* reset global variables */
 	kern_version = 0;
@@ -335,14 +380,33 @@ int load_bpf_file(char *path)
 			}
 			memcpy(&kern_version, data->d_buf, sizeof(int));
 		} else if (strcmp(shname, "maps") == 0) {
-			processed_sec[i] = true;
-			if (load_maps(data->d_buf, data->d_size))
-				return 1;
+			maps_shndx = i;
+			data_maps = data;
 		} else if (shdr.sh_type == SHT_SYMTAB) {
+			strtabidx = shdr.sh_link;
 			symbols = data;
 		}
 	}
 
+	ret = 1;
+
+	if (!symbols) {
+		printf("missing SHT_SYMTAB section\n");
+		goto done;
+	}
+
+	if (data_maps) {
+		if (get_sorted_map_names(elf, symbols, maps_shndx, strtabidx,
+					 map_names))
+			goto done;
+
+		if (load_maps(data_maps->d_buf, data_maps->d_size,
+			      (const char **)map_names, fixup_map))
+			goto done;
+
+		processed_sec[maps_shndx] = true;
+	}
+
 	/* load programs that need map fixup (relocations) */
 	for (i = 1; i < ehdr.e_shnum; i++) {
 		if (processed_sec[i])
@@ -399,8 +463,22 @@ int load_bpf_file(char *path)
 			load_and_attach(shname, data->d_buf, data->d_size);
 	}
 
+	ret = 0;
+done:
+	for (i = 0; i < MAX_MAPS; i++)
+		free(map_names[i]);
 	close(fd);
-	return 0;
+	return ret;
+}
+
+int load_bpf_file(char *path)
+{
+	return do_load_bpf_file(path, NULL);
+}
+
+int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map)
+{
+	return do_load_bpf_file(path, fixup_map);
 }
 
 void read_trace_pipe(void)
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index c827827299b3..68f6b2d22507 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -6,6 +6,18 @@
 #define MAX_MAPS 32
 #define MAX_PROGS 32
 
+struct bpf_map_def {
+	unsigned int type;
+	unsigned int key_size;
+	unsigned int value_size;
+	unsigned int max_entries;
+	unsigned int map_flags;
+	unsigned int inner_map_idx;
+};
+
+typedef void (*fixup_map_cb)(struct bpf_map_def *map, const char *map_name,
+			     int idx);
+
 extern int map_fd[MAX_MAPS];
 extern int prog_fd[MAX_PROGS];
 extern int event_fd[MAX_PROGS];
@@ -25,6 +37,7 @@ extern int prog_cnt;
  * returns zero on success
  */
 int load_bpf_file(char *path);
+int load_bpf_file_fixup_map(const char *path, fixup_map_cb fixup_map);
 
 void read_trace_pipe(void);
 struct ksym {
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 51cb8f238aa2..2a12f48b5c6d 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -24,7 +24,7 @@
 #include "libbpf.h"
 #include "bpf_load.h"
 
-#define MAX_CNT 1000000
+#define TEST_BIT(t) (1U << (t))
 
 static __u64 time_get_ns(void)
 {
@@ -34,17 +34,39 @@ static __u64 time_get_ns(void)
 	return ts.tv_sec * 1000000000ull + ts.tv_nsec;
 }
 
-#define HASH_PREALLOC		(1 << 0)
-#define PERCPU_HASH_PREALLOC	(1 << 1)
-#define HASH_KMALLOC		(1 << 2)
-#define PERCPU_HASH_KMALLOC	(1 << 3)
-#define LRU_HASH_PREALLOC	(1 << 4)
-#define NOCOMMON_LRU_HASH_PREALLOC	(1 << 5)
-#define LPM_KMALLOC		(1 << 6)
-#define HASH_LOOKUP		(1 << 7)
-#define ARRAY_LOOKUP		(1 << 8)
+enum test_type {
+	HASH_PREALLOC,
+	PERCPU_HASH_PREALLOC,
+	HASH_KMALLOC,
+	PERCPU_HASH_KMALLOC,
+	LRU_HASH_PREALLOC,
+	NOCOMMON_LRU_HASH_PREALLOC,
+	LPM_KMALLOC,
+	HASH_LOOKUP,
+	ARRAY_LOOKUP,
+	NR_TESTS,
+};
+
+const char *test_map_names[NR_TESTS] = {
+	[HASH_PREALLOC] = "hash_map",
+	[PERCPU_HASH_PREALLOC] = "percpu_hash_map",
+	[HASH_KMALLOC] = "hash_map_alloc",
+	[PERCPU_HASH_KMALLOC] = "percpu_hash_map_alloc",
+	[LRU_HASH_PREALLOC] = "lru_hash_map",
+	[NOCOMMON_LRU_HASH_PREALLOC] = "nocommon_lru_hash_map",
+	[LPM_KMALLOC] = "lpm_trie_map_alloc",
+	[HASH_LOOKUP] = "hash_map",
+	[ARRAY_LOOKUP] = "array_map",
+};
 
 static int test_flags = ~0;
+static uint32_t num_map_entries;
+static uint32_t max_cnt = 1000000;
+
+static int check_test_flags(enum test_type t)
+{
+	return test_flags & TEST_BIT(t);
+}
 
 static void test_hash_prealloc(int cpu)
 {
@@ -52,13 +74,13 @@ static void test_hash_prealloc(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_getuid);
 	printf("%d:hash_map_perf pre-alloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
-static void do_test_lru(int lru_test_flag, int cpu)
+static void do_test_lru(enum test_type test, int cpu)
 {
 	struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 };
 	const char *test_name;
@@ -68,10 +90,10 @@ static void do_test_lru(int lru_test_flag, int cpu)
 	in6.sin6_addr.s6_addr16[0] = 0xdead;
 	in6.sin6_addr.s6_addr16[1] = 0xbeef;
 
-	if (lru_test_flag & LRU_HASH_PREALLOC) {
+	if (test == LRU_HASH_PREALLOC) {
 		test_name = "lru_hash_map_perf";
 		in6.sin6_addr.s6_addr16[7] = 0;
-	} else if (lru_test_flag & NOCOMMON_LRU_HASH_PREALLOC) {
+	} else if (test == NOCOMMON_LRU_HASH_PREALLOC) {
 		test_name = "nocommon_lru_hash_map_perf";
 		in6.sin6_addr.s6_addr16[7] = 1;
 	} else {
@@ -79,13 +101,13 @@ static void do_test_lru(int lru_test_flag, int cpu)
 	}
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++) {
+	for (i = 0; i < max_cnt; i++) {
 		ret = connect(-1, (const struct sockaddr *)&in6, sizeof(in6));
 		assert(ret == -1 && errno == EBADF);
 	}
 	printf("%d:%s pre-alloc %lld events per sec\n",
 	       cpu, test_name,
-	       MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	       max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
 static void test_lru_hash_prealloc(int cpu)
@@ -104,10 +126,10 @@ static void test_percpu_hash_prealloc(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_geteuid);
 	printf("%d:percpu_hash_map_perf pre-alloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
 static void test_hash_kmalloc(int cpu)
@@ -116,10 +138,10 @@ static void test_hash_kmalloc(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_getgid);
 	printf("%d:hash_map_perf kmalloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
 static void test_percpu_hash_kmalloc(int cpu)
@@ -128,10 +150,10 @@ static void test_percpu_hash_kmalloc(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_getegid);
 	printf("%d:percpu_hash_map_perf kmalloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
 static void test_lpm_kmalloc(int cpu)
@@ -140,10 +162,10 @@ static void test_lpm_kmalloc(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_gettid);
 	printf("%d:lpm_perf kmalloc %lld events per sec\n",
-	       cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll / (time_get_ns() - start_time));
 }
 
 static void test_hash_lookup(int cpu)
@@ -152,10 +174,10 @@ static void test_hash_lookup(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_getpgid, 0);
 	printf("%d:hash_lookup %lld lookups per sec\n",
-	       cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time));
 }
 
 static void test_array_lookup(int cpu)
@@ -164,46 +186,38 @@ static void test_array_lookup(int cpu)
 	int i;
 
 	start_time = time_get_ns();
-	for (i = 0; i < MAX_CNT; i++)
+	for (i = 0; i < max_cnt; i++)
 		syscall(__NR_getpgrp, 0);
 	printf("%d:array_lookup %lld lookups per sec\n",
-	       cpu, MAX_CNT * 1000000000ll * 64 / (time_get_ns() - start_time));
+	       cpu, max_cnt * 1000000000ll * 64 / (time_get_ns() - start_time));
 }
 
+typedef void (*test_func)(int cpu);
+const test_func test_funcs[] = {
+	[HASH_PREALLOC] = test_hash_prealloc,
+	[PERCPU_HASH_PREALLOC] = test_percpu_hash_prealloc,
+	[HASH_KMALLOC] = test_hash_kmalloc,
+	[PERCPU_HASH_KMALLOC] = test_percpu_hash_kmalloc,
+	[LRU_HASH_PREALLOC] = test_lru_hash_prealloc,
+	[NOCOMMON_LRU_HASH_PREALLOC] = test_nocommon_lru_hash_prealloc,
+	[LPM_KMALLOC] = test_lpm_kmalloc,
+	[HASH_LOOKUP] = test_hash_lookup,
+	[ARRAY_LOOKUP] = test_array_lookup,
+};
+
 static void loop(int cpu)
 {
 	cpu_set_t cpuset;
+	int i;
 
 	CPU_ZERO(&cpuset);
 	CPU_SET(cpu, &cpuset);
 	sched_setaffinity(0, sizeof(cpuset), &cpuset);
 
-	if (test_flags & HASH_PREALLOC)
-		test_hash_prealloc(cpu);
-
-	if (test_flags & PERCPU_HASH_PREALLOC)
-		test_percpu_hash_prealloc(cpu);
-
-	if (test_flags & HASH_KMALLOC)
-		test_hash_kmalloc(cpu);
-
-	if (test_flags & PERCPU_HASH_KMALLOC)
-		test_percpu_hash_kmalloc(cpu);
-
-	if (test_flags & LRU_HASH_PREALLOC)
-		test_lru_hash_prealloc(cpu);
-
-	if (test_flags & NOCOMMON_LRU_HASH_PREALLOC)
-		test_nocommon_lru_hash_prealloc(cpu);
-
-	if (test_flags & LPM_KMALLOC)
-		test_lpm_kmalloc(cpu);
-
-	if (test_flags & HASH_LOOKUP)
-		test_hash_lookup(cpu);
-
-	if (test_flags & ARRAY_LOOKUP)
-		test_array_lookup(cpu);
+	for (i = 0; i < NR_TESTS; i++) {
+		if (check_test_flags(i))
+			test_funcs[i](cpu);
+	}
 }
 
 static void run_perf_test(int tasks)
@@ -260,6 +274,22 @@ static void fill_lpm_trie(void)
 	assert(!r);
 }
 
+static void fixup_map(struct bpf_map_def *map, const char *name, int idx)
+{
+	int i;
+
+	if (num_map_entries <= 0)
+		return;
+
+	/* Only change the max_entries for the enabled test(s) */
+	for (i = 0; i < NR_TESTS; i++) {
+		if (!strcmp(test_map_names[i], name) &&
+		    (check_test_flags(i))) {
+			map->max_entries = num_map_entries;
+		}
+	}
+}
+
 int main(int argc, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
@@ -275,7 +305,13 @@ int main(int argc, char **argv)
 	if (argc > 2)
 		num_cpu = atoi(argv[2]) ? : num_cpu;
 
-	if (load_bpf_file(filename)) {
+	if (argc > 3)
+		num_map_entries = atoi(argv[3]);
+
+	if (argc > 4)
+		max_cnt = atoi(argv[4]);
+
+	if (load_bpf_file_fixup_map(filename, fixup_map)) {
 		printf("%s", bpf_log_buf);
 		return 1;
 	}
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 5/6] bpf: lru: Lower the PERCPU_NR_SCANS from 16 to 4
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
                   ` (3 preceding siblings ...)
  2017-04-14 17:30 ` [PATCH net-next 4/6] bpf: Allow bpf sample programs (*_user.c) to change bpf_map_def Martin KaFai Lau
@ 2017-04-14 17:30 ` Martin KaFai Lau
  2017-04-14 17:30 ` [PATCH net-next 6/6] bpf: lru: Add map-in-map LRU example Martin KaFai Lau
  2017-04-17 17:56 ` [PATCH net-next 0/6] bpf: LRU performance and test-program improvements David Miller
  6 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

After doing map_perf_test with a much bigger
BPF_F_NO_COMMON_LRU map, the perf report shows a
lot of time spent in rotating the inactive list (i.e.
__bpf_lru_list_rotate_inactive):
> map_perf_test 32 8 10000 1000000 | awk '{sum += $3}END{print sum}'
19644783 (19M/s)
> map_perf_test 32 8 10000000 10000000 |  awk '{sum += $3}END{print sum}'
6283930 (6.28M/s)

By inactive, it usually means the element is not in cache.  Hence,
there is a need to tune the PERCPU_NR_SCANS value.

This patch finds a better number of elements to
scan during each list rotation.  The PERCPU_NR_SCANS (which
is defined the same as PERCPU_FREE_TARGET) decreases
from 16 elements to 4 elements.  This change only
affects the BPF_F_NO_COMMON_LRU map.

The test_lru_dist does not show meaningful difference
between 16 and 4.  Our production L4 load balancer which uses
the LRU map for conntrack-ing also shows little change in cache
hit rate.  Since both benchmark and production data show no
cache-hit difference, PERCPU_NR_SCANS is lowered from 16 to 4.
We can consider making it configurable if we find a usecase
later that shows another value works better and/or use
a different rotation strategy.

After this change:
> map_perf_test 32 8 10000000 10000000 |  awk '{sum += $3}END{print sum}'
9240324 (9.2M/s)

i.e. 6.28M/s -> 9.2M/s

The test_lru_dist has not shown meaningful difference:
> test_lru_dist zipf.100k.a1_01.out 4000 1:
nr_misses: 31575 (Before) vs 31566 (After)

> test_lru_dist zipf.100k.a0_01.out 40000 1
nr_misses: 67036 (Before) vs 67031 (After)

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/bpf_lru_list.c                  | 2 +-
 tools/testing/selftests/bpf/test_lru_map.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
index f62d1d56f41d..e6ef4401a138 100644
--- a/kernel/bpf/bpf_lru_list.c
+++ b/kernel/bpf/bpf_lru_list.c
@@ -13,7 +13,7 @@
 #define LOCAL_FREE_TARGET		(128)
 #define LOCAL_NR_SCANS			LOCAL_FREE_TARGET
 
-#define PERCPU_FREE_TARGET		(16)
+#define PERCPU_FREE_TARGET		(4)
 #define PERCPU_NR_SCANS			PERCPU_FREE_TARGET
 
 /* Helpers to get the local list index */
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 87c05e5bdfdd..8c10c9180c1a 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -22,7 +22,7 @@
 #include "bpf_util.h"
 
 #define LOCAL_FREE_TARGET	(128)
-#define PERCPU_FREE_TARGET	(16)
+#define PERCPU_FREE_TARGET	(4)
 
 static int nr_cpus;
 
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH net-next 6/6] bpf: lru: Add map-in-map LRU example
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
                   ` (4 preceding siblings ...)
  2017-04-14 17:30 ` [PATCH net-next 5/6] bpf: lru: Lower the PERCPU_NR_SCANS from 16 to 4 Martin KaFai Lau
@ 2017-04-14 17:30 ` Martin KaFai Lau
  2017-04-17 17:56 ` [PATCH net-next 0/6] bpf: LRU performance and test-program improvements David Miller
  6 siblings, 0 replies; 8+ messages in thread
From: Martin KaFai Lau @ 2017-04-14 17:30 UTC (permalink / raw)
  To: netdev; +Cc: Alexei Starovoitov, Daniel Borkmann

This patch adds a map-in-map LRU example.
If we know only a subset of cores will use the
LRU, we can allocate a common LRU list per targeting core
and store it into an array-of-hashs.

It allows using the common LRU map with map-update performance
comparable to the BPF_F_NO_COMMON_LRU map but without wasting memory
on the unused cores that we know they will never access the LRU map.

BPF_F_NO_COMMON_LRU:
> map_perf_test 32 8 10000000 10000000 | awk '{sum += $3}END{print sum}'
9234314 (9.23M/s)

map-in-map LRU:
> map_perf_test 512 8 1260000 80000000 | awk '{sum += $3}END{print sum}'
9962743 (9.96M/s)

Notes that the max_entries for the map-in-map LRU test is 1260000 which
is the max_entries for each inner LRU map.  8 processes have been
started, so 8 * 1260000 = 10080000 (~10M) which is close to what is
used in the BPF_F_NO_COMMON_LRU test.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 samples/bpf/map_perf_test_kern.c | 34 ++++++++++++++++++++--
 samples/bpf/map_perf_test_user.c | 62 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+), 3 deletions(-)

diff --git a/samples/bpf/map_perf_test_kern.c b/samples/bpf/map_perf_test_kern.c
index 404ed53b8a53..245165817fbe 100644
--- a/samples/bpf/map_perf_test_kern.c
+++ b/samples/bpf/map_perf_test_kern.c
@@ -11,6 +11,7 @@
 #include "bpf_helpers.h"
 
 #define MAX_ENTRIES 1000
+#define MAX_NR_CPUS 1024
 
 struct bpf_map_def SEC("maps") hash_map = {
 	.type = BPF_MAP_TYPE_HASH,
@@ -34,6 +35,19 @@ struct bpf_map_def SEC("maps") nocommon_lru_hash_map = {
 	.map_flags = BPF_F_NO_COMMON_LRU,
 };
 
+struct bpf_map_def SEC("maps") inner_lru_hash_map = {
+	.type = BPF_MAP_TYPE_LRU_HASH,
+	.key_size = sizeof(u32),
+	.value_size = sizeof(long),
+	.max_entries = MAX_ENTRIES,
+};
+
+struct bpf_map_def SEC("maps") array_of_lru_hashs = {
+	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+	.key_size = sizeof(u32),
+	.max_entries = MAX_NR_CPUS,
+};
+
 struct bpf_map_def SEC("maps") percpu_hash_map = {
 	.type = BPF_MAP_TYPE_PERCPU_HASH,
 	.key_size = sizeof(u32),
@@ -154,13 +168,27 @@ int stress_lru_hmap_alloc(struct pt_regs *ctx)
 
 	test_case = dst6[7];
 
-	if (test_case == 0)
+	if (test_case == 0) {
 		ret = bpf_map_update_elem(&lru_hash_map, &key, &val, BPF_ANY);
-	else if (test_case == 1)
+	} else if (test_case == 1) {
 		ret = bpf_map_update_elem(&nocommon_lru_hash_map, &key, &val,
 					  BPF_ANY);
-	else
+	} else if (test_case == 2) {
+		void *nolocal_lru_map;
+		int cpu = bpf_get_smp_processor_id();
+
+		nolocal_lru_map = bpf_map_lookup_elem(&array_of_lru_hashs,
+						      &cpu);
+		if (!nolocal_lru_map) {
+			ret = -ENOENT;
+			goto done;
+		}
+
+		ret = bpf_map_update_elem(nolocal_lru_map, &key, &val,
+					  BPF_ANY);
+	} else {
 		ret = -EINVAL;
+	}
 
 done:
 	if (ret)
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 2a12f48b5c6d..6ac778153315 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -25,6 +25,7 @@
 #include "bpf_load.h"
 
 #define TEST_BIT(t) (1U << (t))
+#define MAX_NR_CPUS 1024
 
 static __u64 time_get_ns(void)
 {
@@ -44,6 +45,7 @@ enum test_type {
 	LPM_KMALLOC,
 	HASH_LOOKUP,
 	ARRAY_LOOKUP,
+	INNER_LRU_HASH_PREALLOC,
 	NR_TESTS,
 };
 
@@ -57,10 +59,14 @@ const char *test_map_names[NR_TESTS] = {
 	[LPM_KMALLOC] = "lpm_trie_map_alloc",
 	[HASH_LOOKUP] = "hash_map",
 	[ARRAY_LOOKUP] = "array_map",
+	[INNER_LRU_HASH_PREALLOC] = "inner_lru_hash_map",
 };
 
 static int test_flags = ~0;
 static uint32_t num_map_entries;
+static uint32_t inner_lru_hash_size;
+static int inner_lru_hash_idx = -1;
+static int array_of_lru_hashs_idx = -1;
 static uint32_t max_cnt = 1000000;
 
 static int check_test_flags(enum test_type t)
@@ -82,11 +88,42 @@ static void test_hash_prealloc(int cpu)
 
 static void do_test_lru(enum test_type test, int cpu)
 {
+	static int inner_lru_map_fds[MAX_NR_CPUS];
+
 	struct sockaddr_in6 in6 = { .sin6_family = AF_INET6 };
 	const char *test_name;
 	__u64 start_time;
 	int i, ret;
 
+	if (test == INNER_LRU_HASH_PREALLOC) {
+		int outer_fd = map_fd[array_of_lru_hashs_idx];
+
+		assert(cpu < MAX_NR_CPUS);
+
+		if (cpu) {
+			inner_lru_map_fds[cpu] =
+				bpf_create_map(BPF_MAP_TYPE_LRU_HASH,
+					       sizeof(uint32_t), sizeof(long),
+					       inner_lru_hash_size, 0);
+			if (inner_lru_map_fds[cpu] == -1) {
+				printf("cannot create BPF_MAP_TYPE_LRU_HASH %s(%d)\n",
+				       strerror(errno), errno);
+				exit(1);
+			}
+		} else {
+			inner_lru_map_fds[cpu] = map_fd[inner_lru_hash_idx];
+		}
+
+		ret = bpf_map_update_elem(outer_fd, &cpu,
+					  &inner_lru_map_fds[cpu],
+					  BPF_ANY);
+		if (ret) {
+			printf("cannot update ARRAY_OF_LRU_HASHS with key:%u. %s(%d)\n",
+			       cpu, strerror(errno), errno);
+			exit(1);
+		}
+	}
+
 	in6.sin6_addr.s6_addr16[0] = 0xdead;
 	in6.sin6_addr.s6_addr16[1] = 0xbeef;
 
@@ -96,6 +133,9 @@ static void do_test_lru(enum test_type test, int cpu)
 	} else if (test == NOCOMMON_LRU_HASH_PREALLOC) {
 		test_name = "nocommon_lru_hash_map_perf";
 		in6.sin6_addr.s6_addr16[7] = 1;
+	} else if (test == INNER_LRU_HASH_PREALLOC) {
+		test_name = "inner_lru_hash_map_perf";
+		in6.sin6_addr.s6_addr16[7] = 2;
 	} else {
 		assert(0);
 	}
@@ -120,6 +160,11 @@ static void test_nocommon_lru_hash_prealloc(int cpu)
 	do_test_lru(NOCOMMON_LRU_HASH_PREALLOC, cpu);
 }
 
+static void test_inner_lru_hash_prealloc(int cpu)
+{
+	do_test_lru(INNER_LRU_HASH_PREALLOC, cpu);
+}
+
 static void test_percpu_hash_prealloc(int cpu)
 {
 	__u64 start_time;
@@ -203,6 +248,7 @@ const test_func test_funcs[] = {
 	[LPM_KMALLOC] = test_lpm_kmalloc,
 	[HASH_LOOKUP] = test_hash_lookup,
 	[ARRAY_LOOKUP] = test_array_lookup,
+	[INNER_LRU_HASH_PREALLOC] = test_inner_lru_hash_prealloc,
 };
 
 static void loop(int cpu)
@@ -278,9 +324,25 @@ static void fixup_map(struct bpf_map_def *map, const char *name, int idx)
 {
 	int i;
 
+	if (!strcmp("inner_lru_hash_map", name)) {
+		inner_lru_hash_idx = idx;
+		inner_lru_hash_size = map->max_entries;
+	}
+
+	if (!strcmp("array_of_lru_hashs", name)) {
+		if (inner_lru_hash_idx == -1) {
+			printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n");
+			exit(1);
+		}
+		map->inner_map_idx = inner_lru_hash_idx;
+		array_of_lru_hashs_idx = idx;
+	}
+
 	if (num_map_entries <= 0)
 		return;
 
+	inner_lru_hash_size = num_map_entries;
+
 	/* Only change the max_entries for the enabled test(s) */
 	for (i = 0; i < NR_TESTS; i++) {
 		if (!strcmp(test_map_names[i], name) &&
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH net-next 0/6] bpf: LRU performance and test-program improvements
  2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
                   ` (5 preceding siblings ...)
  2017-04-14 17:30 ` [PATCH net-next 6/6] bpf: lru: Add map-in-map LRU example Martin KaFai Lau
@ 2017-04-17 17:56 ` David Miller
  6 siblings, 0 replies; 8+ messages in thread
From: David Miller @ 2017-04-17 17:56 UTC (permalink / raw)
  To: kafai; +Cc: netdev, ast, daniel

From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 14 Apr 2017 10:30:24 -0700

> The first 4 patches make a few improvements to the LRU tests.
> 
> Patch 5/6 is to improve the performance of BPF_F_NO_COMMON_LRU map.
> 
> Patch 6/6 adds an example in using LRU map with map-in-map.

Series applied, thank you.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2017-04-17 17:56 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-04-14 17:30 [PATCH net-next 0/6] bpf: LRU performance and test-program improvements Martin KaFai Lau
2017-04-14 17:30 ` [PATCH net-next 1/6] bpf: lru: Add test_lru_sanity6 for BPF_F_NO_COMMON_LRU Martin KaFai Lau
2017-04-14 17:30 ` [PATCH net-next 2/6] bpf: lru: Cleanup test_lru_map.c Martin KaFai Lau
2017-04-14 17:30 ` [PATCH net-next 3/6] bpf: lru: Refactor LRU map tests in map_perf_test Martin KaFai Lau
2017-04-14 17:30 ` [PATCH net-next 4/6] bpf: Allow bpf sample programs (*_user.c) to change bpf_map_def Martin KaFai Lau
2017-04-14 17:30 ` [PATCH net-next 5/6] bpf: lru: Lower the PERCPU_NR_SCANS from 16 to 4 Martin KaFai Lau
2017-04-14 17:30 ` [PATCH net-next 6/6] bpf: lru: Add map-in-map LRU example Martin KaFai Lau
2017-04-17 17:56 ` [PATCH net-next 0/6] bpf: LRU performance and test-program improvements David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).