netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Alexei Starovoitov <ast@plumgrid.com>
To: "David S. Miller" <davem@davemloft.net>
Cc: Ingo Molnar <mingo@kernel.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Andy Lutomirski <luto@amacapital.net>,
	Steven Rostedt <rostedt@goodmis.org>,
	Daniel Borkmann <dborkman@redhat.com>,
	Chema Gonzalez <chema@google.com>,
	Eric Dumazet <edumazet@google.com>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Brendan Gregg <brendan.d.gregg@gmail.com>,
	Namhyung Kim <namhyung@kernel.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Kees Cook <keescook@chromium.org>,
	linux-api@vger.kernel.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH RFC v7 net-next 28/28] samples: bpf: IO latency analysis (iosnoop/heatmap)
Date: Tue, 26 Aug 2014 19:29:42 -0700	[thread overview]
Message-ID: <1409106582-10095-29-git-send-email-ast@plumgrid.com> (raw)
In-Reply-To: <1409106582-10095-1-git-send-email-ast@plumgrid.com>

eBPF C program attaches to block_rq_issue/block_rq_complete events to calculate
IO latency. Then it waits for the first 100 events to compute average latency
and uses range [0 .. ave_lat * 2] to record histogram of events in this latency
range.
User space reads this histogram map every 2 seconds and prints it as a 'heatmap'
using gray shades of text terminal. Black spaces have many events and white
spaces have very few events. Left most space is the smallest latency, right most
space is the largest latency in the range.
If kernel sees too many events that fall out of histogram range, user space
adjusts the range up, so heatmap for next 2 seconds will be more accurate.

Usage:
$ sudo ./ex3
and do 'sudo dd if=/dev/sda of=/dev/null' in other terminal.
Observe IO latencies and how different activity (like 'make kernel') affects it.

Similar experiments can be done for network transmit latencies, syscalls, etc

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
---
 samples/bpf/Makefile   |    6 +-
 samples/bpf/ex3_kern.c |  104 +++++++++++++++++++++++++++++++++
 samples/bpf/ex3_user.c |  149 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 257 insertions(+), 2 deletions(-)
 create mode 100644 samples/bpf/ex3_kern.c
 create mode 100644 samples/bpf/ex3_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index d2de86188925..9e7a9bc2194d 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -2,21 +2,23 @@
 obj- := dummy.o
 
 # List of programs to build
-hostprogs-y := dropmon test_verifier ex1 ex2
+hostprogs-y := dropmon test_verifier ex1 ex2 ex3
 
 dropmon-objs := dropmon.o libbpf.o
 test_verifier-objs := test_verifier.o libbpf.o
 ex1-objs := bpf_load.o libbpf.o ex1_user.o
 ex2-objs := bpf_load.o libbpf.o ex2_user.o
+ex3-objs := bpf_load.o libbpf.o ex3_user.o
 
 # Tell kbuild to always build the programs
-always := $(hostprogs-y) ex1_kern.o ex2_kern.o
+always := $(hostprogs-y) ex1_kern.o ex2_kern.o ex3_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
 HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
 HOSTLOADLIBES_ex1 += -lelf
 HOSTLOADLIBES_ex2 += -lelf
+HOSTLOADLIBES_ex3 += -lelf
 
 LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc
 
diff --git a/samples/bpf/ex3_kern.c b/samples/bpf/ex3_kern.c
new file mode 100644
index 000000000000..45ff40ff1077
--- /dev/null
+++ b/samples/bpf/ex3_kern.c
@@ -0,0 +1,104 @@
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <uapi/linux/bpf.h>
+#include <trace/bpf_trace.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") my_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(long),
+	.value_size = sizeof(u64),
+	.max_entries = 4096,
+};
+
+/* alternative events:
+ * SEC("events/syscalls/sys_enter_write")
+ * SEC("events/net/net_dev_start_xmit")
+ */
+SEC("events/block/block_rq_issue")
+int bpf_prog1(struct bpf_context *ctx)
+{
+	long rq = ctx->arg2; /* long rq = bpf_get_current(); */
+	u64 val = bpf_ktime_get_ns();
+
+	bpf_map_update_elem(&my_map, &rq, &val);
+	return 0;
+}
+
+struct globals {
+	u64 lat_ave;
+	u64 lat_sum;
+	u64 missed;
+	u64 max_lat;
+	int num_samples;
+};
+
+struct bpf_map_def SEC("maps") global_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct globals),
+	.max_entries = 1,
+};
+
+#define MAX_SLOT 32
+
+struct bpf_map_def SEC("maps") lat_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(int),
+	.value_size = sizeof(u64),
+	.max_entries = MAX_SLOT,
+};
+
+/* alternative evenets:
+ * SEC("events/syscalls/sys_exit_write")
+ * SEC("events/net/net_dev_xmit")
+ */
+SEC("events/block/block_rq_complete")
+int bpf_prog2(struct bpf_context *ctx)
+{
+	long rq = ctx->arg2;
+	void *value;
+
+	value = bpf_map_lookup_elem(&my_map, &rq);
+	if (!value)
+		return 0;
+
+	u64 cur_time = bpf_ktime_get_ns();
+	u64 delta = (cur_time - *(u64 *)value) / 1000;
+
+	bpf_map_delete_elem(&my_map, &rq);
+
+	int ind = 1;
+	struct globals *g = bpf_map_lookup_elem(&global_map, &ind);
+	if (!g)
+		return 0;
+	if (g->lat_ave == 0) {
+		g->num_samples++;
+		g->lat_sum += delta;
+		if (g->num_samples >= 100) {
+			g->lat_ave = g->lat_sum / g->num_samples;
+			if (0/* debug */) {
+				char fmt[] = "after %d samples average latency %ld usec\n";
+				bpf_printk(fmt, sizeof(fmt), g->num_samples,
+					   g->lat_ave);
+			}
+		}
+	} else {
+		u64 max_lat = g->lat_ave * 2;
+		if (delta > max_lat) {
+			g->missed++;
+			if (delta > g->max_lat)
+				g->max_lat = delta;
+			return 0;
+		}
+
+		ind = delta * MAX_SLOT / max_lat;
+		value = bpf_map_lookup_elem(&lat_map, &ind);
+		if (!value)
+			return 0;
+		(*(u64 *)value) ++;
+	}
+
+	return 0;
+}
+char license[] SEC("license") = "GPL";
diff --git a/samples/bpf/ex3_user.c b/samples/bpf/ex3_user.c
new file mode 100644
index 000000000000..508a7c3b61c5
--- /dev/null
+++ b/samples/bpf/ex3_user.c
@@ -0,0 +1,149 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+
+struct globals {
+	__u64 lat_ave;
+	__u64 lat_sum;
+	__u64 missed;
+	__u64 max_lat;
+	int num_samples;
+};
+
+static void clear_stats(int fd)
+{
+	int key;
+	__u64 value = 0;
+	for (key = 0; key < 32; key++)
+		bpf_update_elem(fd, &key, &value);
+}
+
+const char *color[] = {
+	"\033[48;5;255m",
+	"\033[48;5;252m",
+	"\033[48;5;250m",
+	"\033[48;5;248m",
+	"\033[48;5;246m",
+	"\033[48;5;244m",
+	"\033[48;5;242m",
+	"\033[48;5;240m",
+	"\033[48;5;238m",
+	"\033[48;5;236m",
+	"\033[48;5;234m",
+	"\033[48;5;232m",
+};
+const int num_colors = ARRAY_SIZE(color);
+
+const char nocolor[] = "\033[00m";
+
+static void print_banner(__u64 max_lat)
+{
+	printf("0 usec     ...          %lld usec\n", max_lat);
+}
+
+static void print_hist(int fd)
+{
+	int key;
+	__u64 value;
+	__u64 cnt[32];
+	__u64 max_cnt = 0;
+	__u64 total_events = 0;
+	int max_bucket = 0;
+
+	for (key = 0; key < 32; key++) {
+		value = 0;
+		bpf_lookup_elem(fd, &key, &value);
+		if (value > 0)
+			max_bucket = key;
+		cnt[key] = value;
+		total_events += value;
+		if (value > max_cnt)
+			max_cnt = value;
+	}
+	clear_stats(fd);
+	for (key = 0; key < 32; key++) {
+		int c = num_colors * cnt[key] / (max_cnt + 1);
+		printf("%s %s", color[c], nocolor);
+	}
+	printf(" captured=%lld", total_events);
+
+	key = 1;
+	struct globals g = {};
+	bpf_lookup_elem(map_fd[1], &key, &g);
+
+	printf(" missed=%lld max_lat=%lld usec\n",
+	       g.missed, g.max_lat);
+
+	if (g.missed > 10 && g.missed > total_events / 10) {
+		printf("adjusting range UP...\n");
+		g.lat_ave = g.max_lat / 2;
+		print_banner(g.lat_ave * 2);
+	} else if (max_bucket < 4 && total_events > 100) {
+		printf("adjusting range DOWN...\n");
+		g.lat_ave = g.lat_ave / 4;
+		print_banner(g.lat_ave * 2);
+	}
+	/* clear some globals */
+	g.missed = 0;
+	g.max_lat = 0;
+	bpf_update_elem(map_fd[1], &key, &g);
+}
+
+static void int_exit(int sig)
+{
+	print_hist(map_fd[2]);
+	exit(0);
+}
+
+int main(int ac, char **argv)
+{
+	char filename[256];
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	clear_stats(map_fd[2]);
+
+	int key = 1;
+	struct globals g = {};
+	if (bpf_update_elem(map_fd[1], &key, &g) != 0) {
+		printf("bug\n");
+		return 1;
+	}
+	signal(SIGINT, int_exit);
+
+	if (fork() == 0) {
+		read_trace_pipe();
+	} else {
+		printf("waiting for events to determine average latency...\n");
+		for (;;) {
+			bpf_lookup_elem(map_fd[1], &key, &g);
+			if (g.lat_ave)
+				break;
+			sleep(1);
+		}
+
+		printf("  IO latency in usec\n"
+		       "  %s %s - many events with this latency\n"
+		       "  %s %s - few events\n",
+		       color[num_colors - 1], nocolor,
+		       color[0], nocolor);
+		print_banner(g.lat_ave * 2);
+		for (;;) {
+			print_hist(map_fd[2]);
+			sleep(2);
+		}
+	}
+
+	return 0;
+}
-- 
1.7.9.5

  parent reply	other threads:[~2014-08-27  2:29 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-27  2:29 [PATCH RFC v7 net-next 00/28] BPF syscall Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 01/28] net: filter: add "load 64-bit immediate" eBPF instruction Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 02/28] net: filter: split filter.h and expose eBPF to user space Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 03/28] bpf: introduce syscall(BPF, ...) and BPF maps Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 04/28] bpf: enable bpf syscall on x64 and i386 Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 05/28] bpf: add lookup/update/delete/iterate methods to BPF maps Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 06/28] bpf: add hashtable type of " Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 07/28] bpf: expand BPF syscall with program load/unload Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 08/28] bpf: handle pseudo BPF_CALL insn Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 09/28] bpf: verifier (add docs) Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 10/28] bpf: verifier (add ability to receive verification log) Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 11/28] bpf: handle pseudo BPF_LD_IMM64 insn Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 12/28] bpf: verifier (add branch/goto checks) Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 13/28] bpf: verifier (add verifier core) Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 14/28] bpf: verifier (add state prunning optimization) Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 17/28] tracing: allow eBPF programs to be attached to events Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 19/28] tracing: allow eBPF programs to be attached to kprobe/kretprobe Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 20/28] tracing: allow eBPF programs to call ktime_get_ns() and get_current() Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 21/28] samples: bpf: add mini eBPF library to manipulate maps and programs Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 22/28] samples: bpf: example of tracing filters with eBPF Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 23/28] bpf: verifier test Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 26/28] samples: bpf: eBPF example in C Alexei Starovoitov
2014-08-27  2:29 ` [PATCH RFC v7 net-next 27/28] samples: bpf: counting " Alexei Starovoitov
2014-08-27  2:29 ` Alexei Starovoitov [this message]
2014-08-27  3:56 ` [PATCH RFC v7 net-next 00/28] BPF syscall Andy Lutomirski
2014-08-27  4:35   ` Alexei Starovoitov
2014-08-27  4:49     ` Andy Lutomirski
2014-08-27  4:57       ` Alexei Starovoitov
     [not found]         ` <CAMEtUuw1n1HzAeyKFj9=nGq7RKZq7TADS-6M_BkHbTsWJ_Gm-Q-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-08-27 18:26           ` Andy Lutomirski
     [not found]             ` <CALCETrXAfZJTsF2nPFw55rHkfbNXKQuF8Frnq3e1wHEoGxLM4w-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2014-08-27 19:18               ` Stephen Hemminger
2014-08-27 19:35                 ` Daniel Borkmann
2014-08-27 19:37                 ` Alexei Starovoitov
     [not found] ` <1409106582-10095-1-git-send-email-ast-uqk4Ao+rVK5Wk0Htik3J/w@public.gmane.org>
2014-08-27  2:29   ` [PATCH RFC v7 net-next 15/28] bpf: allow eBPF programs to use maps Alexei Starovoitov
2014-08-27  2:29   ` [PATCH RFC v7 net-next 16/28] bpf: split eBPF out of NET Alexei Starovoitov
2014-08-27  2:29   ` [PATCH RFC v7 net-next 18/28] tracing: allow eBPF programs call printk() Alexei Starovoitov
2014-08-27  2:29   ` [PATCH RFC v7 net-next 24/28] bpf: llvm backend Alexei Starovoitov
2014-08-27  2:29   ` [PATCH RFC v7 net-next 25/28] samples: bpf: elf file loader Alexei Starovoitov
2014-08-27  6:11   ` [PATCH RFC v7 net-next 00/28] BPF syscall David Miller
     [not found]     ` <20140826.231155.421325307812864648.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
2014-08-27 18:24       ` Steven Stewart-Gallus

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1409106582-10095-29-git-send-email-ast@plumgrid.com \
    --to=ast@plumgrid.com \
    --cc=a.p.zijlstra@chello.nl \
    --cc=akpm@linux-foundation.org \
    --cc=brendan.d.gregg@gmail.com \
    --cc=chema@google.com \
    --cc=davem@davemloft.net \
    --cc=dborkman@redhat.com \
    --cc=edumazet@google.com \
    --cc=hpa@zytor.com \
    --cc=keescook@chromium.org \
    --cc=linux-api@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).