From: Jesper Dangaard Brouer <brouer@redhat.com>
To: netdev@vger.kernel.org
Cc: jakub.kicinski@netronome.com,
"Michael S. Tsirkin" <mst@redhat.com>,
pavel.odintsov@gmail.com, Jason Wang <jasowang@redhat.com>,
mchan@broadcom.com, John Fastabend <john.fastabend@gmail.com>,
peter.waskiewicz.jr@intel.com,
Jesper Dangaard Brouer <brouer@redhat.com>,
Daniel Borkmann <borkmann@iogearbox.net>,
Alexei Starovoitov <alexei.starovoitov@gmail.com>,
Andy Gospodarek <andy@greyhouse.net>
Subject: [net-next V4 PATCH 4/5] bpf: cpumap add tracepoints
Date: Wed, 04 Oct 2017 14:04:00 +0200 [thread overview]
Message-ID: <150711864030.9499.18274057292232018278.stgit@firesoul> (raw)
In-Reply-To: <150711858281.9499.7767364427831352921.stgit@firesoul>
This adds two tracepoint to the cpumap. One for the enqueue side
trace_xdp_cpumap_enqueue() and one for the kthread dequeue side
trace_xdp_cpumap_kthread().
To mitigate the tracepoint overhead, these are invoked during the
enqueue/dequeue bulking phases, thus amortizing the cost.
The obvious use-cases are for debugging and monitoring. The
non-intuitive use-case is using these as a feedback loop to know the
system load. One can imagine auto-scaling by reducing, adding or
activating more worker CPUs on demand.
V4: tracepoint remove time_limit info, instead add sched info
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
---
include/trace/events/xdp.h | 70 ++++++++++++++++++++++++++++++++++++++++++++
kernel/bpf/cpumap.c | 23 +++++++++++---
2 files changed, 88 insertions(+), 5 deletions(-)
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index eb2ece96c1a2..0c8dec61987e 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -150,6 +150,76 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err,
trace_xdp_redirect_map_err(dev, xdp, devmap_ifindex(fwd, map), \
err, map, idx)
+TRACE_EVENT(xdp_cpumap_kthread,
+
+ TP_PROTO(int map_id, unsigned int processed, unsigned int drops,
+ int sched),
+
+ TP_ARGS(map_id, processed, drops, sched),
+
+ TP_STRUCT__entry(
+ __field(int, map_id)
+ __field(u32, act)
+ __field(int, cpu)
+ __field(unsigned int, drops)
+ __field(unsigned int, processed)
+ __field(int, sched)
+ ),
+
+ TP_fast_assign(
+ __entry->map_id = map_id;
+ __entry->act = XDP_REDIRECT;
+ __entry->cpu = smp_processor_id();
+ __entry->drops = drops;
+ __entry->processed = processed;
+ __entry->sched = sched;
+ ),
+
+ TP_printk("kthread"
+ " cpu=%d map_id=%d action=%s"
+ " processed=%u drops=%u"
+ " sched=%d",
+ __entry->cpu, __entry->map_id,
+ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+ __entry->processed, __entry->drops,
+ __entry->sched)
+);
+
+TRACE_EVENT(xdp_cpumap_enqueue,
+
+ TP_PROTO(int map_id, unsigned int processed, unsigned int drops,
+ int to_cpu),
+
+ TP_ARGS(map_id, processed, drops, to_cpu),
+
+ TP_STRUCT__entry(
+ __field(int, map_id)
+ __field(u32, act)
+ __field(int, cpu)
+ __field(unsigned int, drops)
+ __field(unsigned int, processed)
+ __field(int, to_cpu)
+ ),
+
+ TP_fast_assign(
+ __entry->map_id = map_id;
+ __entry->act = XDP_REDIRECT;
+ __entry->cpu = smp_processor_id();
+ __entry->drops = drops;
+ __entry->processed = processed;
+ __entry->to_cpu = to_cpu;
+ ),
+
+ TP_printk("enqueue"
+ " cpu=%d map_id=%d action=%s"
+ " processed=%u drops=%u"
+ " to_cpu=%d",
+ __entry->cpu, __entry->map_id,
+ __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB),
+ __entry->processed, __entry->drops,
+ __entry->to_cpu)
+);
+
#endif /* _TRACE_XDP_H */
#include <trace/define_trace.h>
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 4383d524432f..571c668ac61b 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -23,6 +23,7 @@
#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
+#include <trace/events/xdp.h>
#include <linux/netdevice.h> /* netif_receive_skb_core */
#include <linux/etherdevice.h> /* eth_type_trans */
@@ -258,14 +259,15 @@ static int cpu_map_kthread_run(void *data)
set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop()) {
- unsigned int processed = 0, drops = 0;
+ unsigned int processed = 0, drops = 0, sched = 0;
struct xdp_pkt *xdp_pkt;
/* Release CPU reschedule checks */
if (__ptr_ring_empty(rcpu->queue)) {
schedule();
+ sched = 1;
} else {
- cond_resched();
+ sched = cond_resched();
}
/* Process packets in rcpu->queue */
@@ -294,6 +296,9 @@ static int cpu_map_kthread_run(void *data)
if (++processed == 8)
break;
}
+ /* Feedback loop via tracepoint */
+ trace_xdp_cpumap_kthread(rcpu->map_id, processed, drops, sched);
+
local_bh_enable(); /* resched point, may call do_softirq() */
__set_current_state(TASK_INTERRUPTIBLE);
@@ -331,7 +336,10 @@ struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id)
err = ptr_ring_init(rcpu->queue, qsize, gfp);
if (err)
goto fail;
- rcpu->qsize = qsize;
+
+ rcpu->cpu = cpu;
+ rcpu->map_id = map_id;
+ rcpu->qsize = qsize;
/* Setup kthread */
rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
@@ -557,6 +565,8 @@ const struct bpf_map_ops cpu_map_ops = {
static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
struct xdp_bulk_queue *bq)
{
+ unsigned int processed = 0, drops = 0;
+ const int to_cpu = rcpu->cpu;
struct ptr_ring *q;
int i;
@@ -572,13 +582,16 @@ static int bq_flush_to_queue(struct bpf_cpu_map_entry *rcpu,
err = __ptr_ring_produce(q, xdp_pkt);
if (err) {
- /* Free xdp_pkt */
- page_frag_free(xdp_pkt);
+ drops++;
+ page_frag_free(xdp_pkt); /* Free xdp_pkt */
}
+ processed++;
}
bq->count = 0;
spin_unlock(&q->producer_lock);
+ /* Feedback loop via tracepoints */
+ trace_xdp_cpumap_enqueue(rcpu->map_id, processed, drops, to_cpu);
return 0;
}
next prev parent reply other threads:[~2017-10-04 12:04 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-10-04 12:03 [net-next V4 PATCH 0/5] New bpf cpumap type for XDP_REDIRECT Jesper Dangaard Brouer
2017-10-04 12:03 ` [net-next V4 PATCH 1/5] bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP Jesper Dangaard Brouer
2017-10-04 19:02 ` Alexei Starovoitov
2017-10-05 18:01 ` John Fastabend
2017-10-06 9:03 ` Jesper Dangaard Brouer
2017-10-05 9:40 ` Daniel Borkmann
2017-10-06 10:50 ` Jesper Dangaard Brouer
2017-10-06 14:52 ` Daniel Borkmann
2017-10-06 15:58 ` Jesper Dangaard Brouer
2017-10-25 16:53 ` [bpf] 3ea693a925: BUG:unable_to_handle_kernel kernel test robot
2017-10-25 16:59 ` Michael S. Tsirkin
2017-10-25 10:02 ` [LKP] " Ye Xiaolong
2017-10-25 12:09 ` Ye Xiaolong
2017-10-25 16:54 ` kernel test robot
2017-10-04 12:03 ` [net-next V4 PATCH 2/5] bpf: XDP_REDIRECT enable use of cpumap Jesper Dangaard Brouer
2017-10-05 10:10 ` Daniel Borkmann
2017-10-06 11:17 ` Jesper Dangaard Brouer
2017-10-06 12:01 ` Jesper Dangaard Brouer
2017-10-06 15:45 ` Jesper Dangaard Brouer
2017-10-06 8:30 ` kbuild test robot
2017-10-04 12:03 ` [net-next V4 PATCH 3/5] bpf: cpumap xdp_buff to skb conversion and allocation Jesper Dangaard Brouer
2017-10-05 10:22 ` Daniel Borkmann
2017-10-06 12:11 ` Jesper Dangaard Brouer
2017-10-04 12:04 ` Jesper Dangaard Brouer [this message]
2017-10-04 12:04 ` [net-next V4 PATCH 5/5] samples/bpf: add cpumap sample program xdp_redirect_cpu Jesper Dangaard Brouer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=150711864030.9499.18274057292232018278.stgit@firesoul \
--to=brouer@redhat.com \
--cc=alexei.starovoitov@gmail.com \
--cc=andy@greyhouse.net \
--cc=borkmann@iogearbox.net \
--cc=jakub.kicinski@netronome.com \
--cc=jasowang@redhat.com \
--cc=john.fastabend@gmail.com \
--cc=mchan@broadcom.com \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=pavel.odintsov@gmail.com \
--cc=peter.waskiewicz.jr@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox