From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932403AbbJUKHE (ORCPT ); Wed, 21 Oct 2015 06:07:04 -0400 Received: from szxga03-in.huawei.com ([119.145.14.66]:50116 "EHLO szxga03-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932370AbbJUKG6 (ORCPT ); Wed, 21 Oct 2015 06:06:58 -0400 Subject: Re: [RFC PATCH] bpf: Add new bpf map type for timer To: , , , , , , References: <1445232889-155696-1-git-send-email-hekuang@huawei.com> CC: , From: He Kuang Message-ID: <562762B8.5060706@huawei.com> Date: Wed, 21 Oct 2015 18:02:32 +0800 User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Thunderbird/38.0 MIME-Version: 1.0 In-Reply-To: <1445232889-155696-1-git-send-email-hekuang@huawei.com> Content-Type: text/plain; charset="gbk"; format=flowed Content-Transfer-Encoding: 7bit X-Originating-IP: [10.110.54.65] X-CFilter-Loop: Reflected X-Mirapoint-Virus-RAPID-Raw: score=unknown(0), refid=str=0001.0A020201.56276392.0140,ss=1,re=0.000,recu=0.000,reip=0.000,cl=1,cld=1,fgs=0, ip=0.0.0.0, so=2013-05-26 15:14:31, dmn=2013-03-21 17:37:32 X-Mirapoint-Loop-Id: f6a299e6b65584c76ef825f58c6e82d4 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org ping and add ast@plumgrid.com, what's your opinion on this? On 2015/10/19 13:34, He Kuang wrote: > This patch implements a timer map type inherited from array map. eBPF > programs can deloy a timer by updating an entry in timer map, and > destroy that by deleting the entry. The timer delay time(ns) is set by > updating the value field of the entries. > > Currently, an intended empty function is called when the timer is > triggered, then eBPF programs can be attatched to this function and > perfrom customized functions. > > Signed-off-by: He Kuang > --- > > Here's a hypothetical scenario to illustrate the use of timer map. > > A video frame is updated between frame_refresh_start() and > frame_refresh_end(), in most cases, the interval between these two > functions is less than 40ms, but occasionally over 200ms. > > We can set a timer which alarm after the frame_refresh_start() has > been executed 42ms(slightly larger than 40ms) and destory this timer > if frame_refresh_end() is called. So, for most cases, the timer is not > triggered, this can significantly reduce the amount of trace data. > > eBPF progs: > > struct bpf_map_def SEC("maps") timer_map = { > .type = BPF_MAP_TYPE_TIMER_ARRAY, > .key_size = sizeof(int), > .value_size = sizeof(unsigned long long), > .max_entries = 4, > }; > > SEC("func1=frame_refresh_start") > int bpf_prog1(struct pt_regs *ctx) > { > int delay_ns = 42 * NSEC_PER_MSEC; /* 42 milliseconds */ > unsigned int index = 0; > > bpf_map_update_elem(&timer_map, &index, &delay_ns, BPF_ANY); > return 0; > } > > SEC("func2=frame_refresh_end") > int bpf_prog2(struct pt_regs *ctx) > { > unsigned int index = 0; > > bpf_map_delete_elem(&timer_map, &index); > return 0; > } > > SEC("func3=bpf_timer_callback") > int bpf_prog3(struct pt_regs *ctx, int result) > { > char fmt[] = "timer triggered\n"; > > bpf_trace_printk(fmt, sizeof(fmt)); > > /* If comes here, frame refresh time is beyond the threshold > we can switch on more tracepoints or enable more detailed > trace paramaters to diagnose the problems. > */ > return 0; > } > > Then we can write an exec to test the ebpf progs above, each timeout > frame updates will trigger the timer. > > --- > include/uapi/linux/bpf.h | 1 + > kernel/bpf/arraymap.c | 161 +++++++++++++++++++++++++++++++++++++++++++---- > 2 files changed, 149 insertions(+), 13 deletions(-) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 92a48e2..c41c80e 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -115,6 +115,7 @@ enum bpf_map_type { > BPF_MAP_TYPE_ARRAY, > BPF_MAP_TYPE_PROG_ARRAY, > BPF_MAP_TYPE_PERF_EVENT_ARRAY, > + BPF_MAP_TYPE_TIMER_ARRAY, > }; > > enum bpf_prog_type { > diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c > index 29ace10..1e03c70 100644 > --- a/kernel/bpf/arraymap.c > +++ b/kernel/bpf/arraymap.c > @@ -16,18 +16,10 @@ > #include > #include > > -/* Called from syscall */ > -static struct bpf_map *array_map_alloc(union bpf_attr *attr) > +static struct bpf_map *__array_map_alloc(union bpf_attr *attr, u32 elem_size) > { > struct bpf_array *array; > - u32 elem_size, array_size; > - > - /* check sanity of attributes */ > - if (attr->max_entries == 0 || attr->key_size != 4 || > - attr->value_size == 0) > - return ERR_PTR(-EINVAL); > - > - elem_size = round_up(attr->value_size, 8); > + u32 array_size; > > /* check round_up into zero and u32 overflow */ > if (elem_size == 0 || > @@ -54,6 +46,20 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) > return &array->map; > } > > +/* Called from syscall */ > +static struct bpf_map *array_map_alloc(union bpf_attr *attr) > +{ > + u32 elem_size; > + > + /* check sanity of attributes */ > + if (attr->max_entries == 0 || attr->key_size != 4 || > + attr->value_size == 0) > + return ERR_PTR(-EINVAL); > + > + elem_size = round_up(attr->value_size, 8); > + return __array_map_alloc(attr, elem_size); > +} > + > /* Called from syscall or from eBPF program */ > static void *array_map_lookup_elem(struct bpf_map *map, void *key) > { > @@ -171,7 +177,7 @@ static void fd_array_map_free(struct bpf_map *map) > kvfree(array); > } > > -static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key) > +static void *empty_array_map_lookup_elem(struct bpf_map *map, void *key) > { > return NULL; > } > @@ -255,7 +261,7 @@ static const struct bpf_map_ops prog_array_ops = { > .map_alloc = fd_array_map_alloc, > .map_free = fd_array_map_free, > .map_get_next_key = array_map_get_next_key, > - .map_lookup_elem = fd_array_map_lookup_elem, > + .map_lookup_elem = empty_array_map_lookup_elem, > .map_update_elem = fd_array_map_update_elem, > .map_delete_elem = fd_array_map_delete_elem, > .map_fd_get_ptr = prog_fd_array_get_ptr, > @@ -312,7 +318,7 @@ static const struct bpf_map_ops perf_event_array_ops = { > .map_alloc = fd_array_map_alloc, > .map_free = perf_event_array_map_free, > .map_get_next_key = array_map_get_next_key, > - .map_lookup_elem = fd_array_map_lookup_elem, > + .map_lookup_elem = empty_array_map_lookup_elem, > .map_update_elem = fd_array_map_update_elem, > .map_delete_elem = fd_array_map_delete_elem, > .map_fd_get_ptr = perf_event_fd_array_get_ptr, > @@ -330,3 +336,132 @@ static int __init register_perf_event_array_map(void) > return 0; > } > late_initcall(register_perf_event_array_map); > + > +/* Intended empty function as ebpf stub */ > +enum hrtimer_restart bpf_timer_callback(struct hrtimer *timer __maybe_unused) > +{ > + return HRTIMER_NORESTART; > +} > + > +static void timer_array_map_init_timers(struct bpf_map *map) > +{ > + struct bpf_array *array = container_of(map, struct bpf_array, map); > + int i; > + struct hrtimer *timer; > + > + /* init all timer */ > + for (i = 0; i < array->map.max_entries; i++) { > + timer = (struct hrtimer *)(array->value + > + array->elem_size * i); > + hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); > + timer->function = bpf_timer_callback; > + } > +} > + > +static struct bpf_map *timer_array_map_alloc(union bpf_attr *attr) > +{ > + u32 elem_size; > + struct bpf_map *map; > + > + if (attr->value_size != sizeof(u64)) > + return ERR_PTR(-EINVAL); > + > + /* check sanity of attributes */ > + if (attr->max_entries == 0 || attr->key_size != 4 || > + attr->value_size == 0) > + return ERR_PTR(-EINVAL); > + > + elem_size = round_up(sizeof(struct hrtimer), 8); > + map = __array_map_alloc(attr, elem_size); > + if (IS_ERR(map)) > + return map; > + > + timer_array_map_init_timers(map); > + > + return map; > +} > + > +static void timer_array_map_free(struct bpf_map *map) > +{ > + struct bpf_array *array = container_of(map, struct bpf_array, map); > + int i; > + struct hrtimer *timer; > + > + synchronize_rcu(); > + > + /* cancel all untriggered timer */ > + for (i = 0; i < array->map.max_entries; i++) { > + timer = (struct hrtimer *)(array->value + > + array->elem_size * i); > + hrtimer_cancel(timer); > + } > + kvfree(array); > +} > + > +/* Timer activate */ > +static int timer_array_map_update_elem(struct bpf_map *map, void *key, > + void *value, u64 map_flags) > +{ > + struct bpf_array *array = container_of(map, struct bpf_array, map); > + u32 index = *(u32 *)key; > + struct hrtimer *timer; > + int delay_ns = *(u64 *)value; > + ktime_t kt; > + > + if (map_flags > BPF_EXIST) > + /* unknown flags */ > + return -EINVAL; > + > + if (index >= array->map.max_entries) > + /* all elements were pre-allocated, cannot insert a new one */ > + return -E2BIG; > + > + if (map_flags == BPF_NOEXIST) > + /* all elements already exist */ > + return -EEXIST; > + > + timer = (struct hrtimer *)(array->value + array->elem_size * index); > + kt = ktime_set(0, delay_ns); > + hrtimer_start(timer, kt, HRTIMER_MODE_REL); > + > + return 0; > +} > + > +/* Timer inactivate */ > +static int timer_array_map_delete_elem(struct bpf_map *map, void *key) > +{ > + struct bpf_array *array = container_of(map, struct bpf_array, map); > + u32 index = *(u32 *)key; > + struct hrtimer *timer; > + > + if (index >= array->map.max_entries) > + return -E2BIG; > + > + timer = (struct hrtimer *)(array->value + array->elem_size * index); > + > + if (hrtimer_try_to_cancel(timer) >= 0) > + return 0; > + else > + return -EBUSY; > +} > + > +static const struct bpf_map_ops timer_array_ops = { > + .map_alloc = timer_array_map_alloc, > + .map_free = timer_array_map_free, > + .map_get_next_key = array_map_get_next_key, > + .map_lookup_elem = empty_array_map_lookup_elem, > + .map_update_elem = timer_array_map_update_elem, > + .map_delete_elem = timer_array_map_delete_elem, > +}; > + > +static struct bpf_map_type_list timer_array_type __read_mostly = { > + .ops = &timer_array_ops, > + .type = BPF_MAP_TYPE_TIMER_ARRAY, > +}; > + > +static int __init register_timer_array_map(void) > +{ > + bpf_register_map_type(&timer_array_type); > + return 0; > +} > +late_initcall(register_timer_array_map); >