From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1756569AbdJKJDh (ORCPT ); Wed, 11 Oct 2017 05:03:37 -0400 Received: from szxga05-in.huawei.com ([45.249.212.191]:7561 "EHLO szxga05-in.huawei.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752011AbdJKJDd (ORCPT ); Wed, 11 Oct 2017 05:03:33 -0400 Message-ID: <59DDDE24.5050909@huawei.com> Date: Wed, 11 Oct 2017 17:02:28 +0800 From: zhouchengming User-Agent: Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20120428 Thunderbird/12.0.1 MIME-Version: 1.0 To: Peter Zijlstra CC: , , , Subject: Re: [PATCH 2/4] perf/ftrace: Fix function trace events References: <20171011074528.764500836@infradead.org> <20171011080224.257804988@infradead.org> In-Reply-To: <20171011080224.257804988@infradead.org> Content-Type: text/plain; charset="UTF-8"; format=flowed Content-Transfer-Encoding: 7bit X-Originating-IP: [10.177.236.183] X-CFilter-Loop: Reflected X-Mirapoint-Virus-RAPID-Raw: score=unknown(0), refid=str=0001.0A020203.59DDDE63.0022,ss=1,re=0.000,recu=0.000,reip=0.000,cl=1,cld=1,fgs=0, ip=0.0.0.0, so=2014-11-16 11:51:01, dmn=2013-03-21 17:37:32 X-Mirapoint-Loop-Id: 307084c61c21755708f8426ba99dfa45 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On 2017/10/11 15:45, Peter Zijlstra wrote: > The function-trace<-> perf interface is a tad messed up. Where all > the other trace<-> perf interfaces use a single trace hook > registration and use per-cpu RCU based hlist to iterate the events, > function-trace actually needs multiple hook registrations in order to > minimize function entry patching when filters are present. > > The end result is that we iterate events both on the trace hook and on > the hlist, which results in reporting events multiple times. > > Since function-trace cannot use the regular scheme, fix it the other > way around, use singleton hlists. > > Signed-off-by: Peter Zijlstra (Intel) > --- > include/linux/trace_events.h | 5 ++ > kernel/trace/trace_event_perf.c | 82 ++++++++++++++++++++++++---------------- > 2 files changed, 55 insertions(+), 32 deletions(-) > > --- a/include/linux/trace_events.h > +++ b/include/linux/trace_events.h > @@ -173,6 +173,11 @@ enum trace_reg { > TRACE_REG_PERF_UNREGISTER, > TRACE_REG_PERF_OPEN, > TRACE_REG_PERF_CLOSE, > + /* > + * These (ADD/DEL) use a 'boolean' return value, where 1 (true) means a > + * custom action was taken and the default action is not to be > + * performed. > + */ > TRACE_REG_PERF_ADD, > TRACE_REG_PERF_DEL, > #endif > --- a/kernel/trace/trace_event_perf.c > +++ b/kernel/trace/trace_event_perf.c > @@ -240,27 +240,41 @@ void perf_trace_destroy(struct perf_even > int perf_trace_add(struct perf_event *p_event, int flags) > { > struct trace_event_call *tp_event = p_event->tp_event; > - struct hlist_head __percpu *pcpu_list; > - struct hlist_head *list; > > - pcpu_list = tp_event->perf_events; > - if (WARN_ON_ONCE(!pcpu_list)) > - return -EINVAL; > + /* > + * If TRACE_REG_PERF_ADD returns false; no custom action was performed > + * and we need to take the default action of enqueueing our event on > + * the right per-cpu hlist. > + */ > + if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event)) { > + struct hlist_head __percpu *pcpu_list; > + struct hlist_head *list; > + > + pcpu_list = tp_event->perf_events; > + if (WARN_ON_ONCE(!pcpu_list)) > + return -EINVAL; > > - if (!(flags& PERF_EF_START)) > - p_event->hw.state = PERF_HES_STOPPED; > + if (!(flags& PERF_EF_START)) > + p_event->hw.state = PERF_HES_STOPPED; Don't we need to check the flags for ftrace perf_event? So if we should put this outside the if (!tp_event->class->reg()) ? > > - list = this_cpu_ptr(pcpu_list); > - hlist_add_head_rcu(&p_event->hlist_entry, list); > + list = this_cpu_ptr(pcpu_list); > + hlist_add_head_rcu(&p_event->hlist_entry, list); > + } Now we don't add perf_event to the pcpu_list, so we also can avoid to alloc pcpu_list for function tp_event in perf_trace_event_reg(). Thanks. > > - return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event); > + return 0; > } > > void perf_trace_del(struct perf_event *p_event, int flags) > { > struct trace_event_call *tp_event = p_event->tp_event; > - hlist_del_rcu(&p_event->hlist_entry); > - tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event); > + > + /* > + * If TRACE_REG_PERF_DEL returns false; no custom action was performed > + * and we need to take the default action of dequeueing our event from > + * the right per-cpu hlist. > + */ > + if (!tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event)) > + hlist_del_rcu(&p_event->hlist_entry); > } > > void *perf_trace_buf_alloc(int size, struct pt_regs **regs, int *rctxp) > @@ -307,14 +321,24 @@ perf_ftrace_function_call(unsigned long > struct ftrace_ops *ops, struct pt_regs *pt_regs) > { > struct ftrace_entry *entry; > - struct hlist_head *head; > + struct perf_event *event; > + struct hlist_head head; > struct pt_regs regs; > int rctx; > > - head = this_cpu_ptr(event_function.perf_events); > - if (hlist_empty(head)) > + if ((unsigned long)ops->private != smp_processor_id()) > return; > > + event = container_of(ops, struct perf_event, ftrace_ops); > + > + /* > + * @event->hlist entry is NULL (per INIT_HLIST_NODE), and all > + * the perf code does is hlist_for_each_entry_rcu(), so we can > + * get away with simply setting the @head.first pointer in order > + * to create a singular list. > + */ > + head.first =&event->hlist_entry; > + > #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \ > sizeof(u64)) - sizeof(u32)) > > @@ -330,7 +354,7 @@ perf_ftrace_function_call(unsigned long > entry->ip = ip; > entry->parent_ip = parent_ip; > perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, TRACE_FN, > - 1,®s, head, NULL); > + 1,®s,&head, NULL); > > #undef ENTRY_SIZE > } > @@ -339,8 +363,10 @@ static int perf_ftrace_function_register > { > struct ftrace_ops *ops =&event->ftrace_ops; > > - ops->flags |= FTRACE_OPS_FL_PER_CPU | FTRACE_OPS_FL_RCU; > - ops->func = perf_ftrace_function_call; > + ops->flags |= FTRACE_OPS_FL_RCU; > + ops->func = perf_ftrace_function_call; > + ops->private = (void *)(unsigned long)nr_cpu_ids; > + > return register_ftrace_function(ops); > } > > @@ -352,19 +378,11 @@ static int perf_ftrace_function_unregist > return ret; > } > > -static void perf_ftrace_function_enable(struct perf_event *event) > -{ > - ftrace_function_local_enable(&event->ftrace_ops); > -} > - > -static void perf_ftrace_function_disable(struct perf_event *event) > -{ > - ftrace_function_local_disable(&event->ftrace_ops); > -} > - > int perf_ftrace_event_register(struct trace_event_call *call, > enum trace_reg type, void *data) > { > + struct perf_event *event = data; > + > switch (type) { > case TRACE_REG_REGISTER: > case TRACE_REG_UNREGISTER: > @@ -377,11 +395,11 @@ int perf_ftrace_event_register(struct tr > case TRACE_REG_PERF_CLOSE: > return perf_ftrace_function_unregister(data); > case TRACE_REG_PERF_ADD: > - perf_ftrace_function_enable(data); > - return 0; > + event->ftrace_ops.private = (void *)(unsigned long)smp_processor_id(); > + return 1; > case TRACE_REG_PERF_DEL: > - perf_ftrace_function_disable(data); > - return 0; > + event->ftrace_ops.private = (void *)(unsigned long)nr_cpu_ids; > + return 1; > } > > return -EINVAL; > > > > . >