public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC/PATCH] ftrace: add set_graph_notrace filter
@ 2013-09-03  5:05 Namhyung Kim
  2013-09-30  7:04 ` Namhyung Kim
  2013-10-11  4:17 ` Steven Rostedt
  0 siblings, 2 replies; 13+ messages in thread
From: Namhyung Kim @ 2013-09-03  5:05 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Frederic Weisbecker, Ingo Molnar, LKML, Namhyung Kim

From: Namhyung Kim <namhyung.kim@lge.com>

The set_graph_notrace filter is analogous to set_ftrace_notrace and
can be used for eliminating uninteresting part of function graph trace
output.  It also works with set_graph_function nicely.

  # cd /sys/kernel/debug/tracing/
  # echo do_page_fault > set_graph_function
  # perf ftrace live true
   2)               |  do_page_fault() {
   2)               |    __do_page_fault() {
   2)   0.381 us    |      down_read_trylock();
   2)   0.055 us    |      __might_sleep();
   2)   0.696 us    |      find_vma();
   2)               |      handle_mm_fault() {
   2)               |        handle_pte_fault() {
   2)               |          __do_fault() {
   2)               |            filemap_fault() {
   2)               |              find_get_page() {
   2)   0.033 us    |                __rcu_read_lock();
   2)   0.035 us    |                __rcu_read_unlock();
   2)   1.696 us    |              }
   2)   0.031 us    |              __might_sleep();
   2)   2.831 us    |            }
   2)               |            _raw_spin_lock() {
   2)   0.046 us    |              add_preempt_count();
   2)   0.841 us    |            }
   2)   0.033 us    |            page_add_file_rmap();
   2)               |            _raw_spin_unlock() {
   2)   0.057 us    |              sub_preempt_count();
   2)   0.568 us    |            }
   2)               |            unlock_page() {
   2)   0.084 us    |              page_waitqueue();
   2)   0.126 us    |              __wake_up_bit();
   2)   1.117 us    |            }
   2)   7.729 us    |          }
   2)   8.397 us    |        }
   2)   8.956 us    |      }
   2)   0.085 us    |      up_read();
   2) + 12.745 us   |    }
   2) + 13.401 us   |  }
  ...

  # echo handle_mm_fault > set_graph_notrace
  # perf ftrace live true
   1)               |  do_page_fault() {
   1)               |    __do_page_fault() {
   1)   0.205 us    |      down_read_trylock();
   1)   0.041 us    |      __might_sleep();
   1)   0.344 us    |      find_vma();
   1)   0.069 us    |      up_read();
   1)   4.692 us    |    }
   1)   5.311 us    |  }
  ...

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 include/linux/ftrace.h               |   1 +
 kernel/trace/ftrace.c                | 118 ++++++++++++++++++++++++++++++++++-
 kernel/trace/trace.h                 |  23 +++++++
 kernel/trace/trace_functions_graph.c |  21 ++++++-
 4 files changed, 159 insertions(+), 4 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9f15c0064c50..ec85d48619e1 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -721,6 +721,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 extern char __irqentry_text_start[];
 extern char __irqentry_text_end[];
 
+#define FTRACE_NOTRACE_DEPTH 65536
 #define FTRACE_RETFUNC_DEPTH 50
 #define FTRACE_RETSTACK_ALLOC_SIZE 32
 extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a6d098c6df3f..1b1f3409f788 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3819,6 +3819,43 @@ static const struct seq_operations ftrace_graph_seq_ops = {
 	.show = g_show,
 };
 
+int ftrace_graph_notrace_count;
+int ftrace_graph_notrace_enabled;
+unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
+
+static void *
+__n_next(struct seq_file *m, loff_t *pos)
+{
+	if (*pos >= ftrace_graph_notrace_count)
+		return NULL;
+	return &ftrace_graph_notrace_funcs[*pos];
+}
+
+static void *
+n_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return __n_next(m, pos);
+}
+
+static void *n_start(struct seq_file *m, loff_t *pos)
+{
+	mutex_lock(&graph_lock);
+
+	/* Nothing, tell g_show to print all functions are enabled */
+	if (!ftrace_graph_notrace_enabled && !*pos)
+		return (void *)1;
+
+	return __n_next(m, pos);
+}
+
+static const struct seq_operations ftrace_graph_notrace_seq_ops = {
+	.start = n_start,
+	.next = n_next,
+	.stop = g_stop,
+	.show = g_show,
+};
+
 static int
 ftrace_graph_open(struct inode *inode, struct file *file)
 {
@@ -3843,6 +3880,30 @@ ftrace_graph_open(struct inode *inode, struct file *file)
 }
 
 static int
+ftrace_graph_notrace_open(struct inode *inode, struct file *file)
+{
+	int ret = 0;
+
+	if (unlikely(ftrace_disabled))
+		return -ENODEV;
+
+	mutex_lock(&graph_lock);
+	if ((file->f_mode & FMODE_WRITE) &&
+	    (file->f_flags & O_TRUNC)) {
+		ftrace_graph_notrace_enabled = 0;
+		ftrace_graph_notrace_count = 0;
+		memset(ftrace_graph_notrace_funcs, 0,
+		       sizeof(ftrace_graph_notrace_funcs));
+	}
+	mutex_unlock(&graph_lock);
+
+	if (file->f_mode & FMODE_READ)
+		ret = seq_open(file, &ftrace_graph_notrace_seq_ops);
+
+	return ret;
+}
+
+static int
 ftrace_graph_release(struct inode *inode, struct file *file)
 {
 	if (file->f_mode & FMODE_READ)
@@ -3910,8 +3971,6 @@ out:
 	if (fail)
 		return -EINVAL;
 
-	ftrace_graph_filter_enabled = !!(*idx);
-
 	return 0;
 }
 
@@ -3946,6 +4005,50 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
 
 	ret = read;
 
+	ftrace_graph_filter_enabled = ftrace_graph_count > 0;
+
+out_free:
+	trace_parser_put(&parser);
+out_unlock:
+	mutex_unlock(&graph_lock);
+
+	return ret;
+}
+
+static ssize_t
+ftrace_graph_notrace_write(struct file *file, const char __user *ubuf,
+			   size_t cnt, loff_t *ppos)
+{
+	struct trace_parser parser;
+	ssize_t read, ret;
+
+	if (!cnt)
+		return 0;
+
+	mutex_lock(&graph_lock);
+
+	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	read = trace_get_user(&parser, ubuf, cnt, ppos);
+
+	if (read >= 0 && trace_parser_loaded((&parser))) {
+		parser.buffer[parser.idx] = 0;
+
+		/* we allow only one expression at a time */
+		ret = ftrace_set_func(ftrace_graph_notrace_funcs,
+				      &ftrace_graph_notrace_count,
+				      parser.buffer);
+		if (ret)
+			goto out_free;
+	}
+
+	ret = read;
+
+	ftrace_graph_notrace_enabled = ftrace_graph_notrace_count > 0;
+
 out_free:
 	trace_parser_put(&parser);
 out_unlock:
@@ -3961,6 +4064,14 @@ static const struct file_operations ftrace_graph_fops = {
 	.llseek		= ftrace_filter_lseek,
 	.release	= ftrace_graph_release,
 };
+
+static const struct file_operations ftrace_graph_notrace_fops = {
+	.open		= ftrace_graph_notrace_open,
+	.read		= seq_read,
+	.write		= ftrace_graph_notrace_write,
+	.llseek		= ftrace_filter_lseek,
+	.release	= ftrace_graph_release,
+};
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
@@ -3982,6 +4093,9 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
 	trace_create_file("set_graph_function", 0444, d_tracer,
 				    NULL,
 				    &ftrace_graph_fops);
+	trace_create_file("set_graph_notrace", 0444, d_tracer,
+				    NULL,
+				    &ftrace_graph_notrace_fops);
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 	return 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fe39acd4c1aa..797f6f7f3f56 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -732,6 +732,9 @@ extern void __trace_graph_return(struct trace_array *tr,
 extern int ftrace_graph_filter_enabled;
 extern int ftrace_graph_count;
 extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
+extern int ftrace_graph_notrace_enabled;
+extern int ftrace_graph_notrace_count;
+extern unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS];
 
 static inline int ftrace_graph_addr(unsigned long addr)
 {
@@ -757,11 +760,31 @@ static inline int ftrace_graph_addr(unsigned long addr)
 
 	return 0;
 }
+
+static inline int ftrace_graph_notrace_addr(unsigned long addr)
+{
+	int i;
+
+	if (!ftrace_graph_notrace_enabled)
+		return 0;
+
+	for (i = 0; i < ftrace_graph_notrace_count; i++) {
+		if (addr == ftrace_graph_notrace_funcs[i])
+			return 1;
+	}
+
+	return 0;
+}
 #else
 static inline int ftrace_graph_addr(unsigned long addr)
 {
 	return 1;
 }
+
+static inline int ftrace_graph_notrace_addr(unsigned long addr)
+{
+	return 0;
+}
 #endif /* CONFIG_DYNAMIC_FTRACE */
 #else /* CONFIG_FUNCTION_GRAPH_TRACER */
 static inline enum print_line_t
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b5c09242683d..3ba240daa5e0 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -114,16 +114,22 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
 		return -EBUSY;
 	}
 
+	/* The function was filtered out */
+	if (current->curr_ret_stack < -1)
+		return -EBUSY;
+
 	calltime = trace_clock_local();
 
 	index = ++current->curr_ret_stack;
+	if (ftrace_graph_notrace_addr(func))
+		current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
 	barrier();
 	current->ret_stack[index].ret = ret;
 	current->ret_stack[index].func = func;
 	current->ret_stack[index].calltime = calltime;
 	current->ret_stack[index].subtime = 0;
 	current->ret_stack[index].fp = frame_pointer;
-	*depth = index;
+	*depth = current->curr_ret_stack;
 
 	return 0;
 }
@@ -137,6 +143,9 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
 
 	index = current->curr_ret_stack;
 
+	if (index < 0)
+		index += FTRACE_NOTRACE_DEPTH;
+
 	if (unlikely(index < 0)) {
 		ftrace_graph_stop();
 		WARN_ON(1);
@@ -193,6 +202,10 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 	trace.rettime = trace_clock_local();
 	barrier();
 	current->curr_ret_stack--;
+	if (current->curr_ret_stack < -1) {
+		current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
+		return ret;
+	}
 
 	/*
 	 * The trace should run after decrementing the ret counter
@@ -259,10 +272,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
 
 	/* trace it when it is-nested-in or is a function enabled. */
 	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
-	     ftrace_graph_ignore_irqs()) ||
+	     ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
 	    (max_depth && trace->depth >= max_depth))
 		return 0;
 
+	/* need to reserve a ret_stack entry to recover the depth */
+	if (ftrace_graph_notrace_addr(trace->func))
+		return 1;
+
 	local_irq_save(flags);
 	cpu = raw_smp_processor_id();
 	data = per_cpu_ptr(tr->trace_buffer.data, cpu);
-- 
1.7.11.7


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH] ftrace: add set_graph_notrace filter
  2013-09-03  5:05 [RFC/PATCH] ftrace: add set_graph_notrace filter Namhyung Kim
@ 2013-09-30  7:04 ` Namhyung Kim
  2013-10-01  3:58   ` Steven Rostedt
  2013-10-11  4:17 ` Steven Rostedt
  1 sibling, 1 reply; 13+ messages in thread
From: Namhyung Kim @ 2013-09-30  7:04 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Frederic Weisbecker, Ingo Molnar, LKML, Namhyung Kim

Ping!

On Tue,  3 Sep 2013 14:05:08 +0900, Namhyung Kim wrote:
> From: Namhyung Kim <namhyung.kim@lge.com>
>
> The set_graph_notrace filter is analogous to set_ftrace_notrace and
> can be used for eliminating uninteresting part of function graph trace
> output.  It also works with set_graph_function nicely.
>
>   # cd /sys/kernel/debug/tracing/
>   # echo do_page_fault > set_graph_function
>   # perf ftrace live true
>    2)               |  do_page_fault() {
>    2)               |    __do_page_fault() {
>    2)   0.381 us    |      down_read_trylock();
>    2)   0.055 us    |      __might_sleep();
>    2)   0.696 us    |      find_vma();
>    2)               |      handle_mm_fault() {
>    2)               |        handle_pte_fault() {
>    2)               |          __do_fault() {
>    2)               |            filemap_fault() {
>    2)               |              find_get_page() {
>    2)   0.033 us    |                __rcu_read_lock();
>    2)   0.035 us    |                __rcu_read_unlock();
>    2)   1.696 us    |              }
>    2)   0.031 us    |              __might_sleep();
>    2)   2.831 us    |            }
>    2)               |            _raw_spin_lock() {
>    2)   0.046 us    |              add_preempt_count();
>    2)   0.841 us    |            }
>    2)   0.033 us    |            page_add_file_rmap();
>    2)               |            _raw_spin_unlock() {
>    2)   0.057 us    |              sub_preempt_count();
>    2)   0.568 us    |            }
>    2)               |            unlock_page() {
>    2)   0.084 us    |              page_waitqueue();
>    2)   0.126 us    |              __wake_up_bit();
>    2)   1.117 us    |            }
>    2)   7.729 us    |          }
>    2)   8.397 us    |        }
>    2)   8.956 us    |      }
>    2)   0.085 us    |      up_read();
>    2) + 12.745 us   |    }
>    2) + 13.401 us   |  }
>   ...
>
>   # echo handle_mm_fault > set_graph_notrace
>   # perf ftrace live true
>    1)               |  do_page_fault() {
>    1)               |    __do_page_fault() {
>    1)   0.205 us    |      down_read_trylock();
>    1)   0.041 us    |      __might_sleep();
>    1)   0.344 us    |      find_vma();
>    1)   0.069 us    |      up_read();
>    1)   4.692 us    |    }
>    1)   5.311 us    |  }
>   ...
>
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  include/linux/ftrace.h               |   1 +
>  kernel/trace/ftrace.c                | 118 ++++++++++++++++++++++++++++++++++-
>  kernel/trace/trace.h                 |  23 +++++++
>  kernel/trace/trace_functions_graph.c |  21 ++++++-
>  4 files changed, 159 insertions(+), 4 deletions(-)
>
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 9f15c0064c50..ec85d48619e1 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -721,6 +721,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
>  extern char __irqentry_text_start[];
>  extern char __irqentry_text_end[];
>  
> +#define FTRACE_NOTRACE_DEPTH 65536
>  #define FTRACE_RETFUNC_DEPTH 50
>  #define FTRACE_RETSTACK_ALLOC_SIZE 32
>  extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index a6d098c6df3f..1b1f3409f788 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -3819,6 +3819,43 @@ static const struct seq_operations ftrace_graph_seq_ops = {
>  	.show = g_show,
>  };
>  
> +int ftrace_graph_notrace_count;
> +int ftrace_graph_notrace_enabled;
> +unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
> +
> +static void *
> +__n_next(struct seq_file *m, loff_t *pos)
> +{
> +	if (*pos >= ftrace_graph_notrace_count)
> +		return NULL;
> +	return &ftrace_graph_notrace_funcs[*pos];
> +}
> +
> +static void *
> +n_next(struct seq_file *m, void *v, loff_t *pos)
> +{
> +	(*pos)++;
> +	return __n_next(m, pos);
> +}
> +
> +static void *n_start(struct seq_file *m, loff_t *pos)
> +{
> +	mutex_lock(&graph_lock);
> +
> +	/* Nothing, tell g_show to print all functions are enabled */
> +	if (!ftrace_graph_notrace_enabled && !*pos)
> +		return (void *)1;
> +
> +	return __n_next(m, pos);
> +}
> +
> +static const struct seq_operations ftrace_graph_notrace_seq_ops = {
> +	.start = n_start,
> +	.next = n_next,
> +	.stop = g_stop,
> +	.show = g_show,
> +};
> +
>  static int
>  ftrace_graph_open(struct inode *inode, struct file *file)
>  {
> @@ -3843,6 +3880,30 @@ ftrace_graph_open(struct inode *inode, struct file *file)
>  }
>  
>  static int
> +ftrace_graph_notrace_open(struct inode *inode, struct file *file)
> +{
> +	int ret = 0;
> +
> +	if (unlikely(ftrace_disabled))
> +		return -ENODEV;
> +
> +	mutex_lock(&graph_lock);
> +	if ((file->f_mode & FMODE_WRITE) &&
> +	    (file->f_flags & O_TRUNC)) {
> +		ftrace_graph_notrace_enabled = 0;
> +		ftrace_graph_notrace_count = 0;
> +		memset(ftrace_graph_notrace_funcs, 0,
> +		       sizeof(ftrace_graph_notrace_funcs));
> +	}
> +	mutex_unlock(&graph_lock);
> +
> +	if (file->f_mode & FMODE_READ)
> +		ret = seq_open(file, &ftrace_graph_notrace_seq_ops);
> +
> +	return ret;
> +}
> +
> +static int
>  ftrace_graph_release(struct inode *inode, struct file *file)
>  {
>  	if (file->f_mode & FMODE_READ)
> @@ -3910,8 +3971,6 @@ out:
>  	if (fail)
>  		return -EINVAL;
>  
> -	ftrace_graph_filter_enabled = !!(*idx);
> -
>  	return 0;
>  }
>  
> @@ -3946,6 +4005,50 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
>  
>  	ret = read;
>  
> +	ftrace_graph_filter_enabled = ftrace_graph_count > 0;
> +
> +out_free:
> +	trace_parser_put(&parser);
> +out_unlock:
> +	mutex_unlock(&graph_lock);
> +
> +	return ret;
> +}
> +
> +static ssize_t
> +ftrace_graph_notrace_write(struct file *file, const char __user *ubuf,
> +			   size_t cnt, loff_t *ppos)
> +{
> +	struct trace_parser parser;
> +	ssize_t read, ret;
> +
> +	if (!cnt)
> +		return 0;
> +
> +	mutex_lock(&graph_lock);
> +
> +	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
> +		ret = -ENOMEM;
> +		goto out_unlock;
> +	}
> +
> +	read = trace_get_user(&parser, ubuf, cnt, ppos);
> +
> +	if (read >= 0 && trace_parser_loaded((&parser))) {
> +		parser.buffer[parser.idx] = 0;
> +
> +		/* we allow only one expression at a time */
> +		ret = ftrace_set_func(ftrace_graph_notrace_funcs,
> +				      &ftrace_graph_notrace_count,
> +				      parser.buffer);
> +		if (ret)
> +			goto out_free;
> +	}
> +
> +	ret = read;
> +
> +	ftrace_graph_notrace_enabled = ftrace_graph_notrace_count > 0;
> +
>  out_free:
>  	trace_parser_put(&parser);
>  out_unlock:
> @@ -3961,6 +4064,14 @@ static const struct file_operations ftrace_graph_fops = {
>  	.llseek		= ftrace_filter_lseek,
>  	.release	= ftrace_graph_release,
>  };
> +
> +static const struct file_operations ftrace_graph_notrace_fops = {
> +	.open		= ftrace_graph_notrace_open,
> +	.read		= seq_read,
> +	.write		= ftrace_graph_notrace_write,
> +	.llseek		= ftrace_filter_lseek,
> +	.release	= ftrace_graph_release,
> +};
>  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>  
>  static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
> @@ -3982,6 +4093,9 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
>  	trace_create_file("set_graph_function", 0444, d_tracer,
>  				    NULL,
>  				    &ftrace_graph_fops);
> +	trace_create_file("set_graph_notrace", 0444, d_tracer,
> +				    NULL,
> +				    &ftrace_graph_notrace_fops);
>  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>  
>  	return 0;
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index fe39acd4c1aa..797f6f7f3f56 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -732,6 +732,9 @@ extern void __trace_graph_return(struct trace_array *tr,
>  extern int ftrace_graph_filter_enabled;
>  extern int ftrace_graph_count;
>  extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
> +extern int ftrace_graph_notrace_enabled;
> +extern int ftrace_graph_notrace_count;
> +extern unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS];
>  
>  static inline int ftrace_graph_addr(unsigned long addr)
>  {
> @@ -757,11 +760,31 @@ static inline int ftrace_graph_addr(unsigned long addr)
>  
>  	return 0;
>  }
> +
> +static inline int ftrace_graph_notrace_addr(unsigned long addr)
> +{
> +	int i;
> +
> +	if (!ftrace_graph_notrace_enabled)
> +		return 0;
> +
> +	for (i = 0; i < ftrace_graph_notrace_count; i++) {
> +		if (addr == ftrace_graph_notrace_funcs[i])
> +			return 1;
> +	}
> +
> +	return 0;
> +}
>  #else
>  static inline int ftrace_graph_addr(unsigned long addr)
>  {
>  	return 1;
>  }
> +
> +static inline int ftrace_graph_notrace_addr(unsigned long addr)
> +{
> +	return 0;
> +}
>  #endif /* CONFIG_DYNAMIC_FTRACE */
>  #else /* CONFIG_FUNCTION_GRAPH_TRACER */
>  static inline enum print_line_t
> diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
> index b5c09242683d..3ba240daa5e0 100644
> --- a/kernel/trace/trace_functions_graph.c
> +++ b/kernel/trace/trace_functions_graph.c
> @@ -114,16 +114,22 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
>  		return -EBUSY;
>  	}
>  
> +	/* The function was filtered out */
> +	if (current->curr_ret_stack < -1)
> +		return -EBUSY;
> +
>  	calltime = trace_clock_local();
>  
>  	index = ++current->curr_ret_stack;
> +	if (ftrace_graph_notrace_addr(func))
> +		current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
>  	barrier();
>  	current->ret_stack[index].ret = ret;
>  	current->ret_stack[index].func = func;
>  	current->ret_stack[index].calltime = calltime;
>  	current->ret_stack[index].subtime = 0;
>  	current->ret_stack[index].fp = frame_pointer;
> -	*depth = index;
> +	*depth = current->curr_ret_stack;
>  
>  	return 0;
>  }
> @@ -137,6 +143,9 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
>  
>  	index = current->curr_ret_stack;
>  
> +	if (index < 0)
> +		index += FTRACE_NOTRACE_DEPTH;
> +
>  	if (unlikely(index < 0)) {
>  		ftrace_graph_stop();
>  		WARN_ON(1);
> @@ -193,6 +202,10 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
>  	trace.rettime = trace_clock_local();
>  	barrier();
>  	current->curr_ret_stack--;
> +	if (current->curr_ret_stack < -1) {
> +		current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
> +		return ret;
> +	}
>  
>  	/*
>  	 * The trace should run after decrementing the ret counter
> @@ -259,10 +272,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
>  
>  	/* trace it when it is-nested-in or is a function enabled. */
>  	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
> -	     ftrace_graph_ignore_irqs()) ||
> +	     ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
>  	    (max_depth && trace->depth >= max_depth))
>  		return 0;
>  
> +	/* need to reserve a ret_stack entry to recover the depth */
> +	if (ftrace_graph_notrace_addr(trace->func))
> +		return 1;
> +
>  	local_irq_save(flags);
>  	cpu = raw_smp_processor_id();
>  	data = per_cpu_ptr(tr->trace_buffer.data, cpu);

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH] ftrace: add set_graph_notrace filter
  2013-09-30  7:04 ` Namhyung Kim
@ 2013-10-01  3:58   ` Steven Rostedt
  2013-10-10  1:51     ` Namhyung Kim
  0 siblings, 1 reply; 13+ messages in thread
From: Steven Rostedt @ 2013-10-01  3:58 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: Frederic Weisbecker, Ingo Molnar, LKML, Namhyung Kim

On Mon, 2013-09-30 at 16:04 +0900, Namhyung Kim wrote:
> Ping!

Ah I missed this patch. And I'm currently traveling.

I'll start testing it when I get back on Friday, but feel free to ping
me again next Monday.

-- Steve

> 
> On Tue,  3 Sep 2013 14:05:08 +0900, Namhyung Kim wrote:
> > From: Namhyung Kim <namhyung.kim@lge.com>
> >
> > The set_graph_notrace filter is analogous to set_ftrace_notrace and
> > can be used for eliminating uninteresting part of function graph trace
> > output.  It also works with set_graph_function nicely.
> >
> >   # cd /sys/kernel/debug/tracing/
> >   # echo do_page_fault > set_graph_function
> >   # perf ftrace live true
> >    2)               |  do_page_fault() {
> >    2)               |    __do_page_fault() {
> >    2)   0.381 us    |      down_read_trylock();
> >    2)   0.055 us    |      __might_sleep();
> >    2)   0.696 us    |      find_vma();
> >    2)               |      handle_mm_fault() {
> >    2)               |        handle_pte_fault() {
> >    2)               |          __do_fault() {
> >    2)               |            filemap_fault() {
> >    2)               |              find_get_page() {
> >    2)   0.033 us    |                __rcu_read_lock();
> >    2)   0.035 us    |                __rcu_read_unlock();
> >    2)   1.696 us    |              }
> >    2)   0.031 us    |              __might_sleep();
> >    2)   2.831 us    |            }
> >    2)               |            _raw_spin_lock() {
> >    2)   0.046 us    |              add_preempt_count();
> >    2)   0.841 us    |            }
> >    2)   0.033 us    |            page_add_file_rmap();
> >    2)               |            _raw_spin_unlock() {
> >    2)   0.057 us    |              sub_preempt_count();
> >    2)   0.568 us    |            }
> >    2)               |            unlock_page() {
> >    2)   0.084 us    |              page_waitqueue();
> >    2)   0.126 us    |              __wake_up_bit();
> >    2)   1.117 us    |            }
> >    2)   7.729 us    |          }
> >    2)   8.397 us    |        }
> >    2)   8.956 us    |      }
> >    2)   0.085 us    |      up_read();
> >    2) + 12.745 us   |    }
> >    2) + 13.401 us   |  }
> >   ...
> >
> >   # echo handle_mm_fault > set_graph_notrace
> >   # perf ftrace live true
> >    1)               |  do_page_fault() {
> >    1)               |    __do_page_fault() {
> >    1)   0.205 us    |      down_read_trylock();
> >    1)   0.041 us    |      __might_sleep();
> >    1)   0.344 us    |      find_vma();
> >    1)   0.069 us    |      up_read();
> >    1)   4.692 us    |    }
> >    1)   5.311 us    |  }
> >   ...
> >
> > Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> > ---
> >  include/linux/ftrace.h               |   1 +
> >  kernel/trace/ftrace.c                | 118 ++++++++++++++++++++++++++++++++++-
> >  kernel/trace/trace.h                 |  23 +++++++
> >  kernel/trace/trace_functions_graph.c |  21 ++++++-
> >  4 files changed, 159 insertions(+), 4 deletions(-)
> >
> > diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> > index 9f15c0064c50..ec85d48619e1 100644
> > --- a/include/linux/ftrace.h
> > +++ b/include/linux/ftrace.h
> > @@ -721,6 +721,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
> >  extern char __irqentry_text_start[];
> >  extern char __irqentry_text_end[];
> >  
> > +#define FTRACE_NOTRACE_DEPTH 65536
> >  #define FTRACE_RETFUNC_DEPTH 50
> >  #define FTRACE_RETSTACK_ALLOC_SIZE 32
> >  extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
> > diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> > index a6d098c6df3f..1b1f3409f788 100644
> > --- a/kernel/trace/ftrace.c
> > +++ b/kernel/trace/ftrace.c
> > @@ -3819,6 +3819,43 @@ static const struct seq_operations ftrace_graph_seq_ops = {
> >  	.show = g_show,
> >  };
> >  
> > +int ftrace_graph_notrace_count;
> > +int ftrace_graph_notrace_enabled;
> > +unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
> > +
> > +static void *
> > +__n_next(struct seq_file *m, loff_t *pos)
> > +{
> > +	if (*pos >= ftrace_graph_notrace_count)
> > +		return NULL;
> > +	return &ftrace_graph_notrace_funcs[*pos];
> > +}
> > +
> > +static void *
> > +n_next(struct seq_file *m, void *v, loff_t *pos)
> > +{
> > +	(*pos)++;
> > +	return __n_next(m, pos);
> > +}
> > +
> > +static void *n_start(struct seq_file *m, loff_t *pos)
> > +{
> > +	mutex_lock(&graph_lock);
> > +
> > +	/* Nothing, tell g_show to print all functions are enabled */
> > +	if (!ftrace_graph_notrace_enabled && !*pos)
> > +		return (void *)1;
> > +
> > +	return __n_next(m, pos);
> > +}
> > +
> > +static const struct seq_operations ftrace_graph_notrace_seq_ops = {
> > +	.start = n_start,
> > +	.next = n_next,
> > +	.stop = g_stop,
> > +	.show = g_show,
> > +};
> > +
> >  static int
> >  ftrace_graph_open(struct inode *inode, struct file *file)
> >  {
> > @@ -3843,6 +3880,30 @@ ftrace_graph_open(struct inode *inode, struct file *file)
> >  }
> >  
> >  static int
> > +ftrace_graph_notrace_open(struct inode *inode, struct file *file)
> > +{
> > +	int ret = 0;
> > +
> > +	if (unlikely(ftrace_disabled))
> > +		return -ENODEV;
> > +
> > +	mutex_lock(&graph_lock);
> > +	if ((file->f_mode & FMODE_WRITE) &&
> > +	    (file->f_flags & O_TRUNC)) {
> > +		ftrace_graph_notrace_enabled = 0;
> > +		ftrace_graph_notrace_count = 0;
> > +		memset(ftrace_graph_notrace_funcs, 0,
> > +		       sizeof(ftrace_graph_notrace_funcs));
> > +	}
> > +	mutex_unlock(&graph_lock);
> > +
> > +	if (file->f_mode & FMODE_READ)
> > +		ret = seq_open(file, &ftrace_graph_notrace_seq_ops);
> > +
> > +	return ret;
> > +}
> > +
> > +static int
> >  ftrace_graph_release(struct inode *inode, struct file *file)
> >  {
> >  	if (file->f_mode & FMODE_READ)
> > @@ -3910,8 +3971,6 @@ out:
> >  	if (fail)
> >  		return -EINVAL;
> >  
> > -	ftrace_graph_filter_enabled = !!(*idx);
> > -
> >  	return 0;
> >  }
> >  
> > @@ -3946,6 +4005,50 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
> >  
> >  	ret = read;
> >  
> > +	ftrace_graph_filter_enabled = ftrace_graph_count > 0;
> > +
> > +out_free:
> > +	trace_parser_put(&parser);
> > +out_unlock:
> > +	mutex_unlock(&graph_lock);
> > +
> > +	return ret;
> > +}
> > +
> > +static ssize_t
> > +ftrace_graph_notrace_write(struct file *file, const char __user *ubuf,
> > +			   size_t cnt, loff_t *ppos)
> > +{
> > +	struct trace_parser parser;
> > +	ssize_t read, ret;
> > +
> > +	if (!cnt)
> > +		return 0;
> > +
> > +	mutex_lock(&graph_lock);
> > +
> > +	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
> > +		ret = -ENOMEM;
> > +		goto out_unlock;
> > +	}
> > +
> > +	read = trace_get_user(&parser, ubuf, cnt, ppos);
> > +
> > +	if (read >= 0 && trace_parser_loaded((&parser))) {
> > +		parser.buffer[parser.idx] = 0;
> > +
> > +		/* we allow only one expression at a time */
> > +		ret = ftrace_set_func(ftrace_graph_notrace_funcs,
> > +				      &ftrace_graph_notrace_count,
> > +				      parser.buffer);
> > +		if (ret)
> > +			goto out_free;
> > +	}
> > +
> > +	ret = read;
> > +
> > +	ftrace_graph_notrace_enabled = ftrace_graph_notrace_count > 0;
> > +
> >  out_free:
> >  	trace_parser_put(&parser);
> >  out_unlock:
> > @@ -3961,6 +4064,14 @@ static const struct file_operations ftrace_graph_fops = {
> >  	.llseek		= ftrace_filter_lseek,
> >  	.release	= ftrace_graph_release,
> >  };
> > +
> > +static const struct file_operations ftrace_graph_notrace_fops = {
> > +	.open		= ftrace_graph_notrace_open,
> > +	.read		= seq_read,
> > +	.write		= ftrace_graph_notrace_write,
> > +	.llseek		= ftrace_filter_lseek,
> > +	.release	= ftrace_graph_release,
> > +};
> >  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> >  
> >  static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
> > @@ -3982,6 +4093,9 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
> >  	trace_create_file("set_graph_function", 0444, d_tracer,
> >  				    NULL,
> >  				    &ftrace_graph_fops);
> > +	trace_create_file("set_graph_notrace", 0444, d_tracer,
> > +				    NULL,
> > +				    &ftrace_graph_notrace_fops);
> >  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
> >  
> >  	return 0;
> > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> > index fe39acd4c1aa..797f6f7f3f56 100644
> > --- a/kernel/trace/trace.h
> > +++ b/kernel/trace/trace.h
> > @@ -732,6 +732,9 @@ extern void __trace_graph_return(struct trace_array *tr,
> >  extern int ftrace_graph_filter_enabled;
> >  extern int ftrace_graph_count;
> >  extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
> > +extern int ftrace_graph_notrace_enabled;
> > +extern int ftrace_graph_notrace_count;
> > +extern unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS];
> >  
> >  static inline int ftrace_graph_addr(unsigned long addr)
> >  {
> > @@ -757,11 +760,31 @@ static inline int ftrace_graph_addr(unsigned long addr)
> >  
> >  	return 0;
> >  }
> > +
> > +static inline int ftrace_graph_notrace_addr(unsigned long addr)
> > +{
> > +	int i;
> > +
> > +	if (!ftrace_graph_notrace_enabled)
> > +		return 0;
> > +
> > +	for (i = 0; i < ftrace_graph_notrace_count; i++) {
> > +		if (addr == ftrace_graph_notrace_funcs[i])
> > +			return 1;
> > +	}
> > +
> > +	return 0;
> > +}
> >  #else
> >  static inline int ftrace_graph_addr(unsigned long addr)
> >  {
> >  	return 1;
> >  }
> > +
> > +static inline int ftrace_graph_notrace_addr(unsigned long addr)
> > +{
> > +	return 0;
> > +}
> >  #endif /* CONFIG_DYNAMIC_FTRACE */
> >  #else /* CONFIG_FUNCTION_GRAPH_TRACER */
> >  static inline enum print_line_t
> > diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
> > index b5c09242683d..3ba240daa5e0 100644
> > --- a/kernel/trace/trace_functions_graph.c
> > +++ b/kernel/trace/trace_functions_graph.c
> > @@ -114,16 +114,22 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
> >  		return -EBUSY;
> >  	}
> >  
> > +	/* The function was filtered out */
> > +	if (current->curr_ret_stack < -1)
> > +		return -EBUSY;
> > +
> >  	calltime = trace_clock_local();
> >  
> >  	index = ++current->curr_ret_stack;
> > +	if (ftrace_graph_notrace_addr(func))
> > +		current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
> >  	barrier();
> >  	current->ret_stack[index].ret = ret;
> >  	current->ret_stack[index].func = func;
> >  	current->ret_stack[index].calltime = calltime;
> >  	current->ret_stack[index].subtime = 0;
> >  	current->ret_stack[index].fp = frame_pointer;
> > -	*depth = index;
> > +	*depth = current->curr_ret_stack;
> >  
> >  	return 0;
> >  }
> > @@ -137,6 +143,9 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
> >  
> >  	index = current->curr_ret_stack;
> >  
> > +	if (index < 0)
> > +		index += FTRACE_NOTRACE_DEPTH;
> > +
> >  	if (unlikely(index < 0)) {
> >  		ftrace_graph_stop();
> >  		WARN_ON(1);
> > @@ -193,6 +202,10 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
> >  	trace.rettime = trace_clock_local();
> >  	barrier();
> >  	current->curr_ret_stack--;
> > +	if (current->curr_ret_stack < -1) {
> > +		current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
> > +		return ret;
> > +	}
> >  
> >  	/*
> >  	 * The trace should run after decrementing the ret counter
> > @@ -259,10 +272,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
> >  
> >  	/* trace it when it is-nested-in or is a function enabled. */
> >  	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
> > -	     ftrace_graph_ignore_irqs()) ||
> > +	     ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
> >  	    (max_depth && trace->depth >= max_depth))
> >  		return 0;
> >  
> > +	/* need to reserve a ret_stack entry to recover the depth */
> > +	if (ftrace_graph_notrace_addr(trace->func))
> > +		return 1;
> > +
> >  	local_irq_save(flags);
> >  	cpu = raw_smp_processor_id();
> >  	data = per_cpu_ptr(tr->trace_buffer.data, cpu);



^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH] ftrace: add set_graph_notrace filter
  2013-10-01  3:58   ` Steven Rostedt
@ 2013-10-10  1:51     ` Namhyung Kim
  2013-10-10  2:11       ` Steven Rostedt
  0 siblings, 1 reply; 13+ messages in thread
From: Namhyung Kim @ 2013-10-10  1:51 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Frederic Weisbecker, Ingo Molnar, LKML, Namhyung Kim

Hi Steve,

On Mon, 30 Sep 2013 23:58:46 -0400, Steven Rostedt wrote:
> On Mon, 2013-09-30 at 16:04 +0900, Namhyung Kim wrote:
>> Ping!
>
> Ah I missed this patch. And I'm currently traveling.
>
> I'll start testing it when I get back on Friday, but feel free to ping
> me again next Monday.

Would you take a look at it this time? :)

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH] ftrace: add set_graph_notrace filter
  2013-10-10  1:51     ` Namhyung Kim
@ 2013-10-10  2:11       ` Steven Rostedt
  0 siblings, 0 replies; 13+ messages in thread
From: Steven Rostedt @ 2013-10-10  2:11 UTC (permalink / raw)
  To: Namhyung Kim; +Cc: Frederic Weisbecker, Ingo Molnar, LKML, Namhyung Kim

On Thu, 10 Oct 2013 10:51:54 +0900
Namhyung Kim <namhyung@kernel.org> wrote:

> Hi Steve,
> 
> On Mon, 30 Sep 2013 23:58:46 -0400, Steven Rostedt wrote:
> > On Mon, 2013-09-30 at 16:04 +0900, Namhyung Kim wrote:
> >> Ping!
> >
> > Ah I missed this patch. And I'm currently traveling.
> >
> > I'll start testing it when I get back on Friday, but feel free to ping
> > me again next Monday.
> 
> Would you take a look at it this time? :)

Yep! I just finished a bunch of stuff, and now have time to look into
patches. I'm working backward (although I should be working forward).
I'll definitely get to this tomorrow.

-- Steve

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH] ftrace: add set_graph_notrace filter
  2013-09-03  5:05 [RFC/PATCH] ftrace: add set_graph_notrace filter Namhyung Kim
  2013-09-30  7:04 ` Namhyung Kim
@ 2013-10-11  4:17 ` Steven Rostedt
  2013-10-11  7:21   ` Heiko Carstens
  2013-10-11  8:19   ` Namhyung Kim
  1 sibling, 2 replies; 13+ messages in thread
From: Steven Rostedt @ 2013-10-11  4:17 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Frederic Weisbecker, Ingo Molnar, LKML, Namhyung Kim,
	Martin Schwidefsky, Heiko Carstens

Sorry for the very late reply, finally got some time to look at other
peoples code.

On Tue,  3 Sep 2013 14:05:08 +0900
Namhyung Kim <namhyung@kernel.org> wrote:

> From: Namhyung Kim <namhyung.kim@lge.com>
> 
> The set_graph_notrace filter is analogous to set_ftrace_notrace and
> can be used for eliminating uninteresting part of function graph trace
> output.  It also works with set_graph_function nicely.
> 
>   # cd /sys/kernel/debug/tracing/
>   # echo do_page_fault > set_graph_function
>   # perf ftrace live true
>    2)               |  do_page_fault() {
>    2)               |    __do_page_fault() {
>    2)   0.381 us    |      down_read_trylock();
>    2)   0.055 us    |      __might_sleep();
>    2)   0.696 us    |      find_vma();
>    2)               |      handle_mm_fault() {
>    2)               |        handle_pte_fault() {
>    2)               |          __do_fault() {
>    2)               |            filemap_fault() {
>    2)               |              find_get_page() {
>    2)   0.033 us    |                __rcu_read_lock();
>    2)   0.035 us    |                __rcu_read_unlock();
>    2)   1.696 us    |              }
>    2)   0.031 us    |              __might_sleep();
>    2)   2.831 us    |            }
>    2)               |            _raw_spin_lock() {
>    2)   0.046 us    |              add_preempt_count();
>    2)   0.841 us    |            }
>    2)   0.033 us    |            page_add_file_rmap();
>    2)               |            _raw_spin_unlock() {
>    2)   0.057 us    |              sub_preempt_count();
>    2)   0.568 us    |            }
>    2)               |            unlock_page() {
>    2)   0.084 us    |              page_waitqueue();
>    2)   0.126 us    |              __wake_up_bit();
>    2)   1.117 us    |            }
>    2)   7.729 us    |          }
>    2)   8.397 us    |        }
>    2)   8.956 us    |      }
>    2)   0.085 us    |      up_read();
>    2) + 12.745 us   |    }
>    2) + 13.401 us   |  }
>   ...
> 
>   # echo handle_mm_fault > set_graph_notrace
>   # perf ftrace live true
>    1)               |  do_page_fault() {
>    1)               |    __do_page_fault() {
>    1)   0.205 us    |      down_read_trylock();
>    1)   0.041 us    |      __might_sleep();
>    1)   0.344 us    |      find_vma();
>    1)   0.069 us    |      up_read();
>    1)   4.692 us    |    }
>    1)   5.311 us    |  }
>   ...

Concept looks good.

> 
> Signed-off-by: Namhyung Kim <namhyung@kernel.org>
> ---
>  include/linux/ftrace.h               |   1 +
>  kernel/trace/ftrace.c                | 118 ++++++++++++++++++++++++++++++++++-
>  kernel/trace/trace.h                 |  23 +++++++
>  kernel/trace/trace_functions_graph.c |  21 ++++++-
>  4 files changed, 159 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index 9f15c0064c50..ec85d48619e1 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -721,6 +721,7 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
>  extern char __irqentry_text_start[];
>  extern char __irqentry_text_end[];
>  
> +#define FTRACE_NOTRACE_DEPTH 65536
>  #define FTRACE_RETFUNC_DEPTH 50
>  #define FTRACE_RETSTACK_ALLOC_SIZE 32
>  extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index a6d098c6df3f..1b1f3409f788 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -3819,6 +3819,43 @@ static const struct seq_operations ftrace_graph_seq_ops = {
>  	.show = g_show,
>  };
>  
> +int ftrace_graph_notrace_count;
> +int ftrace_graph_notrace_enabled;
> +unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS] __read_mostly;
> +
> +static void *
> +__n_next(struct seq_file *m, loff_t *pos)
> +{
> +	if (*pos >= ftrace_graph_notrace_count)
> +		return NULL;
> +	return &ftrace_graph_notrace_funcs[*pos];
> +}
> +
> +static void *
> +n_next(struct seq_file *m, void *v, loff_t *pos)
> +{
> +	(*pos)++;
> +	return __n_next(m, pos);
> +}
> +
> +static void *n_start(struct seq_file *m, loff_t *pos)
> +{
> +	mutex_lock(&graph_lock);
> +
> +	/* Nothing, tell g_show to print all functions are enabled */
> +	if (!ftrace_graph_notrace_enabled && !*pos)
> +		return (void *)1;
> +
> +	return __n_next(m, pos);
> +}
> +
> +static const struct seq_operations ftrace_graph_notrace_seq_ops = {
> +	.start = n_start,
> +	.next = n_next,
> +	.stop = g_stop,
> +	.show = g_show,
> +};
> +
>  static int
>  ftrace_graph_open(struct inode *inode, struct file *file)
>  {
> @@ -3843,6 +3880,30 @@ ftrace_graph_open(struct inode *inode, struct file *file)
>  }
>  
>  static int
> +ftrace_graph_notrace_open(struct inode *inode, struct file *file)
> +{
> +	int ret = 0;
> +
> +	if (unlikely(ftrace_disabled))
> +		return -ENODEV;
> +
> +	mutex_lock(&graph_lock);
> +	if ((file->f_mode & FMODE_WRITE) &&
> +	    (file->f_flags & O_TRUNC)) {
> +		ftrace_graph_notrace_enabled = 0;
> +		ftrace_graph_notrace_count = 0;
> +		memset(ftrace_graph_notrace_funcs, 0,
> +		       sizeof(ftrace_graph_notrace_funcs));
> +	}
> +	mutex_unlock(&graph_lock);
> +
> +	if (file->f_mode & FMODE_READ)
> +		ret = seq_open(file, &ftrace_graph_notrace_seq_ops);
> +
> +	return ret;
> +}
> +
> +static int
>  ftrace_graph_release(struct inode *inode, struct file *file)
>  {
>  	if (file->f_mode & FMODE_READ)
> @@ -3910,8 +3971,6 @@ out:
>  	if (fail)
>  		return -EINVAL;
>  
> -	ftrace_graph_filter_enabled = !!(*idx);
> -
>  	return 0;
>  }
>  
> @@ -3946,6 +4005,50 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
>  
>  	ret = read;
>  
> +	ftrace_graph_filter_enabled = ftrace_graph_count > 0;

You modified the ftrace_graph_filter code without mentioning this in
the change log. Actually, this should be a separate patch (clean up)
and not just included with the ftrace_graph_notrace code, as it is a
separate change.

> +
> +out_free:
> +	trace_parser_put(&parser);
> +out_unlock:
> +	mutex_unlock(&graph_lock);
> +
> +	return ret;
> +}
> +
> +static ssize_t
> +ftrace_graph_notrace_write(struct file *file, const char __user *ubuf,
> +			   size_t cnt, loff_t *ppos)
> +{
> +	struct trace_parser parser;
> +	ssize_t read, ret;
> +
> +	if (!cnt)
> +		return 0;
> +
> +	mutex_lock(&graph_lock);
> +
> +	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
> +		ret = -ENOMEM;
> +		goto out_unlock;
> +	}
> +
> +	read = trace_get_user(&parser, ubuf, cnt, ppos);
> +
> +	if (read >= 0 && trace_parser_loaded((&parser))) {
> +		parser.buffer[parser.idx] = 0;
> +
> +		/* we allow only one expression at a time */
> +		ret = ftrace_set_func(ftrace_graph_notrace_funcs,
> +				      &ftrace_graph_notrace_count,
> +				      parser.buffer);
> +		if (ret)
> +			goto out_free;
> +	}
> +
> +	ret = read;
> +
> +	ftrace_graph_notrace_enabled = ftrace_graph_notrace_count > 0;
> +

ftrace_graph_notrace_write() and ftrace_graph_write() are basically
identical. Please make a helper function that does the work and passes
in the different variables. That is:

static ssize_t
ftrace_graph_notrace_write(struct file *file, const char __user *ubuf,
		size_t cnt, loff_t *ppos)
{
	return __ftrace_graph_write(file, ubuf, cnt, ppos,
				ftrace_graph_notrace_funcs,
				&ftrace_graph_notrace_count)
}

and do the same with ftrace_graph_write(). This way if we find a bug,
or update the code, it gets done to both and not just one.


>  out_free:
>  	trace_parser_put(&parser);
>  out_unlock:
> @@ -3961,6 +4064,14 @@ static const struct file_operations ftrace_graph_fops = {
>  	.llseek		= ftrace_filter_lseek,
>  	.release	= ftrace_graph_release,
>  };
> +
> +static const struct file_operations ftrace_graph_notrace_fops = {
> +	.open		= ftrace_graph_notrace_open,
> +	.read		= seq_read,
> +	.write		= ftrace_graph_notrace_write,
> +	.llseek		= ftrace_filter_lseek,
> +	.release	= ftrace_graph_release,
> +};
>  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>  
>  static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
> @@ -3982,6 +4093,9 @@ static __init int ftrace_init_dyn_debugfs(struct dentry *d_tracer)
>  	trace_create_file("set_graph_function", 0444, d_tracer,
>  				    NULL,
>  				    &ftrace_graph_fops);
> +	trace_create_file("set_graph_notrace", 0444, d_tracer,
> +				    NULL,
> +				    &ftrace_graph_notrace_fops);
>  #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
>  
>  	return 0;
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index fe39acd4c1aa..797f6f7f3f56 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -732,6 +732,9 @@ extern void __trace_graph_return(struct trace_array *tr,
>  extern int ftrace_graph_filter_enabled;
>  extern int ftrace_graph_count;
>  extern unsigned long ftrace_graph_funcs[FTRACE_GRAPH_MAX_FUNCS];
> +extern int ftrace_graph_notrace_enabled;
> +extern int ftrace_graph_notrace_count;
> +extern unsigned long ftrace_graph_notrace_funcs[FTRACE_GRAPH_MAX_FUNCS];
>  
>  static inline int ftrace_graph_addr(unsigned long addr)
>  {
> @@ -757,11 +760,31 @@ static inline int ftrace_graph_addr(unsigned long addr)
>  
>  	return 0;
>  }
> +
> +static inline int ftrace_graph_notrace_addr(unsigned long addr)
> +{
> +	int i;
> +
> +	if (!ftrace_graph_notrace_enabled)
> +		return 0;
> +
> +	for (i = 0; i < ftrace_graph_notrace_count; i++) {
> +		if (addr == ftrace_graph_notrace_funcs[i])
> +			return 1;
> +	}
> +
> +	return 0;
> +}
>  #else
>  static inline int ftrace_graph_addr(unsigned long addr)
>  {
>  	return 1;
>  }
> +
> +static inline int ftrace_graph_notrace_addr(unsigned long addr)
> +{
> +	return 0;
> +}
>  #endif /* CONFIG_DYNAMIC_FTRACE */
>  #else /* CONFIG_FUNCTION_GRAPH_TRACER */
>  static inline enum print_line_t
> diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
> index b5c09242683d..3ba240daa5e0 100644
> --- a/kernel/trace/trace_functions_graph.c
> +++ b/kernel/trace/trace_functions_graph.c
> @@ -114,16 +114,22 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
>  		return -EBUSY;
>  	}
>  
> +	/* The function was filtered out */
> +	if (current->curr_ret_stack < -1)
> +		return -EBUSY;

I would be a bit more specific in your comment. Explain that
curr_ret_stack is negative when we hit a function in the
set_graph_notrace file.

> +
>  	calltime = trace_clock_local();
>  
>  	index = ++current->curr_ret_stack;
> +	if (ftrace_graph_notrace_addr(func))
> +		current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;

I really hate this double call to ftrace_graph_notrace_addr(). The
first one in trace_graph_entry(), and then here too.

Isn't there a way we could pass the state? Hmm, I think we could use
depth to do that. As depth is a pointer to trace.depth and not used
before then. We could make it negative and then check that.

/me looks at other archs.

Darn it, s390 calls ftrace_push_return_trace() before
ftrace_graph_entry(). They got fancy, as they must have noticed that
trace.depth is set by ftrace_push_return_trace() and they just figured
to let the ftrace_push_return_trace() do the work.

Hmm, we could add a config to do the check on one side or the other
depending on how the arch handles it.

It needs to be well commented though.



>  	barrier();
>  	current->ret_stack[index].ret = ret;
>  	current->ret_stack[index].func = func;
>  	current->ret_stack[index].calltime = calltime;
>  	current->ret_stack[index].subtime = 0;
>  	current->ret_stack[index].fp = frame_pointer;
> -	*depth = index;
> +	*depth = current->curr_ret_stack;
>  
>  	return 0;
>  }
> @@ -137,6 +143,9 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
>  
>  	index = current->curr_ret_stack;
>  
> +	if (index < 0)
> +		index += FTRACE_NOTRACE_DEPTH;

Speaking of comments. This needs to be commented, and not only that,
perhaps this is where another ftrace_graph_notrace_addr() belongs.

	bool check = false;

	if (index < 0) {
		index += FTRACE_NOTRACE_DEPTH;
		check = true;
	}

	[...]

	if (check && !ftrace_graph_notrace_addr(trace->func))
		[ do the same bug as index < 0 ]

		

Otherwise if the stack does get corrupted, this will just think we are
in a notrace and fail someplace else, making it much harder to debug
what went wrong.

Hmm, this may be racy, as the user could change the notrace file and
this could happen after that, causing a false positive.

Well, we could do other tricks, like adding a flag in
current->ret_stack[index].func (LSB set).

But for now, just comment this better. We can come up with a proper
check later. (/me is rambling as I've been on and off reviewing this
patch all day, and it's now past my bedtime).


> +
>  	if (unlikely(index < 0)) {
>  		ftrace_graph_stop();
>  		WARN_ON(1);
> @@ -193,6 +202,10 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
>  	trace.rettime = trace_clock_local();
>  	barrier();
>  	current->curr_ret_stack--;
> +	if (current->curr_ret_stack < -1) {
> +		current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
> +		return ret;
> +	}
>  
>  	/*
>  	 * The trace should run after decrementing the ret counter
> @@ -259,10 +272,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
>  
>  	/* trace it when it is-nested-in or is a function enabled. */
>  	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
> -	     ftrace_graph_ignore_irqs()) ||
> +	     ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
>  	    (max_depth && trace->depth >= max_depth))
>  		return 0;
>  
> +	/* need to reserve a ret_stack entry to recover the depth */
> +	if (ftrace_graph_notrace_addr(trace->func))
> +		return 1;
> +

Also, I understand what you are doing here, with making curr_ret_stack
negative to denote we are in a state to not do tracing. But it's more
of a hack (not a bad one), and really needs to be documented somewhere.
That is, the design should be in the comments where it's used, and 5
years from now, nobody will understand how the notrace works without
spending days trying to figure it out. Let's be nice to that poor soul,
and write up what is going on so they don't need to pray to the holy
tuna hoping to get a fish of enlightenment on how turning
curr_ret_stack negative magically makes the function graph tracing not
trace down functions, and magically starts tracing again when it comes
back up.

-- Steve

>  	local_irq_save(flags);
>  	cpu = raw_smp_processor_id();
>  	data = per_cpu_ptr(tr->trace_buffer.data, cpu);


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH] ftrace: add set_graph_notrace filter
  2013-10-11  4:17 ` Steven Rostedt
@ 2013-10-11  7:21   ` Heiko Carstens
  2013-10-11  8:34     ` Namhyung Kim
  2013-10-11 12:22     ` Steven Rostedt
  2013-10-11  8:19   ` Namhyung Kim
  1 sibling, 2 replies; 13+ messages in thread
From: Heiko Carstens @ 2013-10-11  7:21 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Namhyung Kim, Frederic Weisbecker, Ingo Molnar, LKML,
	Namhyung Kim, Martin Schwidefsky

On Fri, Oct 11, 2013 at 12:17:17AM -0400, Steven Rostedt wrote:
> Isn't there a way we could pass the state? Hmm, I think we could use
> depth to do that. As depth is a pointer to trace.depth and not used
> before then. We could make it negative and then check that.
> 
> /me looks at other archs.
> 
> Darn it, s390 calls ftrace_push_return_trace() before
> ftrace_graph_entry(). They got fancy, as they must have noticed that
> trace.depth is set by ftrace_push_return_trace() and they just figured
> to let the ftrace_push_return_trace() do the work.
> 
> Hmm, we could add a config to do the check on one side or the other
> depending on how the arch handles it.
> 
> It needs to be well commented though.

I don't remember I why decided to do that more than four years ago, however
on the other hand I really don't think we need a config for this.
So I just committed the patch below... it compiles and doesn't crash
immediately, so must be good.
It will be merged during the next merge window.

>From 8c539d10400ee2efe000d324497a0661a2143dbf Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 11 Oct 2013 08:55:57 +0200
Subject: [PATCH] s390/ftrace: prepare_ftrace_return() function call order

Steven Rostedt noted that s390 is the only architecture which calls
ftrace_push_return_trace() before ftrace_graph_entry() and therefore has
the small advantage that trace.depth gets initialized automatically.

However this small advantage isn't worth the difference and possible subtle
breakage that may result from this.
So change s390 to have the same function call order like all other
architectures: first ftrace_graph_entry(), then ftrace_push_return_trace()

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/s390/kernel/ftrace.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 1014ad5..224db03 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -151,14 +151,13 @@ unsigned long __kprobes prepare_ftrace_return(unsigned long parent,
 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
 		goto out;
 	ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
-	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
-		goto out;
 	trace.func = ip;
+	trace.depth = current->curr_ret_stack + 1;
 	/* Only trace if the calling function expects to. */
-	if (!ftrace_graph_entry(&trace)) {
-		current->curr_ret_stack--;
+	if (!ftrace_graph_entry(&trace))
+		goto out;
+	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
 		goto out;
-	}
 	parent = (unsigned long) return_to_handler;
 out:
 	return parent;
-- 
1.8.3.4


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH] ftrace: add set_graph_notrace filter
  2013-10-11  4:17 ` Steven Rostedt
  2013-10-11  7:21   ` Heiko Carstens
@ 2013-10-11  8:19   ` Namhyung Kim
  2013-10-11 12:31     ` Steven Rostedt
  1 sibling, 1 reply; 13+ messages in thread
From: Namhyung Kim @ 2013-10-11  8:19 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Frederic Weisbecker, Ingo Molnar, LKML, Namhyung Kim,
	Martin Schwidefsky, Heiko Carstens

Hi Steve,

On Fri, 11 Oct 2013 00:17:17 -0400, Steven Rostedt wrote:
> Sorry for the very late reply, finally got some time to look at other
> peoples code.

Thank you for taking your time to review this carefully. :)

>
> On Tue,  3 Sep 2013 14:05:08 +0900
> Namhyung Kim <namhyung@kernel.org> wrote:
>
>> From: Namhyung Kim <namhyung.kim@lge.com>
>> 
>> The set_graph_notrace filter is analogous to set_ftrace_notrace and
>> can be used for eliminating uninteresting part of function graph trace
>> output.  It also works with set_graph_function nicely.
>> 
>>   # cd /sys/kernel/debug/tracing/
>>   # echo do_page_fault > set_graph_function
>>   # perf ftrace live true
>>    2)               |  do_page_fault() {
>>    2)               |    __do_page_fault() {
>>    2)   0.381 us    |      down_read_trylock();
>>    2)   0.055 us    |      __might_sleep();
>>    2)   0.696 us    |      find_vma();
>>    2)               |      handle_mm_fault() {
>>    2)               |        handle_pte_fault() {
>>    2)               |          __do_fault() {
>>    2)               |            filemap_fault() {
>>    2)               |              find_get_page() {
>>    2)   0.033 us    |                __rcu_read_lock();
>>    2)   0.035 us    |                __rcu_read_unlock();
>>    2)   1.696 us    |              }
>>    2)   0.031 us    |              __might_sleep();
>>    2)   2.831 us    |            }
>>    2)               |            _raw_spin_lock() {
>>    2)   0.046 us    |              add_preempt_count();
>>    2)   0.841 us    |            }
>>    2)   0.033 us    |            page_add_file_rmap();
>>    2)               |            _raw_spin_unlock() {
>>    2)   0.057 us    |              sub_preempt_count();
>>    2)   0.568 us    |            }
>>    2)               |            unlock_page() {
>>    2)   0.084 us    |              page_waitqueue();
>>    2)   0.126 us    |              __wake_up_bit();
>>    2)   1.117 us    |            }
>>    2)   7.729 us    |          }
>>    2)   8.397 us    |        }
>>    2)   8.956 us    |      }
>>    2)   0.085 us    |      up_read();
>>    2) + 12.745 us   |    }
>>    2) + 13.401 us   |  }
>>   ...
>> 
>>   # echo handle_mm_fault > set_graph_notrace
>>   # perf ftrace live true
>>    1)               |  do_page_fault() {
>>    1)               |    __do_page_fault() {
>>    1)   0.205 us    |      down_read_trylock();
>>    1)   0.041 us    |      __might_sleep();
>>    1)   0.344 us    |      find_vma();
>>    1)   0.069 us    |      up_read();
>>    1)   4.692 us    |    }
>>    1)   5.311 us    |  }
>>   ...
>
> Concept looks good.

hehe, thanks.

[SNIP]
>
>> @@ -3910,8 +3971,6 @@ out:
>>  	if (fail)
>>  		return -EINVAL;
>>  
>> -	ftrace_graph_filter_enabled = !!(*idx);
>> -
>>  	return 0;
>>  }
>>  
>> @@ -3946,6 +4005,50 @@ ftrace_graph_write(struct file *file, const char __user *ubuf,
>>  
>>  	ret = read;
>>  
>> +	ftrace_graph_filter_enabled = ftrace_graph_count > 0;
>
> You modified the ftrace_graph_filter code without mentioning this in
> the change log. Actually, this should be a separate patch (clean up)
> and not just included with the ftrace_graph_notrace code, as it is a
> separate change.

Okay, I'll make it a separate patch.  And I think we can get rid of
ftrace_graph_{filter,notrace}_enabled altogether and just use _count
variable instead.

>
>> +
>> +out_free:
>> +	trace_parser_put(&parser);
>> +out_unlock:
>> +	mutex_unlock(&graph_lock);
>> +
>> +	return ret;
>> +}
>> +
>> +static ssize_t
>> +ftrace_graph_notrace_write(struct file *file, const char __user *ubuf,
>> +			   size_t cnt, loff_t *ppos)
>> +{
>> +	struct trace_parser parser;
>> +	ssize_t read, ret;
>> +
>> +	if (!cnt)
>> +		return 0;
>> +
>> +	mutex_lock(&graph_lock);
>> +
>> +	if (trace_parser_get_init(&parser, FTRACE_BUFF_MAX)) {
>> +		ret = -ENOMEM;
>> +		goto out_unlock;
>> +	}
>> +
>> +	read = trace_get_user(&parser, ubuf, cnt, ppos);
>> +
>> +	if (read >= 0 && trace_parser_loaded((&parser))) {
>> +		parser.buffer[parser.idx] = 0;
>> +
>> +		/* we allow only one expression at a time */
>> +		ret = ftrace_set_func(ftrace_graph_notrace_funcs,
>> +				      &ftrace_graph_notrace_count,
>> +				      parser.buffer);
>> +		if (ret)
>> +			goto out_free;
>> +	}
>> +
>> +	ret = read;
>> +
>> +	ftrace_graph_notrace_enabled = ftrace_graph_notrace_count > 0;
>> +
>
> ftrace_graph_notrace_write() and ftrace_graph_write() are basically
> identical. Please make a helper function that does the work and passes
> in the different variables. That is:
>
> static ssize_t
> ftrace_graph_notrace_write(struct file *file, const char __user *ubuf,
> 		size_t cnt, loff_t *ppos)
> {
> 	return __ftrace_graph_write(file, ubuf, cnt, ppos,
> 				ftrace_graph_notrace_funcs,
> 				&ftrace_graph_notrace_count)
> }
>
> and do the same with ftrace_graph_write(). This way if we find a bug,
> or update the code, it gets done to both and not just one.

Agreed, will do that.

[SNIP]
>
>> diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
>> index b5c09242683d..3ba240daa5e0 100644
>> --- a/kernel/trace/trace_functions_graph.c
>> +++ b/kernel/trace/trace_functions_graph.c
>> @@ -114,16 +114,22 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
>>  		return -EBUSY;
>>  	}
>>  
>> +	/* The function was filtered out */
>> +	if (current->curr_ret_stack < -1)
>> +		return -EBUSY;
>
> I would be a bit more specific in your comment. Explain that
> curr_ret_stack is negative when we hit a function in the
> set_graph_notrace file.

How about this:

	/*
	 * curr_ret_stack is initialized to -1 and gets increased in
	 * this function.  So it can be less than -1 only if it was
	 * filtered out via ftrace_graph_notrace_addr() which can be
	 * set from set_graph_notrace file in debugfs by user.
	 */

>
>> +
>>  	calltime = trace_clock_local();
>>  
>>  	index = ++current->curr_ret_stack;
>> +	if (ftrace_graph_notrace_addr(func))
>> +		current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
>
> I really hate this double call to ftrace_graph_notrace_addr(). The
> first one in trace_graph_entry(), and then here too.
>
> Isn't there a way we could pass the state? Hmm, I think we could use
> depth to do that. As depth is a pointer to trace.depth and not used
> before then. We could make it negative and then check that.
>
> /me looks at other archs.
>
> Darn it, s390 calls ftrace_push_return_trace() before
> ftrace_graph_entry(). They got fancy, as they must have noticed that
> trace.depth is set by ftrace_push_return_trace() and they just figured
> to let the ftrace_push_return_trace() do the work.

Yes, I thought about it before but as you can see other archs go to the
other way around so I just ended up to call it twice.

>
> Hmm, we could add a config to do the check on one side or the other
> depending on how the arch handles it.
>
> It needs to be well commented though.

Hmm.. maybe.  But it'd better if this function call order is fixed
IMHO.  Anyway, I'll add comments.

>
>
>
>>  	barrier();
>>  	current->ret_stack[index].ret = ret;
>>  	current->ret_stack[index].func = func;
>>  	current->ret_stack[index].calltime = calltime;
>>  	current->ret_stack[index].subtime = 0;
>>  	current->ret_stack[index].fp = frame_pointer;
>> -	*depth = index;
>> +	*depth = current->curr_ret_stack;
>>  
>>  	return 0;
>>  }
>> @@ -137,6 +143,9 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
>>  
>>  	index = current->curr_ret_stack;
>>  
>> +	if (index < 0)
>> +		index += FTRACE_NOTRACE_DEPTH;
>
> Speaking of comments. This needs to be commented, and not only that,
> perhaps this is where another ftrace_graph_notrace_addr() belongs.
>
> 	bool check = false;
>
> 	if (index < 0) {
> 		index += FTRACE_NOTRACE_DEPTH;
> 		check = true;
> 	}
>
> 	[...]
>
> 	if (check && !ftrace_graph_notrace_addr(trace->func))
> 		[ do the same bug as index < 0 ]
>
> 		
>
> Otherwise if the stack does get corrupted, this will just think we are
> in a notrace and fail someplace else, making it much harder to debug
> what went wrong.
>
> Hmm, this may be racy, as the user could change the notrace file and
> this could happen after that, causing a false positive.
>
> Well, we could do other tricks, like adding a flag in
> current->ret_stack[index].func (LSB set).
>
> But for now, just comment this better. We can come up with a proper
> check later. (/me is rambling as I've been on and off reviewing this
> patch all day, and it's now past my bedtime).

Sorry for interrupting your bedtime. :)

Hmm.. never thought about the corruption.  And yes, simply checking it
again here might cause a trouble, I guess.  I'll add a comment for this.

	/*
	 * A negative index here means that it's just returned from a
	 * notrace'd function.  Recover index to get an original return
	 * address.  See ftrace_push_return_trace().
	 *
	 * TODO: Need to check whether the stack gets corrupted.
	 */

>
>
>> +
>>  	if (unlikely(index < 0)) {
>>  		ftrace_graph_stop();
>>  		WARN_ON(1);
>> @@ -193,6 +202,10 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
>>  	trace.rettime = trace_clock_local();
>>  	barrier();
>>  	current->curr_ret_stack--;
>> +	if (current->curr_ret_stack < -1) {
>> +		current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
>> +		return ret;
>> +	}
>>  
>>  	/*
>>  	 * The trace should run after decrementing the ret counter
>> @@ -259,10 +272,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
>>  
>>  	/* trace it when it is-nested-in or is a function enabled. */
>>  	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
>> -	     ftrace_graph_ignore_irqs()) ||
>> +	     ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
>>  	    (max_depth && trace->depth >= max_depth))
>>  		return 0;
>>  
>> +	/* need to reserve a ret_stack entry to recover the depth */
>> +	if (ftrace_graph_notrace_addr(trace->func))
>> +		return 1;
>> +
>
> Also, I understand what you are doing here, with making curr_ret_stack
> negative to denote we are in a state to not do tracing. But it's more
> of a hack (not a bad one), and really needs to be documented somewhere.
> That is, the design should be in the comments where it's used, and 5
> years from now, nobody will understand how the notrace works without
> spending days trying to figure it out. Let's be nice to that poor soul,
> and write up what is going on so they don't need to pray to the holy
> tuna hoping to get a fish of enlightenment on how turning
> curr_ret_stack negative magically makes the function graph tracing not
> trace down functions, and magically starts tracing again when it comes
> back up.

Fully agreed.  How about this:

/*
 * The curr_ret_stack is an index to ftrace return stack of current
 * task.  Its value should be in [0, FTRACE_RETFUNC_DEPTH) when the
 * function graph tracer is used.  To support filtering out specific
 * functions, it makes the index negative by subtracting huge value
 * (FTRACE_NOTRACE_DEPTH) so when it sees a negative index the ftrace
 * will ignore the record.  And the index gets recovered when returning
 * from the filtered function by adding the FTRACE_NOTRACE_DEPTH and
 * then it will continue to record functions normally.
 */

Thanks,
Namhyung

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH] ftrace: add set_graph_notrace filter
  2013-10-11  7:21   ` Heiko Carstens
@ 2013-10-11  8:34     ` Namhyung Kim
  2013-10-11  8:58       ` Heiko Carstens
  2013-10-11 12:22     ` Steven Rostedt
  1 sibling, 1 reply; 13+ messages in thread
From: Namhyung Kim @ 2013-10-11  8:34 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Steven Rostedt, Frederic Weisbecker, Ingo Molnar, LKML,
	Namhyung Kim, Martin Schwidefsky

Hi Heiko,

On Fri, 11 Oct 2013 09:21:56 +0200, Heiko Carstens wrote:
> On Fri, Oct 11, 2013 at 12:17:17AM -0400, Steven Rostedt wrote:
>> Isn't there a way we could pass the state? Hmm, I think we could use
>> depth to do that. As depth is a pointer to trace.depth and not used
>> before then. We could make it negative and then check that.
>> 
>> /me looks at other archs.
>> 
>> Darn it, s390 calls ftrace_push_return_trace() before
>> ftrace_graph_entry(). They got fancy, as they must have noticed that
>> trace.depth is set by ftrace_push_return_trace() and they just figured
>> to let the ftrace_push_return_trace() do the work.
>> 
>> Hmm, we could add a config to do the check on one side or the other
>> depending on how the arch handles it.
>> 
>> It needs to be well commented though.
>
> I don't remember I why decided to do that more than four years ago, however
> on the other hand I really don't think we need a config for this.
> So I just committed the patch below... it compiles and doesn't crash
> immediately, so must be good.
> It will be merged during the next merge window.

Nice!

>
> From 8c539d10400ee2efe000d324497a0661a2143dbf Mon Sep 17 00:00:00 2001
> From: Heiko Carstens <heiko.carstens@de.ibm.com>
> Date: Fri, 11 Oct 2013 08:55:57 +0200
> Subject: [PATCH] s390/ftrace: prepare_ftrace_return() function call order
>
> Steven Rostedt noted that s390 is the only architecture which calls
> ftrace_push_return_trace() before ftrace_graph_entry() and therefore has
> the small advantage that trace.depth gets initialized automatically.

Well, this is not true.  AFAICS arm, x86 and powerpc have same order, and
other archs have different (inverted) order.  And it'd really great if
all archs agreed on a single ordering.

Thanks,
Namhyung

>
> However this small advantage isn't worth the difference and possible subtle
> breakage that may result from this.
> So change s390 to have the same function call order like all other
> architectures: first ftrace_graph_entry(), then ftrace_push_return_trace()
>
> Reported-by: Steven Rostedt <rostedt@goodmis.org>
> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
> ---
>  arch/s390/kernel/ftrace.c | 9 ++++-----
>  1 file changed, 4 insertions(+), 5 deletions(-)
>
> diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
> index 1014ad5..224db03 100644
> --- a/arch/s390/kernel/ftrace.c
> +++ b/arch/s390/kernel/ftrace.c
> @@ -151,14 +151,13 @@ unsigned long __kprobes prepare_ftrace_return(unsigned long parent,
>  	if (unlikely(atomic_read(&current->tracing_graph_pause)))
>  		goto out;
>  	ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
> -	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
> -		goto out;
>  	trace.func = ip;
> +	trace.depth = current->curr_ret_stack + 1;
>  	/* Only trace if the calling function expects to. */
> -	if (!ftrace_graph_entry(&trace)) {
> -		current->curr_ret_stack--;
> +	if (!ftrace_graph_entry(&trace))
> +		goto out;
> +	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
>  		goto out;
> -	}
>  	parent = (unsigned long) return_to_handler;
>  out:
>  	return parent;

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH] ftrace: add set_graph_notrace filter
  2013-10-11  8:34     ` Namhyung Kim
@ 2013-10-11  8:58       ` Heiko Carstens
  0 siblings, 0 replies; 13+ messages in thread
From: Heiko Carstens @ 2013-10-11  8:58 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Steven Rostedt, Frederic Weisbecker, Ingo Molnar, LKML,
	Namhyung Kim, Martin Schwidefsky

On Fri, Oct 11, 2013 at 05:34:19PM +0900, Namhyung Kim wrote:
> Hi Heiko,
> 
> On Fri, 11 Oct 2013 09:21:56 +0200, Heiko Carstens wrote:
> > On Fri, Oct 11, 2013 at 12:17:17AM -0400, Steven Rostedt wrote:
> >> Isn't there a way we could pass the state? Hmm, I think we could use
> >> depth to do that. As depth is a pointer to trace.depth and not used
> >> before then. We could make it negative and then check that.
> >> 
> >> /me looks at other archs.
> >> 
> >> Darn it, s390 calls ftrace_push_return_trace() before
> >> ftrace_graph_entry(). They got fancy, as they must have noticed that
> >> trace.depth is set by ftrace_push_return_trace() and they just figured
> >> to let the ftrace_push_return_trace() do the work.
> >> 
> >> Hmm, we could add a config to do the check on one side or the other
> >> depending on how the arch handles it.
> >> 
> >> It needs to be well commented though.
> >
> > I don't remember I why decided to do that more than four years ago, however
> > on the other hand I really don't think we need a config for this.
> > So I just committed the patch below... it compiles and doesn't crash
> > immediately, so must be good.
> > It will be merged during the next merge window.
> 
> Nice!
> 
> >
> > From 8c539d10400ee2efe000d324497a0661a2143dbf Mon Sep 17 00:00:00 2001
> > From: Heiko Carstens <heiko.carstens@de.ibm.com>
> > Date: Fri, 11 Oct 2013 08:55:57 +0200
> > Subject: [PATCH] s390/ftrace: prepare_ftrace_return() function call order
> >
> > Steven Rostedt noted that s390 is the only architecture which calls
> > ftrace_push_return_trace() before ftrace_graph_entry() and therefore has
> > the small advantage that trace.depth gets initialized automatically.
> 
> Well, this is not true.  AFAICS arm, x86 and powerpc have same order, and
> other archs have different (inverted) order.  And it'd really great if
> all archs agreed on a single ordering.

Ooops. Anyway.. I just realized that we also have an out-of-tree patch
that relies on the now new order on s390. So that patch got a bit smaller
as an added bonus ;)


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH] ftrace: add set_graph_notrace filter
  2013-10-11  7:21   ` Heiko Carstens
  2013-10-11  8:34     ` Namhyung Kim
@ 2013-10-11 12:22     ` Steven Rostedt
  1 sibling, 0 replies; 13+ messages in thread
From: Steven Rostedt @ 2013-10-11 12:22 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Namhyung Kim, Frederic Weisbecker, Ingo Molnar, LKML,
	Namhyung Kim, Martin Schwidefsky

On Fri, 11 Oct 2013 09:21:56 +0200
Heiko Carstens <heiko.carstens@de.ibm.com> wrote:

> 
> Steven Rostedt noted that s390 is the only architecture which calls
> ftrace_push_return_trace() before ftrace_graph_entry() and therefore has

I don't know if s390 is the only arch. I didn't finish reading all the
arch specific code to make sure.

> the small advantage that trace.depth gets initialized automatically.
> 
> However this small advantage isn't worth the difference and possible subtle
> breakage that may result from this.
> So change s390 to have the same function call order like all other
> architectures: first ftrace_graph_entry(), then ftrace_push_return_trace()
> 
> Reported-by: Steven Rostedt <rostedt@goodmis.org>
> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
> ---
>  arch/s390/kernel/ftrace.c | 9 ++++-----
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
> index 1014ad5..224db03 100644
> --- a/arch/s390/kernel/ftrace.c
> +++ b/arch/s390/kernel/ftrace.c
> @@ -151,14 +151,13 @@ unsigned long __kprobes prepare_ftrace_return(unsigned long parent,
>  	if (unlikely(atomic_read(&current->tracing_graph_pause)))
>  		goto out;
>  	ip = (ip & PSW_ADDR_INSN) - MCOUNT_INSN_SIZE;
> -	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
> -		goto out;
>  	trace.func = ip;
> +	trace.depth = current->curr_ret_stack + 1;
>  	/* Only trace if the calling function expects to. */
> -	if (!ftrace_graph_entry(&trace)) {
> -		current->curr_ret_stack--;
> +	if (!ftrace_graph_entry(&trace))
> +		goto out;
> +	if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
>  		goto out;
> -	}

Hmm, if this code is so identical among the archs, I may create a
generic inline function to do the work. That way any change to the flow
can be done for all archs in one place.

Thanks!

-- Steve

>  	parent = (unsigned long) return_to_handler;
>  out:
>  	return parent;


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH] ftrace: add set_graph_notrace filter
  2013-10-11  8:19   ` Namhyung Kim
@ 2013-10-11 12:31     ` Steven Rostedt
  2013-10-14  0:59       ` Namhyung Kim
  0 siblings, 1 reply; 13+ messages in thread
From: Steven Rostedt @ 2013-10-11 12:31 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Frederic Weisbecker, Ingo Molnar, LKML, Namhyung Kim,
	Martin Schwidefsky, Heiko Carstens

On Fri, 11 Oct 2013 17:19:46 +0900
Namhyung Kim <namhyung@kernel.org> wrote:

> Hi Steve,
> 
> On Fri, 11 Oct 2013 00:17:17 -0400, Steven Rostedt wrote:
> > Sorry for the very late reply, finally got some time to look at other
> > peoples code.
> 
> Thank you for taking your time to review this carefully. :)

Sorry for it taking so long.



> > I would be a bit more specific in your comment. Explain that
> > curr_ret_stack is negative when we hit a function in the
> > set_graph_notrace file.
> 
> How about this:
> 
> 	/*
> 	 * curr_ret_stack is initialized to -1 and gets increased in
> 	 * this function.  So it can be less than -1 only if it was
> 	 * filtered out via ftrace_graph_notrace_addr() which can be
> 	 * set from set_graph_notrace file in debugfs by user.
> 	 */

Looks good.

> 
> >
> >> +
> >>  	calltime = trace_clock_local();
> >>  
> >>  	index = ++current->curr_ret_stack;
> >> +	if (ftrace_graph_notrace_addr(func))
> >> +		current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
> >
> > I really hate this double call to ftrace_graph_notrace_addr(). The
> > first one in trace_graph_entry(), and then here too.
> >
> > Isn't there a way we could pass the state? Hmm, I think we could use
> > depth to do that. As depth is a pointer to trace.depth and not used
> > before then. We could make it negative and then check that.
> >
> > /me looks at other archs.
> >
> > Darn it, s390 calls ftrace_push_return_trace() before
> > ftrace_graph_entry(). They got fancy, as they must have noticed that
> > trace.depth is set by ftrace_push_return_trace() and they just figured
> > to let the ftrace_push_return_trace() do the work.
> 
> Yes, I thought about it before but as you can see other archs go to the
> other way around so I just ended up to call it twice.
> 
> >
> > Hmm, we could add a config to do the check on one side or the other
> > depending on how the arch handles it.
> >
> > It needs to be well commented though.
> 
> Hmm.. maybe.  But it'd better if this function call order is fixed
> IMHO.  Anyway, I'll add comments.

Well, as you probably already saw, s390 changed to help us out :-) Is
there other archs you know about. I haven't looked at all the others
yet. s390 was the first one I saw that didn't follow suit.

Anyway, lets keep your double check for now. I'll look at making the
two function calls from arch into one, where we can optimize this a bit
more.

> 
> >
> >
> >
> >>  	barrier();
> >>  	current->ret_stack[index].ret = ret;
> >>  	current->ret_stack[index].func = func;
> >>  	current->ret_stack[index].calltime = calltime;
> >>  	current->ret_stack[index].subtime = 0;
> >>  	current->ret_stack[index].fp = frame_pointer;
> >> -	*depth = index;
> >> +	*depth = current->curr_ret_stack;
> >>  
> >>  	return 0;
> >>  }
> >> @@ -137,6 +143,9 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
> >>  
> >>  	index = current->curr_ret_stack;
> >>  
> >> +	if (index < 0)
> >> +		index += FTRACE_NOTRACE_DEPTH;
> >
> > Speaking of comments. This needs to be commented, and not only that,
> > perhaps this is where another ftrace_graph_notrace_addr() belongs.
> >
> > 	bool check = false;
> >
> > 	if (index < 0) {
> > 		index += FTRACE_NOTRACE_DEPTH;
> > 		check = true;
> > 	}
> >
> > 	[...]
> >
> > 	if (check && !ftrace_graph_notrace_addr(trace->func))
> > 		[ do the same bug as index < 0 ]
> >
> > 		
> >
> > Otherwise if the stack does get corrupted, this will just think we are
> > in a notrace and fail someplace else, making it much harder to debug
> > what went wrong.
> >
> > Hmm, this may be racy, as the user could change the notrace file and
> > this could happen after that, causing a false positive.
> >
> > Well, we could do other tricks, like adding a flag in
> > current->ret_stack[index].func (LSB set).
> >
> > But for now, just comment this better. We can come up with a proper
> > check later. (/me is rambling as I've been on and off reviewing this
> > patch all day, and it's now past my bedtime).
> 
> Sorry for interrupting your bedtime. :)

hehe, I hit an unrelated bug with acpi on 3.12-rc4 when testing your
patches, and went to figure out why (and reported it). Otherwise, I
would have been wide awake when reviewing all of this.


> 
> Hmm.. never thought about the corruption.  And yes, simply checking it
> again here might cause a trouble, I guess.  I'll add a comment for this.
> 
> 	/*
> 	 * A negative index here means that it's just returned from a
> 	 * notrace'd function.  Recover index to get an original return
> 	 * address.  See ftrace_push_return_trace().
> 	 *
> 	 * TODO: Need to check whether the stack gets corrupted.
> 	 */
> 
> >
> >
> >> +
> >>  	if (unlikely(index < 0)) {
> >>  		ftrace_graph_stop();
> >>  		WARN_ON(1);
> >> @@ -193,6 +202,10 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
> >>  	trace.rettime = trace_clock_local();
> >>  	barrier();
> >>  	current->curr_ret_stack--;
> >> +	if (current->curr_ret_stack < -1) {
> >> +		current->curr_ret_stack += FTRACE_NOTRACE_DEPTH;
> >> +		return ret;
> >> +	}
> >>  
> >>  	/*
> >>  	 * The trace should run after decrementing the ret counter
> >> @@ -259,10 +272,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
> >>  
> >>  	/* trace it when it is-nested-in or is a function enabled. */
> >>  	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
> >> -	     ftrace_graph_ignore_irqs()) ||
> >> +	     ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
> >>  	    (max_depth && trace->depth >= max_depth))
> >>  		return 0;
> >>  
> >> +	/* need to reserve a ret_stack entry to recover the depth */
> >> +	if (ftrace_graph_notrace_addr(trace->func))
> >> +		return 1;
> >> +
> >
> > Also, I understand what you are doing here, with making curr_ret_stack
> > negative to denote we are in a state to not do tracing. But it's more
> > of a hack (not a bad one), and really needs to be documented somewhere.
> > That is, the design should be in the comments where it's used, and 5
> > years from now, nobody will understand how the notrace works without
> > spending days trying to figure it out. Let's be nice to that poor soul,
> > and write up what is going on so they don't need to pray to the holy
> > tuna hoping to get a fish of enlightenment on how turning
> > curr_ret_stack negative magically makes the function graph tracing not
> > trace down functions, and magically starts tracing again when it comes
> > back up.
> 
> Fully agreed.  How about this:
> 
> /*
>  * The curr_ret_stack is an index to ftrace return stack of current
>  * task.  Its value should be in [0, FTRACE_RETFUNC_DEPTH) when the
>  * function graph tracer is used.  To support filtering out specific
>  * functions, it makes the index negative by subtracting huge value

                                                s/huge/the max/

>  * (FTRACE_NOTRACE_DEPTH) so when it sees a negative index the ftrace
>  * will ignore the record.  And the index gets recovered when returning
>  * from the filtered function by adding the FTRACE_NOTRACE_DEPTH and
>  * then it will continue to record functions normally.
>  */

Sounds good.

Thanks!

-- Steve

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH] ftrace: add set_graph_notrace filter
  2013-10-11 12:31     ` Steven Rostedt
@ 2013-10-14  0:59       ` Namhyung Kim
  0 siblings, 0 replies; 13+ messages in thread
From: Namhyung Kim @ 2013-10-14  0:59 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Frederic Weisbecker, Ingo Molnar, LKML, Namhyung Kim,
	Martin Schwidefsky, Heiko Carstens

Hi,

On Fri, 11 Oct 2013 08:31:45 -0400, Steven Rostedt wrote:
> On Fri, 11 Oct 2013 17:19:46 +0900
> Namhyung Kim <namhyung@kernel.org> wrote:
>
>> Hi Steve,
>> 
>> On Fri, 11 Oct 2013 00:17:17 -0400, Steven Rostedt wrote:
>> > Sorry for the very late reply, finally got some time to look at other
>> > peoples code.
>> 
>> Thank you for taking your time to review this carefully. :)
>
> Sorry for it taking so long.

Nevermind. :)

>
>> > I would be a bit more specific in your comment. Explain that
>> > curr_ret_stack is negative when we hit a function in the
>> > set_graph_notrace file.
>> 
>> How about this:
>> 
>> 	/*
>> 	 * curr_ret_stack is initialized to -1 and gets increased in
>> 	 * this function.  So it can be less than -1 only if it was
>> 	 * filtered out via ftrace_graph_notrace_addr() which can be
>> 	 * set from set_graph_notrace file in debugfs by user.
>> 	 */
>
> Looks good.
>
>> 
>> >
>> >> +
>> >>  	calltime = trace_clock_local();
>> >>  
>> >>  	index = ++current->curr_ret_stack;
>> >> +	if (ftrace_graph_notrace_addr(func))
>> >> +		current->curr_ret_stack -= FTRACE_NOTRACE_DEPTH;
>> >
>> > I really hate this double call to ftrace_graph_notrace_addr(). The
>> > first one in trace_graph_entry(), and then here too.
>> >
>> > Isn't there a way we could pass the state? Hmm, I think we could use
>> > depth to do that. As depth is a pointer to trace.depth and not used
>> > before then. We could make it negative and then check that.
>> >
>> > /me looks at other archs.
>> >
>> > Darn it, s390 calls ftrace_push_return_trace() before
>> > ftrace_graph_entry(). They got fancy, as they must have noticed that
>> > trace.depth is set by ftrace_push_return_trace() and they just figured
>> > to let the ftrace_push_return_trace() do the work.
>> 
>> Yes, I thought about it before but as you can see other archs go to the
>> other way around so I just ended up to call it twice.
>> 
>> >
>> > Hmm, we could add a config to do the check on one side or the other
>> > depending on how the arch handles it.
>> >
>> > It needs to be well commented though.
>> 
>> Hmm.. maybe.  But it'd better if this function call order is fixed
>> IMHO.  Anyway, I'll add comments.
>
> Well, as you probably already saw, s390 changed to help us out :-) Is
> there other archs you know about. I haven't looked at all the others
> yet. s390 was the first one I saw that didn't follow suit.

As you can see that most other archs don't follow the ordering.

>
> Anyway, lets keep your double check for now. I'll look at making the
> two function calls from arch into one, where we can optimize this a bit
> more.

Okay.

>
[SNIP]

>> >> @@ -259,10 +272,14 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
>> >>  
>> >>  	/* trace it when it is-nested-in or is a function enabled. */
>> >>  	if ((!(trace->depth || ftrace_graph_addr(trace->func)) ||
>> >> -	     ftrace_graph_ignore_irqs()) ||
>> >> +	     ftrace_graph_ignore_irqs()) || (trace->depth < 0) ||
>> >>  	    (max_depth && trace->depth >= max_depth))
>> >>  		return 0;
>> >>  
>> >> +	/* need to reserve a ret_stack entry to recover the depth */
>> >> +	if (ftrace_graph_notrace_addr(trace->func))
>> >> +		return 1;
>> >> +
>> >
>> > Also, I understand what you are doing here, with making curr_ret_stack
>> > negative to denote we are in a state to not do tracing. But it's more
>> > of a hack (not a bad one), and really needs to be documented somewhere.
>> > That is, the design should be in the comments where it's used, and 5
>> > years from now, nobody will understand how the notrace works without
>> > spending days trying to figure it out. Let's be nice to that poor soul,
>> > and write up what is going on so they don't need to pray to the holy
>> > tuna hoping to get a fish of enlightenment on how turning
>> > curr_ret_stack negative magically makes the function graph tracing not
>> > trace down functions, and magically starts tracing again when it comes
>> > back up.
>> 
>> Fully agreed.  How about this:
>> 
>> /*
>>  * The curr_ret_stack is an index to ftrace return stack of current
>>  * task.  Its value should be in [0, FTRACE_RETFUNC_DEPTH) when the
>>  * function graph tracer is used.  To support filtering out specific
>>  * functions, it makes the index negative by subtracting huge value
>
>                                                 s/huge/the max/

Hmm.. I introduced FTRACE_NOTRACE_DEPTH (not FTRACE_RETFUNC_DEPTH) in
order to prevent possible off-by-one bug in any case.  Do you think just
using FTRACE_RETFUNC_DEPTH is enough?

>
>>  * (FTRACE_NOTRACE_DEPTH) so when it sees a negative index the ftrace
>>  * will ignore the record.  And the index gets recovered when returning
>>  * from the filtered function by adding the FTRACE_NOTRACE_DEPTH and
>>  * then it will continue to record functions normally.
>>  */
>
> Sounds good.

Thank you for the review!
Namhyung

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2013-10-14  0:59 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-09-03  5:05 [RFC/PATCH] ftrace: add set_graph_notrace filter Namhyung Kim
2013-09-30  7:04 ` Namhyung Kim
2013-10-01  3:58   ` Steven Rostedt
2013-10-10  1:51     ` Namhyung Kim
2013-10-10  2:11       ` Steven Rostedt
2013-10-11  4:17 ` Steven Rostedt
2013-10-11  7:21   ` Heiko Carstens
2013-10-11  8:34     ` Namhyung Kim
2013-10-11  8:58       ` Heiko Carstens
2013-10-11 12:22     ` Steven Rostedt
2013-10-11  8:19   ` Namhyung Kim
2013-10-11 12:31     ` Steven Rostedt
2013-10-14  0:59       ` Namhyung Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox