Re: [Patch 11/12] ftrace plugin for kernel symbol tracing using HWBreakpoint interfaces - v4

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Frederic Weisbecker <fweisbec@gmail.com>
To: "K.Prasad" <prasad@linux.vnet.ibm.com>
Cc: Alan Stern <stern@rowland.harvard.edu>,
	Steven Rostedt <rostedt@goodmis.org>, Ingo Molnar <mingo@elte.hu>,
	Linux Kernel Mailing List <linux-kernel@vger.kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	Benjamin Herrenschmidt <benh@au1.ibm.com>,
	maneesh@linux.vnet.ibm.com, Roland McGrath <roland@redhat.com>,
	Masami Hiramatsu <mhiramat@redhat.com>
Subject: Re: [Patch 11/12] ftrace plugin for kernel symbol tracing using HWBreakpoint interfaces - v4
Date: Tue, 12 May 2009 17:15:05 +0200	[thread overview]
Message-ID: <20090512151504.GB6255@nowhere> (raw)
In-Reply-To: <20090512141944.GB6033@in.ibm.com>

On Tue, May 12, 2009 at 07:49:44PM +0530, K.Prasad wrote:
> On Tue, May 12, 2009 at 12:14:29AM +0200, Frederic Weisbecker wrote:
> > On Mon, May 11, 2009 at 05:25:02PM +0530, K.Prasad wrote:
> > > This patch adds an ftrace plugin to detect and profile memory access over kernel
> > > variables. It uses HW Breakpoint interfaces to 'watch memory addresses.
> > > 
> > > +void ksym_collect_stats(unsigned long hbp_hit_addr)
> > > +{
> > > +	struct hlist_node *node;
> > > +	struct trace_ksym *entry;
> > > +
> > > +	rcu_read_lock();
> > > +	hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
> > > +		if ((entry->ksym_addr == hbp_hit_addr) &&
> > > +		    (entry->counter <= MAX_UL_INT)) {
> > > +			entry->counter++;
> > > +			break;
> > > +		}
> > > +	}
> > > +	rcu_read_unlock();
> > 
> > 
> > 
> > rcu looks a good idea to maintain your list.
> >
> 
> True, and there weren't many choices either. The earlier implementations
> with mutex/spin_lock turned out to be incorrect in their own way (while
> mutexes cannot be used inside exception handler context, spinlocks led to
> potential circular dependancy).
>  
> > > +static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
> > > +{
> > > +	struct trace_ksym *entry;
> > > +	int ret;
> > > +
> > > +	if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
> > > +		printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
> > > +		" new requests for tracing can be accepted now.\n",
> > > +			KSYM_TRACER_MAX);
> > > +		return -ENOSPC;
> > > +	}
> > > +
> > > +	entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
> > > +	if (!entry)
> > > +		return -ENOMEM;
> > > +
> > > +	entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
> > > +	if (!entry->ksym_hbp) {
> > > +		kfree(entry);
> > > +		return -ENOMEM;
> > > +	}
> > > +
> > > +	entry->ksym_hbp->info.name = ksymname;
> > > +	entry->ksym_hbp->info.type = op;
> > > +	entry->ksym_addr = entry->ksym_hbp->info.address = addr;
> > > +#ifdef CONFIG_X86
> > > +	entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
> > > +#endif
> > > +	entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
> > > +
> > > +	ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
> > > +	if (ret < 0) {
> > > +		printk(KERN_INFO "ksym_tracer request failed. Try again"
> > > +					" later!!\n");
> > > +		kfree(entry->ksym_hbp);
> > > +		kfree(entry);
> > > +		return -EAGAIN;
> > > +	}
> > > +	hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
> > 
> > 
> > And then ksym_tracer_mutex protect concurrent writers.
> > 
> > 
> 
> Yes, they synchronise read/write operations over the list pointed by
> ksym_filter_head.
> 
> > > +	ksym_filter_entry_count++;
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
> > > +						size_t count, loff_t *ppos)
> > > +{
> > > +	struct trace_ksym *entry;
> > > +	struct hlist_node *node;
> > > +	char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
> > > +	ssize_t ret, cnt = 0;
> > > +
> > > +	mutex_lock(&ksym_tracer_mutex);
> > > +
> > > +	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> > 
> > 
> > 
> > And here you don't use the rcu version.
> > I guess it's fine since you're protected by the writer lock...
> > 
> 
> I couldn't use RCU here because the simple_read_from_buffer() is
> inatomic, and hence the mutex.



Ah indeed, it might sleep, you're right.


 
> > > +		cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
> > > +				entry->ksym_hbp->info.name);
> > > +		if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
> > > +			cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > > +								"-w-\n");
> > > +		else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
> > > +			cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > > +								"rw-\n");
> > > +	}
> > > +	ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
> > > +	mutex_unlock(&ksym_tracer_mutex);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static ssize_t ksym_trace_filter_write(struct file *file,
> > > +					const char __user *buffer,
> > > +						size_t count, loff_t *ppos)
> > > +{
> > > +	struct trace_ksym *entry;
> > > +	struct hlist_node *node;
> > > +	char *input_string, *ksymname = NULL;
> > > +	unsigned long ksym_addr = 0;
> > > +	int ret, op, changed = 0;
> > > +
> > > +	/* Ignore echo "" > ksym_trace_filter */
> > > +	if (count == 0)
> > > +		return 0;
> > > +
> > > +	input_string = kzalloc(count, GFP_KERNEL);
> > > +	if (!input_string)
> > > +		return -ENOMEM;
> > > +
> > > +	if (copy_from_user(input_string, buffer, count)) {
> > > +		kfree(input_string);
> > > +		return -EFAULT;
> > > +	}
> > > +
> > > +	ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
> > > +	if (ret < 0) {
> > > +		kfree(input_string);
> > > +		return ret;
> > > +	}
> > > +
> > > +	mutex_lock(&ksym_tracer_mutex);
> > > +
> > > +	ret = -EINVAL;
> > > +	hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
> > 
> > 
> > 
> > Same here, ok...
> > 
> > 
> > > +static int ksym_trace_init(struct trace_array *tr)
> > > +{
> > > +	int cpu, ret = 0;
> > > +
> > > +	for_each_online_cpu(cpu)
> > > +		tracing_reset(tr, cpu);
> > > +
> > > +	ksym_tracing_enabled = 1;
> > > +	ksym_trace_array = tr;
> > > +
> > > +#ifdef CONFIG_FTRACE_SELFTEST
> > > +	/* Check if we are re-entering self-test code during initialisation */
> > > +	if (ksym_selftest_dummy)
> > > +		goto ret_path;
> > > +
> > > +	ksym_selftest_dummy = 0;
> > > +
> > > +	/* Register the read-write tracing request */
> > > +	ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
> > > +					(unsigned long)(&ksym_selftest_dummy));
> > > +
> > > +	if (ret < 0) {
> > > +		printk(KERN_CONT "ksym_trace read-write startup test failed\n");
> > > +		goto ret_path;
> > > +	}
> > > +	/* Perform a read and a write operation over the dummy variable to
> > > +	 * trigger the tracer
> > > +	 */
> > > +	if (ksym_selftest_dummy == 0)
> > > +		ksym_selftest_dummy++;
> > > +ret_path:
> > > +#endif /* CONFIG_FTRACE_SELFTEST */
> > 
> > 
> > It means that each time your tracer is selected, it will perform a selftest.
> > I think we only need this selftest once during the boot.
> > I would rather see that in the real selftest callback (trace_selftest_startup_kysm).
> >
> 
> > > +   if (ksym_selftest_dummy)
> > > +           goto ret_path;
> 
> The above check will help prevent a re-run of the test everytime init is
> executed.
> 
> A part of the selftest was kept in trace_ksym.c (and hence in
> ksym_trace_init()) in order to use functions local to this file, such as
> process_new_ksym_entry().



Ok.
May be you could just compute your selftests on the breakpoint themselves,
not on the entries in the ring buffer.

That's what do other tracers because it's usually convenient, but the main
focuse is the event itself, because the ring buffer is already tested.
The only thing you need to test is the hardware breakpoint triggering, then
you wouldn't need anymore the trace_selftest.c helpers and you could
implement your selftest directly in your tracer file.

I remember such thing has been discussed recently but I'm not sure
whether it concerned the kprobe tracer or yours.


  
> > > +__init static int init_ksym_trace(void)
> > > +{
> > > +	struct dentry *d_tracer;
> > > +	struct dentry *entry;
> > > +
> > > +	d_tracer = tracing_init_dentry();
> > > +	ksym_filter_entry_count = 0;
> > > +
> > > +	entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
> > 
> > 
> > 
> > Still writeable for everyone?
> > 
> > Thanks,
> > Frederic.
> >
> 
> Looks like I missed the change! Please find the next patchset to contain
> a '644' permission mode.



Thanks, I'm looking forward for the next version.

Frederic.


 
> Thanks for reviewing the code.
> 
> -- K.Prasad
>

next prev parent reply	other threads:[~2009-05-12 15:15 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20090511114422.133566343@prasadkr_t60p.in.ibm.com>
2009-05-11 11:52 ` [Patch 01/12] Prepare the code for Hardware Breakpoint interfaces K.Prasad
2009-05-28  5:28   ` David Gibson
2009-05-28 11:10     ` K.Prasad
2009-05-11 11:52 ` [Patch 02/12] Introducing generic hardware breakpoint handler interfaces K.Prasad
2009-05-11 12:12   ` Bharata B Rao
2009-05-11 12:16     ` K.Prasad
2009-05-28  6:15   ` David Gibson
2009-05-28 11:55     ` K.Prasad
2009-05-29  2:59       ` David Gibson
2009-05-11 11:53 ` [Patch 03/12] x86 architecture implementation of Hardware Breakpoint interfaces K.Prasad
2009-05-28  6:35   ` David Gibson
2009-05-28 13:41     ` K.Prasad
2009-05-29  3:15       ` David Gibson
2009-05-11 11:53 ` [Patch 04/12] Modifying generic debug exception to use thread-specific debug registers K.Prasad
2009-05-11 11:53 ` [Patch 05/12] Use wrapper routines around debug registers in processor related functions K.Prasad
2009-05-11 11:53 ` [Patch 06/12] Use the new wrapper routines to access debug registers in process/thread code K.Prasad
2009-05-28  6:42   ` David Gibson
2009-05-29  9:01     ` K.Prasad
2009-05-29 10:49       ` Frederic Weisbecker
2009-05-29 13:52         ` K.Prasad
2009-05-29 14:07           ` Frédéric Weisbecker
2009-05-30 11:00             ` K.Prasad
2009-05-29 13:54         ` Alan Stern
2009-05-11 11:53 ` [Patch 07/12] Modify signal handling code to refrain from re-enabling HW Breakpoints K.Prasad
2009-05-11 11:54 ` [Patch 08/12] Modify Ptrace routines to access breakpoint registers K.Prasad
2009-05-11 11:54 ` [Patch 09/12] Cleanup HW Breakpoint registers before kexec K.Prasad
2009-05-11 11:54 ` [Patch 10/12] Sample HW breakpoint over kernel data address K.Prasad
2009-05-11 11:55 ` [Patch 11/12] ftrace plugin for kernel symbol tracing using HW Breakpoint interfaces - v4 K.Prasad
2009-05-11 22:14   ` Frederic Weisbecker
2009-05-12 14:19     ` [Patch 11/12] ftrace plugin for kernel symbol tracing using HWBreakpoint " K.Prasad
2009-05-12 15:15       ` Frederic Weisbecker [this message]
2009-05-12 20:02         ` [Patch 11/12] ftrace plugin for kernel symbol tracing usingHWBreakpoint " K.Prasad
2009-05-11 11:55 ` [Patch 12/12] Reset bits in dr6 after the corresponding exception is handled K.Prasad

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090512151504.GB6255@nowhere \
    --to=fweisbec@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=benh@au1.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maneesh@linux.vnet.ibm.com \
    --cc=mhiramat@redhat.com \
    --cc=mingo@elte.hu \
    --cc=prasad@linux.vnet.ibm.com \
    --cc=roland@redhat.com \
    --cc=rostedt@goodmis.org \
    --cc=stern@rowland.harvard.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox