Re: [PATCH v2 7/11] Uprobes Implementation

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Oleg Nesterov <oleg@redhat.com>
To: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@elte.hu>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Masami Hiramatsu <mhiramat@redhat.com>,
	Randy Dunlap <rdunlap@xenotime.net>,
	Ananth N Mavinakayanahalli <ananth@in.ibm.com>,
	Jim Keniston <jkenisto@linux.vnet.ibm.com>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	"Frank Ch. Eigler" <fche@redhat.com>,
	LKML <linux-kernel@vger.kernel.org>,
	Roland McGrath <roland@redhat.com>
Subject: Re: [PATCH v2 7/11] Uprobes Implementation
Date: Tue, 13 Apr 2010 20:35:37 +0200	[thread overview]
Message-ID: <20100413183537.GA17538@redhat.com> (raw)
In-Reply-To: <20100331155228.4181.61294.sendpatchset@localhost6.localdomain6>

On 03/31, Srikar Dronamraju wrote:
>
> --- /dev/null
> +++ b/kernel/uprobes.c
> ...
> +static struct uprobe_process *find_uprocess(struct pid *tg_leader)
> +{
> +	struct uprobe_process *uproc;
> +	struct task_struct *tsk = get_pid_task(tg_leader, PIDTYPE_PID);
> +
> +	if (!tsk)
> +		return NULL;
> +
> +	if (!tsk->utask || !tsk->utask->uproc) {
> +		put_task_struct(tsk);
> +		return NULL;
> +	}
> +
> +	uproc = tsk->utask->uproc;
> +	BUG_ON(uproc->tg_leader != tg_leader);
> +	atomic_inc(&uproc->refcount);
> +	put_task_struct(tsk);
> +	return uproc;

Looks like, this doesn't need get/put task_struct, you could just
use pid_task() under rcu_read_lock().

> +static void cleanup_uprocess(struct uprobe_process *uproc,
> +					struct task_struct *start)
> +{
> +	struct task_struct *tsk = start;
> +
> +	if (!start)
> +		return;
> +
> +	rcu_read_lock();
> +	do {
> +		if (tsk->utask) {
> +			kfree(tsk->utask);
> +			tsk->utask = NULL;
> +		}
> +		tsk = next_thread(tsk);

This doesn't look right. We can't trust ->thread_group list even under
rcu_read_lock(). The task can exit and __exit_signal() can remove it
from ->thread_group list before we take rcu_read_lock().

> +static struct uprobe_task *add_utask(struct task_struct *t,
> +					struct uprobe_process *uproc)
> +{
> +	struct uprobe_task *utask;
> +
> +	if (!t)
> +		return NULL;
> +	utask = kzalloc(sizeof *utask, GFP_USER);
> +	if (unlikely(utask == NULL))
> +		return ERR_PTR(-ENOMEM);
> +
> +	utask->uproc = uproc;
> +	utask->active_ppt = NULL;
> +	t->utask = utask;
> +	atomic_inc(&uproc->refcount);
> +
> +	return utask;
> +}

This is called by create_uprocess(). Who will free t->utask if t has
already passed tracehook_report_exit() ?

> +static struct task_struct *find_next_thread(struct uprobe_process *uproc,
> +						struct task_struct *start)
> +{
> +	struct task_struct *next_t = NULL;
> +
> +	rcu_read_lock();
> +	if (start) {
> +		struct task_struct *t = start;
> +
> +		do {
> +			if (unlikely(t->flags & PF_EXITING))
> +				goto dont_add;

not sure I understand this check. Somehow we should prevent the races
with tracehook_report_exit/tracehook_report_exec, but PF_EXITING can't
help ?

> +dont_add:
> +			t = next_thread(t);
> +		} while (t != start);

again, this doesn't look right. Btw, I'd suggest to use while_each_thread().

> +static struct uprobe_process *create_uprocess(struct pid *tg_leader)
> +{
> ...
> +	/*
> +	 * Create and populate one utask per thread in this process.  We
> +	 * can't call add_utask() while holding RCU lock, so we:
> +	 *	1. rcu_read_lock()
> +	 *	2. Find the next thread, add_me, in this process that's not
> +	 *	having a utask struct allocated.
> +	 *	3. rcu_read_unlock()
> +	 *	4. add_utask(add_me, uproc)
> +	 *	Repeat 1-4 'til we have utasks for all threads.
> +	 */
> +	cur_t = get_pid_task(tg_leader, PIDTYPE_PID);
> +	do {
> +		utask = add_utask(cur_t, uproc);
> +		if (IS_ERR(utask)) {
> +			err = PTR_ERR(utask);
> +			goto fail;
> +		}
> +		add_me = find_next_thread(uproc, cur_t);
> +		put_task_struct(cur_t);
> +		cur_t = add_me;
> +	} while (add_me != NULL);

can't we race with clone(CLONE_THREAD) and miss the new thread? Probably
I missed something, but afaics we need some barriers to ensure that either
tracehook_report_clone() sees current->utask != NULL or find_next_thread()
sees the new thread in ->thread_group.

> +static struct pid *get_tg_leader(pid_t p)
> +{
> +	struct pid *pid = NULL;
> +
> +	rcu_read_lock();
> +	if (current->nsproxy)
> +		pid = find_vpid(p);

Is it really possible to call register/unregister with nsproxy == NULL?

> +	if (pid) {
> +		struct task_struct *t = pid_task(pid, PIDTYPE_PID);
> +
> +		if (!t || unlikely(t->flags & PF_EXITING))

Why do we check PF_EXITING?

> +int register_uprobe(struct uprobe *u)
> +{
> +	struct uprobe_process *uproc;
> +	struct uprobe_probept *ppt;
> +	struct pid *p;
> +	int ret = 0;
> +
> +	if (!u || !u->handler)
> +		return -EINVAL;
> +
> +	p = get_tg_leader(u->pid);
> +	if (!p)
> +		return -ESRCH;
> +
> +
> +	/* Get the uprobe_process for this pid, or make a new one. */
> +	mutex_lock(&uprobe_mutex);
> +	uproc = find_uprocess(p);
> +
> +	if (!uproc) {
> +		uproc = create_uprocess(p);
> +		if (IS_ERR(uproc)) {
> +			ret = (int) PTR_ERR(uproc);
> +			mutex_unlock(&uprobe_mutex);
> +			goto fail_tsk;
> +		}
> +	}
> +	mutex_unlock(&uprobe_mutex);
> +	mutex_lock(&uproc->mutex);
> +
> +	if (uproc->n_ppts >= MAX_USER_BKPT_XOL_SLOTS)
> +		goto fail_uproc;
> +
> +	ret = xol_validate_vaddr(p, u->vaddr, uproc->xol_area);

OK, uproc and p can't go away. But why it is safe to use pid_task(p) ?

I am looking at 6th patch http://marc.info/?l=linux-kernel&m=127005086102256
and xol_validate_vaddr() calls pid_task() without rcu and doesn't check
the result is not NULL.

We already dropped uprobe_mutex, can't this task exit?

> +void uprobe_handle_clone(unsigned long clone_flags,
> +				struct task_struct *child)
> +{
> +	struct uprobe_process *uproc;
> +	struct uprobe_task *ptask, *ctask;
> +
> +	ptask = current->utask;
> +	if (!ptask)
> +		return;
> +
> +	uproc = ptask->uproc;
> +
> +	if (clone_flags & CLONE_THREAD) {
> +		mutex_lock(&uprobe_mutex);
> +		/* New thread in the same process. */
> +		ctask = child->utask;
> +		if (unlikely(ctask)) {
> +			/*
> +			 * create_uprocess() ran just as this clone
> +			 * happened, and has already accounted for the
> +			 * new child.
> +			 */
> +		} else
> +			ctask = add_utask(child, uproc);

This looks a bit strange. Why do we need "ctask" at all? It is not used,
you could just do

	if (likely(!child->utask))
		add_utask(child, uproc);

The same for "else" branch.

> +	} else {
> +		struct uprobe_probept *ppt;
> +		int ret;
> +
> +		/*
> +		 * New process spawned by parent.  Remove the probepoints
> +		 * in the child's text.
> +		 *
> +		 * We also hold the uproc->mutex for the parent - so no
> +		 * new uprobes will be registered 'til we return.
> +		 */
> +		mutex_lock(&uproc->mutex);
> +		ctask = child->utask;
> +		if (unlikely(ctask)) {
> +			/*
> +			 * create_uprocess() ran just as this fork
> +			 * happened, and has already created a new utask.
> +			 */
> +			mutex_unlock(&uproc->mutex);
> +			return;
> +		}
> +		list_for_each_entry(ppt, &uproc->uprobe_list, ut_node) {
> +			ret = user_bkpt_remove_bkpt(child, &ppt->user_bkpt);

OK, iiuc this should restore the original instruction, right?

But what about clone(CLONE_VM)? In this case this child shares ->mm with
parent.

Oleg.

next prev parent reply	other threads:[~2010-04-13 18:38 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-31 15:51 [PATCH v2 0/11] Uprobes patches Srikar Dronamraju
2010-03-31 15:51 ` [PATCH v2 1/11] Move Macro W to insn.h Srikar Dronamraju
2010-03-31 15:51 ` [PATCH v2 2/11] Move replace_page() to mm/memory.c Srikar Dronamraju
2010-03-31 15:51 ` [PATCH v2 3/11] Enhance replace_page() to support pagecache Srikar Dronamraju
2010-03-31 15:51 ` [PATCH v2 4/11] User Space Breakpoint Assistance Layer Srikar Dronamraju
2010-03-31 15:52 ` [PATCH v2 5/11] X86 details for user space breakpoint assistance Srikar Dronamraju
2010-03-31 15:52 ` [PATCH v2 6/11] Slot allocation for Execution out of line Srikar Dronamraju
2010-03-31 15:52 ` [PATCH v2 7/11] Uprobes Implementation Srikar Dronamraju
2010-04-13 18:35   ` Oleg Nesterov [this message]
2010-04-15  9:35     ` Srikar Dronamraju
2010-04-19 19:31       ` Oleg Nesterov
2010-04-20 12:43         ` Srikar Dronamraju
2010-04-20 15:30           ` Oleg Nesterov
2010-04-21  6:59             ` Srikar Dronamraju
2010-04-21 16:05               ` Oleg Nesterov
2010-04-22 13:31                 ` Srikar Dronamraju
2010-04-22 15:40                   ` Oleg Nesterov
2010-04-23 14:58                     ` Srikar Dronamraju
2010-04-23 18:53                       ` Oleg Nesterov
2010-05-11 20:47                       ` Peter Zijlstra
2010-05-11 20:44                     ` Peter Zijlstra
2010-05-11 20:45                     ` Peter Zijlstra
2010-05-12 10:31                       ` Srikar Dronamraju
2010-05-13 19:40                       ` Oleg Nesterov
2010-05-13 19:59                         ` Linus Torvalds
2010-05-13 22:12                           ` Andi Kleen
2010-05-13 22:25                             ` Linus Torvalds
2010-05-14  0:56                           ` Roland McGrath
2010-05-14  5:42                           ` Srikar Dronamraju
2010-05-11 20:43                   ` Peter Zijlstra
2010-05-12 10:41                     ` Srikar Dronamraju
2010-05-12 11:12                       ` Peter Zijlstra
2010-05-12 14:24                         ` Srikar Dronamraju
2010-05-11 20:32             ` Peter Zijlstra
2010-05-11 20:57               ` Frank Ch. Eigler
2010-05-11 21:01                 ` Peter Zijlstra
2010-03-31 15:52 ` [PATCH v2 8/11] X86 details for uprobes Srikar Dronamraju
2010-03-31 15:52 ` [PATCH v2 9/11] Uprobes Documentation patch Srikar Dronamraju
2010-03-31 15:52 ` [PATCH v2 10/11] Uprobes samples Srikar Dronamraju
2010-03-31 15:53 ` [PATCH v2 11/11] Uprobes traceevents patch Srikar Dronamraju
2010-03-31 21:24   ` Steven Rostedt
2010-04-01  4:16     ` Masami Hiramatsu
2010-05-12 14:57       ` Frederic Weisbecker
2010-05-12 11:02   ` Frederic Weisbecker
2010-05-12 14:34     ` Srikar Dronamraju
2010-05-12 15:15   ` Frederic Weisbecker

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100413183537.GA17538@redhat.com \
    --to=oleg@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=ananth@in.ibm.com \
    --cc=fche@redhat.com \
    --cc=fweisbec@gmail.com \
    --cc=jkenisto@linux.vnet.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mhiramat@redhat.com \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    --cc=rdunlap@xenotime.net \
    --cc=roland@redhat.com \
    --cc=srikar@linux.vnet.ibm.com \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.