All of lore.kernel.org
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Steven Rostedt <rostedt@goodmis.org>
Cc: <linux-kernel@vger.kernel.org>, Ingo Molnar <mingo@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Jiri Olsa <jolsa@kernel.org>,
	Masami Hiramatsu <mhiramat@kernel.org>,
	<linux-trace-users@vger.kernel.org>
Subject: Re: [RFC][PATCH 2/4] tracing: Use pid bitmap instead of a pid array for set_event_pid
Date: Fri, 22 Apr 2016 11:45:30 +0900	[thread overview]
Message-ID: <20160422024530.GA1790@sejong> (raw)
In-Reply-To: <20160419143725.295928551@goodmis.org>

Hi Steve,

On Tue, Apr 19, 2016 at 10:34:23AM -0400, Steven Rostedt wrote:
> From: Steven Rostedt <rostedt@goodmis.org>
> 
> In order to add the ability to let tasks that are filtered by the events
> have their children also be traced on fork (and then not traced on exit),
> convert the array into a pid bitmask. Most of the time the number of pids is
> only 32768 pids or a 4k bitmask, which is the same size as the default list
> currently is, and that list could grow if more pids are listed.
> 
> This also greatly simplifies the code.
> 
> Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
> ---

[SNIP]
> @@ -1571,7 +1572,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
>  	struct seq_file *m = filp->private_data;
>  	struct trace_array *tr = m->private;
>  	struct trace_pid_list *filtered_pids = NULL;
> -	struct trace_pid_list *pid_list = NULL;
> +	struct trace_pid_list *pid_list;
>  	struct trace_event_file *file;
>  	struct trace_parser parser;
>  	unsigned long val;
> @@ -1579,7 +1580,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
>  	ssize_t read = 0;
>  	ssize_t ret = 0;
>  	pid_t pid;
> -	int i;
> +	int nr_pids = 0;
>  
>  	if (!cnt)
>  		return 0;
> @@ -1592,10 +1593,43 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
>  		return -ENOMEM;
>  
>  	mutex_lock(&event_mutex);
> +	filtered_pids = rcu_dereference_protected(tr->filtered_pids,
> +					     lockdep_is_held(&event_mutex));
> +
>  	/*
> -	 * Load as many pids into the array before doing a
> -	 * swap from the tr->filtered_pids to the new list.
> +	 * Always recreate a new array. The write is an all or nothing
> +	 * operation. Always create a new array when adding new pids by
> +	 * the user. If the operation fails, then the current list is
> +	 * not modified.
>  	 */
> +	pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
> +	if (!pid_list) {
> +		read = -ENOMEM;
> +		goto out;
> +	}
> +	pid_list->pid_max = READ_ONCE(pid_max);
> +	/* Only truncating will shrink pid_max */
> +	if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
> +		pid_list->pid_max = filtered_pids->pid_max;
> +	pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
> +	if (!pid_list->pids) {
> +		kfree(pid_list);
> +		read = -ENOMEM;
> +		goto out;
> +	}
> +	if (filtered_pids) {
> +		/* copy the current bits to the new max */
> +		pid = find_first_bit(filtered_pids->pids,
> +				     filtered_pids->pid_max);
> +		while (pid < filtered_pids->pid_max) {
> +			set_bit(pid, pid_list->pids);
> +			pid = find_next_bit(filtered_pids->pids,
> +					    filtered_pids->pid_max,
> +					    pid + 1);
> +			nr_pids++;
> +		}

Why not just use memcpy and keep nr_pids in the pid_list?

Thanks,
Namhyung



> +	}
> +
>  	while (cnt > 0) {
>  
>  		this_pos = 0;
> @@ -1613,92 +1647,35 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
>  		ret = -EINVAL;
>  		if (kstrtoul(parser.buffer, 0, &val))
>  			break;
> -		if (val > INT_MAX)
> +		if (val >= pid_list->pid_max)
>  			break;
>  
>  		pid = (pid_t)val;
>  
> -		ret = -ENOMEM;
> -		if (!pid_list) {
> -			pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
> -			if (!pid_list)
> -				break;
> -
> -			filtered_pids = rcu_dereference_protected(tr->filtered_pids,
> -							lockdep_is_held(&event_mutex));
> -			if (filtered_pids)
> -				pid_list->order = filtered_pids->order;
> -			else
> -				pid_list->order = 0;
> -
> -			pid_list->pids = (void *)__get_free_pages(GFP_KERNEL,
> -								  pid_list->order);
> -			if (!pid_list->pids)
> -				break;
> -
> -			if (filtered_pids) {
> -				pid_list->nr_pids = filtered_pids->nr_pids;
> -				memcpy(pid_list->pids, filtered_pids->pids,
> -				       pid_list->nr_pids * sizeof(pid_t));
> -			} else
> -				pid_list->nr_pids = 0;
> -		}
> -
> -		if (pid_list->nr_pids >= max_pids(pid_list)) {
> -			pid_t *pid_page;
> -
> -			pid_page = (void *)__get_free_pages(GFP_KERNEL,
> -							    pid_list->order + 1);
> -			if (!pid_page)
> -				break;
> -			memcpy(pid_page, pid_list->pids,
> -			       pid_list->nr_pids * sizeof(pid_t));
> -			free_pages((unsigned long)pid_list->pids, pid_list->order);
> -
> -			pid_list->order++;
> -			pid_list->pids = pid_page;
> -		}
> +		set_bit(pid, pid_list->pids);
> +		nr_pids++;
>  
> -		pid_list->pids[pid_list->nr_pids++] = pid;
>  		trace_parser_clear(&parser);
>  		ret = 0;
>  	}
>  	trace_parser_put(&parser);
>  
>  	if (ret < 0) {
> -		if (pid_list)
> -			free_pages((unsigned long)pid_list->pids, pid_list->order);
> +		vfree(pid_list->pids);
>  		kfree(pid_list);
> -		mutex_unlock(&event_mutex);
> -		return ret;
> -	}
> -
> -	if (!pid_list) {
> -		mutex_unlock(&event_mutex);
> -		return ret;
> +		read = ret;
> +		goto out;
>  	}
>  
> -	sort(pid_list->pids, pid_list->nr_pids, sizeof(pid_t), cmp_pid, NULL);
> -
> -	/* Remove duplicates */
> -	for (i = 1; i < pid_list->nr_pids; i++) {
> -		int start = i;
> -
> -		while (i < pid_list->nr_pids &&
> -		       pid_list->pids[i - 1] == pid_list->pids[i])
> -			i++;
> -
> -		if (start != i) {
> -			if (i < pid_list->nr_pids) {
> -				memmove(&pid_list->pids[start], &pid_list->pids[i],
> -					(pid_list->nr_pids - i) * sizeof(pid_t));
> -				pid_list->nr_pids -= i - start;
> -				i = start;
> -			} else
> -				pid_list->nr_pids = start;
> -		}
> +	if (!nr_pids) {
> +		/* Cleared the list of pids */
> +		vfree(pid_list->pids);
> +		kfree(pid_list);
> +		read = ret;
> +		if (!filtered_pids)
> +			goto out;
> +		pid_list = NULL;
>  	}
> -
>  	rcu_assign_pointer(tr->filtered_pids, pid_list);
>  
>  	list_for_each_entry(file, &tr->events, list) {
> @@ -1708,7 +1685,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
>  	if (filtered_pids) {
>  		synchronize_sched();
>  
> -		free_pages((unsigned long)filtered_pids->pids, filtered_pids->order);
> +		vfree(filtered_pids->pids);
>  		kfree(filtered_pids);
>  	} else {
>  		/*
> @@ -1745,10 +1722,12 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
>  	 */
>  	on_each_cpu(ignore_task_cpu, tr, 1);
>  
> + out:
>  	mutex_unlock(&event_mutex);
>  
>  	ret = read;
> -	*ppos += read;
> +	if (read > 0)
> +		*ppos += read;
>  
>  	return ret;
>  }
> -- 
> 2.8.0.rc3
> 
> 

  parent reply	other threads:[~2016-04-22  2:45 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-19 14:34 [RFC][PATCH 0/4] tracing: Add event-fork to trace tasks children Steven Rostedt
2016-04-19 14:34 ` [RFC][PATCH 1/4] tracing: Rename check_ignore_pid() to ignore_this_task() Steven Rostedt
2016-04-19 14:34 ` [RFC][PATCH 2/4] tracing: Use pid bitmap instead of a pid array for set_event_pid Steven Rostedt
2016-04-19 16:55   ` Mathieu Desnoyers
2016-04-19 17:19     ` Steven Rostedt
2016-04-19 18:57       ` H. Peter Anvin
2016-04-19 19:41         ` Steven Rostedt
2016-04-19 20:17           ` Mathieu Desnoyers
2016-04-19 20:50             ` Steven Rostedt
2016-04-19 21:22               ` Mathieu Desnoyers
2016-04-19 22:49                 ` Steven Rostedt
2016-04-19 22:59                   ` Mathieu Desnoyers
2016-04-19 23:06                     ` Steven Rostedt
2016-04-22  2:45   ` Namhyung Kim [this message]
2016-04-22 15:30     ` Steven Rostedt
2016-04-19 14:34 ` [RFC][PATCH 3/4] tracing: Add infrastructure to allow set_event_pid to follow children Steven Rostedt
2016-04-19 16:55   ` Mathieu Desnoyers
2016-04-19 17:13     ` Steven Rostedt
2016-04-19 20:30       ` Mathieu Desnoyers
2016-04-19 14:34 ` [RFC][PATCH 4/4] tracing: Update the documentation to describe "event-fork" option Steven Rostedt
2016-04-20  2:05 ` [RFC][PATCH 0/4] tracing: Add event-fork to trace tasks children Daniel Bristot de Oliveira
2016-04-20  2:30   ` Steven Rostedt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160422024530.GA1790@sejong \
    --to=namhyung@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=hpa@zytor.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-users@vger.kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=mingo@kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.