linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
To: Christoph Lameter <cl@linux-foundation.org>
Cc: Tejun Heo <tj@kernel.org>, linux-kernel@vger.kernel.org
Subject: Re: [RFC local_t removal V1 2/4] Replace local_t use in trace subsystem
Date: Tue, 5 Jan 2010 17:57:10 -0500	[thread overview]
Message-ID: <20100105225710.GC32584@Krystal> (raw)
In-Reply-To: <20100105220438.462143211@quilx.com>

* Christoph Lameter (cl@linux-foundation.org) wrote:
> Replace the local_t use with longs in the trace subsystem. The longs can then be
> updated with the required level of concurrency protection through cmpxchg_local()
> and add_local().
> 
> Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
> 
> ---
>  kernel/trace/ring_buffer.c           |  150 +++++++++++++++++------------------
>  kernel/trace/ring_buffer_benchmark.c |    5 -
>  kernel/trace/trace_branch.c          |    1 
>  3 files changed, 77 insertions(+), 79 deletions(-)
> 
> Index: linux-2.6/kernel/trace/ring_buffer.c
> ===================================================================
> --- linux-2.6.orig/kernel/trace/ring_buffer.c	2010-01-05 15:35:35.000000000 -0600
> +++ linux-2.6/kernel/trace/ring_buffer.c	2010-01-05 15:36:07.000000000 -0600
> @@ -20,7 +20,7 @@
>  #include <linux/cpu.h>
>  #include <linux/fs.h>
>  
> -#include <asm/local.h>
> +#include <asm/add-local.h>
>  #include "trace.h"
>  
>  /*
> @@ -312,7 +312,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data
>  
>  struct buffer_data_page {
>  	u64		 time_stamp;	/* page time stamp */
> -	local_t		 commit;	/* write committed index */
> +	long		 commit;	/* write committed index */
>  	unsigned char	 data[];	/* data of buffer page */
>  };
>  
> @@ -326,9 +326,9 @@ struct buffer_data_page {
>   */
>  struct buffer_page {
>  	struct list_head list;		/* list of buffer pages */
> -	local_t		 write;		/* index for next write */
> +	long		 write;		/* index for next write */
>  	unsigned	 read;		/* index for next read */
> -	local_t		 entries;	/* entries on this page */
> +	long		 entries;	/* entries on this page */
>  	struct buffer_data_page *page;	/* Actual data page */
>  };
>  
> @@ -349,7 +349,7 @@ struct buffer_page {
>  
>  static void rb_init_page(struct buffer_data_page *bpage)
>  {
> -	local_set(&bpage->commit, 0);
> +	bpage->commit = 0;

This is incorrect. You are turning a "volatile" write into a
non-volatile write, which can be turned into multiple writes by the
compiler and therefore expose inconsistent state to interrupt handlers.

>  }
>  
>  /**
> @@ -360,7 +360,7 @@ static void rb_init_page(struct buffer_d
>   */
>  size_t ring_buffer_page_len(void *page)
>  {
> -	return local_read(&((struct buffer_data_page *)page)->commit)
> +	return ((struct buffer_data_page *)page)->commit
>  		+ BUF_PAGE_HDR_SIZE;
>  }
>  
> @@ -431,11 +431,11 @@ struct ring_buffer_per_cpu {
>  	struct buffer_page		*tail_page;	/* write to tail */
>  	struct buffer_page		*commit_page;	/* committed pages */
>  	struct buffer_page		*reader_page;
> -	local_t				commit_overrun;
> -	local_t				overrun;
> -	local_t				entries;
> -	local_t				committing;
> -	local_t				commits;
> +	long				commit_overrun;
> +	long				overrun;
> +	long				entries;
> +	long				committing;
> +	long				commits;
>  	unsigned long			read;
>  	u64				write_stamp;
>  	u64				read_stamp;
> @@ -824,8 +824,8 @@ static int rb_tail_page_update(struct ri
>  	 *
>  	 * We add a counter to the write field to denote this.
>  	 */
> -	old_write = local_add_return(RB_WRITE_INTCNT, &next_page->write);
> -	old_entries = local_add_return(RB_WRITE_INTCNT, &next_page->entries);
> +	old_write = add_return_local(&next_page->write, RB_WRITE_INTCNT);
> +	old_entries = add_return_local(&next_page->entries, RB_WRITE_INTCNT);
>  
>  	/*
>  	 * Just make sure we have seen our old_write and synchronize
> @@ -853,15 +853,15 @@ static int rb_tail_page_update(struct ri
>  		 * cmpxchg to only update if an interrupt did not already
>  		 * do it for us. If the cmpxchg fails, we don't care.
>  		 */
> -		(void)local_cmpxchg(&next_page->write, old_write, val);
> -		(void)local_cmpxchg(&next_page->entries, old_entries, eval);
> +		(void)cmpxchg_local(&next_page->write, old_write, val);
> +		(void)cmpxchg_local(&next_page->entries, old_entries, eval);
>  
>  		/*
>  		 * No need to worry about races with clearing out the commit.
>  		 * it only can increment when a commit takes place. But that
>  		 * only happens in the outer most nested commit.
>  		 */
> -		local_set(&next_page->page->commit, 0);
> +		next_page->page->commit = 0;
>  
>  		old_tail = cmpxchg(&cpu_buffer->tail_page,
>  				   tail_page, next_page);
> @@ -1394,17 +1394,17 @@ rb_iter_head_event(struct ring_buffer_it
>  
>  static inline unsigned long rb_page_write(struct buffer_page *bpage)
>  {
> -	return local_read(&bpage->write) & RB_WRITE_MASK;
> +	return bpage->write & RB_WRITE_MASK;

Same problem here: missing volatile for read. Same applies thorough the
patch.

>  }
>  
>  static inline unsigned rb_page_commit(struct buffer_page *bpage)
>  {
> -	return local_read(&bpage->page->commit);
> +	return bpage->page->commit;
>  }
>  
>  static inline unsigned long rb_page_entries(struct buffer_page *bpage)
>  {
> -	return local_read(&bpage->entries) & RB_WRITE_MASK;
> +	return bpage->entries & RB_WRITE_MASK;
>  }
>  
>  /* Size is determined by what has been commited */
> @@ -1463,8 +1463,8 @@ rb_set_commit_to_write(struct ring_buffe
>  		if (RB_WARN_ON(cpu_buffer,
>  			       rb_is_reader_page(cpu_buffer->tail_page)))
>  			return;
> -		local_set(&cpu_buffer->commit_page->page->commit,
> -			  rb_page_write(cpu_buffer->commit_page));
> +		cpu_buffer->commit_page->page->commit =
> +			  rb_page_write(cpu_buffer->commit_page);
>  		rb_inc_page(cpu_buffer, &cpu_buffer->commit_page);
>  		cpu_buffer->write_stamp =
>  			cpu_buffer->commit_page->page->time_stamp;
> @@ -1474,10 +1474,10 @@ rb_set_commit_to_write(struct ring_buffe
>  	while (rb_commit_index(cpu_buffer) !=
>  	       rb_page_write(cpu_buffer->commit_page)) {
>  
> -		local_set(&cpu_buffer->commit_page->page->commit,
> -			  rb_page_write(cpu_buffer->commit_page));
> +		cpu_buffer->commit_page->page->commit =
> +			  rb_page_write(cpu_buffer->commit_page);
>  		RB_WARN_ON(cpu_buffer,
> -			   local_read(&cpu_buffer->commit_page->page->commit) &
> +			   cpu_buffer->commit_page->page->commit &
>  			   ~RB_WRITE_MASK);
>  		barrier();
>  	}
> @@ -1600,7 +1600,7 @@ rb_handle_head_page(struct ring_buffer_p
>  		 * it is our responsibility to update
>  		 * the counters.
>  		 */
> -		local_add(entries, &cpu_buffer->overrun);
> +		add_local(&cpu_buffer->overrun, entries);
>  
>  		/*
>  		 * The entries will be zeroed out when we move the
> @@ -1741,7 +1741,7 @@ rb_reset_tail(struct ring_buffer_per_cpu
>  	 * must fill the old tail_page with padding.
>  	 */
>  	if (tail >= BUF_PAGE_SIZE) {
> -		local_sub(length, &tail_page->write);
> +		add_local(&tail_page->write, -length);

[...]

If we can have inc/dec/sub already, that would be good, rather than
going with add -val. This would ensure that we don't do too much
ping-pong with the code using these primitives.

In the end, the fact that the missing volatile access bug crept up as
part of this patch makes me think that keeping local_t was doing a fine
encapsulation job. However, if we really want to go down the path of
removing this encapsulation, then we should:

- make sure that _all_ variable accesses are encapsulated, even
  read_local and set_local.
- put all this API into a single header per architecture, easy for
  people to find and understand, rather than multiple headers sprinkled
  all over the place.
- document that accessing the variables without the API violates the
  consistency guarantees.

Thanks,

Mathieu

-- 
Mathieu Desnoyers
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

  reply	other threads:[~2010-01-05 22:57 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-01-05 22:04 [RFC local_t removal V1 0/4] Remove local_t Christoph Lameter
2010-01-05 22:04 ` [RFC local_t removal V1 1/4] Add add_local() and add_local_return() Christoph Lameter
2010-01-05 22:17   ` Mike Frysinger
2010-01-05 22:29     ` Christoph Lameter
2010-01-06 19:23       ` Mike Frysinger
2010-01-07 17:03         ` Christoph Lameter
2010-01-05 22:49   ` Mathieu Desnoyers
2010-01-07 13:45     ` Arnd Bergmann
2010-01-07 13:57       ` Mathieu Desnoyers
2010-01-07 14:22         ` Arnd Bergmann
2010-01-07 17:07     ` Christoph Lameter
2010-01-05 22:04 ` [RFC local_t removal V1 2/4] Replace local_t use in trace subsystem Christoph Lameter
2010-01-05 22:57   ` Mathieu Desnoyers [this message]
2010-01-07 17:15     ` Christoph Lameter
2010-01-14  2:56   ` Steven Rostedt
2010-01-14 14:49     ` Christoph Lameter
2010-01-05 22:04 ` [RFC local_t removal V1 3/4] Optimized add_local() Christoph Lameter
2010-01-05 22:59   ` Mathieu Desnoyers
2010-01-07 17:16     ` Christoph Lameter
2010-01-05 22:04 ` [RFC local_t removal V1 4/4] Remove local_t support Christoph Lameter
2010-01-05 22:23 ` [RFC local_t removal V1 0/4] Remove local_t Mathieu Desnoyers
2010-01-05 22:34   ` Christoph Lameter
2010-01-05 22:45     ` Mathieu Desnoyers
2010-01-07 17:05       ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100105225710.GC32584@Krystal \
    --to=mathieu.desnoyers@polymtl.ca \
    --cc=cl@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).