Re: [Patch 3/3] OProfile SPU event profiling support for IBM Cell processor

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Arnd Bergmann <arnd@arndb.de>
To: Carl Love <cel@us.ibm.com>
Cc: linuxppc-dev@ozlabs.org, cel <cel@linux.vnet.ibm.com>,
	linux-kernel <linux-kernel@vger.kernel.org>,
	oprofile-list@lists.sourceforge.net, cbe-oss-dev@ozlabs.org
Subject: Re: [Patch 3/3] OProfile SPU event profiling support for IBM Cell processor
Date: Tue, 25 Nov 2008 16:58:31 +0100	[thread overview]
Message-ID: <200811251658.32327.arnd@arndb.de> (raw)
In-Reply-To: <1227569215.6509.216.camel@carll-linux-desktop>

On Tuesday 25 November 2008, Carl Love wrote:
> 
> This is the second of the two kernel  patches for adding SPU profiling 
> for the IBM Cell processor.  This patch contains the spu event profiling
> setup, start and stop routines.
> 
> Signed-off-by: Carl Love <carll@us.ibm.com>

Maybe a little more detailed description would be good. Explain
what this patch adds that isn't already there and why people would
want to have that in the kernel.
 
>  
>  static void cell_global_stop_spu_cycles(void);
> +static void cell_global_stop_spu_events(void);

Can you reorder the functions so that you don't need any forward
declarations? In general, it gets easier to understand if the
definition order matches the call graph.

>  static unsigned int spu_cycle_reset;
>  static unsigned int profiling_mode;
> -
> +static int spu_evnt_phys_spu_indx;
>  
>  struct pmc_cntrl_data {
>  	unsigned long vcntr;
> @@ -111,6 +126,8 @@ struct pm_cntrl {
>  	u16 trace_mode;
>  	u16 freeze;
>  	u16 count_mode;
> +	u16 spu_addr_trace;
> +	u8  trace_buf_ovflw;
>  };
>  
>  static struct {
> @@ -128,7 +145,7 @@ static struct {
>  #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
>  
>  static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
> -
> +static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE];
>  static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];

Can't you add this data to an existing data structure, e.g. struct spu,
rather than adding a new global?
  
>  static u32 reset_value[NR_PHYS_CTRS];
>  static int num_counters;
>  static int oprofile_running;
> -static DEFINE_SPINLOCK(virt_cntr_lock);
> +static DEFINE_SPINLOCK(cntr_lock);
>  
>  static u32 ctr_enabled;
>  
> @@ -242,7 +260,6 @@ static int pm_rtas_activate_signals(u32 
>  	i = 0;
>  	for (j = 0; j < count; j++) {
>  		if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) {
> -
>  			/* fw expects physical cpu # */
>  			pm_signal_local[i].cpu = node;
>  			pm_signal_local[i].signal_group
> @@ -265,7 +282,6 @@ static int pm_rtas_activate_signals(u32 
>  			return -EIO;
>  		}
>  	}
> -
>  	return 0;
>  }
>  

These look like a cleanups that should go into the first patch, right?

> +static void spu_evnt_swap(unsigned long data)

This function could use a comment about why we need to swap and what
the overall effect of swapping is.

>  int spu_prof_running;
>  static unsigned int profiling_interval;
> +DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck);
> +unsigned long oprof_spu_smpl_arry_lck_flags;
>  
>  #define NUM_SPU_BITS_TRBUF 16
>  #define SPUS_PER_TB_ENTRY   4
> +#define SPUS_PER_NODE       8
>  
>  #define SPU_PC_MASK	     0xFFFF
>  
> -static DEFINE_SPINLOCK(sample_array_lock);
> -unsigned long sample_array_lock_flags;
> -

This also looks like a rename that should go into the first patch.

> +/*
> + * Entry point for SPU event profiling.
> + * NOTE:  SPU profiling is done system-wide, not per-CPU.
> + *
> + * cycles_reset is the count value specified by the user when
> + * setting up OProfile to count SPU_CYCLES.
> + */
> +void start_spu_profiling_events(void)
> +{
> +	spu_prof_running = 1;
> +	schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
> +
> +	return;
> +}
> +
> +void stop_spu_profiling_cycles(void)
>  {
>  	spu_prof_running = 0;
>  	hrtimer_cancel(&timer);
>  	kfree(samples);
> -	pr_debug("SPU_PROF: stop_spu_profiling issued\n");
> +	pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n");
> +}
> +
> +void stop_spu_profiling_events(void)
> +{
> +	spu_prof_running = 0;
>  }

Is this atomic? What if two CPUs access the spu_prof_running variable at
the same time?

	Arnd <><

WARNING: multiple messages have this Message-ID (diff)

From: Arnd Bergmann <arnd@arndb.de>
To: Carl Love <cel@us.ibm.com>
Cc: linux-kernel <linux-kernel@vger.kernel.org>,
	linuxppc-dev@ozlabs.org, oprofile-list@lists.sourceforge.net,
	cel <cel@linux.vnet.ibm.com>,
	cbe-oss-dev@ozlabs.org
Subject: Re: [Patch 3/3] OProfile SPU event profiling support for IBM Cell processor
Date: Tue, 25 Nov 2008 16:58:31 +0100	[thread overview]
Message-ID: <200811251658.32327.arnd@arndb.de> (raw)
In-Reply-To: <1227569215.6509.216.camel@carll-linux-desktop>

On Tuesday 25 November 2008, Carl Love wrote:
> 
> This is the second of the two kernel  patches for adding SPU profiling 
> for the IBM Cell processor.  This patch contains the spu event profiling
> setup, start and stop routines.
> 
> Signed-off-by: Carl Love <carll@us.ibm.com>

Maybe a little more detailed description would be good. Explain
what this patch adds that isn't already there and why people would
want to have that in the kernel.
 
>  
>  static void cell_global_stop_spu_cycles(void);
> +static void cell_global_stop_spu_events(void);

Can you reorder the functions so that you don't need any forward
declarations? In general, it gets easier to understand if the
definition order matches the call graph.

>  static unsigned int spu_cycle_reset;
>  static unsigned int profiling_mode;
> -
> +static int spu_evnt_phys_spu_indx;
>  
>  struct pmc_cntrl_data {
>  	unsigned long vcntr;
> @@ -111,6 +126,8 @@ struct pm_cntrl {
>  	u16 trace_mode;
>  	u16 freeze;
>  	u16 count_mode;
> +	u16 spu_addr_trace;
> +	u8  trace_buf_ovflw;
>  };
>  
>  static struct {
> @@ -128,7 +145,7 @@ static struct {
>  #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
>  
>  static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
> -
> +static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE];
>  static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];

Can't you add this data to an existing data structure, e.g. struct spu,
rather than adding a new global?
  
>  static u32 reset_value[NR_PHYS_CTRS];
>  static int num_counters;
>  static int oprofile_running;
> -static DEFINE_SPINLOCK(virt_cntr_lock);
> +static DEFINE_SPINLOCK(cntr_lock);
>  
>  static u32 ctr_enabled;
>  
> @@ -242,7 +260,6 @@ static int pm_rtas_activate_signals(u32 
>  	i = 0;
>  	for (j = 0; j < count; j++) {
>  		if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) {
> -
>  			/* fw expects physical cpu # */
>  			pm_signal_local[i].cpu = node;
>  			pm_signal_local[i].signal_group
> @@ -265,7 +282,6 @@ static int pm_rtas_activate_signals(u32 
>  			return -EIO;
>  		}
>  	}
> -
>  	return 0;
>  }
>  

These look like a cleanups that should go into the first patch, right?

> +static void spu_evnt_swap(unsigned long data)

This function could use a comment about why we need to swap and what
the overall effect of swapping is.

>  int spu_prof_running;
>  static unsigned int profiling_interval;
> +DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck);
> +unsigned long oprof_spu_smpl_arry_lck_flags;
>  
>  #define NUM_SPU_BITS_TRBUF 16
>  #define SPUS_PER_TB_ENTRY   4
> +#define SPUS_PER_NODE       8
>  
>  #define SPU_PC_MASK	     0xFFFF
>  
> -static DEFINE_SPINLOCK(sample_array_lock);
> -unsigned long sample_array_lock_flags;
> -

This also looks like a rename that should go into the first patch.

> +/*
> + * Entry point for SPU event profiling.
> + * NOTE:  SPU profiling is done system-wide, not per-CPU.
> + *
> + * cycles_reset is the count value specified by the user when
> + * setting up OProfile to count SPU_CYCLES.
> + */
> +void start_spu_profiling_events(void)
> +{
> +	spu_prof_running = 1;
> +	schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
> +
> +	return;
> +}
> +
> +void stop_spu_profiling_cycles(void)
>  {
>  	spu_prof_running = 0;
>  	hrtimer_cancel(&timer);
>  	kfree(samples);
> -	pr_debug("SPU_PROF: stop_spu_profiling issued\n");
> +	pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n");
> +}
> +
> +void stop_spu_profiling_events(void)
> +{
> +	spu_prof_running = 0;
>  }

Is this atomic? What if two CPUs access the spu_prof_running variable at
the same time?

	Arnd <><

next prev parent reply	other threads:[~2008-11-25 15:58 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-11-24 23:26 [Patch 3/3] OProfile SPU event profiling support for IBM Cell processor Carl Love
2008-11-25 15:58 ` Arnd Bergmann [this message]
2008-11-25 15:58   ` Arnd Bergmann
2008-11-25 23:13   ` Carl Love
2008-11-25 23:13     ` Carl Love

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200811251658.32327.arnd@arndb.de \
    --to=arnd@arndb.de \
    --cc=cbe-oss-dev@ozlabs.org \
    --cc=cel@linux.vnet.ibm.com \
    --cc=cel@us.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linuxppc-dev@ozlabs.org \
    --cc=oprofile-list@lists.sourceforge.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.