LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices
From: Sethi Varun-B16395 @ 2013-10-16 17:07 UTC (permalink / raw)
  To: Bhushan Bharat-R65777, joro@8bytes.org,
	iommu@lists.linux-foundation.org, linuxppc-dev@lists.ozlabs.org,
	linux-kernel@vger.kernel.org, Yoder Stuart-B08248,
	Wood Scott-B07421, alex.williamson@redhat.com
In-Reply-To: <6A3DF150A5B70D4F9B66A25E3F7C888D071C0835@039-SN2MPN1-013.039d.mgd.msft.net>



> -----Original Message-----
> From: Bhushan Bharat-R65777
> Sent: Wednesday, October 16, 2013 10:20 PM
> To: Sethi Varun-B16395; joro@8bytes.org; iommu@lists.linux-
> foundation.org; linuxppc-dev@lists.ozlabs.org; linux-
> kernel@vger.kernel.org; Yoder Stuart-B08248; Wood Scott-B07421;
> alex.williamson@redhat.com
> Subject: RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe
> devices
>=20
>=20
>=20
> > -----Original Message-----
> > From: Sethi Varun-B16395
> > Sent: Wednesday, October 16, 2013 4:53 PM
> > To: joro@8bytes.org; iommu@lists.linux-foundation.org; linuxppc-
> > dev@lists.ozlabs.org; linux-kernel@vger.kernel.org; Yoder
> > Stuart-B08248; Wood Scott-B07421; alex.williamson@redhat.com; Bhushan
> > Bharat-R65777
> > Cc: Sethi Varun-B16395
> > Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe
> > devices
> >
> > Once the PCIe device assigned to a guest VM (via VFIO) gets detached
> > from the iommu domain (when guest terminates), its PAMU table entry is
> > disabled. So, this would prevent the device from being used once it's
> assigned back to the host.
> >
> > This patch allows for creation of a default DMA window corresponding
> > to the device and subsequently enabling the PAMU table entry. Before
> > we enable the entry, we ensure that the device's bus master capability
> > is disabled (device quiesced).
> >
> > Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
> > ---
> >  drivers/iommu/fsl_pamu.c        |   43 ++++++++++++++++++++++++++++---
> -----
> >  drivers/iommu/fsl_pamu.h        |    1 +
> >  drivers/iommu/fsl_pamu_domain.c |   46
> ++++++++++++++++++++++++++++++++++++---
> >  3 files changed, 78 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index
> > cba0498..fb4a031 100644
> > --- a/drivers/iommu/fsl_pamu.c
> > +++ b/drivers/iommu/fsl_pamu.c
> > @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct paace
> > *paace,
> > u32 wnum)
> >  	return spaace;
> >  }
> >
> > +/*
> > + * Defaul PPAACE settings for an LIODN.
> > + */
> > +static void setup_default_ppaace(struct paace *ppaace) {
> > +	pamu_init_ppaace(ppaace);
> > +	/* window size is 2^(WSE+1) bytes */
> > +	set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
> > +	ppaace->wbah =3D 0;
> > +	set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
> > +	set_bf(ppaace->impl_attr, PAACE_IA_ATM,
> > +		PAACE_ATM_NO_XLATE);
> > +	set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
> > +		PAACE_AP_PERMS_ALL);
> > +}
> >  /**
> >   * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves
> subwindows
> >   *                                required for primary PAACE in the
> secondary
> > @@ -253,6 +268,24 @@ static unsigned long
> > pamu_get_fspi_and_allocate(u32
> > subwin_cnt)
> >  	return (spaace_addr - (unsigned long)spaact) / (sizeof(struct
> > paace));  }
> >
> > +/* Reset the PAACE entry to the default state */ void
> > +enable_default_dma_window(int liodn) {
> > +	struct paace *ppaace;
> > +
> > +	ppaace =3D pamu_get_ppaace(liodn);
> > +	if (!ppaace) {
> > +		pr_debug("Invalid liodn entry\n");
> > +		return;
> > +	}
> > +
> > +	memset(ppaace, 0, sizeof(struct paace));
> > +
> > +	setup_default_ppaace(ppaace);
> > +	mb();
> > +	pamu_enable_liodn(liodn);
> > +}
> > +
> >  /* Release the subwindows reserved for a particular LIODN */  void
> > pamu_free_subwins(int liodn)  { @@ -752,15 +785,7 @@ static void
> > __init
> > setup_liodns(void)
> >  				continue;
> >  			}
> >  			ppaace =3D pamu_get_ppaace(liodn);
> > -			pamu_init_ppaace(ppaace);
> > -			/* window size is 2^(WSE+1) bytes */
> > -			set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
> > -			ppaace->wbah =3D 0;
> > -			set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
> > -			set_bf(ppaace->impl_attr, PAACE_IA_ATM,
> > -				PAACE_ATM_NO_XLATE);
> > -			set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
> > -				PAACE_AP_PERMS_ALL);
> > +			setup_default_ppaace(ppaace);
> >  			if (of_device_is_compatible(node, "fsl,qman-portal"))
> >  				setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE);
> >  			if (of_device_is_compatible(node, "fsl,qman")) diff --
> git
> > a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index
> > 8fc1a12..0edcbbbb
> > 100644
> > --- a/drivers/iommu/fsl_pamu.h
> > +++ b/drivers/iommu/fsl_pamu.h
> > @@ -406,5 +406,6 @@ void get_ome_index(u32 *omi_index, struct device
> > *dev);  int pamu_update_paace_stash(int liodn, u32 subwin, u32 value);
> > int pamu_disable_spaace(int liodn, u32 subwin);
> >  u32 pamu_get_max_subwin_cnt(void);
> > +void enable_default_dma_window(int liodn);
> >
> >  #endif  /* __FSL_PAMU_H */
> > diff --git a/drivers/iommu/fsl_pamu_domain.c
> > b/drivers/iommu/fsl_pamu_domain.c index 966ae70..dd6cafc 100644
> > --- a/drivers/iommu/fsl_pamu_domain.c
> > +++ b/drivers/iommu/fsl_pamu_domain.c
> > @@ -340,17 +340,57 @@ static inline struct device_domain_info
> > *find_domain(struct device *dev)
> >  	return dev->archdata.iommu_domain;
> >  }
> >
> > +/* Disable device DMA capability and enable default DMA window */
> > +static void disable_device_dma(struct device_domain_info *info,
> > +				int enable_dma_window)
> > +{
> > +#ifdef CONFIG_PCI
> > +	if (info->dev->bus =3D=3D &pci_bus_type) {
> > +		struct pci_dev *pdev =3D NULL;
> > +		pdev =3D to_pci_dev(info->dev);
> > +		if (pci_is_enabled(pdev))
> > +			pci_disable_device(pdev);
> > +	}
> > +#endif
> > +
> > +	if (enable_dma_window)
> > +		enable_default_dma_window(info->liodn);
> > +}
> > +
> > +static int check_for_shared_liodn(struct device_domain_info *info) {
> > +	struct device_domain_info *tmp;
> > +
> > +	/*
> > +	 * Sanity check, to ensure that this is not a
> > +	 * shared LIODN. In case of a PCIe controller
> > +	 * it's possible that all PCIe devices share
> > +	 * the same LIODN.
> > +	 */
> > +	list_for_each_entry(tmp, &info->domain->devices, link) {
> > +		if (info->liodn =3D=3D tmp->liodn)
> > +			return 1;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> >  static void remove_device_ref(struct device_domain_info *info, u32
> win_cnt)  {
> >  	unsigned long flags;
> > +	int enable_dma_window =3D 0;
> >
> >  	list_del(&info->link);
> >  	spin_lock_irqsave(&iommu_lock, flags);
> > -	if (win_cnt > 1)
> > -		pamu_free_subwins(info->liodn);
> > -	pamu_disable_liodn(info->liodn);
> > +	if (!check_for_shared_liodn(info)) {
>=20
> One query; Do we really need to check for this?
>=20
[Sethi Varun-B16395] Yes, just a sanity check to ensure that there are no m=
ore devices linked to this LIODN and we can disable it.

-Varun

> Otherwise this patch series looks good to me.
>=20
> Thanks
> -Bharat
>=20
> > +		if (win_cnt > 1)
> > +			pamu_free_subwins(info->liodn);
> > +		pamu_disable_liodn(info->liodn);
> > +		enable_dma_window =3D 1;
> > +	}
> >  	spin_unlock_irqrestore(&iommu_lock, flags);
> >  	spin_lock_irqsave(&device_domain_lock, flags);
> > +	disable_device_dma(info, enable_dma_window);
> >  	info->dev->archdata.iommu_domain =3D NULL;
> >  	kmem_cache_free(iommu_devinfo_cache, info);
> >  	spin_unlock_irqrestore(&device_domain_lock, flags);
> > --
> > 1.7.9.5

^ permalink raw reply

* RE: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altivec idle
From: Bhushan Bharat-R65777 @ 2013-10-16 17:01 UTC (permalink / raw)
  To: Wang Dongsheng-B40534, Wood Scott-B07421; +Cc: linuxppc-dev@lists.ozlabs.org
In-Reply-To: <1381828871-17110-4-git-send-email-dongsheng.wang@freescale.com>



> -----Original Message-----
> From: Wang Dongsheng-B40534
> Sent: Tuesday, October 15, 2013 2:51 PM
> To: Wood Scott-B07421
> Cc: Bhushan Bharat-R65777; linuxppc-dev@lists.ozlabs.org; Wang Dongsheng-=
B40534
> Subject: [PATCH v5 4/4] powerpc/85xx: add sysfs for pw20 state and altive=
c idle
>=20
> From: Wang Dongsheng <dongsheng.wang@freescale.com>
>=20
> Add a sys interface to enable/diable pw20 state or altivec idle, and cont=
rol the
> wait entry time.
>=20
> Enable/Disable interface:
> 0, disable. 1, enable.
> /sys/devices/system/cpu/cpuX/pw20_state
> /sys/devices/system/cpu/cpuX/altivec_idle
>=20
> Set wait time interface:(Nanosecond)
> /sys/devices/system/cpu/cpuX/pw20_wait_time
> /sys/devices/system/cpu/cpuX/altivec_idle_wait_time
> Example: Base on TBfreq is 41MHZ.
> 1~48(ns): TB[63]
> 49~97(ns): TB[62]
> 98~195(ns): TB[61]
> 196~390(ns): TB[60]
> 391~780(ns): TB[59]
> 781~1560(ns): TB[58]
> ...
>=20
> Signed-off-by: Wang Dongsheng <dongsheng.wang@freescale.com>
> ---
> *v5:
> Change get_idle_ticks_bit function implementation.
>=20
> *v4:
> Move code from 85xx/common.c to kernel/sysfs.c.
>=20
> Remove has_pw20_altivec_idle function.
>=20
> Change wait "entry_bit" to wait time.
>=20
> diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c in=
dex
> 27a90b9..10d1128 100644
> --- a/arch/powerpc/kernel/sysfs.c
> +++ b/arch/powerpc/kernel/sysfs.c
> @@ -85,6 +85,284 @@ __setup("smt-snooze-delay=3D", setup_smt_snooze_delay=
);
>=20
>  #endif /* CONFIG_PPC64 */
>=20
> +#ifdef CONFIG_FSL_SOC
> +#define MAX_BIT				63
> +
> +static u64 pw20_wt;
> +static u64 altivec_idle_wt;
> +
> +static unsigned int get_idle_ticks_bit(u64 ns) {
> +	u64 cycle;
> +
> +	if (ns >=3D 10000)
> +		cycle =3D div_u64(ns + 500, 1000) * tb_ticks_per_usec;
> +	else
> +		cycle =3D div_u64(ns * tb_ticks_per_usec, 1000);
> +
> +	if (!cycle)
> +		return 0;
> +
> +	return ilog2(cycle);
> +}
> +
> +static void do_show_pwrmgtcr0(void *val) {
> +	u32 *value =3D val;
> +
> +	*value =3D mfspr(SPRN_PWRMGTCR0);
> +}
> +
> +static ssize_t show_pw20_state(struct device *dev,
> +				struct device_attribute *attr, char *buf) {
> +	u32 value;
> +	unsigned int cpu =3D dev->id;
> +
> +	smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
> +
> +	value &=3D PWRMGTCR0_PW20_WAIT;
> +
> +	return sprintf(buf, "%u\n", value ? 1 : 0); }
> +
> +static void do_store_pw20_state(void *val) {
> +	u32 *value =3D val;
> +	u32 pw20_state;
> +
> +	pw20_state =3D mfspr(SPRN_PWRMGTCR0);
> +
> +	if (*value)
> +		pw20_state |=3D PWRMGTCR0_PW20_WAIT;
> +	else
> +		pw20_state &=3D ~PWRMGTCR0_PW20_WAIT;
> +
> +	mtspr(SPRN_PWRMGTCR0, pw20_state);
> +}
> +
> +static ssize_t store_pw20_state(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t count)
> +{
> +	u32 value;
> +	unsigned int cpu =3D dev->id;
> +
> +	if (kstrtou32(buf, 0, &value))
> +		return -EINVAL;
> +
> +	if (value > 1)
> +		return -EINVAL;
> +
> +	smp_call_function_single(cpu, do_store_pw20_state, &value, 1);
> +
> +	return count;
> +}
> +
> +static ssize_t show_pw20_wait_time(struct device *dev,
> +				struct device_attribute *attr, char *buf) {
> +	u32 value;
> +	u64 tb_cycle;
> +	s64 time;
> +
> +	unsigned int cpu =3D dev->id;
> +
> +	if (!pw20_wt) {
> +		smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
> +		value =3D (value & PWRMGTCR0_PW20_ENT) >>
> +					PWRMGTCR0_PW20_ENT_SHIFT;
> +
> +		tb_cycle =3D (1 << (MAX_BIT - value)) * 2;

Is value =3D 0 and value =3D 1 legal? These will make tb_cycle =3D 0,

> +		time =3D div_u64(tb_cycle * 1000, tb_ticks_per_usec) - 1;

And time =3D -1;


> +	} else {
> +		time =3D pw20_wt;
> +	}
> +
> +	return sprintf(buf, "%llu\n", time > 0 ? time : 0);
> }
> +
> +static void set_pw20_wait_entry_bit(void *val) {
> +	u32 *value =3D val;
> +	u32 pw20_idle;
> +
> +	pw20_idle =3D mfspr(SPRN_PWRMGTCR0);
> +
> +	/* Set Automatic PW20 Core Idle Count */
> +	/* clear count */
> +	pw20_idle &=3D ~PWRMGTCR0_PW20_ENT;
> +
> +	/* set count */
> +	pw20_idle |=3D ((MAX_BIT - *value) << PWRMGTCR0_PW20_ENT_SHIFT);
> +
> +	mtspr(SPRN_PWRMGTCR0, pw20_idle);
> +}
> +
> +static ssize_t store_pw20_wait_time(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t count)
> +{
> +	u32 entry_bit;
> +	u64 value;
> +
> +	unsigned int cpu =3D dev->id;
> +
> +	if (kstrtou64(buf, 0, &value))
> +		return -EINVAL;
> +
> +	if (!value)
> +		return -EINVAL;
> +
> +	entry_bit =3D get_idle_ticks_bit(value);
> +	if (entry_bit > MAX_BIT)
> +		return -EINVAL;
> +
> +	pw20_wt =3D value;
> +	smp_call_function_single(cpu, set_pw20_wait_entry_bit,
> +				&entry_bit, 1);
> +
> +	return count;
> +}
> +
> +static ssize_t show_altivec_idle(struct device *dev,
> +				struct device_attribute *attr, char *buf) {
> +	u32 value;
> +	unsigned int cpu =3D dev->id;
> +
> +	smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
> +
> +	value &=3D PWRMGTCR0_AV_IDLE_PD_EN;
> +
> +	return sprintf(buf, "%u\n", value ? 1 : 0); }
> +
> +static void do_store_altivec_idle(void *val) {
> +	u32 *value =3D val;
> +	u32 altivec_idle;
> +
> +	altivec_idle =3D mfspr(SPRN_PWRMGTCR0);
> +
> +	if (*value)
> +		altivec_idle |=3D PWRMGTCR0_AV_IDLE_PD_EN;
> +	else
> +		altivec_idle &=3D ~PWRMGTCR0_AV_IDLE_PD_EN;
> +
> +	mtspr(SPRN_PWRMGTCR0, altivec_idle);
> +}
> +
> +static ssize_t store_altivec_idle(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t count)
> +{
> +	u32 value;
> +	unsigned int cpu =3D dev->id;
> +
> +	if (kstrtou32(buf, 0, &value))
> +		return -EINVAL;
> +
> +	if (value > 1)
> +		return -EINVAL;
> +
> +	smp_call_function_single(cpu, do_store_altivec_idle, &value, 1);
> +
> +	return count;
> +}
> +
> +static ssize_t show_altivec_idle_wait_time(struct device *dev,
> +				struct device_attribute *attr, char *buf) {
> +	u32 value;
> +	u64 tb_cycle;
> +	s64 time;
> +
> +	unsigned int cpu =3D dev->id;
> +
> +	if (!altivec_idle_wt) {
> +		smp_call_function_single(cpu, do_show_pwrmgtcr0, &value, 1);
> +		value =3D (value & PWRMGTCR0_AV_IDLE_CNT) >>
> +					PWRMGTCR0_AV_IDLE_CNT_SHIFT;
> +
> +		tb_cycle =3D (1 << (MAX_BIT - value)) * 2;
> +		time =3D div_u64(tb_cycle * 1000, tb_ticks_per_usec) - 1;

Likewise

Thanks
-Bharat

> +	} else {
> +		time =3D altivec_idle_wt;
> +	}
> +
> +	return sprintf(buf, "%llu\n", time > 0 ? time : 0); }
> +
> +static void set_altivec_idle_wait_entry_bit(void *val) {
> +	u32 *value =3D val;
> +	u32 altivec_idle;
> +
> +	altivec_idle =3D mfspr(SPRN_PWRMGTCR0);
> +
> +	/* Set Automatic AltiVec Idle Count */
> +	/* clear count */
> +	altivec_idle &=3D ~PWRMGTCR0_AV_IDLE_CNT;
> +
> +	/* set count */
> +	altivec_idle |=3D ((MAX_BIT - *value) << PWRMGTCR0_AV_IDLE_CNT_SHIFT);
> +
> +	mtspr(SPRN_PWRMGTCR0, altivec_idle);
> +}
> +
> +static ssize_t store_altivec_idle_wait_time(struct device *dev,
> +				struct device_attribute *attr,
> +				const char *buf, size_t count)
> +{
> +	u32 entry_bit;
> +	u64 value;
> +
> +	unsigned int cpu =3D dev->id;
> +
> +	if (kstrtou64(buf, 0, &value))
> +		return -EINVAL;
> +
> +	if (!value)
> +		return -EINVAL;
> +
> +	entry_bit =3D get_idle_ticks_bit(value);
> +	if (entry_bit > MAX_BIT)
> +		return -EINVAL;
> +
> +	altivec_idle_wt =3D value;
> +	smp_call_function_single(cpu, set_altivec_idle_wait_entry_bit,
> +				&entry_bit, 1);
> +
> +	return count;
> +}
> +
> +/*
> + * Enable/Disable interface:
> + * 0, disable. 1, enable.
> + */
> +static DEVICE_ATTR(pw20_state, 0600, show_pw20_state,
> +store_pw20_state); static DEVICE_ATTR(altivec_idle, 0600,
> +show_altivec_idle, store_altivec_idle);
> +
> +/*
> + * Set wait time interface:(Nanosecond)
> + * Example: Base on TBfreq is 41MHZ.
> + * 1~48(ns): TB[63]
> + * 49~97(ns): TB[62]
> + * 98~195(ns): TB[61]
> + * 196~390(ns): TB[60]
> + * 391~780(ns): TB[59]
> + * 781~1560(ns): TB[58]
> + * ...
> + */
> +static DEVICE_ATTR(pw20_wait_time, 0600,
> +			show_pw20_wait_time,
> +			store_pw20_wait_time);
> +static DEVICE_ATTR(altivec_idle_wait_time, 0600,
> +			show_altivec_idle_wait_time,
> +			store_altivec_idle_wait_time);
> +#endif
> +
>  /*
>   * Enabling PMCs will slow partition context switch times so we only do
>   * it the first time we write to the PMCs.
> @@ -407,6 +685,15 @@ static void register_cpu_online(unsigned int cpu)
>  		device_create_file(s, &dev_attr_pir);  #endif /* CONFIG_PPC64 */
>=20
> +#ifdef CONFIG_FSL_SOC
> +	if (PVR_VER(cur_cpu_spec->pvr_value) =3D=3D PVR_VER_E6500) {
> +		device_create_file(s, &dev_attr_pw20_state);
> +		device_create_file(s, &dev_attr_pw20_wait_time);
> +
> +		device_create_file(s, &dev_attr_altivec_idle);
> +		device_create_file(s, &dev_attr_altivec_idle_wait_time);
> +	}
> +#endif
>  	cacheinfo_cpu_online(cpu);
>  }
>=20
> @@ -479,6 +766,15 @@ static void unregister_cpu_online(unsigned int cpu)
>  		device_remove_file(s, &dev_attr_pir);  #endif /* CONFIG_PPC64 */
>=20
> +#ifdef CONFIG_FSL_SOC
> +	if (PVR_VER(cur_cpu_spec->pvr_value) =3D=3D PVR_VER_E6500) {
> +		device_remove_file(s, &dev_attr_pw20_state);
> +		device_remove_file(s, &dev_attr_pw20_wait_time);
> +
> +		device_remove_file(s, &dev_attr_altivec_idle);
> +		device_remove_file(s, &dev_attr_altivec_idle_wait_time);
> +	}
> +#endif
>  	cacheinfo_cpu_offline(cpu);
>  }
>=20
> --
> 1.8.0

^ permalink raw reply

* RE: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices
From: Bhushan Bharat-R65777 @ 2013-10-16 16:50 UTC (permalink / raw)
  To: Sethi Varun-B16395, joro@8bytes.org,
	iommu@lists.linux-foundation.org, linuxppc-dev@lists.ozlabs.org,
	linux-kernel@vger.kernel.org, Yoder Stuart-B08248,
	Wood Scott-B07421, alex.williamson@redhat.com
In-Reply-To: <1381922582-28724-3-git-send-email-Varun.Sethi@freescale.com>



> -----Original Message-----
> From: Sethi Varun-B16395
> Sent: Wednesday, October 16, 2013 4:53 PM
> To: joro@8bytes.org; iommu@lists.linux-foundation.org; linuxppc-
> dev@lists.ozlabs.org; linux-kernel@vger.kernel.org; Yoder Stuart-B08248; =
Wood
> Scott-B07421; alex.williamson@redhat.com; Bhushan Bharat-R65777
> Cc: Sethi Varun-B16395
> Subject: [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe dev=
ices
>=20
> Once the PCIe device assigned to a guest VM (via VFIO) gets detached from=
 the
> iommu domain (when guest terminates), its PAMU table entry is disabled. S=
o, this
> would prevent the device from being used once it's assigned back to the h=
ost.
>=20
> This patch allows for creation of a default DMA window corresponding to t=
he
> device and subsequently enabling the PAMU table entry. Before we enable t=
he
> entry, we ensure that the device's bus master capability is disabled (dev=
ice
> quiesced).
>=20
> Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
> ---
>  drivers/iommu/fsl_pamu.c        |   43 ++++++++++++++++++++++++++++-----=
---
>  drivers/iommu/fsl_pamu.h        |    1 +
>  drivers/iommu/fsl_pamu_domain.c |   46 +++++++++++++++++++++++++++++++++=
+++---
>  3 files changed, 78 insertions(+), 12 deletions(-)
>=20
> diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index
> cba0498..fb4a031 100644
> --- a/drivers/iommu/fsl_pamu.c
> +++ b/drivers/iommu/fsl_pamu.c
> @@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct paace *p=
aace,
> u32 wnum)
>  	return spaace;
>  }
>=20
> +/*
> + * Defaul PPAACE settings for an LIODN.
> + */
> +static void setup_default_ppaace(struct paace *ppaace) {
> +	pamu_init_ppaace(ppaace);
> +	/* window size is 2^(WSE+1) bytes */
> +	set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
> +	ppaace->wbah =3D 0;
> +	set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
> +	set_bf(ppaace->impl_attr, PAACE_IA_ATM,
> +		PAACE_ATM_NO_XLATE);
> +	set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
> +		PAACE_AP_PERMS_ALL);
> +}
>  /**
>   * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves subw=
indows
>   *                                required for primary PAACE in the seco=
ndary
> @@ -253,6 +268,24 @@ static unsigned long pamu_get_fspi_and_allocate(u32
> subwin_cnt)
>  	return (spaace_addr - (unsigned long)spaact) / (sizeof(struct paace)); =
 }
>=20
> +/* Reset the PAACE entry to the default state */ void
> +enable_default_dma_window(int liodn) {
> +	struct paace *ppaace;
> +
> +	ppaace =3D pamu_get_ppaace(liodn);
> +	if (!ppaace) {
> +		pr_debug("Invalid liodn entry\n");
> +		return;
> +	}
> +
> +	memset(ppaace, 0, sizeof(struct paace));
> +
> +	setup_default_ppaace(ppaace);
> +	mb();
> +	pamu_enable_liodn(liodn);
> +}
> +
>  /* Release the subwindows reserved for a particular LIODN */  void
> pamu_free_subwins(int liodn)  { @@ -752,15 +785,7 @@ static void __init
> setup_liodns(void)
>  				continue;
>  			}
>  			ppaace =3D pamu_get_ppaace(liodn);
> -			pamu_init_ppaace(ppaace);
> -			/* window size is 2^(WSE+1) bytes */
> -			set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
> -			ppaace->wbah =3D 0;
> -			set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
> -			set_bf(ppaace->impl_attr, PAACE_IA_ATM,
> -				PAACE_ATM_NO_XLATE);
> -			set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
> -				PAACE_AP_PERMS_ALL);
> +			setup_default_ppaace(ppaace);
>  			if (of_device_is_compatible(node, "fsl,qman-portal"))
>  				setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE);
>  			if (of_device_is_compatible(node, "fsl,qman")) diff --git
> a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index 8fc1a12..0edc=
bbbb
> 100644
> --- a/drivers/iommu/fsl_pamu.h
> +++ b/drivers/iommu/fsl_pamu.h
> @@ -406,5 +406,6 @@ void get_ome_index(u32 *omi_index, struct device *dev=
);  int
> pamu_update_paace_stash(int liodn, u32 subwin, u32 value);  int
> pamu_disable_spaace(int liodn, u32 subwin);
>  u32 pamu_get_max_subwin_cnt(void);
> +void enable_default_dma_window(int liodn);
>=20
>  #endif  /* __FSL_PAMU_H */
> diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_dom=
ain.c
> index 966ae70..dd6cafc 100644
> --- a/drivers/iommu/fsl_pamu_domain.c
> +++ b/drivers/iommu/fsl_pamu_domain.c
> @@ -340,17 +340,57 @@ static inline struct device_domain_info
> *find_domain(struct device *dev)
>  	return dev->archdata.iommu_domain;
>  }
>=20
> +/* Disable device DMA capability and enable default DMA window */
> +static void disable_device_dma(struct device_domain_info *info,
> +				int enable_dma_window)
> +{
> +#ifdef CONFIG_PCI
> +	if (info->dev->bus =3D=3D &pci_bus_type) {
> +		struct pci_dev *pdev =3D NULL;
> +		pdev =3D to_pci_dev(info->dev);
> +		if (pci_is_enabled(pdev))
> +			pci_disable_device(pdev);
> +	}
> +#endif
> +
> +	if (enable_dma_window)
> +		enable_default_dma_window(info->liodn);
> +}
> +
> +static int check_for_shared_liodn(struct device_domain_info *info) {
> +	struct device_domain_info *tmp;
> +
> +	/*
> +	 * Sanity check, to ensure that this is not a
> +	 * shared LIODN. In case of a PCIe controller
> +	 * it's possible that all PCIe devices share
> +	 * the same LIODN.
> +	 */
> +	list_for_each_entry(tmp, &info->domain->devices, link) {
> +		if (info->liodn =3D=3D tmp->liodn)
> +			return 1;
> +	}
> +
> +	return 0;
> +}
> +
>  static void remove_device_ref(struct device_domain_info *info, u32 win_c=
nt)  {
>  	unsigned long flags;
> +	int enable_dma_window =3D 0;
>=20
>  	list_del(&info->link);
>  	spin_lock_irqsave(&iommu_lock, flags);
> -	if (win_cnt > 1)
> -		pamu_free_subwins(info->liodn);
> -	pamu_disable_liodn(info->liodn);
> +	if (!check_for_shared_liodn(info)) {

One query; Do we really need to check for this?

Otherwise this patch series looks good to me.

Thanks
-Bharat

> +		if (win_cnt > 1)
> +			pamu_free_subwins(info->liodn);
> +		pamu_disable_liodn(info->liodn);
> +		enable_dma_window =3D 1;
> +	}
>  	spin_unlock_irqrestore(&iommu_lock, flags);
>  	spin_lock_irqsave(&device_domain_lock, flags);
> +	disable_device_dma(info, enable_dma_window);
>  	info->dev->archdata.iommu_domain =3D NULL;
>  	kmem_cache_free(iommu_devinfo_cache, info);
>  	spin_unlock_irqrestore(&device_domain_lock, flags);
> --
> 1.7.9.5

^ permalink raw reply

* Re: [PATCH v5] powerpc/mpc85xx: Update the clock nodes in device tree
From: Scott Wood @ 2013-10-16 16:46 UTC (permalink / raw)
  To: Tang Yuantian-B29983
  Cc: Mark Rutland, Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org,
	Li Yang-Leo-R58472, devicetree@vger.kernel.org
In-Reply-To: <D07C73A334FF604B95B3CBD2A545D07B150C312F@039-SN2MPN1-013.039d.mgd.msft.net>

On Tue, 2013-10-15 at 21:57 -0500, Tang Yuantian-B29983 wrote:
> > > > > >
> > > > > The device tree makes that quite clear.
> > > >
> > > > You chose to model it that way in the device tree; that doesn't make
> > > > it clear that the hardware works that way or that it's a good way to
> > > > model it.
> > > >
> > > > > Each PLL has several output which MUX node can take from.
> > > >
> > > > Point out where in the hardware documentation it says this.  What I
> > > > see is a PLL that has one output, and a MUX register that can choose
> > > > from multiple PLL and divider options.
> > > >
> > > Take T4240 for example: see section 4.6.5.1 , (Page 141) in T4240RM Rev.
> > D, 09/2012.
> > 
> > That shows the dividers as being somewhere in between the PLL and the MUX.
> > The MUX is where the divider is selected.  There's nothing in the PLL's
> > programming interface that relates to the dividers.  As such it's simpler
> > to model it as being part of the MUX.
> > 
> > -Scott
> > 
> I don't know whether it is simpler, but "modeling divider as being part of the MUX"
> is your guess, right?
> If the "divider" is included in MUX, the MUX would not be called "MUX".

It's still selecting from multiple PLLs.

> I don't know whether "divider" module exists or not. If it exists, it should be part
> of PLL or between PLL and MUX. wherever it was, the device tree binding is appropriate.

The device tree binding is unnecessarily complicated.

> The P3041RM shows exactly each PLL has 2 outputs which definitely have no "divider" at all.

That diagram is a bit weird -- one of the outputs is used as is, and the
other is split into 1/2 and 1/4.  It doesn't really matter though; the
end result is the same.  We're describing the programming interface, not
artwork choices in the manual.

-Scott

^ permalink raw reply

* Re: [PATCH 02/10][v6] powerpc/Power7: detect load/store instructions
From: Sukadev Bhattiprolu @ 2013-10-16 15:39 UTC (permalink / raw)
  To: Anshuman Khandual
  Cc: Michael Ellerman, linux-kernel, Stephane Eranian, linuxppc-dev,
	David Laight, Paul Mackerras, Arnaldo Carvalho de Melo
In-Reply-To: <525E5E79.6010905@linux.vnet.ibm.com>

Anshuman Khandual [khandual@linux.vnet.ibm.com] wrote:
| On 10/16/2013 01:55 PM, David Laight wrote:
| >> Implement instr_is_load_store_2_06() to detect whether a given instruction
| >> is one of the fixed-point or floating-point load/store instructions in the
| >> POWER Instruction Set Architecture v2.06.
| > ...
| 
| The op code encoding is dependent on the ISA version ? Does the basic load
| and store instructions change with newer ISA versions ?

TBH, I don't know whether the encoding is dependent on the ISA version.

We need this for a very narrow/specific purpose on Power7 _and_ did not
want to set up expectations that it will work with all versions. Hence
the horribly named function :-)

| BTW we have got a
| newer version for the ISA "PowerISA_V2.07_PUBLIC.pdf" here at power.org
| 
| https://www.power.org/documentation/power-isa-version-2-07/

Yes, but on Power8 there is a bit in the SIER that tells us whether it
is a load or store instruction. We use that and don't need to determine
in software.

Power7 does not have such a bit and we need this only for Power7. We are
not targetting this "memory hierarchy" feature for Power6 or older processors.

| 
| Does not sound like a good idea to analyse the instructions with functions
| names which specify ISA version number. Besides, this function does not
| belong to specific processor or platform. It has to be bit generic.
| 
| >> +int instr_is_load_store_2_06(const unsigned int *instr)
| >> +{
| >> +	unsigned int op, upper, lower;
| >> +
| >> +	op = instr_opcode(*instr);
| >> +
| >> +	if ((op >= 32 && op <= 58) || (op == 61 || op == 62))
| >> +		return true;
| >> +
| >> +	if (op != 31)
| >> +		return false;
| >> +
| >> +	upper = op >> 5;
| >> +	lower = op & 0x1f;
| >> +
| >> +	/* Short circuit as many misses as we can */
| >> +	if (lower < 3 || lower > 23)
| >> +		return false;
| >> +
| >> +	if (lower == 3) {
| >> +		if (upper >= 16)
| >> +			return true;
| >> +
| >> +		return false;
| >> +	}
| >> +
| >> +	if (lower == 7 || lower == 12)
| >> +		return true;
| >> +
| >> +	if (lower >= 20) /* && lower <= 23 (implicit) */
| >> +		return true;
| >> +
| >> +	return false;
| >> +}
| > 
| > I can't help feeling the code could do with some comments about
| > which actual instructions are selected where.
| 
| Yeah, I agree. At least which category of load-store instructions are
| getting selected in each case.

Like I mentioned in the other message, how about adding a couple
of lines in the function header ?

^ permalink raw reply

* Re: [PATCH 02/10][v6] powerpc/Power7: detect load/store instructions
From: Sukadev Bhattiprolu @ 2013-10-16 15:27 UTC (permalink / raw)
  To: David Laight
  Cc: Michael Ellerman, linux-kernel, Stephane Eranian, linuxppc-dev,
	Paul Mackerras, Arnaldo Carvalho de Melo, Anshuman Khandual
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B738E@saturn3.aculab.com>

David Laight [David.Laight@aculab.com] wrote:
| 
| I can't help feeling the code could do with some comments about
| which actual instructions are selected where.

At a high level, only the load and store instructions are selected.

I added a reference to the Appendix F (Opcode maps) in the function
header.  The opcode maps is a table of upper x lower values. From
that table it should be fairly straightforward which instructions
are selected.

How about I add this to the function header ?

 * Please use the table in Appendix F (opcode maps) to determine
 * events selected by this function.

There are over 100 instructions selected by this list and wasn't
sure if we should list them all.

Sukadev

^ permalink raw reply

* Re: Perf not resolving all symbols, showing 0x7ffffxxx
From: Martin Hicks @ 2013-10-16 15:05 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Scott Wood, linuxppc-dev, Anton Blanchard
In-Reply-To: <1381869590.17841.23.camel@pasglop>

Actually, I was wrong, the mpc8379 is an e300c4.

So it seems clear to me that we compile in the book3s code because
this is an 83xx CPU part.  I also see that Kconfig knows that I have
an core-fsl-emb but we don't actually compile the PMU backend for it
because there's no support for anything but e500.

mort@chinook:~/src/s4v2-glibc/linux-mpc$ grep PERF .config
CONFIG_FSL_EMB_PERFMON=y
CONFIG_PPC_PERF_CTRS=y
CONFIG_HAVE_PERF_EVENTS=y
CONFIG_PERF_EVENTS=y
# CONFIG_DEBUG_PERF_USE_VMALLOC is not set
mort@chinook:~/src/s4v2-glibc/linux-mpc$ grep BOOK3S .config
CONFIG_PPC_BOOK3S_32=y
CONFIG_PPC_BOOK3S=y

more below...

On Tue, Oct 15, 2013 at 4:39 PM, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
> On Tue, 2013-10-15 at 15:22 -0500, Scott Wood wrote:
>> On Tue, 2013-10-15 at 14:53 -0500, Benjamin Herrenschmidt wrote:
>> > On Tue, 2013-10-15 at 14:44 -0400, Martin Hicks wrote:
>> > > >
>> > > > This is an e300 core right ? (603...). Do that have an SIAR at all
>> > > > (Scott ?)
>> > >
>> > > Yes, e300c3.
>> >
>> > Ok so I have a hard time figuring out how that patch can make a
>> > difference since for all I can see, there is no perf backend upstream
>> > for e300 at all :-(
>> >
>> > I must certainly be missing something ... Scott, can you have a look ?
>>
>> e300c3 has a core-fsl-emb style performance monitor (though Linux
>> doesn't support it yet).  If a bug was bisected to a change in
>> core-book3s.c, then it's probably a coincidence due to moving code
>> around.

CONFIG_PPC_PERF_CTRS seems to give the mpc8379 some kind of basic
performance measuring.  Is this through dummy_perf() in
arch/powerpc/kernel/pmc.c?

>
> Mort, can you see if just that change is enough to cause the problem ?

It is not.  The patch that does get IPs working again in my 3.11 tree
is this one:

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index eeae308..9a3f572 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -122,10 +122,6 @@ void power_pmu_flush_branch_stack(void) {}
 static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
 #endif /* CONFIG_PPC32 */

-static bool regs_use_siar(struct pt_regs *regs)
-{
-       return !!regs->result;
-}

 /*
  * Things that are specific to 64-bit implementations.
@@ -1802,14 +1798,13 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
  */
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
-       bool use_siar = regs_use_siar(regs);
-
-       if (use_siar && siar_valid(regs))
-               return mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
-       else if (use_siar)
-               return 0;               // no valid instruction pointer
-       else
+       unsigned long mmcra = regs->dsisr;
+       if (TRAP(regs) != 0xf00)
+               return regs->nip;
+       if ((ppmu->flags & PPMU_NO_CONT_SAMPLING) &&
+           !(mmcra & MMCRA_SAMPLE_ENABLE))
                return regs->nip;
+       return mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
 }

 static bool pmc_overflow_power7(unsigned long val)


mh

-- 
Martin Hicks P.Eng.      |         mort@bork.org
Bork Consulting Inc.     |   +1 (613) 266-2296

^ permalink raw reply related

* Re: Gianfar driver crashes in Kernel v3.10
From: Thomas Hühn @ 2013-10-16 12:44 UTC (permalink / raw)
  To: Claudiu Manoil; +Cc: linuxppc-dev
In-Reply-To: <525E3BD7.1020208@freescale.com>

Hi,

Together with other OpenWRT users we currently use this workaround patch =
(https://dev.openwrt.org/changeset/38409/trunk) that downgrades the =
gianfar driver to kernel version 3.9, as 3.10 is just crashing.=20
With this workaround, several users with TPLink 4900 routers reported =
that their system is runing stable and without issues.

>=20
> Please try the following patch:
> http://patchwork.ozlabs.org/patch/283235/
>=20
> It should help with your issue.
>=20

Thank for you patch.=20
I have adapted your patch to by applicable in current OpenWRT trunk.
I posted it in our current forum thread, where several users beside me =
will test it in the next days.
(https://forum.openwrt.org/viewtopic.php?pid=3D214931#p214931)
You can expect a report after the weekend.

Greetings Thomas

> claudiu
>=20
>=20
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* [PATCH 3/3] Add maintainers entry for the Freescale PAMU driver.
From: Varun Sethi @ 2013-10-16 11:23 UTC (permalink / raw)
  To: joro, iommu, linuxppc-dev, linux-kernel, stuart.yoder, scottwood,
	alex.williamson, r65777
  Cc: Varun Sethi
In-Reply-To: <1381922582-28724-1-git-send-email-Varun.Sethi@freescale.com>

Add maintainers entry for Freescale PAMU driver.

Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
---
 MAINTAINERS |    7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8a0cbf3..5b6ea5c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3511,6 +3511,13 @@ S:	Maintained
 F:	drivers/net/ethernet/freescale/fs_enet/
 F:	include/linux/fs_enet_pd.h
 
+FREESCALE PAMU DRIVER
+M:	Varun Sethi <varun.sethi@freescale.com>
+L:	linuxppc-dev@lists.ozlabs.org
+L:	iommu@lists.linux-foundation.org
+S:	Maintained
+F:	drivers/iommu/fsl_pamu*
+
 FREESCALE QUICC ENGINE LIBRARY
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Orphan
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 2/3 v2] iommu/fsl: Enable default DMA window for PCIe devices
From: Varun Sethi @ 2013-10-16 11:23 UTC (permalink / raw)
  To: joro, iommu, linuxppc-dev, linux-kernel, stuart.yoder, scottwood,
	alex.williamson, r65777
  Cc: Varun Sethi
In-Reply-To: <1381922582-28724-1-git-send-email-Varun.Sethi@freescale.com>

Once the PCIe device assigned to a guest VM (via VFIO) gets detached from the iommu domain
(when guest terminates), its PAMU table entry is disabled. So, this would prevent the device
from being used once it's assigned back to the host.

This patch allows for creation of a default DMA window corresponding to the device
and subsequently enabling the PAMU table entry. Before we enable the entry, we ensure that
the device's bus master capability is disabled (device quiesced).

Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
---
 drivers/iommu/fsl_pamu.c        |   43 ++++++++++++++++++++++++++++--------
 drivers/iommu/fsl_pamu.h        |    1 +
 drivers/iommu/fsl_pamu_domain.c |   46 ++++++++++++++++++++++++++++++++++++---
 3 files changed, 78 insertions(+), 12 deletions(-)

diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index cba0498..fb4a031 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -225,6 +225,21 @@ static struct paace *pamu_get_spaace(struct paace *paace, u32 wnum)
 	return spaace;
 }
 
+/*
+ * Defaul PPAACE settings for an LIODN.
+ */
+static void setup_default_ppaace(struct paace *ppaace)
+{
+	pamu_init_ppaace(ppaace);
+	/* window size is 2^(WSE+1) bytes */
+	set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
+	ppaace->wbah = 0;
+	set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
+	set_bf(ppaace->impl_attr, PAACE_IA_ATM,
+		PAACE_ATM_NO_XLATE);
+	set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
+		PAACE_AP_PERMS_ALL);
+}
 /**
  * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves subwindows
  *                                required for primary PAACE in the secondary
@@ -253,6 +268,24 @@ static unsigned long pamu_get_fspi_and_allocate(u32 subwin_cnt)
 	return (spaace_addr - (unsigned long)spaact) / (sizeof(struct paace));
 }
 
+/* Reset the PAACE entry to the default state */
+void enable_default_dma_window(int liodn)
+{
+	struct paace *ppaace;
+
+	ppaace = pamu_get_ppaace(liodn);
+	if (!ppaace) {
+		pr_debug("Invalid liodn entry\n");
+		return;
+	}
+
+	memset(ppaace, 0, sizeof(struct paace));
+
+	setup_default_ppaace(ppaace);
+	mb();
+	pamu_enable_liodn(liodn);
+}
+
 /* Release the subwindows reserved for a particular LIODN */
 void pamu_free_subwins(int liodn)
 {
@@ -752,15 +785,7 @@ static void __init setup_liodns(void)
 				continue;
 			}
 			ppaace = pamu_get_ppaace(liodn);
-			pamu_init_ppaace(ppaace);
-			/* window size is 2^(WSE+1) bytes */
-			set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
-			ppaace->wbah = 0;
-			set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
-			set_bf(ppaace->impl_attr, PAACE_IA_ATM,
-				PAACE_ATM_NO_XLATE);
-			set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
-				PAACE_AP_PERMS_ALL);
+			setup_default_ppaace(ppaace);
 			if (of_device_is_compatible(node, "fsl,qman-portal"))
 				setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE);
 			if (of_device_is_compatible(node, "fsl,qman"))
diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h
index 8fc1a12..0edcbbbb 100644
--- a/drivers/iommu/fsl_pamu.h
+++ b/drivers/iommu/fsl_pamu.h
@@ -406,5 +406,6 @@ void get_ome_index(u32 *omi_index, struct device *dev);
 int  pamu_update_paace_stash(int liodn, u32 subwin, u32 value);
 int pamu_disable_spaace(int liodn, u32 subwin);
 u32 pamu_get_max_subwin_cnt(void);
+void enable_default_dma_window(int liodn);
 
 #endif  /* __FSL_PAMU_H */
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 966ae70..dd6cafc 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -340,17 +340,57 @@ static inline struct device_domain_info *find_domain(struct device *dev)
 	return dev->archdata.iommu_domain;
 }
 
+/* Disable device DMA capability and enable default DMA window */
+static void disable_device_dma(struct device_domain_info *info,
+				int enable_dma_window)
+{
+#ifdef CONFIG_PCI
+	if (info->dev->bus == &pci_bus_type) {
+		struct pci_dev *pdev = NULL;
+		pdev = to_pci_dev(info->dev);
+		if (pci_is_enabled(pdev))
+			pci_disable_device(pdev);
+	}
+#endif
+
+	if (enable_dma_window)
+		enable_default_dma_window(info->liodn);
+}
+
+static int check_for_shared_liodn(struct device_domain_info *info)
+{
+	struct device_domain_info *tmp;
+
+	/*
+	 * Sanity check, to ensure that this is not a
+	 * shared LIODN. In case of a PCIe controller
+	 * it's possible that all PCIe devices share
+	 * the same LIODN.
+	 */
+	list_for_each_entry(tmp, &info->domain->devices, link) {
+		if (info->liodn == tmp->liodn)
+			return 1;
+	}
+
+	return 0;
+}
+
 static void remove_device_ref(struct device_domain_info *info, u32 win_cnt)
 {
 	unsigned long flags;
+	int enable_dma_window = 0;
 
 	list_del(&info->link);
 	spin_lock_irqsave(&iommu_lock, flags);
-	if (win_cnt > 1)
-		pamu_free_subwins(info->liodn);
-	pamu_disable_liodn(info->liodn);
+	if (!check_for_shared_liodn(info)) {
+		if (win_cnt > 1)
+			pamu_free_subwins(info->liodn);
+		pamu_disable_liodn(info->liodn);
+		enable_dma_window = 1;
+	}
 	spin_unlock_irqrestore(&iommu_lock, flags);
 	spin_lock_irqsave(&device_domain_lock, flags);
+	disable_device_dma(info, enable_dma_window);
 	info->dev->archdata.iommu_domain = NULL;
 	kmem_cache_free(iommu_devinfo_cache, info);
 	spin_unlock_irqrestore(&device_domain_lock, flags);
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH 0/3 v2] iommu/fsl: PAMU driver fixes.
From: Varun Sethi @ 2013-10-16 11:22 UTC (permalink / raw)
  To: joro, iommu, linuxppc-dev, linux-kernel, stuart.yoder, scottwood,
	alex.williamson, r65777
  Cc: Varun Sethi

The first patch fixes a build failure, when we try to build for a Freescale
platform without PCI support.

The second patch enables a default DMA window for the device, once it's
detached from a domain. In case of vfio, once device is detached from a
guest it can be again used by the host.

The last patch adds the maintainer entry for the Freescale PAMU driver.

Varun Sethi (3):
  iommu/fsl: Factor out PCI specific code.
  iommu/fsl: Enable default DMA window for PCIe devices once detached
  Add maintainers entry for the Freescale PAMU driver.

 MAINTAINERS                     |    7 ++
 drivers/iommu/fsl_pamu.c        |   43 ++++++++++---
 drivers/iommu/fsl_pamu.h        |    1 +
 drivers/iommu/fsl_pamu_domain.c |  134 +++++++++++++++++++++++++--------------
 4 files changed, 128 insertions(+), 57 deletions(-)

-- 
1.7.9.5

^ permalink raw reply

* [PATCH 1/3 v2] iommu/fsl: Factor out PCI specific code.
From: Varun Sethi @ 2013-10-16 11:23 UTC (permalink / raw)
  To: joro, iommu, linuxppc-dev, linux-kernel, stuart.yoder, scottwood,
	alex.williamson, r65777
  Cc: Varun Sethi
In-Reply-To: <1381922582-28724-1-git-send-email-Varun.Sethi@freescale.com>

Factor out PCI specific code in the PAMU driver.

Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
---
 drivers/iommu/fsl_pamu_domain.c |   88 +++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 45 deletions(-)

diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index c857c30..966ae70 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -677,21 +677,15 @@ static int handle_attach_device(struct fsl_dma_domain *dma_domain,
 	return ret;
 }
 
-static int fsl_pamu_attach_device(struct iommu_domain *domain,
-				  struct device *dev)
+static struct device *get_dma_device(struct device *dev)
 {
-	struct fsl_dma_domain *dma_domain = domain->priv;
-	const u32 *liodn;
-	u32 liodn_cnt;
-	int len, ret = 0;
-	struct pci_dev *pdev = NULL;
-	struct pci_controller *pci_ctl;
+	struct device *dma_dev = dev;
+#ifdef CONFIG_PCI
 
-	/*
-	 * Use LIODN of the PCI controller while attaching a
-	 * PCI device.
-	 */
 	if (dev->bus == &pci_bus_type) {
+		struct pci_controller *pci_ctl;
+		struct pci_dev *pdev;
+
 		pdev = to_pci_dev(dev);
 		pci_ctl = pci_bus_to_host(pdev->bus);
 		/*
@@ -699,17 +693,31 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,
 		 * so we can get the LIODN programmed by
 		 * u-boot.
 		 */
-		dev = pci_ctl->parent;
+		dma_dev = pci_ctl->parent;
 	}
+#endif
+	return dma_dev;
+}
+
+static int fsl_pamu_attach_device(struct iommu_domain *domain,
+				  struct device *dev)
+{
+	struct fsl_dma_domain *dma_domain = domain->priv;
+	struct device *dma_dev;
+	const u32 *liodn;
+	u32 liodn_cnt;
+	int len, ret = 0;
+
+	dma_dev = get_dma_device(dev);
 
-	liodn = of_get_property(dev->of_node, "fsl,liodn", &len);
+	liodn = of_get_property(dma_dev->of_node, "fsl,liodn", &len);
 	if (liodn) {
 		liodn_cnt = len / sizeof(u32);
 		ret = handle_attach_device(dma_domain, dev,
 					 liodn, liodn_cnt);
 	} else {
 		pr_debug("missing fsl,liodn property at %s\n",
-		          dev->of_node->full_name);
+		          dma_dev->of_node->full_name);
 			ret = -EINVAL;
 	}
 
@@ -720,32 +728,18 @@ static void fsl_pamu_detach_device(struct iommu_domain *domain,
 				      struct device *dev)
 {
 	struct fsl_dma_domain *dma_domain = domain->priv;
+	struct device *dma_dev;
 	const u32 *prop;
 	int len;
-	struct pci_dev *pdev = NULL;
-	struct pci_controller *pci_ctl;
 
-	/*
-	 * Use LIODN of the PCI controller while detaching a
-	 * PCI device.
-	 */
-	if (dev->bus == &pci_bus_type) {
-		pdev = to_pci_dev(dev);
-		pci_ctl = pci_bus_to_host(pdev->bus);
-		/*
-		 * make dev point to pci controller device
-		 * so we can get the LIODN programmed by
-		 * u-boot.
-		 */
-		dev = pci_ctl->parent;
-	}
+	dma_dev = get_dma_device(dev);
 
-	prop = of_get_property(dev->of_node, "fsl,liodn", &len);
+	prop = of_get_property(dma_dev->of_node, "fsl,liodn", &len);
 	if (prop)
 		detach_device(dev, dma_domain);
 	else
 		pr_debug("missing fsl,liodn property at %s\n",
-		          dev->of_node->full_name);
+		          dma_dev->of_node->full_name);
 }
 
 static  int configure_domain_geometry(struct iommu_domain *domain, void *data)
@@ -905,6 +899,7 @@ static struct iommu_group *get_device_iommu_group(struct device *dev)
 	return group;
 }
 
+#ifdef CONFIG_PCI
 static  bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl)
 {
 	u32 version;
@@ -945,13 +940,18 @@ static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev)
 	return NULL;
 }
 
-static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
+static struct iommu_group *get_pci_device_group(struct device *dev)
 {
 	struct pci_controller *pci_ctl;
 	bool pci_endpt_partioning;
 	struct iommu_group *group = NULL;
-	struct pci_dev *bridge, *dma_pdev = NULL;
+	struct pci_dev *bridge, *pdev;
+	struct pci_dev *dma_pdev = NULL;
 
+	pdev = to_pci_dev(dev);
+	/* Don't create device groups for virtual PCI bridges */
+	if (pdev->subordinate)
+		return NULL;
 	pci_ctl = pci_bus_to_host(pdev->bus);
 	pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl);
 	/* We can partition PCIe devices so assign device group to the device */
@@ -1044,11 +1044,11 @@ root_bus:
 
 	return group;
 }
+#endif
 
 static int fsl_pamu_add_device(struct device *dev)
 {
 	struct iommu_group *group = NULL;
-	struct pci_dev *pdev;
 	const u32 *prop;
 	int ret, len;
 
@@ -1056,19 +1056,15 @@ static int fsl_pamu_add_device(struct device *dev)
 	 * For platform devices we allocate a separate group for
 	 * each of the devices.
 	 */
-	if (dev->bus == &pci_bus_type) {
-		pdev = to_pci_dev(dev);
-		/* Don't create device groups for virtual PCI bridges */
-		if (pdev->subordinate)
-			return 0;
-
-		group = get_pci_device_group(pdev);
-
-	} else {
+	if (dev->bus == &platform_bus_type) {
 		prop = of_get_property(dev->of_node, "fsl,liodn", &len);
 		if (prop)
 			group = get_device_iommu_group(dev);
 	}
+#ifdef CONFIG_PCI
+	else
+		group = get_pci_device_group(dev);
+#endif
 
 	if (!group || IS_ERR(group))
 		return PTR_ERR(group);
@@ -1166,7 +1162,9 @@ int pamu_domain_init()
 		return ret;
 
 	bus_set_iommu(&platform_bus_type, &fsl_pamu_ops);
+#ifdef CONFIG_PCI
 	bus_set_iommu(&pci_bus_type, &fsl_pamu_ops);
+#endif
 
 	return ret;
 }
-- 
1.7.9.5

^ permalink raw reply related

* [PATCH] powerpc/52xx: fix build breakage for MPC5200 LPBFIFO module
From: Anatolij Gustschin @ 2013-10-16 11:11 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Wolfgang Denk

The MPC5200 LPBFIFO driver requires the bestcomm module to be
enabled, otherwise building will fail. Fix it.

Cc: <stable@vger.kernel.org> # 3.10+
Reported-by: Wolfgang Denk <wd@denx.de>
Signed-off-by: Anatolij Gustschin <agust@denx.de>
---
 arch/powerpc/platforms/52xx/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/52xx/Kconfig b/arch/powerpc/platforms/52xx/Kconfig
index 90f4496..af54174 100644
--- a/arch/powerpc/platforms/52xx/Kconfig
+++ b/arch/powerpc/platforms/52xx/Kconfig
@@ -57,5 +57,5 @@ config PPC_MPC5200_BUGFIX
 
 config PPC_MPC5200_LPBFIFO
 	tristate "MPC5200 LocalPlus bus FIFO driver"
-	depends on PPC_MPC52xx
+	depends on PPC_MPC52xx && PPC_BESTCOMM
 	select PPC_BESTCOMM_GEN_BD
-- 
1.8.3.1

^ permalink raw reply related

* Re: [PATCH 02/10][v6] powerpc/Power7: detect load/store instructions
From: Anshuman Khandual @ 2013-10-16  9:38 UTC (permalink / raw)
  To: David Laight
  Cc: Michael Ellerman, linux-kernel, Stephane Eranian, linuxppc-dev,
	Paul Mackerras, Arnaldo Carvalho de Melo, Sukadev Bhattiprolu
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B738E@saturn3.aculab.com>

On 10/16/2013 01:55 PM, David Laight wrote:
>> Implement instr_is_load_store_2_06() to detect whether a given instruction
>> is one of the fixed-point or floating-point load/store instructions in the
>> POWER Instruction Set Architecture v2.06.
> ...

The op code encoding is dependent on the ISA version ? Does the basic load
and store instructions change with newer ISA versions ? BTW we have got a
newer version for the ISA "PowerISA_V2.07_PUBLIC.pdf" here at power.org

https://www.power.org/documentation/power-isa-version-2-07/

Does not sound like a good idea to analyse the instructions with functions
names which specify ISA version number. Besides, this function does not
belong to specific processor or platform. It has to be bit generic.
 
>> +int instr_is_load_store_2_06(const unsigned int *instr)
>> +{
>> +	unsigned int op, upper, lower;
>> +
>> +	op = instr_opcode(*instr);
>> +
>> +	if ((op >= 32 && op <= 58) || (op == 61 || op == 62))
>> +		return true;
>> +
>> +	if (op != 31)
>> +		return false;
>> +
>> +	upper = op >> 5;
>> +	lower = op & 0x1f;
>> +
>> +	/* Short circuit as many misses as we can */
>> +	if (lower < 3 || lower > 23)
>> +		return false;
>> +
>> +	if (lower == 3) {
>> +		if (upper >= 16)
>> +			return true;
>> +
>> +		return false;
>> +	}
>> +
>> +	if (lower == 7 || lower == 12)
>> +		return true;
>> +
>> +	if (lower >= 20) /* && lower <= 23 (implicit) */
>> +		return true;
>> +
>> +	return false;
>> +}
> 
> I can't help feeling the code could do with some comments about
> which actual instructions are selected where.

Yeah, I agree. At least which category of load-store instructions are
getting selected in each case.

^ permalink raw reply

* Re: [PATCH 1/2] tty/hvc_console: Add DTR/RTS callback to handle HUPCL control
From: Hendrik Brueckner @ 2013-10-16  9:04 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: linux-s390, brueckner, gregkh, heiko.carstens, linuxppc-dev,
	linux-kernel, Hendrik Brueckner, schwidefsky, jslaby
In-Reply-To: <1381870070.17841.24.camel@pasglop>

On Tue, Oct 15, 2013 at 03:47:50PM -0500, Benjamin Herrenschmidt wrote:
> On Tue, 2013-10-15 at 17:36 +0200, Hendrik Brueckner wrote:
> > On Sat, Oct 12, 2013 at 07:43:24AM +1100, Benjamin Herrenschmidt wrote:
> > > On Fri, 2013-10-11 at 14:47 +0200, Hendrik Brueckner wrote:
> > > > The tiocmget/tiocmset callbacks are used to set and get modem status and
> > > > triggered through an tty ioctl.
> > > > 
> > > > The dtr_rts() callback is different and it is used for DTS/RTS handshaking
> > > > between the hvc_console (or any other tty_port) and the tty layer.  The tty
> > > > port layer uses this callback to signal the hvc_console whether to raise or
> > > > lower the DTR/RTS lines.  This is different than the ioctl interface to
> > > > controls the modem status.
> > > 
> > > Well, DTR at least is the same via both callbacks... Also normal handshaking
> > > is normally RTS/CTS, only some HW setups "hijacks" DTR for RTS (old Macs come
> > > to mind).
> > 
> > Yep. DTR is changed in both callbacks but from different layers.  The
> > tiocmget/tiocmset are triggered through the ioctl.  The dtr_rts() callback is
> > called in hvc_close() to properly handle HUPCL to lower modem control lines
> > after last process closes the device (hang up).
> > 
> > This is also done in the hvsilib_close() in hvsi_lib.c:
> > 
> > 	/* Clear our own DTR */
> > 	if (!pv->tty || (pv->tty->termios.c_cflag & HUPCL))
> > 		hvsilib_write_mctrl(pv, 0); 
> > 
> > This is actually what the dtr_rts() callback should trigger and I wonder
> > whether it would be worth to introduce the dtr_rts() callback to encapsulate
> > the "hvsilib_write_mctrl(pv, 0);" call from above.
> > 
> > On the other hand, the dtr_rts() callback is a good encapsulation to not
> > directly access the hp->tty to potentially prevent a layering violation. At
> > least for the hvc_iucv() I do not want to deal with the "underlying" tty layer
> > and introduce additional reference accounting.
> > 
> > I hope this helps you to understand my rational for introducing the dtr_rts()
> > callback.
> 
> I'm not sure :) We still end up basically with 2 callbacks to do the
> same thing ... change the DTR line. It's odd at best, I still don't
> quite see why hvc_console couldn't just use mctrl...
> 
Indeed, two callbacks change the DTR line.  The main difference is that
tiocmget/tiocmset can be called from user space by ioctl.  That's not the case
for the dtr_cts callback.  Also, tiocmget/tiocmset provide more flags that can
be changed (ST, SR, CTS, CD, RNG, RI,  ...)

Assume we would like to unify them have a single callback to change DTR, then
we have to take care of these differences.  So the question to you now is
whether you plan for a) other modem flags to be changed and b) if changing the
DTR line (or other control flags) through an ioctl?

Depending on your results, I could work on sth that helps us both and reduces
the callbacks.

Thanks and kind regards,
  Hendrik

^ permalink raw reply

* RE: [PATCH 02/10][v6] powerpc/Power7: detect load/store instructions
From: David Laight @ 2013-10-16  8:25 UTC (permalink / raw)
  To: Sukadev Bhattiprolu, Arnaldo Carvalho de Melo
  Cc: Michael Ellerman, linux-kernel, Stephane Eranian, linuxppc-dev,
	Paul Mackerras, Anshuman Khandual
In-Reply-To: <1381889202-16826-3-git-send-email-sukadev@linux.vnet.ibm.com>

> Implement instr_is_load_store_2_06() to detect whether a given =
instruction
> is one of the fixed-point or floating-point load/store instructions in =
the
> POWER Instruction Set Architecture v2.06.
...
> +int instr_is_load_store_2_06(const unsigned int *instr)
> +{
> +	unsigned int op, upper, lower;
> +
> +	op =3D instr_opcode(*instr);
> +
> +	if ((op >=3D 32 && op <=3D 58) || (op =3D=3D 61 || op =3D=3D 62))
> +		return true;
> +
> +	if (op !=3D 31)
> +		return false;
> +
> +	upper =3D op >> 5;
> +	lower =3D op & 0x1f;
> +
> +	/* Short circuit as many misses as we can */
> +	if (lower < 3 || lower > 23)
> +		return false;
> +
> +	if (lower =3D=3D 3) {
> +		if (upper >=3D 16)
> +			return true;
> +
> +		return false;
> +	}
> +
> +	if (lower =3D=3D 7 || lower =3D=3D 12)
> +		return true;
> +
> +	if (lower >=3D 20) /* && lower <=3D 23 (implicit) */
> +		return true;
> +
> +	return false;
> +}

I can't help feeling the code could do with some comments about
which actual instructions are selected where.

	David

^ permalink raw reply

* Re: [PATCH v2 08/10] of/platform: Resolve interrupt references at probe time
From: Thierry Reding @ 2013-10-16  8:20 UTC (permalink / raw)
  To: Grant Likely
  Cc: devicetree, Russell King, linux-mips, Greg Kroah-Hartman,
	linux-kernel, Ralf Baechle, Rob Herring, sparclinux,
	Thomas Gleixner, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20131015232436.19F61C40099@trevor.secretlab.ca>

[-- Attachment #1: Type: text/plain, Size: 4329 bytes --]

On Wed, Oct 16, 2013 at 12:24:36AM +0100, Grant Likely wrote:
> On Wed, 18 Sep 2013 15:24:50 +0200, Thierry Reding <thierry.reding@gmail.com> wrote:
> > Interrupt references are currently resolved very early (when a device is
> > created). This has the disadvantage that it will fail in cases where the
> > interrupt parent hasn't been probed and no IRQ domain for it has been
> > registered yet. To work around that various drivers use explicit
> > initcall ordering to force interrupt parents to be probed before devices
> > that need them are created. That's error prone and doesn't always work.
> > If a platform device uses an interrupt line connected to a different
> > platform device (such as a GPIO controller), both will be created in the
> > same batch, and the GPIO controller won't have been probed by its driver
> > when the depending platform device is created. Interrupt resolution will
> > fail in that case.
> 
> What is the reason for all the rework on the irq parsing return values?
> A return value of '0' is always an error on irq parsing, regardless of
> architecture even if NO_IRQ is defined as -1. I may have missed it, but
> I don't see any checking for specific error values in the return paths
> of the functions.
> 
> If the specific return value isn't required (and I don't think it is),
> then you can simplify the whole series by getting rid of the rework
> patches.

The whole reason for this patch set is to propagate the precise error
code so that when one of the top-level OF IRQ functions is called (such
as irq_of_parse_and_map()) the caller can actually make an reasonable
choice on how to handle the error.

More precisely, the goal of this series was to propagate failure to
create a mapping, due to an IRQ domain not having been registered yet
for the device node passed into irq_create_of_mapping(), back to the
caller, irq_of_parse_and_map(), which can then propagate it further.
Ultimately this will allow driver probing to fail with EPROBE_DEFER
when IRQ mapping fails and allow deferred probing to be triggered.

This cannot be done if all you have as error status is 0. Mapping of
IRQs can fail for a number of reasons, such as when an IRQ descriptor
cannot be allocated or when an IRQ domain's .xlate() fails. You don't
want to be deferring probe on all errors because some of them are
genuinely fatal and cannot be recovered from by deferring probe.

With the current implementation in the kernel, interrupt references are
resolved very early, usually when a device is instantiated from the
device tree. So unless all interrupt parents of all devices have been
probed by that time (which usually can only be done using explicit
initcall ordering, and even in that case doesn't always work) then many
devices will end up with an invalid interrupt number.

The typical case where this can happen is if you have a GPIO expander on
an I2C bus that provides interrupt services to other devices. With the
current implementation, the GPIO expander will be probed fairly late, at
which point many of its users will already have been instantiated and
assigned an invalid interrupt. Many drivers try to work around that by
explicitly calling irq_of_parse_and_map() within their .probe() function
because that's usually called sometime after the device's instantiation.
However even that isn't guaranteed to work. If the GPIO expander depends
itself on other resources that cause it to require deferred probing, or
if its driver is built as a module and therefore making the registration
of the corresponding IRQ domain is completely non-deterministic, then
this can fail just as easily.

With this patch series all of these issues should go away. All of the
dependencies should be resolvable by using deferred probing. Furthermore
the mechanism introduced to have the core resolve the IRQ references can
be used to request other standard resources as well. A particular one
that I'm aware of is how IOMMUs are associated with devices. Currently a
variety of quirks have been proposed to work around these issues, such
as reordering nodes in the device tree, which only work because the DTC
implementation that everybody uses happens to keep them ordered in the
same way in the DTB as they were in the DTS.

Thierry

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: Fwd: Gianfar driver crashes in Kernel v3.10
From: Claudiu Manoil @ 2013-10-16  7:10 UTC (permalink / raw)
  To: Thomas Hühn; +Cc: linuxppc-dev
In-Reply-To: <760FFC38-597B-46CB-BA4C-620A1D663C8A@net.t-labs.tu-berlin.de>

On 10/11/2013 11:59 AM, Thomas H=FChn wrote:
> Hi Claudiu,
>
>> Does this show up on a half duplex (100Mb/s) link?
>
> In my testsetup I always used 1GBit Ethernet connections, so no 100MBit=
 tested yet.
> Should I do so ?
>
>> Could you provide following for the gianfar interface, on your setup:
>
> sure:
>> # ethtool ethX
> root@Bluse-home:~# ethtool eth0
> Settings for eth0:
>         Supported ports: [ ]
>         Supported link modes:   1000baseT/Full
>         Supported pause frame use: No
>         Supports auto-negotiation: No
>         Advertised link modes:  1000baseT/Full
>         Advertised pause frame use: No
>         Advertised auto-negotiation: No
>         Speed: 1000Mb/s
>         Duplex: Full
>         Port: MII
>         PHYAD: 0
>         Transceiver: external
>         Auto-negotiation: on
>         Current message level: 0x0000003f (63)
>                                drv probe link timer ifdown ifup
>         Link detected: yes
>
>> and
>> # ethtool -d ethX | grep 500
>>
> root@Bluse-home:~# ethtool -d eth0 | grep 500
> 0x0500:         00 00 00 3f 00 00 72 05 40 60 50 60 00 a1 f0 37
>
>
>> Is there any other indication before this Oops? Like a tx timeout WARN=
?
>>
>
> Nothing there about any timeout.
>

Please try the following patch:
http://patchwork.ozlabs.org/patch/283235/

It should help with your issue.

claudiu

^ permalink raw reply

* [V3 01/10] perf: New conditional branch filter criteria in branch stack sampling
From: Anshuman Khandual @ 2013-10-16  6:56 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel; +Cc: mikey, sukadev, michaele, eranian
In-Reply-To: <1381906617-11392-1-git-send-email-khandual@linux.vnet.ibm.com>

POWER8 PMU based BHRB supports filtering for conditional branches.
This patch introduces new branch filter PERF_SAMPLE_BRANCH_COND which
will extend the existing perf ABI. Other architectures can provide
this functionality with either HW filtering support (if present) or
with SW filtering of instructions.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Reviewed-by: Stephane Eranian <eranian@google.com>
---
 include/uapi/linux/perf_event.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 0b1df41..5da52b6 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -160,8 +160,9 @@ enum perf_branch_sample_type {
 	PERF_SAMPLE_BRANCH_ABORT_TX	= 1U << 7, /* transaction aborts */
 	PERF_SAMPLE_BRANCH_IN_TX	= 1U << 8, /* in transaction */
 	PERF_SAMPLE_BRANCH_NO_TX	= 1U << 9, /* not in transaction */
+	PERF_SAMPLE_BRANCH_COND		= 1U << 10, /* conditional branches */
 
-	PERF_SAMPLE_BRANCH_MAX		= 1U << 10, /* non-ABI */
+	PERF_SAMPLE_BRANCH_MAX		= 1U << 11, /* non-ABI */
 };
 
 #define PERF_SAMPLE_BRANCH_PLM_ALL \
-- 
1.7.11.7

^ permalink raw reply related

* [V3 00/10] perf: New conditional branch filter
From: Anshuman Khandual @ 2013-10-16  6:56 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel; +Cc: mikey, sukadev, michaele, eranian

		This patchset is the re-spin of the original branch stack sampling
patchset which introduced new PERF_SAMPLE_BRANCH_COND branch filter. This patchset
also enables SW based branch filtering support for book3s powerpc platforms which
have PMU HW backed branch stack sampling support. 

Summary of code changes in this patchset:

(1) Introduces a new PERF_SAMPLE_BRANCH_COND branch filter
(2) Add the "cond" branch filter options in the "perf record" tool
(3) Enable PERF_SAMPLE_BRANCH_COND in X86 platforms
(4) Enable PERF_SAMPLE_BRANCH_COND in POWER8 platform 
(5) Update the documentation regarding "perf record" tool
(6) Add some new powerpc instruction analysis functions in code-patching library
(7) Enable SW based branch filter support for powerpc book3s
(8) Changed BHRB configuration in POWER8 to accommodate SW branch filters 

With this new SW enablement, the branch filter support for book3s platforms have
been extended to include all these combinations discussed below with a sample test
application program (included here).

Changes in V2
=============
(1) Enabled PPC64 SW branch filtering support
(2) Incorporated changes required for all previous comments

Changes in V3
=============
(1) Split the SW branch filter enablement into multiple patches
(2) Added PMU neutral SW branch filtering code, PMU specific HW branch filtering code
(3) Added new instruction analysis functionality into powerpc code-patching library
(4) Changed name for some of the functions
(5) Fixed couple of spelling mistakes
(6) Changed code documentation in multiple places

PMU HW branch filters
=====================
(1) perf record -j any_call -e branch-misses:u ./cprog
# Overhead  Command  Source Shared Object          Source Symbol  Target Shared Object             Target Symbol
# ........  .......  ....................  .....................  ....................  ........................
#
     7.00%    cprog  cprog                 [.] sw_3_1             cprog                 [.] sw_3_1_2            
     6.99%    cprog  cprog                 [.] hw_1_1             cprog                 [.] symbol1             
     6.52%    cprog  cprog                 [.] sw_3_1             cprog                 [.] success_3_1_2       
     5.41%    cprog  cprog                 [.] sw_3_1             cprog                 [.] sw_3_1_3            
     5.40%    cprog  cprog                 [.] hw_1_2             cprog                 [.] symbol2             
     5.40%    cprog  cprog                 [.] callme             cprog                 [.] hw_1_2              
     5.40%    cprog  cprog                 [.] sw_3_1             cprog                 [.] success_3_1_1       
     5.40%    cprog  cprog                 [.] callme             cprog                 [.] hw_1_1              
     5.39%    cprog  cprog                 [.] sw_3_1             cprog                 [.] sw_3_1_1            
     5.39%    cprog  cprog                 [.] sw_4_2             cprog                 [.] lr_addr             
     5.39%    cprog  cprog                 [.] callme             cprog                 [.] sw_4_2              
     5.37%    cprog  [unknown]             [.] 00000000           cprog                 [.] ctr_addr            
     4.30%    cprog  cprog                 [.] callme             cprog                 [.] hw_2_1              
     4.28%    cprog  cprog                 [.] callme             cprog                 [.] sw_3_1              
     3.82%    cprog  cprog                 [.] sw_3_1             cprog                 [.] success_3_1_3       
     3.81%    cprog  cprog                 [.] callme             cprog                 [.] hw_2_2              
     3.81%    cprog  cprog                 [.] callme             cprog                 [.] sw_3_2              
     2.71%    cprog  [unknown]             [.] 00000000           cprog                 [.] lr_addr             
     2.70%    cprog  cprog                 [.] main               cprog                 [.] callme              
     2.70%    cprog  cprog                 [.] sw_4_1             cprog                 [.] ctr_addr            
     2.70%    cprog  cprog                 [.] callme             cprog                 [.] sw_4_1              
     0.08%    cprog  [unknown]             [.] 0xf78676c4         [unknown]             [.] 0xf78522c0          
     0.02%    cprog  [unknown]             [k] 00000000           cprog                 [k] ctr_addr            
     0.01%    cprog  [kernel.kallsyms]     [.] .power_pmu_enable  [kernel.kallsyms]     [.] .power8_compute_mmcr
     0.00%    cprog  ld-2.11.2.so          [.] malloc             [unknown]             [.] 0xf786b380          
     0.00%    cprog  ld-2.11.2.so          [.] calloc             [unknown]             [.] 0xf786b390          
     0.00%    cprog  cprog                 [.] main               [unknown]             [.] 0x10000950          
     0.00%    cprog  [unknown]             [.] 00000000           [kernel.kallsyms]     [.] .power_pmu_enable  
    
(2) perf record -j cond -e branch-misses:u ./cprog

# Overhead  Command  Source Shared Object            Source Symbol  Target Shared Object            Target Symbol
# ........  .......  ....................  .......................  ....................  .......................
#
    27.73%    cprog  [unknown]             [.] 00000000             cprog                 [.] callme             
    13.03%    cprog  cprog                 [.] sw_3_1               cprog                 [.] sw_3_1             
     5.64%    cprog  [unknown]             [.] 00000000             cprog                 [.] main               
     5.62%    cprog  [unknown]             [.] 00000000             cprog                 [.] sw_4_2             
     5.46%    cprog  cprog                 [.] sw_4_2               cprog                 [.] lr_addr            
     5.40%    cprog  [unknown]             [.] 00000000             cprog                 [.] sw_4_1             
     3.72%    cprog  cprog                 [.] hw_2_1               cprog                 [.] callme             
     3.71%    cprog  cprog                 [.] main                 cprog                 [.] hw_1_1             
     3.71%    cprog  cprog                 [.] sw_3_1_2             cprog                 [.] sw_3_1             
     3.70%    cprog  cprog                 [.] sw_3_1_3             cprog                 [.] sw_3_1             
     3.70%    cprog  cprog                 [.] sw_4_1               cprog                 [.] ctr_addr           
     3.69%    cprog  cprog                 [.] hw_1_2               cprog                 [.] hw_1_2             
     3.69%    cprog  cprog                 [.] hw_2_2               cprog                 [.] callme             
     3.68%    cprog  cprog                 [.] sw_3_1_1             cprog                 [.] sw_3_1             
     1.93%    cprog  [unknown]             [.] 00000000             cprog                 [.] lr_addr            
     1.78%    cprog  [unknown]             [.] 00000000             cprog                 [.] hw_1_2             
     1.78%    cprog  [unknown]             [.] 00000000             cprog                 [.] sw_3_1             
     1.76%    cprog  [unknown]             [.] 00000000             cprog                 [.] hw_1_1             
     0.12%    cprog  [unknown]             [.] 0xf7bb25dc           [unknown]             [.] 0xf7bb27e4         
     0.07%    cprog  [unknown]             [k] 00000000             cprog                 [k] callme             
     0.07%    cprog  [unknown]             [k] 00000000             cprog                 [k] sw_4_1             
     0.00%    cprog  libc-2.11.2.so        [.] _IO_file_doallocate  libc-2.11.2.so        [.] _IO_file_doallocate
     0.00%    cprog  libc-2.11.2.so        [.] _IO_file_doallocate  libc-2.11.2.so        [.] isatty             
     0.00%    cprog  [unknown]             [.] 00000000             libc-2.11.2.so        [.] _IO_file_doallocate

SW based branch filters
=======================
(3) perf record -j any_ret -e branch-misses:u ./cprog 

# Overhead  Command  Source Shared Object         Source Symbol  Target Shared Object          Target Symbol
# ........  .......  ....................  ....................  ....................  .....................
#
    15.37%    cprog  [unknown]             [.] 00000000          cprog                 [.] sw_3_1           
     6.46%    cprog  cprog                 [.] success_3_1_3     cprog                 [.] sw_3_1           
     6.45%    cprog  cprog                 [.] symbol1           cprog                 [.] hw_1_1           
     6.41%    cprog  [unknown]             [.] 00000000          cprog                 [.] callme           
     6.39%    cprog  cprog                 [.] ctr_addr          cprog                 [.] sw_4_1           
     6.37%    cprog  cprog                 [.] symbol2           cprog                 [.] hw_1_2           
     6.36%    cprog  cprog                 [.] sw_4_2            cprog                 [.] callme           
     6.35%    cprog  cprog                 [.] lr_addr           cprog                 [.] sw_4_2           
     3.97%    cprog  cprog                 [.] back1             cprog                 [.] callme           
     3.93%    cprog  cprog                 [.] sw_3_1_2          cprog                 [.] sw_3_1           
     3.93%    cprog  cprog                 [.] sw_3_1            cprog                 [.] callme           
     3.86%    cprog  cprog                 [.] sw_3_1_3          cprog                 [.] sw_3_1           
     3.84%    cprog  cprog                 [.] sw_3_1_1          cprog                 [.] sw_3_1           
     2.54%    cprog  cprog                 [.] success_3_1_1     cprog                 [.] sw_3_1           
     2.54%    cprog  cprog                 [.] sw_4_1            cprog                 [.] callme           
     2.54%    cprog  cprog                 [.] hw_1_1            cprog                 [.] callme           
     2.53%    cprog  cprog                 [.] sw_3_2            cprog                 [.] callme           
     2.52%    cprog  cprog                 [.] callme            cprog                 [.] main             
     2.51%    cprog  cprog                 [.] hw_1_2            cprog                 [.] callme           
     2.51%    cprog  cprog                 [.] back2             cprog                 [.] callme           
     2.51%    cprog  cprog                 [.] success_3_1_2     cprog                 [.] sw_3_1           
     0.07%    cprog  [unknown]             [k] 00000000          cprog                 [k] callme           
     0.02%    cprog  [unknown]             [.] 00000000          [unknown]             [.] 0xf7e5c004       
     0.01%    cprog  libc-2.11.2.so        [.] __errno_location  libc-2.11.2.so        [.] vfprintf         
     0.01%    cprog  [unknown]             [.] 00000000          libc-2.11.2.so        [.] _IO_file_overflow

(4) perf record -j ind_call  -e branch-misses:u ./cprog

# Overhead  Command  Source Shared Object        Source Symbol  Target Shared Object          Target Symbol
# ........  .......  ....................  ...................  ....................  .....................
#
    48.04%    cprog  [unknown]             [.] 00000000         cprog                 [.] sw_3_1           
    19.96%    cprog  cprog                 [.] sw_4_2           cprog                 [.] lr_addr          
    19.69%    cprog  [unknown]             [.] 00000000         cprog                 [.] callme           
    12.04%    cprog  cprog                 [.] sw_4_1           cprog                 [.] ctr_addr         
     0.18%    cprog  [unknown]             [k] 00000000         cprog                 [k] callme           
     0.02%    cprog  libc-2.11.2.so        [.] _IO_file_xsputn  libc-2.11.2.so        [.] _IO_file_overflow
     0.02%    cprog  [unknown]             [.] 00000000         libc-2.11.2.so        [.] _IO_file_xsputn  
     0.02%    cprog  [unknown]             [.] 00000000         ld-2.11.2.so          [.] malloc           
     0.02%    cprog  [unknown]             [k] 00000000         cprog                 [k] sw_3_1           

(5) perf record -j any_call,any_ret -e branch-misses:u ./cprog

# Overhead  Command  Source Shared Object            Source Symbol  Target Shared Object            Target Symbol
# ........  .......  ....................  .......................  ....................  .......................
#
    10.36%    cprog  [unknown]             [.] 00000000             cprog                 [.] sw_3_1             
     4.18%    cprog  cprog                 [.] symbol1              cprog                 [.] hw_1_1             
     4.18%    cprog  cprog                 [.] success_3_1_3        cprog                 [.] sw_3_1             
     4.17%    cprog  cprog                 [.] sw_4_2               cprog                 [.] lr_addr            
     4.16%    cprog  cprog                 [.] sw_4_2               cprog                 [.] callme             
     4.15%    cprog  cprog                 [.] ctr_addr             cprog                 [.] sw_4_1             
     4.15%    cprog  cprog                 [.] lr_addr              cprog                 [.] sw_4_2             
     4.14%    cprog  cprog                 [.] symbol2              cprog                 [.] hw_1_2             
     4.14%    cprog  [unknown]             [.] 00000000             cprog                 [.] callme             
     2.15%    cprog  cprog                 [.] sw_3_1               cprog                 [.] callme             
     2.14%    cprog  cprog                 [.] hw_1_1               cprog                 [.] symbol1            
     2.14%    cprog  cprog                 [.] callme               cprog                 [.] hw_1_1             
     2.14%    cprog  cprog                 [.] callme               cprog                 [.] sw_4_2             
     2.13%    cprog  cprog                 [.] back1                cprog                 [.] callme             
     2.12%    cprog  cprog                 [.] sw_3_1_2             cprog                 [.] sw_3_1             
     2.12%    cprog  cprog                 [.] sw_3_1               cprog                 [.] sw_3_1_2           
     2.11%    cprog  cprog                 [.] sw_3_1_3             cprog                 [.] sw_3_1             
     2.11%    cprog  cprog                 [.] sw_3_1               cprog                 [.] sw_3_1_3           
     2.11%    cprog  cprog                 [.] sw_4_1               cprog                 [.] ctr_addr           
     2.10%    cprog  cprog                 [.] hw_1_2               cprog                 [.] symbol2            
     2.10%    cprog  cprog                 [.] sw_3_1_1             cprog                 [.] sw_3_1             
     2.10%    cprog  cprog                 [.] sw_3_1               cprog                 [.] sw_3_1_1           
     2.10%    cprog  cprog                 [.] callme               cprog                 [.] hw_1_2             
     2.10%    cprog  cprog                 [.] callme               cprog                 [.] sw_3_1             
     2.05%    cprog  cprog                 [.] success_3_1_1        cprog                 [.] sw_3_1             
     2.05%    cprog  cprog                 [.] sw_3_1               cprog                 [.] success_3_1_1      
     2.05%    cprog  cprog                 [.] success_3_1_2        cprog                 [.] sw_3_1             
     2.05%    cprog  cprog                 [.] sw_3_1               cprog                 [.] success_3_1_2      
     2.04%    cprog  cprog                 [.] hw_1_1               cprog                 [.] callme             
     2.04%    cprog  cprog                 [.] back2                cprog                 [.] callme             
     2.04%    cprog  cprog                 [.] sw_4_1               cprog                 [.] callme             
     2.04%    cprog  cprog                 [.] callme               cprog                 [.] main               
     2.04%    cprog  cprog                 [.] hw_1_2               cprog                 [.] callme             
     2.04%    cprog  cprog                 [.] sw_3_2               cprog                 [.] callme             
     2.04%    cprog  cprog                 [.] callme               cprog                 [.] sw_3_2             
     2.03%    cprog  cprog                 [.] sw_3_1               cprog                 [.] success_3_1_3      
     0.03%    cprog  [unknown]             [k] 00000000             cprog                 [k] callme             
     0.01%    cprog  [unknown]             [.] 0xf7e79bb0           [unknown]             [.] 0xf7e64088         
     0.00%    cprog  libc-2.11.2.so        [.] _IO_file_doallocate  libc-2.11.2.so        [.] mmap               
     0.00%    cprog  libc-2.11.2.so        [.] mmap                 libc-2.11.2.so        [.] _IO_file_doallocate
     0.00%    cprog  [unknown]             [.] 0xf7e7589c           libc-2.11.2.so        [.] printf             
     0.00%    cprog  [unknown]             [k] 00000000             cprog                 [k] sw_3_1          

(6) perf record -j any_call,ind_call -e branch-misses:u ./cprog

# Overhead  Command  Source Shared Object   Source Symbol  Target Shared Object      Target Symbol
# ........  .......  ....................  ..............  ....................  .................
#
    23.09%    cprog  [unknown]             [.] 00000000    cprog                 [.] sw_3_1       
     8.99%    cprog  cprog                 [.] sw_4_2      cprog                 [.] lr_addr      
     8.92%    cprog  [unknown]             [.] 00000000    cprog                 [.] callme       
     5.18%    cprog  cprog                 [.] sw_3_1      cprog                 [.] success_3_1_2
     5.16%    cprog  cprog                 [.] sw_3_1      cprog                 [.] success_3_1_1
     5.16%    cprog  cprog                 [.] callme      cprog                 [.] sw_3_2       
     5.12%    cprog  cprog                 [.] sw_3_1      cprog                 [.] success_3_1_3
     3.85%    cprog  cprog                 [.] sw_3_1      cprog                 [.] sw_3_1_1     
     3.85%    cprog  cprog                 [.] callme      cprog                 [.] sw_3_1       
     3.84%    cprog  cprog                 [.] sw_4_1      cprog                 [.] ctr_addr     
     3.82%    cprog  cprog                 [.] hw_1_1      cprog                 [.] symbol1      
     3.82%    cprog  cprog                 [.] sw_3_1      cprog                 [.] sw_3_1_2     
     3.82%    cprog  cprog                 [.] sw_3_1      cprog                 [.] sw_3_1_3     
     3.82%    cprog  cprog                 [.] callme      cprog                 [.] hw_1_1       
     3.81%    cprog  cprog                 [.] hw_1_2      cprog                 [.] symbol2      
     3.81%    cprog  cprog                 [.] callme      cprog                 [.] hw_1_2       
     3.81%    cprog  cprog                 [.] callme      cprog                 [.] sw_4_2       
     0.05%    cprog  [unknown]             [k] 00000000    cprog                 [k] callme       
     0.03%    cprog  [unknown]             [.] 0xf7f7232c  [unknown]             [.] 0xf7f72334   
     0.01%    cprog  ld-2.11.2.so          [.] malloc      [unknown]             [.] 0xf7f8b380   
     0.01%    cprog  cprog                 [.] main        [unknown]             [.] 0x10000950   
     0.01%    cprog  [unknown]             [.] 00000000    ld-2.11.2.so          [.] malloc       
     0.01%    cprog  [unknown]             [.] 00000000    cprog                 [.] main         

(7) perf record -j cond,any_ret -e branch-misses:u ./cprog

# Overhead  Command  Source Shared Object          Source Symbol  Target Shared Object          Target Symbol
# ........  .......  ....................  .....................  ....................  .....................
#
    12.18%    cprog  [unknown]             [.] 00000000           cprog                 [.] sw_3_1           
     4.90%    cprog  cprog                 [.] sw_4_2             cprog                 [.] lr_addr          
     4.88%    cprog  [unknown]             [.] 00000000           cprog                 [.] callme           
     4.88%    cprog  cprog                 [.] lr_addr            cprog                 [.] sw_4_2           
     4.88%    cprog  cprog                 [.] sw_4_2             cprog                 [.] callme           
     4.86%    cprog  cprog                 [.] symbol1            cprog                 [.] hw_1_1           
     4.86%    cprog  cprog                 [.] success_3_1_3      cprog                 [.] sw_3_1           
     4.85%    cprog  cprog                 [.] symbol2            cprog                 [.] hw_1_2           
     4.85%    cprog  cprog                 [.] ctr_addr           cprog                 [.] sw_4_1           
     2.47%    cprog  cprog                 [.] sw_3_1_3           cprog                 [.] sw_3_1           
     2.46%    cprog  cprog                 [.] back1              cprog                 [.] callme           
     2.45%    cprog  cprog                 [.] hw_1_1             cprog                 [.] callme           
     2.45%    cprog  cprog                 [.] hw_2_1             cprog                 [.] address1         
     2.44%    cprog  cprog                 [.] hw_1_2             cprog                 [.] symbol2          
     2.44%    cprog  cprog                 [.] sw_3_1_1           cprog                 [.] sw_3_1           
     2.44%    cprog  cprog                 [.] sw_3_2             cprog                 [.] callme           
     2.44%    cprog  cprog                 [.] success_3_1_1      cprog                 [.] sw_3_1           
     2.44%    cprog  cprog                 [.] sw_3_1             cprog                 [.] success_3_1_1    
     2.44%    cprog  cprog                 [.] sw_3_1             cprog                 [.] success_3_1_3    
     2.43%    cprog  cprog                 [.] callme             cprog                 [.] main             
     2.43%    cprog  cprog                 [.] hw_2_2             cprog                 [.] address2         
     2.43%    cprog  cprog                 [.] sw_3_1_2           cprog                 [.] sw_3_1           
     2.43%    cprog  cprog                 [.] success_3_1_2      cprog                 [.] sw_3_1           
     2.43%    cprog  cprog                 [.] sw_3_1             cprog                 [.] success_3_1_2    
     2.43%    cprog  cprog                 [.] sw_4_1             cprog                 [.] callme           
     2.42%    cprog  cprog                 [.] sw_3_1             cprog                 [.] callme           
     2.42%    cprog  cprog                 [.] sw_4_1             cprog                 [.] ctr_addr         
     2.42%    cprog  cprog                 [.] back2              cprog                 [.] callme           
     2.40%    cprog  cprog                 [.] hw_1_2             cprog                 [.] callme           
     0.10%    cprog  [unknown]             [.] 0xf78923e0         [unknown]             [.] 0xf78923c0       
     0.03%    cprog  [unknown]             [k] 00000000           cprog                 [k] callme           
     0.01%    cprog  [unknown]             [k] 00000000           cprog                 [k] sw_3_1           
     0.01%    cprog  libc-2.11.2.so        [.] vfprintf           libc-2.11.2.so        [.] vfprintf         
     0.01%    cprog  libc-2.11.2.so        [.] _IO_file_overflow  [unknown]             [.] 0x0fee0100       
     0.01%    cprog  libc-2.11.2.so        [.] strchrnul          libc-2.11.2.so        [.] vfprintf         
     0.01%    cprog  libc-2.11.2.so        [.] strchrnul          libc-2.11.2.so        [.] strchrnul        
     0.01%    cprog  [unknown]             [.] 00000000           libc-2.11.2.so        [.] _IO_file_overflow


(8) perf record -j cond,ind_call -e branch-misses:u ./cprog

# Overhead  Command  Source Shared Object   Source Symbol  Target Shared Object        Target Symbol
# ........  .......  ....................  ..............  ....................  ...................
#
    26.21%    cprog  [unknown]             [.] 00000000    cprog                 [.] sw_3_1         
    10.50%    cprog  cprog                 [.] sw_4_2      cprog                 [.] lr_addr        
    10.38%    cprog  [unknown]             [.] 00000000    cprog                 [.] callme         
     5.31%    cprog  cprog                 [.] sw_3_1_2    cprog                 [.] sw_3_1         
     5.30%    cprog  cprog                 [.] sw_3_1_1    cprog                 [.] sw_3_1         
     5.27%    cprog  cprog                 [.] sw_3_1      cprog                 [.] success_3_1_2  
     5.26%    cprog  cprog                 [.] hw_2_2      cprog                 [.] address2       
     5.25%    cprog  cprog                 [.] hw_1_2      cprog                 [.] symbol2        
     5.25%    cprog  cprog                 [.] sw_3_1      cprog                 [.] success_3_1_3  
     5.24%    cprog  cprog                 [.] hw_2_1      cprog                 [.] address1       
     5.23%    cprog  cprog                 [.] sw_4_1      cprog                 [.] ctr_addr       
     5.20%    cprog  cprog                 [.] sw_3_1_3    cprog                 [.] sw_3_1         
     5.19%    cprog  cprog                 [.] sw_3_1      cprog                 [.] success_3_1_1  
     0.24%    cprog  [unknown]             [.] 0xf7cf23e0  [unknown]             [.] 0xf7cf23c0     
     0.11%    cprog  [unknown]             [k] 00000000    cprog                 [k] callme         
     0.01%    cprog  libc-2.11.2.so        [.] vfprintf    libc-2.11.2.so        [.] vfprintf       
     0.01%    cprog  libc-2.11.2.so        [.] vfprintf    libc-2.11.2.so        [.] _IO_file_xsputn
     0.01%    cprog  [unknown]             [.] 00000000    libc-2.11.2.so        [.] vfprintf       
     0.01%    cprog  [unknown]             [k] 00000000    cprog                 [k] sw_3_1         

(9) perf record -j any_call,cond,any_ret -e branch-misses:u ./cprog

# Overhead  Command  Source Shared Object      Source Symbol  Target Shared Object          Target Symbol
# ........  .......  ....................  .................  ....................  .....................
#
     9.96%    cprog  [unknown]             [.] 00000000       cprog                 [.] sw_3_1           
     4.06%    cprog  cprog                 [.] sw_4_2         cprog                 [.] lr_addr          
     4.04%    cprog  cprog                 [.] lr_addr        cprog                 [.] sw_4_2           
     4.03%    cprog  cprog                 [.] symbol1        cprog                 [.] hw_1_1           
     4.02%    cprog  [unknown]             [.] 00000000       cprog                 [.] callme           
     3.96%    cprog  cprog                 [.] ctr_addr       cprog                 [.] sw_4_1           
     3.94%    cprog  cprog                 [.] symbol2        cprog                 [.] hw_1_2           
     3.94%    cprog  cprog                 [.] success_3_1_3  cprog                 [.] sw_3_1           
     3.93%    cprog  cprog                 [.] sw_4_2         cprog                 [.] callme           
     2.08%    cprog  cprog                 [.] sw_3_2         cprog                 [.] callme           
     2.08%    cprog  cprog                 [.] callme         cprog                 [.] sw_3_2           
     2.07%    cprog  cprog                 [.] hw_2_2         cprog                 [.] address2         
     2.07%    cprog  cprog                 [.] success_3_1_2  cprog                 [.] sw_3_1           
     2.07%    cprog  cprog                 [.] sw_3_1         cprog                 [.] success_3_1_2    
     2.07%    cprog  cprog                 [.] back2          cprog                 [.] callme           
     2.06%    cprog  cprog                 [.] hw_1_1         cprog                 [.] callme           
     1.99%    cprog  cprog                 [.] sw_4_1         cprog                 [.] ctr_addr         
     1.98%    cprog  cprog                 [.] sw_3_1_3       cprog                 [.] sw_3_1           
     1.98%    cprog  cprog                 [.] success_3_1_1  cprog                 [.] sw_3_1           
     1.98%    cprog  cprog                 [.] sw_3_1         cprog                 [.] sw_3_1_3         
     1.98%    cprog  cprog                 [.] sw_3_1         cprog                 [.] success_3_1_1    
     1.98%    cprog  cprog                 [.] callme         cprog                 [.] sw_4_2           
     1.98%    cprog  cprog                 [.] back1          cprog                 [.] callme           
     1.97%    cprog  cprog                 [.] hw_1_1         cprog                 [.] symbol1          
     1.97%    cprog  cprog                 [.] hw_2_1         cprog                 [.] address1         
     1.97%    cprog  cprog                 [.] sw_3_1_1       cprog                 [.] sw_3_1           
     1.97%    cprog  cprog                 [.] sw_3_1         cprog                 [.] sw_3_1_1         
     1.97%    cprog  cprog                 [.] sw_3_1         cprog                 [.] success_3_1_3    
     1.97%    cprog  cprog                 [.] callme         cprog                 [.] hw_1_1           
     1.97%    cprog  cprog                 [.] callme         cprog                 [.] sw_3_1           
     1.97%    cprog  cprog                 [.] hw_1_2         cprog                 [.] symbol2          
     1.97%    cprog  cprog                 [.] hw_1_2         cprog                 [.] callme           
     1.97%    cprog  cprog                 [.] sw_4_1         cprog                 [.] callme           
     1.97%    cprog  cprog                 [.] callme         cprog                 [.] main             
     1.97%    cprog  cprog                 [.] callme         cprog                 [.] hw_1_2           
     1.96%    cprog  cprog                 [.] sw_3_1         cprog                 [.] callme           
     1.96%    cprog  cprog                 [.] sw_3_1_2       cprog                 [.] sw_3_1           
     1.96%    cprog  cprog                 [.] sw_3_1         cprog                 [.] sw_3_1_2         
     0.12%    cprog  [unknown]             [.] 0xf7ab23e0     [unknown]             [.] 0xf7ab23c0       
     0.04%    cprog  [unknown]             [k] 00000000       cprog                 [k] callme           
     0.01%    cprog  [unknown]             [k] 00000000       cprog                 [k] sw_3_1           
     0.00%    cprog  libc-2.11.2.so        [.] vfprintf       libc-2.11.2.so        [.] vfprintf         
     0.00%    cprog  libc-2.11.2.so        [.] _IO_do_write   libc-2.11.2.so        [.] _IO_do_write     
     0.00%    cprog  libc-2.11.2.so        [.] _IO_do_write   libc-2.11.2.so        [.] _IO_file_overflow
     0.00%    cprog  libc-2.11.2.so        [.] strchrnul      libc-2.11.2.so        [.] vfprintf         
     0.00%    cprog  libc-2.11.2.so        [.] strchrnul      libc-2.11.2.so        [.] strchrnul        
     0.00%    cprog  cprog                 [.] callme         cprog                 [.] hw_2_2           
     0.00%    cprog  [unknown]             [.] 00000000       libc-2.11.2.so        [.] _IO_do_write     

(10) perf record -j any_call,cond,ind_call -e branch-misses:u ./cprog

# Overhead  Command  Source Shared Object          Source Symbol  Target Shared Object          Target Symbol
# ........  .......  ....................  .....................  ....................  .....................
#
    17.81%    cprog  [unknown]             [.] 00000000           cprog                 [.] sw_3_1           
     7.19%    cprog  cprog                 [.] sw_4_2             cprog                 [.] lr_addr          
     7.12%    cprog  [unknown]             [.] 00000000           cprog                 [.] callme           
     3.71%    cprog  cprog                 [.] sw_3_1             cprog                 [.] success_3_1_2    
     3.68%    cprog  cprog                 [.] callme             cprog                 [.] sw_3_2           
     3.67%    cprog  cprog                 [.] hw_2_2             cprog                 [.] address2         
     3.57%    cprog  cprog                 [.] hw_2_1             cprog                 [.] address1         
     3.55%    cprog  cprog                 [.] hw_1_1             cprog                 [.] symbol1          
     3.55%    cprog  cprog                 [.] sw_3_1             cprog                 [.] success_3_1_1    
     3.55%    cprog  cprog                 [.] callme             cprog                 [.] hw_1_1           
     3.54%    cprog  cprog                 [.] sw_3_1_1           cprog                 [.] sw_3_1           
     3.54%    cprog  cprog                 [.] sw_3_1             cprog                 [.] sw_3_1_1         
     3.54%    cprog  cprog                 [.] sw_4_1             cprog                 [.] ctr_addr         
     3.54%    cprog  cprog                 [.] callme             cprog                 [.] sw_3_1           
     3.52%    cprog  cprog                 [.] sw_3_1_3           cprog                 [.] sw_3_1           
     3.52%    cprog  cprog                 [.] sw_3_1             cprog                 [.] sw_3_1_3         
     3.52%    cprog  cprog                 [.] sw_3_1             cprog                 [.] success_3_1_3    
     3.52%    cprog  cprog                 [.] sw_3_1_2           cprog                 [.] sw_3_1           
     3.52%    cprog  cprog                 [.] sw_3_1             cprog                 [.] sw_3_1_2         
     3.51%    cprog  cprog                 [.] hw_1_2             cprog                 [.] symbol2          
     3.51%    cprog  cprog                 [.] callme             cprog                 [.] hw_1_2           
     3.49%    cprog  cprog                 [.] callme             cprog                 [.] sw_4_2           
     0.22%    cprog  [unknown]             [.] 0xf7ca23f4         [unknown]             [.] 0xf7ca25d0       
     0.05%    cprog  [unknown]             [k] 00000000           cprog                 [k] callme           
     0.01%    cprog  libc-2.11.2.so        [.] vfprintf           libc-2.11.2.so        [.] vfprintf         
     0.01%    cprog  libc-2.11.2.so        [.] vfprintf           libc-2.11.2.so        [.] strchrnul        
     0.01%    cprog  libc-2.11.2.so        [.] _IO_file_overflow  libc-2.11.2.so        [.] _IO_file_overflow
     0.01%    cprog  libc-2.11.2.so        [.] strchrnul          libc-2.11.2.so        [.] strchrnul        
     0.01%    cprog  [unknown]             [.] 00000000           libc-2.11.2.so        [.] _IO_file_overflow
     0.01%    cprog  [unknown]             [k] 00000000           cprog                 [k] sw_3_1        

(11) perf record -j any_call,cond,any_ret,ind_call -e branch-misses:u ./cprog

# Overhead  Command  Source Shared Object      Source Symbol  Target Shared Object        Target Symbol
# ........  .......  ....................  .................  ....................  ...................
#
     9.72%    cprog  [unknown]             [.] 00000000       cprog                 [.] sw_3_1         
     3.99%    cprog  cprog                 [.] ctr_addr       cprog                 [.] sw_4_1         
     3.98%    cprog  cprog                 [.] success_3_1_3  cprog                 [.] sw_3_1         
     3.98%    cprog  cprog                 [.] symbol1        cprog                 [.] hw_1_1         
     3.98%    cprog  cprog                 [.] symbol2        cprog                 [.] hw_1_2         
     3.98%    cprog  cprog                 [.] sw_4_2         cprog                 [.] lr_addr        
     3.98%    cprog  cprog                 [.] sw_4_2         cprog                 [.] callme         
     3.97%    cprog  cprog                 [.] lr_addr        cprog                 [.] sw_4_2         
     3.91%    cprog  [unknown]             [.] 00000000       cprog                 [.] callme         
     2.22%    cprog  cprog                 [.] sw_4_1         cprog                 [.] ctr_addr       
     2.22%    cprog  cprog                 [.] callme         cprog                 [.] sw_4_2         
     2.22%    cprog  cprog                 [.] hw_2_1         cprog                 [.] address1       
     2.22%    cprog  cprog                 [.] back1          cprog                 [.] callme         
     2.21%    cprog  cprog                 [.] hw_1_2         cprog                 [.] symbol2        
     2.21%    cprog  cprog                 [.] sw_3_1         cprog                 [.] callme         
     2.21%    cprog  cprog                 [.] callme         cprog                 [.] hw_1_2         
     2.21%    cprog  cprog                 [.] sw_3_1_1       cprog                 [.] sw_3_1         
     2.21%    cprog  cprog                 [.] sw_3_1_3       cprog                 [.] sw_3_1         
     2.21%    cprog  cprog                 [.] sw_3_1         cprog                 [.] sw_3_1_1       
     2.21%    cprog  cprog                 [.] sw_3_1         cprog                 [.] sw_3_1_3       
     2.21%    cprog  cprog                 [.] callme         cprog                 [.] sw_3_1         
     2.20%    cprog  cprog                 [.] hw_1_1         cprog                 [.] symbol1        
     2.20%    cprog  cprog                 [.] sw_3_1_2       cprog                 [.] sw_3_1         
     2.20%    cprog  cprog                 [.] sw_3_1         cprog                 [.] sw_3_1_2       
     2.20%    cprog  cprog                 [.] callme         cprog                 [.] hw_1_1         
     1.77%    cprog  cprog                 [.] hw_1_1         cprog                 [.] callme         
     1.77%    cprog  cprog                 [.] success_3_1_1  cprog                 [.] sw_3_1         
     1.77%    cprog  cprog                 [.] sw_3_1         cprog                 [.] success_3_1_1  
     1.77%    cprog  cprog                 [.] success_3_1_2  cprog                 [.] sw_3_1         
     1.77%    cprog  cprog                 [.] sw_3_1         cprog                 [.] success_3_1_2  
     1.77%    cprog  cprog                 [.] sw_3_1         cprog                 [.] success_3_1_3  
     1.76%    cprog  cprog                 [.] hw_1_2         cprog                 [.] callme         
     1.76%    cprog  cprog                 [.] sw_4_1         cprog                 [.] callme         
     1.76%    cprog  cprog                 [.] sw_3_2         cprog                 [.] callme         
     1.76%    cprog  cprog                 [.] callme         cprog                 [.] main           
     1.76%    cprog  cprog                 [.] callme         cprog                 [.] sw_3_2         
     1.75%    cprog  cprog                 [.] hw_2_2         cprog                 [.] address2       
     1.75%    cprog  cprog                 [.] back2          cprog                 [.] callme         
     0.13%    cprog  [unknown]             [.] 0xf7dd23e0     [unknown]             [.] 0xf7dd23c0     
     0.07%    cprog  [unknown]             [k] 00000000       cprog                 [k] callme         
     0.00%    cprog  libc-2.11.2.so        [.] vfprintf       libc-2.11.2.so        [.] vfprintf       
     0.00%    cprog  libc-2.11.2.so        [.] vfprintf       libc-2.11.2.so        [.] _IO_file_xsputn
     0.00%    cprog  [unknown]             [.] 00000000       libc-2.11.2.so        [.] vfprintf       

Test application program
========================
(1) Makefile:
--------------------------------------------
all: sample.o cprog of.cprog of.sample

sample.o: sample.s
        as -o sample.o sample.s
cprog: cprog.c sample.o
        gcc -o cprog cprog.c sample.o
of.sample: sample.o
        objdump -d sample.o > of.sample
of.cprog: cprog
        objdump -d cprog > of.cprog
clean:
        rm sample.o cprog of.sample of.cprog
---------------------------------------------
(2) cprog.c
---------------------------------------------
#include <stdio.h>
#define LOOP_COUNT 10000

extern void callme(void);

int main(int argc, char *argv[])
{
        int i;
        for(i = 0; i < LOOP_COUNT; i++)
                callme();

        printf("end");
        return 0;
}
---------------------------------------------
(3) sample.S
---------------------------------------------
# r25, r26, r27 will be used as first level, second level
# and third level stack for LR. Register r20, r21, r22, r23
# r24 will be used for general programming purpose.

.data

msg:
	.string "BHRB filter tests\n"
	len = . - msg
msg_1_1:
	.string "Test: hw_1_1\n"
	len_1_1 = 13
msg_1_2:
	.string "Test: hw_1_2\n"
	len_1_2 = 13
msg_2_1:
	.string "Test: hw_2_1\n"
	len_2_1 = 13
msg_2_2:
	.string "Test: hw_2_2\n"
	len_2_2 = 13
msg_3_1:
	.string "Test: sw_3_1\n"
	len_3_1 = 13
msg_3_1_1:
	.string "Test: sw_3_1_1\n"
	len_3_1_1 = 15
msg_3_1_2:
	.string "Test: sw_3_1_2\n"
	len_3_1_2 = 15
msg_3_1_3:
        .string "Test: sw_3_1_3\n"
        len_3_1_3 = 15
msg_3_2:
	.string "Test: sw_3_2\n"
	len_3_3 = 13
msg_4_1:
	.string "Test: sw_4_1\n"
	len_4_1 = 13
msg_4_2:
	.string "Test: sw_4_2\n"
	len_4_2 = 13

hw_3_1_1_passed:
	.string "\thw_3_1_1_passed\n\n"
	len_hw_3_1_1_passed = 18
hw_3_1_2_passed:
	.string "\thw_3_1_2_passed\n\n"
	len_hw_3_1_2_passed = 18
hw_3_1_3_passed:
	.string "\thw_3_1_3_passed\n\n"
	len_hw_3_1_3_passed = 18

hw_2_1_passed:
	.string "\thw_2_1_passed\n\n"
	len_hw_2_1_passed = 16

hw_2_2_passed:
	.string "\thw_2_2_passed\n\n"
	len_hw_2_2_passed = 16

hw_1_1_passed:
	.string "\thw_1_1_passed\n\n"
	len_hw_1_1_passed = 16

hw_1_2_passed:
	.string "\thw_1_2_passed\n\n"
	len_hw_1_2_passed = 16

hw_4_1_passed:
	.string "\thw_4_1_passed\n\n"
	len_hw_4_1_passed = 16

hw_4_2_passed:
	.string "\thw_4_2_passed\n\n"
	len_hw_4_2_passed = 16

msg_error:
	.string "\tError\n"
	len_error = 7
.text
	.global callme
	.global hw_1_1
	.global hw_1_2
	.global hw_2_1
	.global hw_2_2

# HW filter test symbols
symbol1:
	# Print "hw_1_1_passed"
	li      0, 4
	li      3, 1
	lis     4, hw_1_1_passed@ha
	addi    4, 4, hw_1_1_passed@l
	li      5, len_hw_1_1_passed
	sc

	blr				# PERF_SAMPLE_BRANCH_ANY_RET

hw_1_1:
        # Save LR - second level
        mflr 26

	# Print "hw_1_1 called"
	li      0, 4
	li      3, 1
	lis     4, msg_1_1@ha
	addi    4, 4, msg_1_1@l
	li      5, len_1_1
	sc

	bl symbol1			# PERF_SAMPLE_BRANCH_ANY_CALL

	# Restore LR
	mtlr 26
	blr				# PERF_SAMPLE_BRANCH_ANY_RET

symbol2:
        # Print "Symbol2 taken"
        li      0, 4
        li      3, 1
        lis     4, hw_1_2_passed@ha
        addi    4, 4, hw_1_2_passed@l
        li      5, len_hw_1_2_passed
        sc

	blr				# PERF_SAMPLE_BRANCH_ANY_RET
hw_1_2:
	# Save LR - second level
	mflr 26

        # Print "hw_1_2 called"
        li      0, 4
        li      3, 1
        lis     4, msg_1_2@ha
        addi    4, 4, msg_1_2@l
        li      5, len_1_2
        sc

	li 4,20
	cmpi 0,4,20
	bcl 12, 4*cr0+2, symbol2	# PERF_SAMPLE_BRANCH_ANY_CALL | PERF_SAMPLE_BRANCH_COND

	mtlr 26
	blr				# PERF_SAMPLE_BRANCH_ANY_RET

# HW filter test

address1: 
	# Print "hw_2_1_passed"
        li      0, 4
        li      3, 1
        lis     4, hw_2_1_passed@ha
        addi    4, 4, hw_2_1_passed@l
        li      5, len_hw_2_1_passed
        sc
	b  back1			# PERF_SAMPLE_BRANCH_ANY

hw_2_1:
	# Print "hw_2_1 called"
	li      0, 4
	li      3, 1
	lis     4, msg_2_1@ha
	addi    4, 4, msg_2_1@l
	li      5, len_2_1
	sc
	
	# Simple conditional branch (equal)
	li	20, 12
	cmpi	3, 20, 12
	bc	12, 4*cr3+2, address1	# PERF_SAMPLE_BRANCH_COND

back1:
	blr				# PERF_SAMPLE_BRANCH_ANY_RET

address2:
        # Print "hw_2_2_passed"
        li      0, 4
        li      3, 1
        lis     4, hw_2_2_passed@ha
        addi    4, 4, hw_2_2_passed@l
        li      5, len_hw_2_2_passed
        sc
        b  back2			# PERF_SAMPLE_BRANCH_ANY

hw_2_2:
        # Print "hw_2_2 called"
	li      0, 4
	li      3, 1
	lis     4, msg_2_2@ha
	addi    4, 4, msg_2_2@l
	li      5, len_2_2
	sc

	# Simple conditional branch (less than)
	li	20, 12
	cmpi	4, 20, 20
	bc	12, 4*cr4+0, address2	# PERF_SAMPLE_BRANCH_COND
back2:
	blr				# PERF_SAMPLE_BRANCH_ANY_RET

# SW filter test symbols
sw_3_1_1:
	# Print "Test: sw_3_1_1"
        li      0, 4
        li      3, 1
        lis     4, msg_3_1_1@ha
        addi    4, 4, msg_3_1_1@l
        li      5, len_3_1_1
        sc

	li	22,0
	# Test the condition and return
	li	21, 10
	cmpi	0, 21, 10
	bclr	12, 2			# PERF_SAMPLE_BRANCH_ANY_RET | PERF_SAMPLE_BRANCH_COND

	# Should not have come here
	li      0, 4
	li      3, 1
        lis     4, msg_error@ha
        addi    4, 4, msg_error@l
        li      5, len_error
        sc
	
	# Mark the error
	li 	22, 1
	
	# Safe fall back
	blr				# PERF_SAMPLE_BRANCH_ANY_RET

sw_3_1_2:
        # Print "Test: sw_3_1_2"
        li      0, 4
        li      3, 1
        lis     4, msg_3_1_2@ha
        addi    4, 4, msg_3_1_2@l
        li      5, len_3_1_2
        sc

	li	23, 0
	# Test the condition and return
	li	21, 10
	cmpi	0, 21, 20
	bclr	12, 0			# PERF_SAMPLE_BRANCH_ANY_RET | PERF_SAMPLE_BRANCH_COND
        
	# Should not have come here
	li      0, 4
	li      3, 1
        lis     4, msg_error@ha
        addi    4, 4, msg_error@l
        li      5, len_error
        sc

	# Mark the error
	li 	23, 1

	# Safe fall back
	blr				# PERF_SAMPLE_BRANCH_ANY_RET

sw_3_1_3:
	# Print "Test: sw_3_1_3"
        li      0, 4
        li      3, 1
        lis     4, msg_3_1_3@ha
        addi    4, 4, msg_3_1_3@l
        li      5, len_3_1_3
        sc

	li	24, 0
	# Test the condition and return
	li	21, 10
	cmpi	0, 21, 5
	bclr	12, 1			# PERF_SAMPLE_BRANCH_ANY_RET | PERF_SAMPLE_BRANCH_COND
	
	# Mark the error
	li 	24, 1

	# Should not have come here
	li      0, 4
	li      3, 1
        lis     4, msg_error@ha
        addi    4, 4, msg_error@l
        li      5, len_error
        sc

	# Safe fall back
	blr				# PERF_SAMPLE_BRANCH_ANY_RET

success_3_1_1:
	li      0, 4
	li      3, 1
        lis     4, hw_3_1_1_passed@ha
        addi    4, 4, hw_3_1_1_passed@l
        li      5, len_hw_3_1_1_passed
        sc
	blr

success_3_1_2:
	li      0, 4
	li      3, 1
        lis     4, hw_3_1_2_passed@ha
        addi    4, 4, hw_3_1_2_passed@l
        li      5, len_hw_3_1_2_passed
        sc
	blr

success_3_1_3:
	li      0, 4
	li      3, 1
        lis     4, hw_3_1_3_passed@ha
        addi    4, 4, hw_3_1_3_passed@l
        li      5, len_hw_3_1_3_passed
        sc
	blr

sw_3_1:
	# Save LR
	mflr 26

        # Print "Test: sw_3_1"
        li      0, 4
        li      3, 1
        lis     4, msg_3_1@ha
        addi    4, 4, msg_3_1@l
        li      5, len_3_1
        sc

	# Equal comparison condition
	bl sw_3_1_1			# PERF_SAMPLE_BRANCH_ANY_CALL
	cmpi	0, 22, 0
	bcl	12, 2, success_3_1_1	# PERF_SAMPLE_BRANCH_ANY_CALL | PERF_SAMPLE_BRANCH_COND

	# LT comparison condition
	bl sw_3_1_2			# PERF_SAMPLE_BRANCH_ANY_CALL
	cmpi	0, 23, 0
	bcl	12, 2, success_3_1_2	# PERF_SAMPLE_BRANCH_ANY_CALL | PERF_SAMPLE_BRANCH_COND

	# GT comparison condition
	bl sw_3_1_3			# PERF_SAMPLE_BRANCH_ANY_CALL
	cmpi	0, 24, 0
	bcl	12, 2, success_3_1_3	# PERF_SAMPLE_BRANCH_ANY_CALL | PERF_SAMPLE_BRANCH_COND

	mtlr 26
	blr				# PERF_SAMPLE_BRANCH_ANY_RET
sw_3_2:
	# Print "Test: sw_3_2"
	li      0, 4
	li      3, 1
	lis     4, msg_3_2@ha
	addi    4, 4, msg_3_2@l
	li      5, len_3_1
	sc

	# FIXME: Anything more here ?
	blr				# PERF_SAMPLE_BRANCH_ANY_RET

# Indirect call tests

# CTR
ctr_addr:
        # Print "bcctr taken"
        li      0, 4
        li      3, 1
        lis     4, hw_4_1_passed@ha
        addi    4, 4, hw_4_1_passed@l
        li      5, len_hw_4_1_passed
        sc

	blr				# PERF_SAMPLE_BRANCH_ANY_RET
sw_4_1:
	# Save LR
	mflr	26

	# Print "sw_4_1 called"
        li      0, 4
        li      3, 1
        lis     4, msg_4_1@ha
        addi    4, 4, msg_4_1@l
        li      5, len_4_1
        sc

	# Save address in CTR
	lis 	20, ctr_addr@ha
	addi	20, 20, ctr_addr@l
	mtctr   20


	# Compare and jump to CTR
	li 	21, 10
	cmpi	0, 21, 10
	bcctrl  12, 4*cr0+2		# PERF_SAMPLE_BRANCH_IND_CALL

	mtlr	26
	blr				# PERF_SAMPLE_BRANCH_ANY_RET
# LR
lr_addr:
	# Print "bclrl taken"
	li      0, 4
	li      3, 1
	lis     4, hw_4_2_passed@ha
	addi    4, 4, hw_4_2_passed@l
	li      5, len_hw_4_2_passed
	sc

	blr				# PERF_SAMPLE_BRANCH_ANY_RET

sw_4_2:
	# Save LR
	mflr	26

        # Print "Test: sw_4_2"
        li      0, 4
        li      3, 1
        lis     4, msg_4_2@ha
        addi    4, 4, msg_4_2@l
        li      5, len_4_2
        sc

	# Save address in LR
	lis 	20, lr_addr@ha
	addi	20, 20, lr_addr@l
	mtlr	20


	# Compare and jump to CTR
	li 	21, 10
	cmpi	0, 21, 10
	bclrl   12, 4*cr0+2		# PERF_SAMPLE_BRANCH_IND_CALL

	# Restore LR
	mtlr	26	
	blr				# PERF_SAMPLE_BRANCH_ANY_RET

callme:
	# Save LR
	mflr	25

	# Print "Branch filter Test"
	li	0, 4
	li	3, 1
	lis 	4, msg@ha
	addi	4, 4, msg@l
	li	5, len
	sc

	# PERF_SAMPLE_BRANCH_ANY_CALL
	bl hw_1_1			# PERF_SAMPLE_BRANCH_ANY_CALL
	bl hw_1_2			# PERF_SAMPLE_BRANCH_ANY_CALL
	# PERF_SAMPLE_BRANCH_COND
	bl hw_2_1			# PERF_SAMPLE_BRANCH_ANY_CALL
	bl hw_2_2			# PERF_SAMPLE_BRANCH_ANY_CALL

	# PERF_SAMPLE_BRANCH_ANY_RET
	bl sw_3_1			# PERF_SAMPLE_BRANCH_ANY_CALL
	bl sw_3_2			# PERF_SAMPLE_BRANCH_ANY_CALL
	# PERF_SAMPLE_BRANCH_IND_CALL
	bl sw_4_1			# PERF_SAMPLE_BRANCH_ANY_CALL
	bl sw_4_2			# PERF_SAMPLE_BRANCH_ANY_CALL

	# Restore LR
	mtlr 25
	blr				# PERF_SAMPLE_BRANCH_ANY_RET
--------------------------------------------------------------------

Anshuman Khandual (10):
  perf: New conditional branch filter criteria in branch stack sampling
  powerpc, perf: Enable conditional branch filter for POWER8
  perf, tool: Conditional branch filter 'cond' added to perf record
  x86, perf: Add conditional branch filtering support
  perf, documentation: Description for conditional branch filter
  powerpc, perf: Change the name of HW PMU branch filter tracking variable
  powerpc, lib: Add new branch instruction analysis support functions
  powerpc, perf: Enable SW filtering in branch stack sampling framework
  power8, perf: Change BHRB branch filter configuration
  powerpc, perf: Cleanup SW branch filter list look up

 arch/powerpc/include/asm/code-patching.h     |  30 ++++
 arch/powerpc/include/asm/perf_event_server.h |   6 +-
 arch/powerpc/lib/code-patching.c             |  54 +++++-
 arch/powerpc/perf/core-book3s.c              | 260 +++++++++++++++++++++++++--
 arch/powerpc/perf/power8-pmu.c               |  75 ++++++--
 arch/x86/kernel/cpu/perf_event_intel_lbr.c   |   5 +
 include/uapi/linux/perf_event.h              |   3 +-
 tools/perf/Documentation/perf-record.txt     |   3 +-
 tools/perf/builtin-record.c                  |   1 +
 9 files changed, 404 insertions(+), 33 deletions(-)

-- 
1.7.11.7

^ permalink raw reply

* [V3 03/10] perf, tool: Conditional branch filter 'cond' added to perf record
From: Anshuman Khandual @ 2013-10-16  6:56 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel; +Cc: mikey, sukadev, michaele, eranian
In-Reply-To: <1381906617-11392-1-git-send-email-khandual@linux.vnet.ibm.com>

Adding perf record support for new branch stack filter criteria
PERF_SAMPLE_BRANCH_COND.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Reviewed-by: Stephane Eranian <eranian@google.com>
---
 tools/perf/builtin-record.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index ecca62e..802d11d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -625,6 +625,7 @@ static const struct branch_mode branch_modes[] = {
 	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
 	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
 	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
 	BRANCH_END
 };
 
-- 
1.7.11.7

^ permalink raw reply related

* [V3 06/10] powerpc, perf: Change the name of HW PMU branch filter tracking variable
From: Anshuman Khandual @ 2013-10-16  6:56 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel; +Cc: mikey, sukadev, michaele, eranian
In-Reply-To: <1381906617-11392-1-git-send-email-khandual@linux.vnet.ibm.com>

This patch simply changes the name of the variable from "bhrb_filter" to
"bhrb_hw_filter" in order to add one more variable which will track SW
filters in generic powerpc book3s code which will be implemented in the
subsequent patch.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 arch/powerpc/perf/core-book3s.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index eeae308..bc4dac7 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -47,7 +47,7 @@ struct cpu_hw_events {
 	int n_txn_start;
 
 	/* BHRB bits */
-	u64				bhrb_filter;	/* BHRB HW branch filter */
+	u64				bhrb_hw_filter;	/* BHRB HW branch filter */
 	int				bhrb_users;
 	void				*bhrb_context;
 	struct	perf_branch_stack	bhrb_stack;
@@ -1159,7 +1159,7 @@ static void power_pmu_enable(struct pmu *pmu)
 
  out:
 	if (cpuhw->bhrb_users)
-		ppmu->config_bhrb(cpuhw->bhrb_filter);
+		ppmu->config_bhrb(cpuhw->bhrb_hw_filter);
 
 	local_irq_restore(flags);
 }
@@ -1254,7 +1254,7 @@ nocheck:
  out:
 	if (has_branch_stack(event)) {
 		power_pmu_bhrb_enable(event);
-		cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+		cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map(
 					event->attr.branch_sample_type);
 	}
 
@@ -1637,10 +1637,10 @@ static int power_pmu_event_init(struct perf_event *event)
 	err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
 	if (has_branch_stack(event)) {
-		cpuhw->bhrb_filter = ppmu->bhrb_filter_map(
+		cpuhw->bhrb_hw_filter = ppmu->bhrb_filter_map(
 					event->attr.branch_sample_type);
 
-		if(cpuhw->bhrb_filter == -1)
+		if(cpuhw->bhrb_hw_filter == -1)
 			return -EOPNOTSUPP;
 	}
 
-- 
1.7.11.7

^ permalink raw reply related

* [V3 05/10] perf, documentation: Description for conditional branch filter
From: Anshuman Khandual @ 2013-10-16  6:56 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel; +Cc: mikey, sukadev, michaele, eranian
In-Reply-To: <1381906617-11392-1-git-send-email-khandual@linux.vnet.ibm.com>

Adding documentation support for conditional branch filter.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Reviewed-by: Stephane Eranian <eranian@google.com>
---
 tools/perf/Documentation/perf-record.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e297b74..59ca8d0 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -163,12 +163,13 @@ following filters are defined:
         - any_call: any function call or system call
         - any_ret: any function return or system call return
         - ind_call: any indirect branch
+        - cond: conditional branches
         - u:  only when the branch target is at the user level
         - k: only when the branch target is in the kernel
         - hv: only when the target is at the hypervisor level
 
 +
-The option requires at least one branch type among any, any_call, any_ret, ind_call.
+The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
 The privilege levels may be omitted, in which case, the privilege levels of the associated
 event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
 levels are subject to permissions.  When sampling on multiple events, branch stack sampling
-- 
1.7.11.7

^ permalink raw reply related

* [V3 04/10] x86, perf: Add conditional branch filtering support
From: Anshuman Khandual @ 2013-10-16  6:56 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel; +Cc: mikey, sukadev, michaele, eranian
In-Reply-To: <1381906617-11392-1-git-send-email-khandual@linux.vnet.ibm.com>

This patch adds conditional branch filtering support,
enabling it for PERF_SAMPLE_BRANCH_COND in perf branch
stack sampling framework by utilizing an available
software filter X86_BR_JCC.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Reviewed-by: Stephane Eranian <eranian@google.com>
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index d5be06a..9723773 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -371,6 +371,9 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 	if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
 		mask |= X86_BR_NO_TX;
 
+	if (br_type & PERF_SAMPLE_BRANCH_COND)
+		mask |= X86_BR_JCC;
+
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
@@ -665,6 +668,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
 	 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
 	 */
 	[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
+	[PERF_SAMPLE_BRANCH_COND]     = LBR_JCC,
 };
 
 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
@@ -676,6 +680,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
 	[PERF_SAMPLE_BRANCH_ANY_CALL]	= LBR_REL_CALL | LBR_IND_CALL
 					| LBR_FAR,
 	[PERF_SAMPLE_BRANCH_IND_CALL]	= LBR_IND_CALL,
+	[PERF_SAMPLE_BRANCH_COND]       = LBR_JCC,
 };
 
 /* core */
-- 
1.7.11.7

^ permalink raw reply related

* [V3 10/10] powerpc, perf: Cleanup SW branch filter list look up
From: Anshuman Khandual @ 2013-10-16  6:56 UTC (permalink / raw)
  To: linuxppc-dev, linux-kernel; +Cc: mikey, sukadev, michaele, eranian
In-Reply-To: <1381906617-11392-1-git-send-email-khandual@linux.vnet.ibm.com>

This patch adds enumeration for all available SW branch filters
in powerpc book3s code and also streamlines the look for the
SW branch filter entries while trying to figure out which all
branch filters can be supported in SW.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
---
 arch/powerpc/perf/core-book3s.c | 38 +++++++++++++-------------------------
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index f983334..ec2dd61 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -566,6 +566,12 @@ static int match_filters(u64 branch_sample_type, u64 filter_mask)
 	return true;
 }
 
+/* SW implemented branch filters */
+static unsigned int power_sw_filter[] =	      { PERF_SAMPLE_BRANCH_ANY_CALL,
+						PERF_SAMPLE_BRANCH_COND,
+						PERF_SAMPLE_BRANCH_ANY_RETURN,
+						PERF_SAMPLE_BRANCH_IND_CALL };
+
 /*
  * Required SW based branch filters
  *
@@ -578,6 +584,7 @@ static u64 branch_filter_map(u64 branch_sample_type, u64 pmu_bhrb_filter,
 			     					u64 *filter_mask)
 {
 	u64 branch_sw_filter = 0;
+	unsigned int i;
 
 	/* No branch filter requested */
 	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY) {
@@ -593,34 +600,15 @@ static u64 branch_filter_map(u64 branch_sample_type, u64 pmu_bhrb_filter,
 	 * SW implemented filters. But right now, there is now way to
 	 * initimate the user about this decision.
 	 */
-	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
-		if (!(pmu_bhrb_filter & PERF_SAMPLE_BRANCH_ANY_CALL)) {
-			branch_sw_filter |= PERF_SAMPLE_BRANCH_ANY_CALL;
-			*filter_mask |= PERF_SAMPLE_BRANCH_ANY_CALL;
-		}
-	}
-
-	if (branch_sample_type & PERF_SAMPLE_BRANCH_COND) {
-		if (!(pmu_bhrb_filter & PERF_SAMPLE_BRANCH_COND)) {
-			branch_sw_filter |= PERF_SAMPLE_BRANCH_COND;
-			*filter_mask |= PERF_SAMPLE_BRANCH_COND;
-		}
-	}
 
-	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN) {
-		if (!(pmu_bhrb_filter & PERF_SAMPLE_BRANCH_ANY_RETURN)) {
-			branch_sw_filter |= PERF_SAMPLE_BRANCH_ANY_RETURN;
-			*filter_mask |= PERF_SAMPLE_BRANCH_ANY_RETURN;
-		}
-	}
-
-	if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) {
-		if (!(pmu_bhrb_filter & PERF_SAMPLE_BRANCH_IND_CALL)) {
-			branch_sw_filter |= PERF_SAMPLE_BRANCH_IND_CALL;
-			*filter_mask |= PERF_SAMPLE_BRANCH_IND_CALL;
+	for (i = 0; i < ARRAY_SIZE(power_sw_filter); i++) {
+		if (branch_sample_type & power_sw_filter[i]) {
+			if (!(pmu_bhrb_filter & power_sw_filter[i])) {
+				branch_sw_filter |= power_sw_filter[i];
+				*filter_mask |= power_sw_filter[i];
+			}
 		}
 	}
-
 	return branch_sw_filter;
 }
 
-- 
1.7.11.7

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox