LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH 11/15] powerpc/powernv/sriov: Drop iov->pe_num_map[]
From: Alexey Kardashevskiy @ 2020-07-15  3:31 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev
In-Reply-To: <20200710052340.737567-12-oohall@gmail.com>



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> Currently the iov->pe_num_map[] does one of two things depending on
> whether single PE mode is being used or not. When it is, this contains an
> array which maps a vf_index to the corresponding PE number. When single PE
> mode is not being used this contains a scalar which is the base PE for the
> set of enabled VFs (for for VFn is base + n).
> 
> The array was necessary because when calling pnv_ioda_alloc_pe() there is
> no guarantee that the allocated PEs would be contigious. We can now


s/contigious/contiguous/ here and below.


> allocate contigious blocks of PEs so this is no longer an issue. This
> allows us to drop the if (single_mode) {} .. else {} block scattered
> through the SR-IOV code which is a nice clean up.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 109 +++++----------------
>  arch/powerpc/platforms/powernv/pci.h       |   4 +-
>  2 files changed, 25 insertions(+), 88 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index d53a85ccb538..08f88187d65a 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -456,11 +456,13 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  
>  
>  			if (iov->m64_single_mode) {
> +				int pe_num = iov->vf_pe_arr[j].pe_number;
> +
>  				size = pci_iov_resource_size(pdev,
>  							PCI_IOV_RESOURCES + i);
>  				start = res->start + size * j;
>  				rc = pnv_ioda_map_m64_single(phb, win,
> -							     iov->pe_num_map[j],
> +							     pe_num,
>  							     start,
>  							     size);
>  			} else {
> @@ -599,38 +601,24 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
>  
>  static void pnv_pci_sriov_disable(struct pci_dev *pdev)
>  {
> +	u16                    num_vfs, base_pe;
>  	struct pnv_phb        *phb;
> -	struct pnv_ioda_pe    *pe;
>  	struct pnv_iov_data   *iov;
> -	u16                    num_vfs, i;
>  
>  	phb = pci_bus_to_pnvhb(pdev->bus);
>  	iov = pnv_iov_get(pdev);
>  	num_vfs = iov->num_vfs;
> +	base_pe = iov->vf_pe_arr[0].pe_number;
>  
>  	/* Release VF PEs */
>  	pnv_ioda_release_vf_PE(pdev);
>  
>  	if (phb->type == PNV_PHB_IODA2) {
>  		if (!iov->m64_single_mode)
> -			pnv_pci_vf_resource_shift(pdev, -*iov->pe_num_map);
> +			pnv_pci_vf_resource_shift(pdev, -base_pe);
>  
>  		/* Release M64 windows */
>  		pnv_pci_vf_release_m64(pdev, num_vfs);
> -
> -		/* Release PE numbers */
> -		if (iov->m64_single_mode) {
> -			for (i = 0; i < num_vfs; i++) {
> -				if (iov->pe_num_map[i] == IODA_INVALID_PE)
> -					continue;
> -
> -				pe = &phb->ioda.pe_array[iov->pe_num_map[i]];
> -				pnv_ioda_free_pe(pe);
> -			}
> -		} else
> -			bitmap_clear(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs);
> -		/* Releasing pe_num_map */
> -		kfree(iov->pe_num_map);
>  	}
>  }
>  
> @@ -656,13 +644,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>  		int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
>  		struct pci_dn *vf_pdn;
>  
> -		if (iov->m64_single_mode)
> -			pe_num = iov->pe_num_map[vf_index];
> -		else
> -			pe_num = *iov->pe_num_map + vf_index;
> -
> -		pe = &phb->ioda.pe_array[pe_num];
> -		pe->pe_number = pe_num;
> +		pe = &iov->vf_pe_arr[vf_index];
>  		pe->phb = phb;
>  		pe->flags = PNV_IODA_PE_VF;
>  		pe->pbus = NULL;
> @@ -670,6 +652,7 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>  		pe->mve_number = -1;
>  		pe->rid = (vf_bus << 8) | vf_devfn;
>  
> +		pe_num = pe->pe_number;
>  		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
>  			pci_domain_nr(pdev->bus), pdev->bus->number,
>  			PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
> @@ -701,9 +684,9 @@ static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
>  
>  static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  {
> +	struct pnv_ioda_pe    *base_pe;
>  	struct pnv_iov_data   *iov;
>  	struct pnv_phb        *phb;
> -	struct pnv_ioda_pe    *pe;
>  	int                    ret;
>  	u16                    i;
>  
> @@ -717,55 +700,14 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  			return -ENOSPC;
>  		}
>  
> -		/*
> -		 * When M64 BARs functions in Single PE mode, the number of VFs
> -		 * could be enabled must be less than the number of M64 BARs.
> -		 */
> -		if (iov->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
> -			dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
> +		/* allocate a contigious block of PEs for our VFs */
> +		base_pe = pnv_ioda_alloc_pe(phb, num_vfs);
> +		if (!base_pe) {
> +			pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs);
>  			return -EBUSY;
>  		}
>  
> -		/* Allocating pe_num_map */
> -		if (iov->m64_single_mode)
> -			iov->pe_num_map = kmalloc_array(num_vfs,
> -							sizeof(*iov->pe_num_map),
> -							GFP_KERNEL);
> -		else
> -			iov->pe_num_map = kmalloc(sizeof(*iov->pe_num_map), GFP_KERNEL);
> -
> -		if (!iov->pe_num_map)
> -			return -ENOMEM;
> -
> -		if (iov->m64_single_mode)
> -			for (i = 0; i < num_vfs; i++)
> -				iov->pe_num_map[i] = IODA_INVALID_PE;
> -
> -		/* Calculate available PE for required VFs */
> -		if (iov->m64_single_mode) {
> -			for (i = 0; i < num_vfs; i++) {
> -				pe = pnv_ioda_alloc_pe(phb);
> -				if (!pe) {
> -					ret = -EBUSY;
> -					goto m64_failed;
> -				}
> -
> -				iov->pe_num_map[i] = pe->pe_number;
> -			}
> -		} else {
> -			mutex_lock(&phb->ioda.pe_alloc_mutex);
> -			*iov->pe_num_map = bitmap_find_next_zero_area(
> -				phb->ioda.pe_alloc, phb->ioda.total_pe_num,
> -				0, num_vfs, 0);
> -			if (*iov->pe_num_map >= phb->ioda.total_pe_num) {
> -				mutex_unlock(&phb->ioda.pe_alloc_mutex);
> -				dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
> -				kfree(iov->pe_num_map);
> -				return -EBUSY;
> -			}
> -			bitmap_set(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs);
> -			mutex_unlock(&phb->ioda.pe_alloc_mutex);
> -		}
> +		iov->vf_pe_arr = base_pe;
>  		iov->num_vfs = num_vfs;
>  
>  		/* Assign M64 window accordingly */
> @@ -781,9 +723,10 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  		 * Otherwise, the PE# for the VF will conflict with others.
>  		 */
>  		if (!iov->m64_single_mode) {
> -			ret = pnv_pci_vf_resource_shift(pdev, *iov->pe_num_map);
> +			ret = pnv_pci_vf_resource_shift(pdev,
> +							base_pe->pe_number);
>  			if (ret)
> -				goto m64_failed;
> +				goto shift_failed;
>  		}
>  	}
>  
> @@ -792,20 +735,12 @@ static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
>  
>  	return 0;
>  
> -m64_failed:
> -	if (iov->m64_single_mode) {
> -		for (i = 0; i < num_vfs; i++) {
> -			if (iov->pe_num_map[i] == IODA_INVALID_PE)
> -				continue;
> -
> -			pe = &phb->ioda.pe_array[iov->pe_num_map[i]];
> -			pnv_ioda_free_pe(pe);
> -		}
> -	} else
> -		bitmap_clear(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs);
> +shift_failed:
> +	pnv_pci_vf_release_m64(pdev, num_vfs);
>  
> -	/* Releasing pe_num_map */
> -	kfree(iov->pe_num_map);
> +m64_failed:
> +	for (i = 0; i < num_vfs; i++)
> +		pnv_ioda_free_pe(&iov->vf_pe_arr[i]);
>  
>  	return ret;
>  }
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index b4c9bdba7217..13555bc549f4 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -238,7 +238,9 @@ struct pnv_iov_data {
>  
>  	/* number of VFs enabled */
>  	u16     num_vfs;
> -	unsigned int *pe_num_map;	/* PE# for the first VF PE or array */
> +
> +	/* pointer to the array of VF PEs. num_vfs long*/

I read the comment and for a second I thought that now you are storing
pnv_ioda_pe structs in pnv_iov_data which is not true: vf_pe_arr
actually points inside phb->ioda.pe_array[]. May be add this to the
comment please.

Otherwise good,


Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>




> +	struct pnv_ioda_pe *vf_pe_arr;
>  
>  	/* Did we map the VF BARs with single-PE IODA BARs? */
>  	bool    m64_single_mode;
> 

-- 
Alexey

^ permalink raw reply

* Re: [PATCH 05/13] cpufreq/arch: powerpc: pasemi: Move prototypes to shared header
From: Olof Johansson @ 2020-07-15  3:26 UTC (permalink / raw)
  To: Lee Jones
  Cc: linux-pm, Rafael J. Wysocki, Linux Kernel Mailing List,
	Paul Mackerras, viresh kumar, linuxppc-dev,
	Linux ARM Mailing List
In-Reply-To: <20200714145049.2496163-6-lee.jones@linaro.org>

On Tue, Jul 14, 2020 at 7:50 AM Lee Jones <lee.jones@linaro.org> wrote:
>
> If function callers and providers do not share the same prototypes the
> compiler complains of missing prototypes.  Fix this by moving the
> already existing prototypes out to a mutually convenient location.
>
> Fixes the following W=1 kernel build warning(s):
>
>  drivers/cpufreq/pasemi-cpufreq.c:109:5: warning: no previous prototype for ‘check_astate’ [-Wmissing-prototypes]
>  109 | int check_astate(void)
>  | ^~~~~~~~~~~~
>  drivers/cpufreq/pasemi-cpufreq.c:114:6: warning: no previous prototype for ‘restore_astate’ [-Wmissing-prototypes]
>  114 | void restore_astate(int cpu)
>  | ^~~~~~~~~~~~~~
>
> Cc: Olof Johansson <olof@lixom.net>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Lee Jones <lee.jones@linaro.org>
> ---
>  arch/powerpc/platforms/pasemi/pasemi.h    | 15 ------------
>  arch/powerpc/platforms/pasemi/powersave.S |  2 ++
>  drivers/cpufreq/pasemi-cpufreq.c          |  1 +
>  include/linux/platform_data/pasemi.h      | 28 +++++++++++++++++++++++
>  4 files changed, 31 insertions(+), 15 deletions(-)
>  create mode 100644 include/linux/platform_data/pasemi.h
>
> diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h
> index 70b56048ed1be..528d81ef748ad 100644
> --- a/arch/powerpc/platforms/pasemi/pasemi.h
> +++ b/arch/powerpc/platforms/pasemi/pasemi.h
> @@ -15,21 +15,6 @@ extern void __init pasemi_map_registers(void);
>  extern void idle_spin(void);
>  extern void idle_doze(void);
>
> -/* Restore astate to last set */
> -#ifdef CONFIG_PPC_PASEMI_CPUFREQ
> -extern int check_astate(void);
> -extern void restore_astate(int cpu);
> -#else
> -static inline int check_astate(void)
> -{
> -       /* Always return >0 so we never power save */
> -       return 1;
> -}
> -static inline void restore_astate(int cpu)
> -{
> -}
> -#endif
> -
>  extern struct pci_controller_ops pasemi_pci_controller_ops;
>
>  #endif /* _PASEMI_PASEMI_H */
> diff --git a/arch/powerpc/platforms/pasemi/powersave.S b/arch/powerpc/platforms/pasemi/powersave.S
> index d0215d5329ca7..7747b48963286 100644
> --- a/arch/powerpc/platforms/pasemi/powersave.S
> +++ b/arch/powerpc/platforms/pasemi/powersave.S
> @@ -5,6 +5,8 @@
>   * Maintained by: Olof Johansson <olof@lixom.net>
>   */
>
> +#include <linux/platform_data/pasemi.h>
> +
>  #include <asm/processor.h>
>  #include <asm/page.h>
>  #include <asm/ppc_asm.h>
> diff --git a/drivers/cpufreq/pasemi-cpufreq.c b/drivers/cpufreq/pasemi-cpufreq.c
> index c66f566a854cb..c6bb3ecc90ef3 100644
> --- a/drivers/cpufreq/pasemi-cpufreq.c
> +++ b/drivers/cpufreq/pasemi-cpufreq.c
> @@ -15,6 +15,7 @@
>  #include <linux/timer.h>
>  #include <linux/module.h>
>  #include <linux/of_address.h>
> +#include <linux/platform_data/pasemi.h>
>
>  #include <asm/hw_irq.h>
>  #include <asm/io.h>
> diff --git a/include/linux/platform_data/pasemi.h b/include/linux/platform_data/pasemi.h
> new file mode 100644
> index 0000000000000..3fed0687fcc9a
> --- /dev/null
> +++ b/include/linux/platform_data/pasemi.h
> @@ -0,0 +1,28 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2020 Linaro Ltd.
> + *
> + * Author: Lee Jones <lee.jones@linaro.org>
> + */

Absolutely not. It's neither your copyright, nor your authorship.


-Olof

^ permalink raw reply

* [PATCH] powerpc: Fix inconsistent function names
From: YueHaibing @ 2020-07-15  3:19 UTC (permalink / raw)
  To: mpe, benh, paulus, haren, dave.hansen, npiggin
  Cc: YueHaibing, linuxppc-dev, linux-kernel

The stub helpers name should be consistent with prototypes.

mm_context_add_vas_windows() --> mm_context_add_vas_window()
mm_context_remove_vas_windows() --> mm_context_remove_vas_window()

Fixes: c420644c0a8f ("powerpc: Use mm_context vas_windows counter to issue CP_ABORT")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 arch/powerpc/include/asm/mmu_context.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 1a474f6b1992..00fd1d44731a 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -218,8 +218,8 @@ static inline void inc_mm_active_cpus(struct mm_struct *mm) { }
 static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
 static inline void mm_context_add_copro(struct mm_struct *mm) { }
 static inline void mm_context_remove_copro(struct mm_struct *mm) { }
-static inline void mm_context_add_vas_windows(struct mm_struct *mm) { }
-static inline void mm_context_remove_vas_windows(struct mm_struct *mm) { }
+static inline void mm_context_add_vas_window(struct mm_struct *mm) { }
+static inline void mm_context_remove_vas_window(struct mm_struct *mm) { }
 #endif
 
 
-- 
2.17.1



^ permalink raw reply related

* Re: [PATCH 10/15] powerpc/powernv/pci: Refactor pnv_ioda_alloc_pe()
From: Alexey Kardashevskiy @ 2020-07-15  3:15 UTC (permalink / raw)
  To: Oliver O'Halloran; +Cc: linuxppc-dev
In-Reply-To: <CAOSf1CGkQBL5caG7tgCUzC7OP55qVk86_XbAx9L43+hy7YNXSw@mail.gmail.com>



On 15/07/2020 12:53, Oliver O'Halloran wrote:
> On Wed, Jul 15, 2020 at 12:29 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>>
>>
>>
>> On 10/07/2020 15:23, Oliver O'Halloran wrote:
>>> Rework the PE allocation logic to allow allocating blocks of PEs rather
>>> than individually. We'll use this to allocate contigious blocks of PEs for
>>> the SR-IOVs.
>>
>> The patch does not do just this, it also adds missing mutexes (which is
>> good) but still misses them in pnv_pci_sriov_disable() and
>> pnv_pci_ioda_pe_dump().
> 
> The current implementation doesn't need the mutex because alloc,
> reserve and free all use atomic bit ops.

Ah, ok.

> The mutex has been there
> forever with nothing actually using it, but with the change we need to
> prevent modifications to the bitmap while alloc() is scanning it. I
> probably should have mentioned that in the commit message.

but bitmap_clear() (from pnv_pci_sriov_disable()) is not atomic. It
probably does not matter as the next patch gets rid of it anyway.


-- 
Alexey

^ permalink raw reply

* [PATCH -next] powerpc/smp: Remove unused inline functions
From: YueHaibing @ 2020-07-15  3:09 UTC (permalink / raw)
  To: mpe, benh, paulus; +Cc: YueHaibing, linuxppc-dev, linux-kernel

commit 441c19c8a290 ("powerpc/kvm/book3s_hv: Rework the secondary inhibit code")
left behind this, remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 arch/powerpc/include/asm/smp.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index 49a25e2400f2..5d5a7596d144 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -176,8 +176,6 @@ extern void __cpu_die(unsigned int cpu);
 /* for UP */
 #define hard_smp_processor_id()		get_hard_smp_processor_id(0)
 #define smp_setup_cpu_maps()
-static inline void inhibit_secondary_onlining(void) {}
-static inline void uninhibit_secondary_onlining(void) {}
 static inline const struct cpumask *cpu_sibling_mask(int cpu)
 {
 	return cpumask_of(cpu);
-- 
2.17.1



^ permalink raw reply related

* Re: [PATCH 07/13] cpufreq: powernv-cpufreq: Fix a bunch of kerneldoc related issues
From: Viresh Kumar @ 2020-07-15  3:09 UTC (permalink / raw)
  To: Lee Jones
  Cc: linux-pm, rjw, linux-kernel, Paul Mackerras, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20200714145049.2496163-8-lee.jones@linaro.org>

On 14-07-20, 15:50, Lee Jones wrote:
> Repair problems with formatting and missing attributes/parameters, and
> demote header comments which do not meet the required standards
> applicable to kerneldoc.
> 
> Fixes the following W=1 kernel build warning(s):
> 
>  drivers/cpufreq/powernv-cpufreq.c:84: warning: Function parameter or member 'last_lpstate_idx' not described in 'global_pstate_info'
>  drivers/cpufreq/powernv-cpufreq.c:84: warning: Function parameter or member 'last_gpstate_idx' not described in 'global_pstate_info'
>  drivers/cpufreq/powernv-cpufreq.c:84: warning: Function parameter or member 'policy' not described in 'global_pstate_info'
>  drivers/cpufreq/powernv-cpufreq.c:182: warning: Function parameter or member 'i' not described in 'idx_to_pstate'
>  drivers/cpufreq/powernv-cpufreq.c:201: warning: Function parameter or member 'pstate' not described in 'pstate_to_idx'
>  drivers/cpufreq/powernv-cpufreq.c:670: warning: Function parameter or member 't' not described in 'gpstate_timer_handler'
>  drivers/cpufreq/powernv-cpufreq.c:670: warning: Excess function parameter 'data' description in 'gpstate_timer_handler'
> 
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Lee Jones <lee.jones@linaro.org>
> ---
>  drivers/cpufreq/powernv-cpufreq.c | 11 ++++++-----
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
> index 068cc53abe320..2e5a8b8a4abaa 100644
> --- a/drivers/cpufreq/powernv-cpufreq.c
> +++ b/drivers/cpufreq/powernv-cpufreq.c
> @@ -64,13 +64,14 @@
>   *				highest_lpstate_idx
>   * @last_sampled_time:		Time from boot in ms when global pstates were
>   *				last set
> - * @last_lpstate_idx,		Last set value of local pstate and global
> - * last_gpstate_idx		pstate in terms of cpufreq table index
> + * @last_lpstate_idx:		Last set value of local pstate and global
> + * @last_gpstate_idx:		pstate in terms of cpufreq table index
>   * @timer:			Is used for ramping down if cpu goes idle for
>   *				a long time with global pstate held high
>   * @gpstate_lock:		A spinlock to maintain synchronization between
>   *				routines called by the timer handler and
>   *				governer's target_index calls
> + * @policy:			Associated CPUFreq policy
>   */
>  struct global_pstate_info {
>  	int highest_lpstate_idx;
> @@ -170,7 +171,7 @@ static inline u8 extract_pstate(u64 pmsr_val, unsigned int shift)
>  
>  /* Use following functions for conversions between pstate_id and index */
>  
> -/**
> +/*
>   * idx_to_pstate : Returns the pstate id corresponding to the
>   *		   frequency in the cpufreq frequency table
>   *		   powernv_freqs indexed by @i.
> @@ -188,7 +189,7 @@ static inline u8 idx_to_pstate(unsigned int i)
>  	return powernv_freqs[i].driver_data;
>  }
>  
> -/**
> +/*
>   * pstate_to_idx : Returns the index in the cpufreq frequencytable
>   *		   powernv_freqs for the frequency whose corresponding
>   *		   pstate id is @pstate.
> @@ -660,7 +661,7 @@ static inline void  queue_gpstate_timer(struct global_pstate_info *gpstates)
>  /**
>   * gpstate_timer_handler
>   *
> - * @data: pointer to cpufreq_policy on which timer was queued
> + * @t: Timer context used to fetch global pstate info struct
>   *
>   * This handler brings down the global pstate closer to the local pstate
>   * according quadratic equation. Queues a new timer if it is still not equal

Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

-- 
viresh

^ permalink raw reply

* Re: [PATCH 06/13] cpufreq: powernv-cpufreq: Functions only used in call-backs should be static
From: Viresh Kumar @ 2020-07-15  3:07 UTC (permalink / raw)
  To: Lee Jones
  Cc: linux-pm, rjw, linux-kernel, Paul Mackerras, linuxppc-dev,
	linux-arm-kernel
In-Reply-To: <20200714145049.2496163-7-lee.jones@linaro.org>

On 14-07-20, 15:50, Lee Jones wrote:
> Fixes the following W=1 kernel build warning(s):
> 
>  drivers/cpufreq/powernv-cpufreq.c:669:6: warning: no previous prototype for ‘gpstate_timer_handler’ [-Wmissing-prototypes]
>  drivers/cpufreq/powernv-cpufreq.c:902:6: warning: no previous prototype for ‘powernv_cpufreq_work_fn’ [-Wmissing-prototypes]
> 
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Lee Jones <lee.jones@linaro.org>
> ---
>  drivers/cpufreq/powernv-cpufreq.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c
> index 8646eb197cd96..068cc53abe320 100644
> --- a/drivers/cpufreq/powernv-cpufreq.c
> +++ b/drivers/cpufreq/powernv-cpufreq.c
> @@ -666,7 +666,7 @@ static inline void  queue_gpstate_timer(struct global_pstate_info *gpstates)
>   * according quadratic equation. Queues a new timer if it is still not equal
>   * to local pstate
>   */
> -void gpstate_timer_handler(struct timer_list *t)
> +static void gpstate_timer_handler(struct timer_list *t)
>  {
>  	struct global_pstate_info *gpstates = from_timer(gpstates, t, timer);
>  	struct cpufreq_policy *policy = gpstates->policy;
> @@ -899,7 +899,7 @@ static struct notifier_block powernv_cpufreq_reboot_nb = {
>  	.notifier_call = powernv_cpufreq_reboot_notifier,
>  };
>  
> -void powernv_cpufreq_work_fn(struct work_struct *work)
> +static void powernv_cpufreq_work_fn(struct work_struct *work)
>  {
>  	struct chip *chip = container_of(work, struct chip, throttle);
>  	struct cpufreq_policy *policy;

Acked-by: Viresh Kumar <viresh.kumar@linaro.org>

-- 
viresh

^ permalink raw reply

* Re: [PATCH 05/13] cpufreq/arch: powerpc: pasemi: Move prototypes to shared header
From: Viresh Kumar @ 2020-07-15  3:07 UTC (permalink / raw)
  To: Lee Jones
  Cc: linux-pm, rjw, linux-kernel, Paul Mackerras, Olof Johansson,
	linuxppc-dev, linux-arm-kernel
In-Reply-To: <20200714145049.2496163-6-lee.jones@linaro.org>

On 14-07-20, 15:50, Lee Jones wrote:
> If function callers and providers do not share the same prototypes the
> compiler complains of missing prototypes.  Fix this by moving the
> already existing prototypes out to a mutually convenient location.
> 
> Fixes the following W=1 kernel build warning(s):
> 
>  drivers/cpufreq/pasemi-cpufreq.c:109:5: warning: no previous prototype for ‘check_astate’ [-Wmissing-prototypes]
>  109 | int check_astate(void)
>  | ^~~~~~~~~~~~
>  drivers/cpufreq/pasemi-cpufreq.c:114:6: warning: no previous prototype for ‘restore_astate’ [-Wmissing-prototypes]
>  114 | void restore_astate(int cpu)
>  | ^~~~~~~~~~~~~~
> 
> Cc: Olof Johansson <olof@lixom.net>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Paul Mackerras <paulus@samba.org>
> Cc: linuxppc-dev@lists.ozlabs.org
> Signed-off-by: Lee Jones <lee.jones@linaro.org>
> ---
>  arch/powerpc/platforms/pasemi/pasemi.h    | 15 ------------

Is there no sane way we can include this file directly to the cpufreq
file ?

>  arch/powerpc/platforms/pasemi/powersave.S |  2 ++
>  drivers/cpufreq/pasemi-cpufreq.c          |  1 +
>  include/linux/platform_data/pasemi.h      | 28 +++++++++++++++++++++++
>  4 files changed, 31 insertions(+), 15 deletions(-)
>  create mode 100644 include/linux/platform_data/pasemi.h

-- 
viresh

^ permalink raw reply

* [PATCH -next] powerpc/xive: Remove unused inline function xive_kexec_teardown_cpu()
From: YueHaibing @ 2020-07-15  2:50 UTC (permalink / raw)
  To: mpe, benh, paulus, groug; +Cc: YueHaibing, linuxppc-dev, linux-kernel

commit e27e0a94651e ("powerpc/xive: Remove xive_kexec_teardown_cpu()")
left behind this, remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 arch/powerpc/include/asm/xive.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index d08ea11b271c..309b4d65b74f 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -155,7 +155,6 @@ static inline void xive_smp_probe(void) { }
 static inline int  xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
 static inline void xive_smp_setup_cpu(void) { }
 static inline void xive_smp_disable_cpu(void) { }
-static inline void xive_kexec_teardown_cpu(int secondary) { }
 static inline void xive_shutdown(void) { }
 static inline void xive_flush_interrupt(void) { }
 
-- 
2.17.1



^ permalink raw reply related

* Re: [PATCH 10/15] powerpc/powernv/pci: Refactor pnv_ioda_alloc_pe()
From: Oliver O'Halloran @ 2020-07-15  2:53 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev
In-Reply-To: <dd85f256-0f52-f2f5-f729-2d5b7269f895@ozlabs.ru>

On Wed, Jul 15, 2020 at 12:29 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
>
>
> On 10/07/2020 15:23, Oliver O'Halloran wrote:
> > Rework the PE allocation logic to allow allocating blocks of PEs rather
> > than individually. We'll use this to allocate contigious blocks of PEs for
> > the SR-IOVs.
>
> The patch does not do just this, it also adds missing mutexes (which is
> good) but still misses them in pnv_pci_sriov_disable() and
> pnv_pci_ioda_pe_dump().

The current implementation doesn't need the mutex because alloc,
reserve and free all use atomic bit ops. The mutex has been there
forever with nothing actually using it, but with the change we need to
prevent modifications to the bitmap while alloc() is scanning it. I
probably should have mentioned that in the commit message.

^ permalink raw reply

* [powerpc:fixes-test] BUILD SUCCESS b710d27bf72068b15b2f0305d825988183e2ff28
From: kernel test robot @ 2020-07-15  2:52 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  fixes-test
branch HEAD: b710d27bf72068b15b2f0305d825988183e2ff28  powerpc/pseries/svm: Fix incorrect check for shared_lppaca_size

elapsed time: 721m

configs tested: 110
configs skipped: 6

The following configs have been built successfully.
More configs may be tested in the coming days.

arm                                 defconfig
arm                              allyesconfig
arm                              allmodconfig
arm                               allnoconfig
arm64                            allyesconfig
arm64                               defconfig
arm64                            allmodconfig
arm64                             allnoconfig
sh                           se7724_defconfig
sh                          sdk7786_defconfig
arm                          tango4_defconfig
sh                            migor_defconfig
i386                             allyesconfig
arm                         s3c6400_defconfig
m68k                         amcore_defconfig
c6x                               allnoconfig
xtensa                generic_kc705_defconfig
mips                           ip27_defconfig
mips                  mips_paravirt_defconfig
c6x                        evmc6474_defconfig
powerpc                      pasemi_defconfig
sh                        dreamcast_defconfig
openrisc                         alldefconfig
m68k                        m5407c3_defconfig
arm                          ep93xx_defconfig
arm                          moxart_defconfig
sparc                               defconfig
mips                        maltaup_defconfig
mips                          rb532_defconfig
powerpc                mpc7448_hpc2_defconfig
mips                        nlm_xlp_defconfig
sh                 kfr2r09-romimage_defconfig
openrisc                            defconfig
xtensa                          iss_defconfig
arm                  colibri_pxa270_defconfig
ia64                      gensparse_defconfig
i386                              allnoconfig
i386                                defconfig
i386                              debian-10.3
ia64                             allmodconfig
ia64                                defconfig
ia64                              allnoconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                              allnoconfig
m68k                           sun3_defconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
nios2                            allyesconfig
c6x                              allyesconfig
openrisc                         allyesconfig
nds32                               defconfig
nds32                             allnoconfig
csky                             allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
h8300                            allmodconfig
xtensa                              defconfig
arc                                 defconfig
arc                              allyesconfig
sh                               allmodconfig
sh                                allnoconfig
microblaze                        allnoconfig
mips                             allyesconfig
mips                              allnoconfig
mips                             allmodconfig
parisc                            allnoconfig
parisc                              defconfig
parisc                           allyesconfig
parisc                           allmodconfig
powerpc                          allyesconfig
powerpc                          rhel-kconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
powerpc                             defconfig
i386                 randconfig-a001-20200714
i386                 randconfig-a005-20200714
i386                 randconfig-a002-20200714
i386                 randconfig-a006-20200714
i386                 randconfig-a003-20200714
i386                 randconfig-a004-20200714
i386                 randconfig-a016-20200714
i386                 randconfig-a011-20200714
i386                 randconfig-a015-20200714
i386                 randconfig-a012-20200714
i386                 randconfig-a013-20200714
i386                 randconfig-a014-20200714
riscv                            allyesconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                            allmodconfig
s390                             allyesconfig
s390                              allnoconfig
s390                             allmodconfig
s390                                defconfig
sparc                            allyesconfig
sparc64                             defconfig
sparc64                           allnoconfig
sparc64                          allyesconfig
sparc64                          allmodconfig
x86_64                    rhel-7.6-kselftests
x86_64                               rhel-8.3
x86_64                                  kexec
x86_64                                   rhel
x86_64                                    lkp
x86_64                              fedora-25

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* [powerpc:next-test] BUILD SUCCESS 17babc2734a55cdf4a678d572f316280b820202b
From: kernel test robot @ 2020-07-15  2:52 UTC (permalink / raw)
  To: Michael Ellerman; +Cc: linuxppc-dev

tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git  next-test
branch HEAD: 17babc2734a55cdf4a678d572f316280b820202b  powerpc/fadump: fix race between pstore write and fadump crash trigger

elapsed time: 720m

configs tested: 104
configs skipped: 8

The following configs have been built successfully.
More configs may be tested in the coming days.

arm                                 defconfig
arm                              allyesconfig
arm                              allmodconfig
arm                               allnoconfig
arm64                            allyesconfig
arm64                               defconfig
arm64                            allmodconfig
arm64                             allnoconfig
sh                           se7724_defconfig
sh                          sdk7786_defconfig
arm                          tango4_defconfig
sh                            migor_defconfig
sh                           se7206_defconfig
mips                malta_qemu_32r6_defconfig
sparc64                             defconfig
mips                      pistachio_defconfig
sh                               alldefconfig
powerpc64                           defconfig
arm                            u300_defconfig
i386                             allyesconfig
arm                         s3c6400_defconfig
m68k                         amcore_defconfig
c6x                               allnoconfig
xtensa                generic_kc705_defconfig
mips                           ip27_defconfig
mips                  mips_paravirt_defconfig
c6x                        evmc6474_defconfig
powerpc                      pasemi_defconfig
sh                        dreamcast_defconfig
openrisc                         alldefconfig
m68k                        m5407c3_defconfig
arm                          ep93xx_defconfig
arm                          moxart_defconfig
sparc                               defconfig
mips                        maltaup_defconfig
mips                          rb532_defconfig
powerpc                mpc7448_hpc2_defconfig
mips                        nlm_xlp_defconfig
sh                 kfr2r09-romimage_defconfig
openrisc                            defconfig
xtensa                          iss_defconfig
arm                  colibri_pxa270_defconfig
ia64                      gensparse_defconfig
i386                              allnoconfig
i386                                defconfig
i386                              debian-10.3
ia64                             allmodconfig
ia64                                defconfig
ia64                              allnoconfig
ia64                             allyesconfig
m68k                             allmodconfig
m68k                              allnoconfig
m68k                           sun3_defconfig
m68k                                defconfig
m68k                             allyesconfig
nios2                               defconfig
nios2                            allyesconfig
c6x                              allyesconfig
openrisc                         allyesconfig
nds32                               defconfig
nds32                             allnoconfig
csky                             allyesconfig
csky                                defconfig
alpha                               defconfig
alpha                            allyesconfig
xtensa                           allyesconfig
h8300                            allyesconfig
h8300                            allmodconfig
xtensa                              defconfig
arc                                 defconfig
arc                              allyesconfig
sh                               allmodconfig
sh                                allnoconfig
microblaze                        allnoconfig
mips                             allyesconfig
mips                              allnoconfig
mips                             allmodconfig
parisc                            allnoconfig
parisc                              defconfig
parisc                           allyesconfig
parisc                           allmodconfig
powerpc                          allyesconfig
powerpc                          rhel-kconfig
powerpc                          allmodconfig
powerpc                           allnoconfig
powerpc                             defconfig
riscv                            allyesconfig
riscv                             allnoconfig
riscv                               defconfig
riscv                            allmodconfig
s390                             allyesconfig
s390                              allnoconfig
s390                             allmodconfig
s390                                defconfig
sparc                            allyesconfig
sparc64                           allnoconfig
sparc64                          allyesconfig
sparc64                          allmodconfig
x86_64                    rhel-7.6-kselftests
x86_64                               rhel-8.3
x86_64                                  kexec
x86_64                                   rhel
x86_64                                    lkp
x86_64                              fedora-25

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org

^ permalink raw reply

* Re: [PATCH v3 04/12] ppc64/kexec_file: avoid stomping memory used by special regions
From: Thiago Jung Bauermann @ 2020-07-15  2:39 UTC (permalink / raw)
  To: Hari Bathini
  Cc: Pingfan Liu, Nayna Jain, Kexec-ml, Mahesh J Salgaonkar,
	Mimi Zohar, lkml, linuxppc-dev, Sourabh Jain, Petr Tesarik,
	Andrew Morton, Dave Young, Vivek Goyal, Eric Biederman
In-Reply-To: <159466088775.24747.1248185448154277951.stgit@hbathini.in.ibm.com>


Hari Bathini <hbathini@linux.ibm.com> writes:

> diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h
> new file mode 100644
> index 0000000..90deb46
> --- /dev/null
> +++ b/arch/powerpc/include/asm/crashdump-ppc64.h
> @@ -0,0 +1,10 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H
> +#define _ASM_POWERPC_CRASHDUMP_PPC64_H
> +
> +/* min & max addresses for kdump load segments */
> +#define KDUMP_BUF_MIN		(crashk_res.start)
> +#define KDUMP_BUF_MAX		((crashk_res.end < ppc64_rma_size) ? \
> +				 crashk_res.end : (ppc64_rma_size - 1))
> +
> +#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */
> diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
> index 7008ea1..bf47a01 100644
> --- a/arch/powerpc/include/asm/kexec.h
> +++ b/arch/powerpc/include/asm/kexec.h
> @@ -100,14 +100,16 @@ void relocate_new_kernel(unsigned long indirection_page, unsigned long reboot_co
>  #ifdef CONFIG_KEXEC_FILE
>  extern const struct kexec_file_ops kexec_elf64_ops;
>
> -#ifdef CONFIG_IMA_KEXEC
>  #define ARCH_HAS_KIMAGE_ARCH
>
>  struct kimage_arch {
> +	struct crash_mem *exclude_ranges;
> +
> +#ifdef CONFIG_IMA_KEXEC
>  	phys_addr_t ima_buffer_addr;
>  	size_t ima_buffer_size;
> -};
>  #endif
> +};
>
>  int setup_purgatory(struct kimage *image, const void *slave_code,
>  		    const void *fdt, unsigned long kernel_load_addr,
> @@ -125,6 +127,7 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
>  			unsigned long initrd_load_addr,
>  			unsigned long initrd_len, const char *cmdline);
>  #endif /* CONFIG_PPC64 */
> +
>  #endif /* CONFIG_KEXEC_FILE */
>
>  #else /* !CONFIG_KEXEC_CORE */
> diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
> index 23ad04c..c695f94 100644
> --- a/arch/powerpc/kexec/elf_64.c
> +++ b/arch/powerpc/kexec/elf_64.c
> @@ -22,6 +22,7 @@
>  #include <linux/of_fdt.h>
>  #include <linux/slab.h>
>  #include <linux/types.h>
> +#include <asm/crashdump-ppc64.h>
>
>  static void *elf64_load(struct kimage *image, char *kernel_buf,
>  			unsigned long kernel_len, char *initrd,
> @@ -46,6 +47,12 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
>  	if (ret)
>  		goto out;
>
> +	if (image->type == KEXEC_TYPE_CRASH) {
> +		/* min & max buffer values for kdump case */
> +		kbuf.buf_min = pbuf.buf_min = KDUMP_BUF_MIN;
> +		kbuf.buf_max = pbuf.buf_max = KDUMP_BUF_MAX;

This is only my personal opinion and an actual maintainer may disagree,
but just looking at the lines above, I would assume that KDUMP_BUF_MIN
and KDUMP_BUF_MAX were constants, when in fact they aren't.

I suggest using static inline macros in <asm/crashdump-ppc64.h>, for
example:

static inline resource_size_t get_kdump_buf_min(void)
{
	return crashk_res.start;
}

static inline resource_size_t get_kdump_buf_max(void)
{
	return (crashk_res.end < ppc64_rma_size) ? \
		 crashk_res.end : (ppc64_rma_size - 1)
}


> +	}
> +
>  	ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
>  	if (ret)
>  		goto out;

<snip>

> +/**
> + * __locate_mem_hole_top_down - Looks top down for a large enough memory hole
> + *                              in the memory regions between buf_min & buf_max
> + *                              for the buffer. If found, sets kbuf->mem.
> + * @kbuf:                       Buffer contents and memory parameters.
> + * @buf_min:                    Minimum address for the buffer.
> + * @buf_max:                    Maximum address for the buffer.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int __locate_mem_hole_top_down(struct kexec_buf *kbuf,
> +				      u64 buf_min, u64 buf_max)
> +{
> +	int ret = -EADDRNOTAVAIL;
> +	phys_addr_t start, end;
> +	u64 i;
> +
> +	for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE,
> +			       MEMBLOCK_NONE, &start, &end, NULL) {
> +		if (start > buf_max)
> +			continue;
> +
> +		/* Memory hole not found */
> +		if (end < buf_min)
> +			break;
> +
> +		/* Adjust memory region based on the given range */
> +		if (start < buf_min)
> +			start = buf_min;
> +		if (end > buf_max)
> +			end = buf_max;
> +
> +		start = ALIGN(start, kbuf->buf_align);
> +		if (start < end && (end - start + 1) >= kbuf->memsz) {

This is why I dislike using start and end to express address ranges:

While struct resource seems to use the [address, end] convention, my
reading of memblock code is that it uses [addres, end). This is
guaranteed to lead to bugs. So the above has an off-by-one error. To
calculate the size of the current range, you need to use `end - start`.

> +			/* Suitable memory range found. Set kbuf->mem */
> +			kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1,

Similarly, I believe the `+ 1` here is wrong.

> +					       kbuf->buf_align);
> +			ret = 0;
> +			break;
> +		}
> +	}
> +
> +	return ret;
> +}
> +
> +/**
> + * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a
> + *                                  suitable buffer with top down approach.
> + * @kbuf:                           Buffer contents and memory parameters.
> + * @buf_min:                        Minimum address for the buffer.
> + * @buf_max:                        Maximum address for the buffer.
> + * @emem:                           Exclude memory ranges.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf,
> +					  u64 buf_min, u64 buf_max,
> +					  const struct crash_mem *emem)
> +{
> +	int i, ret = 0, err = -EADDRNOTAVAIL;
> +	u64 start, end, tmin, tmax;
> +
> +	tmax = buf_max;
> +	for (i = (emem->nr_ranges - 1); i >= 0; i--) {
> +		start = emem->ranges[i].start;
> +		end = emem->ranges[i].end;
> +
> +		if (start > tmax)
> +			continue;
> +
> +		if (end < tmax) {
> +			tmin = (end < buf_min ? buf_min : end + 1);
> +			ret = __locate_mem_hole_top_down(kbuf, tmin, tmax);
> +			if (!ret)
> +				return 0;
> +		}
> +
> +		tmax = start - 1;
> +
> +		if (tmax < buf_min) {
> +			ret = err;
> +			break;
> +		}
> +		ret = 0;
> +	}
> +
> +	if (!ret) {
> +		tmin = buf_min;
> +		ret = __locate_mem_hole_top_down(kbuf, tmin, tmax);
> +	}
> +	return ret;
> +}
> +
> +/**
> + * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole
> + *                               in the memory regions between buf_min & buf_max
> + *                               for the buffer. If found, sets kbuf->mem.
> + * @kbuf:                        Buffer contents and memory parameters.
> + * @buf_min:                     Minimum address for the buffer.
> + * @buf_max:                     Maximum address for the buffer.
> + *
> + * Returns 0 on success, negative errno on error.
> + */
> +static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf,
> +				       u64 buf_min, u64 buf_max)
> +{
> +	int ret = -EADDRNOTAVAIL;
> +	phys_addr_t start, end;
> +	u64 i;
> +
> +	for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
> +			   MEMBLOCK_NONE, &start, &end, NULL) {
> +		if (end < buf_min)
> +			continue;
> +
> +		/* Memory hole not found */
> +		if (start > buf_max)
> +			break;
> +
> +		/* Adjust memory region based on the given range */
> +		if (start < buf_min)
> +			start = buf_min;
> +		if (end > buf_max)
> +			end = buf_max;

buf_max is an inclusive end address, right? Then this should read
`end = buf_max + 1`. Same thing in the top-down version above.

> +
> +		start = ALIGN(start, kbuf->buf_align);
> +		if (start < end && (end - start + 1) >= kbuf->memsz) {

Same off-by-one problem. There shouldn't be a `+ 1` here.

> +			/* Suitable memory range found. Set kbuf->mem */
> +			kbuf->mem = start;
> +			ret = 0;
> +			break;
> +		}
> +	}
> +
> +	return ret;
> +}


--
Thiago Jung Bauermann
IBM Linux Technology Center

^ permalink raw reply

* Re: [PATCH 10/15] powerpc/powernv/pci: Refactor pnv_ioda_alloc_pe()
From: Alexey Kardashevskiy @ 2020-07-15  2:29 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev
In-Reply-To: <20200710052340.737567-11-oohall@gmail.com>



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> Rework the PE allocation logic to allow allocating blocks of PEs rather
> than individually. We'll use this to allocate contigious blocks of PEs for
> the SR-IOVs.

The patch does not do just this, it also adds missing mutexes (which is
good) but still misses them in pnv_pci_sriov_disable() and
pnv_pci_ioda_pe_dump().




> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 41 ++++++++++++++++++-----
>  arch/powerpc/platforms/powernv/pci.h      |  2 +-
>  2 files changed, 34 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
> index 2d36a9ebf0e9..c9c25fb0783c 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -145,23 +145,45 @@ static void pnv_ioda_reserve_pe(struct pnv_phb *phb, int pe_no)
>  		return;
>  	}
>  
> +	mutex_lock(&phb->ioda.pe_alloc_mutex);
>  	if (test_and_set_bit(pe_no, phb->ioda.pe_alloc))
>  		pr_debug("%s: PE %x was reserved on PHB#%x\n",
>  			 __func__, pe_no, phb->hose->global_number);
> +	mutex_unlock(&phb->ioda.pe_alloc_mutex);
>  
>  	pnv_ioda_init_pe(phb, pe_no);
>  }
>  
> -struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
> +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count)
>  {
> -	long pe;
> +	struct pnv_ioda_pe *ret = NULL;
> +	int run = 0, pe, i;
>  
> +	mutex_lock(&phb->ioda.pe_alloc_mutex);
> +
> +	/* scan backwards for a run of @count cleared bits */
>  	for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
> -		if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
> -			return pnv_ioda_init_pe(phb, pe);
> +		if (test_bit(pe, phb->ioda.pe_alloc)) {
> +			run = 0;
> +			continue;
> +		}
> +
> +		run++;
> +		if (run == count)
> +			break;
>  	}
> +	if (run != count)
> +		goto out;
>  
> -	return NULL;
> +	for (i = pe; i < pe + count; i++) {
> +		set_bit(i, phb->ioda.pe_alloc);
> +		pnv_ioda_init_pe(phb, i);
> +	}
> +	ret = &phb->ioda.pe_array[pe];
> +
> +out:
> +	mutex_unlock(&phb->ioda.pe_alloc_mutex);
> +	return ret;
>  }
>  
>  void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
> @@ -173,7 +195,10 @@ void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
>  	WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */
>  	kfree(pe->npucomp);
>  	memset(pe, 0, sizeof(struct pnv_ioda_pe));
> +
> +	mutex_lock(&phb->ioda.pe_alloc_mutex);
>  	clear_bit(pe_num, phb->ioda.pe_alloc);
> +	mutex_unlock(&phb->ioda.pe_alloc_mutex);
>  }
>  
>  /* The default M64 BAR is shared by all PEs */
> @@ -976,7 +1001,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
>  	if (pdn->pe_number != IODA_INVALID_PE)
>  		return NULL;
>  
> -	pe = pnv_ioda_alloc_pe(phb);
> +	pe = pnv_ioda_alloc_pe(phb, 1);
>  	if (!pe) {
>  		pr_warn("%s: Not enough PE# available, disabling device\n",
>  			pci_name(dev));
> @@ -1047,7 +1072,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
>  
>  	/* The PE number isn't pinned by M64 */
>  	if (!pe)
> -		pe = pnv_ioda_alloc_pe(phb);
> +		pe = pnv_ioda_alloc_pe(phb, 1);
>  
>  	if (!pe) {
>  		pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
> @@ -3065,7 +3090,7 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
>  		pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
>  	} else {
>  		/* otherwise just allocate one */
> -		root_pe = pnv_ioda_alloc_pe(phb);
> +		root_pe = pnv_ioda_alloc_pe(phb, 1);
>  		phb->ioda.root_pe_idx = root_pe->pe_number;
>  	}
>  
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index 58c97e60c3db..b4c9bdba7217 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -223,7 +223,7 @@ int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
>  void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
>  void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe);
>  
> -struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb);
> +struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count);
>  void pnv_ioda_free_pe(struct pnv_ioda_pe *pe);
>  
>  #ifdef CONFIG_PCI_IOV
> 

-- 
Alexey

^ permalink raw reply

* Re: [RFC PATCH 00/35] Move all PCIBIOS* definitions into arch/x86
From: Benjamin Herrenschmidt @ 2020-07-15  2:19 UTC (permalink / raw)
  To: Bjorn Helgaas, Arnd Bergmann
  Cc: linux-pci, Keith Busch, Paul Mackerras, sparclinux, Toan Le,
	Kjetil Oftedal, Greg Ungerer, Marek Vasut, Rob Herring,
	Lorenzo Pieralisi, Sagi Grimberg, Russell King, Ley Foon Tan,
	Christoph Hellwig, Geert Uytterhoeven, Kevin Hilman,
	Jakub Kicinski, Matt Turner, linux-kernel-mentees, Guenter Roeck,
	Ray Jui, Jens Axboe, Ivan Kokshaysky, Shuah Khan, bjorn,
	Boris Ostrovsky, Richard Henderson, Juergen Gross, Bjorn Helgaas,
	Thomas Bogendoerfer, Scott Branden, Jingoo Han,
	Saheed O. Bolarinwa, linux-kernel@vger.kernel.org, Philipp Zabel,
	Greg Kroah-Hartman, Gustavo Pimentel, linuxppc-dev,
	David S. Miller, Heiner Kallweit
In-Reply-To: <20200714234625.GA428442@bjorn-Precision-5520>

On Tue, 2020-07-14 at 18:46 -0500, Bjorn Helgaas wrote:
> Yes.  I have no problem with that.  There are a few cases where it's
> important to check for errors, e.g., we read a status register and do
> something based on a bit being set.  A failure will return all bits
> set, and we may do the wrong thing.  But most of the errors we care
> about will be on MMIO reads, not config reads, so we can probably
> ignore most config read errors.

And in both cases, we don't have the plumbing to provide accurate
and reliable error returns for all platforms anyways (esp. not for
MMIO).

I think it makes sense to stick to the good old "if all 1's, then go
out of line" including for config space.

 ../..

> Yep, except for things like device removal or other PCI errors.

A whole bunch of these are reported asynchronously, esp for writes (and
yes, including config writes, they are supposed to be non-posted but
more often than not, the path  from the CPU to the PCI bridge remains
posted for writes including config ones).

> So maybe a good place to start is by removing some of the useless
> error checking for pci_read_config_*() and pci_write_config_*().
> That's a decent-sized but not impractical project that could be done
> per subsystem or something:
> 
>   git grep -E "(if|return|=).*\<pci_(read|write)_config" drivers
> 
> finds about 400 matches.
> 
> Some of those callers probably really *do* want to check for errors,
> and I guess we'd have to identify them and do them separately as you
> mentioned.

I'd be curious about these considering how unreliable our error return
is accross the board.

Cheers,
Ben.



^ permalink raw reply

* Re: [PATCH v3 2/9] powerpc/watchpoint: Fix DAWR exception constraint
From: Jordan Niethe @ 2020-07-15  2:19 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: Christophe Leroy, apopple, mikey, miltonm, peterz, oleg,
	Nicholas Piggin, linux-kernel, Paul Mackerras, jolsa, fweisbec,
	pedromfc, naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200708045046.135702-3-ravi.bangoria@linux.ibm.com>

On Wed, Jul 8, 2020 at 2:52 PM Ravi Bangoria
<ravi.bangoria@linux.ibm.com> wrote:
>
> Pedro Miraglia Franco de Carvalho noticed that on p8, DAR value is
> inconsistent with different type of load/store. Like for byte,word
> etc. load/stores, DAR is set to the address of the first byte of
> overlap between watch range and real access. But for quadword load/
> store it's set to the address of the first byte of real access. This
> issue has been fixed in p10. In p10(ISA 3.1), DAR is always set to
> the address of the first byte of overlap. Commit 27985b2a640e
> ("powerpc/watchpoint: Don't ignore extraneous exceptions blindly")
> wrongly assumes that DAR is set to the address of the first byte of
> overlap for all load/stores on p8 as well. Fix that. With the fix,
> we now rely on 'ea' provided by analyse_instr(). If analyse_instr()
> fails, generate event unconditionally on p8, and on p10 generate
> event only if DAR is within a DAWR range.
>
> Note: 8xx is not affected.
>
> Fixes: 27985b2a640e ("powerpc/watchpoint: Don't ignore extraneous exceptions blindly")
> Fixes: 74c6881019b7 ("powerpc/watchpoint: Prepare handler to handle more than one watchpoint")
> Reported-by: Pedro Miraglia Franco de Carvalho <pedromfc@br.ibm.com>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
> ---
>  arch/powerpc/kernel/hw_breakpoint.c | 93 +++++++++++++++++++----------
>  1 file changed, 63 insertions(+), 30 deletions(-)
>
> diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
> index 031e6defc08e..7a66c370a105 100644
> --- a/arch/powerpc/kernel/hw_breakpoint.c
> +++ b/arch/powerpc/kernel/hw_breakpoint.c
> @@ -498,11 +498,11 @@ static bool dar_in_user_range(unsigned long dar, struct arch_hw_breakpoint *info
>         return ((info->address <= dar) && (dar - info->address < info->len));
>  }
>
> -static bool dar_user_range_overlaps(unsigned long dar, int size,
> -                                   struct arch_hw_breakpoint *info)
> +static bool ea_user_range_overlaps(unsigned long ea, int size,
> +                                  struct arch_hw_breakpoint *info)
>  {
> -       return ((dar < info->address + info->len) &&
> -               (dar + size > info->address));
> +       return ((ea < info->address + info->len) &&
> +               (ea + size > info->address));
>  }
>
>  static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
> @@ -515,20 +515,22 @@ static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
>         return ((hw_start_addr <= dar) && (hw_end_addr > dar));
>  }
>
> -static bool dar_hw_range_overlaps(unsigned long dar, int size,
> -                                 struct arch_hw_breakpoint *info)
> +static bool ea_hw_range_overlaps(unsigned long ea, int size,
> +                                struct arch_hw_breakpoint *info)
>  {
>         unsigned long hw_start_addr, hw_end_addr;
>
>         hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
>         hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
>
> -       return ((dar < hw_end_addr) && (dar + size > hw_start_addr));
> +       return ((ea < hw_end_addr) && (ea + size > hw_start_addr));
>  }
>
>  /*
>   * If hw has multiple DAWR registers, we also need to check all
>   * dawrx constraint bits to confirm this is _really_ a valid event.
> + * If type is UNKNOWN, but privilege level matches, consider it as
> + * a positive match.
>   */
>  static bool check_dawrx_constraints(struct pt_regs *regs, int type,
>                                     struct arch_hw_breakpoint *info)
> @@ -536,7 +538,12 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type,
>         if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ))
>                 return false;
>
> -       if (OP_IS_STORE(type) && !(info->type & HW_BRK_TYPE_WRITE))
> +       /*
> +        * The Cache Management instructions other than dcbz never
> +        * cause a match. i.e. if type is CACHEOP, the instruction
> +        * is dcbz, and dcbz is treated as Store.
> +        */
> +       if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE))
>                 return false;
This change seems seperate to this commit?
>
>         if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL))
> @@ -553,7 +560,8 @@ static bool check_dawrx_constraints(struct pt_regs *regs, int type,
>   * including extraneous exception. Otherwise return false.
>   */
>  static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr,
> -                             int type, int size, struct arch_hw_breakpoint *info)
> +                             unsigned long ea, int type, int size,
> +                             struct arch_hw_breakpoint *info)
>  {
>         bool in_user_range = dar_in_user_range(regs->dar, info);
>         bool dawrx_constraints;
> @@ -569,11 +577,10 @@ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr,
>         }
>
>         if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) {
> -               if (in_user_range)
> -                       return true;
> -
> -               if (dar_in_hw_range(regs->dar, info)) {
> -                       info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
> +               if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +                       if (dar_in_hw_range(regs->dar, info))
> +                               return true;
> +               } else {
>                         return true;
I think this would be clearer as:
        if (cpu_has_feature(CPU_FTR_ARCH_31) &&
!(dar_in_hw_range(regs->dar, info)))
            return false;
        else
            return true;

>                 }
>                 return false;
> @@ -581,10 +588,20 @@ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr,
>
>         dawrx_constraints = check_dawrx_constraints(regs, type, info);
>
> -       if (dar_user_range_overlaps(regs->dar, size, info))
> +       if (type == UNKNOWN) {
> +               if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> +                       if (dar_in_hw_range(regs->dar, info))
> +                               return dawrx_constraints;
> +               } else {
> +                       return dawrx_constraints;
> +               }
> +               return false;
> +       }
Similar thing here, it could be:
        if ((cpu_has_feature(CPU_FTR_ARCH_31)) &&
!(dar_in_hw_range(regs->dar, info)))
            return false;
        else
            return dawrx_constraints;
> +
> +       if (ea_user_range_overlaps(ea, size, info))
>                 return dawrx_constraints;
>
> -       if (dar_hw_range_overlaps(regs->dar, size, info)) {
> +       if (ea_hw_range_overlaps(ea, size, info)) {
>                 if (dawrx_constraints) {
>                         info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
>                         return true;
> @@ -593,8 +610,17 @@ static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr,
>         return false;
>  }
>
> +static int cache_op_size(void)
> +{
> +#ifdef __powerpc64__
> +       return ppc64_caches.l1d.block_size;
> +#else
> +       return L1_CACHE_BYTES;
> +#endif
> +}
> +
>  static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
> -                            int *type, int *size, bool *larx_stcx)
> +                            int *type, int *size, unsigned long *ea)
>  {
>         struct instruction_op op;
>
> @@ -602,16 +628,23 @@ static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
>                 return;
>
>         analyse_instr(&op, regs, *instr);
> -
> -       /*
> -        * Set size = 8 if analyse_instr() fails. If it's a userspace
> -        * watchpoint(valid or extraneous), we can notify user about it.
> -        * If it's a kernel watchpoint, instruction  emulation will fail
> -        * in stepping_handler() and watchpoint will be disabled.
> -        */
>         *type = GETTYPE(op.type);
> -       *size = !(*type == UNKNOWN) ? GETSIZE(op.type) : 8;
> -       *larx_stcx = (*type == LARX || *type == STCX);
> +       *ea = op.ea;
> +#ifdef __powerpc64__
> +       if (!(regs->msr & MSR_64BIT))
> +               *ea &= 0xffffffffUL;
> +#endif
> +
> +       *size = GETSIZE(op.type);
> +       if (*type == CACHEOP) {
> +               *size = cache_op_size();
> +               *ea &= ~(*size - 1);
> +       }
Again related to CACHEOP, should these changes be mentioned in the
commit message?
> +}
> +
> +static bool is_larx_stcx_instr(int type)
> +{
> +       return type == LARX || type == STCX;
>  }
>
>  /*
> @@ -678,7 +711,7 @@ int hw_breakpoint_handler(struct die_args *args)
>         struct ppc_inst instr = ppc_inst(0);
>         int type = 0;
>         int size = 0;
> -       bool larx_stcx = false;
> +       unsigned long ea;
>
>         /* Disable breakpoints during exception handling */
>         hw_breakpoint_disable();
> @@ -692,7 +725,7 @@ int hw_breakpoint_handler(struct die_args *args)
>         rcu_read_lock();
>
>         if (!IS_ENABLED(CONFIG_PPC_8xx))
> -               get_instr_detail(regs, &instr, &type, &size, &larx_stcx);
> +               get_instr_detail(regs, &instr, &type, &size, &ea);
>
>         for (i = 0; i < nr_wp_slots(); i++) {
>                 bp[i] = __this_cpu_read(bp_per_reg[i]);
> @@ -702,7 +735,7 @@ int hw_breakpoint_handler(struct die_args *args)
>                 info[i] = counter_arch_bp(bp[i]);
>                 info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
>
> -               if (check_constraints(regs, instr, type, size, info[i])) {
> +               if (check_constraints(regs, instr, ea, type, size, info[i])) {
>                         if (!IS_ENABLED(CONFIG_PPC_8xx) &&
>                             ppc_inst_equal(instr, ppc_inst(0))) {
>                                 handler_error(bp[i], info[i]);
> @@ -744,7 +777,7 @@ int hw_breakpoint_handler(struct die_args *args)
>         }
>
>         if (!IS_ENABLED(CONFIG_PPC_8xx)) {
> -               if (larx_stcx) {
> +               if (is_larx_stcx_instr(type)) {
>                         for (i = 0; i < nr_wp_slots(); i++) {
>                                 if (!hit[i])
>                                         continue;
> --
> 2.26.2
>

^ permalink raw reply

* Re: [PATCH 09/15] powerpc/powernv/sriov: Factor out M64 BAR setup
From: Alexey Kardashevskiy @ 2020-07-15  2:09 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev
In-Reply-To: <20200710052340.737567-10-oohall@gmail.com>



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> The sequence required to use the single PE BAR mode is kinda janky and
> requires a little explanation. The API was designed with P7-IOC style
> windows where the setup process is something like:
> 
> 1. Configure the window start / end address
> 2. Enable the window
> 3. Map the segments of each window to the PE
> 
> For Single PE BARs the process is:
> 
> 1. Set the PE for segment zero on a disabled window
> 2. Set the range
> 3. Enable the window
> 
> Move the OPAL calls into their own helper functions where the quirks can be
> contained.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>


I'd use "segmented" instead of "accordion". Otherwise,

Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>




> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 132 ++++++++++++++++-----
>  1 file changed, 103 insertions(+), 29 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index e4c65cb49757..d53a85ccb538 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -320,6 +320,102 @@ static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
>  	return 0;
>  }
>  
> +
> +/*
> + * PHB3 and beyond support "accordion" windows. The window's address range
> + * is subdivided into phb->ioda.total_pe_num segments and there's a 1-1
> + * mapping between PEs and segments.
> + *
> + * They're called that because as the window size changes the segment sizes
> + * change with it. Sort of like an accordion, sort of.
> + */
> +static int64_t pnv_ioda_map_m64_accordion(struct pnv_phb *phb,
> +					  int window_id,
> +					  resource_size_t start,
> +					  resource_size_t size)
> +{
> +	int64_t rc;
> +
> +	rc = opal_pci_set_phb_mem_window(phb->opal_id,
> +					 OPAL_M64_WINDOW_TYPE,
> +					 window_id,
> +					 start,
> +					 0, /* unused */
> +					 size);
> +	if (rc)
> +		goto out;
> +
> +	rc = opal_pci_phb_mmio_enable(phb->opal_id,
> +				      OPAL_M64_WINDOW_TYPE,
> +				      window_id,
> +				      OPAL_ENABLE_M64_SPLIT);
> +out:
> +	if (rc)
> +		pr_err("Failed to map M64 window #%d: %lld\n", window_id, rc);
> +
> +	return rc;
> +}
> +
> +static int64_t pnv_ioda_map_m64_single(struct pnv_phb *phb,
> +				       int pe_num,
> +				       int window_id,
> +				       resource_size_t start,
> +				       resource_size_t size)
> +{
> +	int64_t rc;
> +
> +	/*
> +	 * The API for setting up m64 mmio windows seems to have been designed
> +	 * with P7-IOC in mind. For that chip each M64 BAR (window) had a fixed
> +	 * split of 8 equally sized segments each of which could individually
> +	 * assigned to a PE.
> +	 *
> +	 * The problem with this is that the API doesn't have any way to
> +	 * communicate the number of segments we want on a BAR. This wasn't
> +	 * a problem for p7-ioc since you didn't have a choice, but the
> +	 * single PE windows added in PHB3 don't map cleanly to this API.
> +	 *
> +	 * As a result we've got this slightly awkward process where we
> +	 * call opal_pci_map_pe_mmio_window() to put the single in single
> +	 * PE mode, and set the PE for the window before setting the address
> +	 * bounds. We need to do it this way because the single PE windows
> +	 * for PHB3 have different alignment requirements on PHB3.
> +	 */
> +	rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> +					 pe_num,
> +					 OPAL_M64_WINDOW_TYPE,
> +					 window_id,
> +					 0);
> +	if (rc)
> +		goto out;
> +
> +	/*
> +	 * NB: In single PE mode the window needs to be aligned to 32MB
> +	 */
> +	rc = opal_pci_set_phb_mem_window(phb->opal_id,
> +					 OPAL_M64_WINDOW_TYPE,
> +					 window_id,
> +					 start,
> +					 0, /* ignored by FW, m64 is 1-1 */
> +					 size);
> +	if (rc)
> +		goto out;
> +
> +	/*
> +	 * Now actually enable it. We specified the BAR should be in "non-split"
> +	 * mode so FW will validate that the BAR is in single PE mode.
> +	 */
> +	rc = opal_pci_phb_mmio_enable(phb->opal_id,
> +				      OPAL_M64_WINDOW_TYPE,
> +				      window_id,
> +				      OPAL_ENABLE_M64_NON_SPLIT);
> +out:
> +	if (rc)
> +		pr_err("Error mapping single PE BAR\n");
> +
> +	return rc;
> +}
> +
>  static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  {
>  	struct pnv_iov_data   *iov;
> @@ -330,7 +426,6 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  	int64_t                rc;
>  	int                    total_vfs;
>  	resource_size_t        size, start;
> -	int                    pe_num;
>  	int                    m64_bars;
>  
>  	phb = pci_bus_to_pnvhb(pdev->bus);
> @@ -359,49 +454,28 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
>  			set_bit(win, iov->used_m64_bar_mask);
>  
> +
>  			if (iov->m64_single_mode) {
>  				size = pci_iov_resource_size(pdev,
>  							PCI_IOV_RESOURCES + i);
>  				start = res->start + size * j;
> +				rc = pnv_ioda_map_m64_single(phb, win,
> +							     iov->pe_num_map[j],
> +							     start,
> +							     size);
>  			} else {
>  				size = resource_size(res);
>  				start = res->start;
> -			}
>  
> -			/* Map the M64 here */
> -			if (iov->m64_single_mode) {
> -				pe_num = iov->pe_num_map[j];
> -				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
> -						pe_num, OPAL_M64_WINDOW_TYPE,
> -						win, 0);
> +				rc = pnv_ioda_map_m64_accordion(phb, win, start,
> +								size);
>  			}
>  
> -			rc = opal_pci_set_phb_mem_window(phb->opal_id,
> -						 OPAL_M64_WINDOW_TYPE,
> -						 win,
> -						 start,
> -						 0, /* unused */
> -						 size);
> -
> -
>  			if (rc != OPAL_SUCCESS) {
>  				dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
>  					win, rc);
>  				goto m64_failed;
>  			}
> -
> -			if (iov->m64_single_mode)
> -				rc = opal_pci_phb_mmio_enable(phb->opal_id,
> -				     OPAL_M64_WINDOW_TYPE, win, 2);
> -			else
> -				rc = opal_pci_phb_mmio_enable(phb->opal_id,
> -				     OPAL_M64_WINDOW_TYPE, win, 1);
> -
> -			if (rc != OPAL_SUCCESS) {
> -				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
> -					win, rc);
> -				goto m64_failed;
> -			}
>  		}
>  	}
>  	return 0;
> 

-- 
Alexey

^ permalink raw reply

* Re: [RFC PATCH 00/35] Move all PCIBIOS* definitions into arch/x86
From: Benjamin Herrenschmidt @ 2020-07-15  2:14 UTC (permalink / raw)
  To: Kjetil Oftedal, Bjorn Helgaas
  Cc: linux-pci, Keith Busch, Paul Mackerras, sparclinux, Toan Le,
	Greg Ungerer, Marek Vasut, Rob Herring, Lorenzo Pieralisi,
	Sagi Grimberg, Russell King, Ley Foon Tan, Christoph Hellwig,
	Geert Uytterhoeven, Kevin Hilman, Jakub Kicinski, Matt Turner,
	linux-kernel-mentees, Guenter Roeck, Arnd Bergmann, Ray Jui,
	Jens Axboe, Ivan Kokshaysky, Shuah Khan, bjorn, Boris Ostrovsky,
	Richard Henderson, Juergen Gross, Bjorn Helgaas,
	Thomas Bogendoerfer, Scott Branden, Jingoo Han,
	Saheed O. Bolarinwa, linux-kernel@vger.kernel.org, Philipp Zabel,
	Greg Kroah-Hartman, Gustavo Pimentel, linuxppc-dev,
	David S. Miller, Heiner Kallweit
In-Reply-To: <CALMQjD9OVTbLVPGX-9+GDekZ02Wsqdz57-k1uCBMXC7cT3K_7w@mail.gmail.com>

On Tue, 2020-07-14 at 23:02 +0200, Kjetil Oftedal wrote:
> > 
> > > For b), it might be nice to also change other aspects of the
> > > interface, e.g. passing a pci_host_bridge pointer plus bus number
> > > instead of a pci_bus pointer, or having the callback in the
> > > pci_host_bridge structure.
> > 
> > I like this idea a lot, too.  I think the fact that
> > pci_bus_read_config_word() requires a pci_bus * complicates things in
> > a few places.
> > 
> > I think it's completely separate, as you say, and we should defer it
> > for now because even part a) is a lot of work.  I added it to my list
> > of possible future projects.
> > 
> 
> What about strange PCI devices such as Non-Transparent bridges?
> They will require their own PCI Config space accessors that is not
> connected to a host bridge if one wants to do some sort of
> punch-through enumeration.
> I guess the kernel doesn't care much about them?

Well, today they would require a pci_bus anyway.. . so if you want to do
that sort of funny trick you may as well create a "virtual" host bridge.

Cheers,
Ben.



^ permalink raw reply

* Re: [RFC PATCH 00/35] Move all PCIBIOS* definitions into arch/x86
From: Benjamin Herrenschmidt @ 2020-07-15  2:12 UTC (permalink / raw)
  To: Bjorn Helgaas, Arnd Bergmann
  Cc: linux-pci, Keith Busch, Paul Mackerras, sparclinux, Toan Le,
	Greg Ungerer, Marek Vasut, Rob Herring, Lorenzo Pieralisi,
	Sagi Grimberg, Russell King, Ley Foon Tan, Christoph Hellwig,
	Geert Uytterhoeven, Kevin Hilman, Jakub Kicinski, Matt Turner,
	linux-kernel-mentees, Guenter Roeck, Ray Jui, Jens Axboe,
	Ivan Kokshaysky, Shuah Khan, bjorn, Boris Ostrovsky,
	Richard Henderson, Juergen Gross, Bjorn Helgaas,
	Thomas Bogendoerfer, Scott Branden, Jingoo Han,
	Saheed O. Bolarinwa, linux-kernel@vger.kernel.org, Philipp Zabel,
	Greg Kroah-Hartman, Gustavo Pimentel, linuxppc-dev,
	David S. Miller, Heiner Kallweit
In-Reply-To: <20200714184550.GA397277@bjorn-Precision-5520>

On Tue, 2020-07-14 at 13:45 -0500, Bjorn Helgaas wrote:
> 
> > fail for valid arguments on a valid pci_device* ?
> 
> I really like this idea.
> 
> pci_write_config_*() has one return value, and only 100ish of 2500
> callers check for errors.  It's sometimes possible for config
> accessors to detect PCI errors and return failure, e.g., device was
> removed or didn't respond, but most of them don't, and detecting
> these
> errors is not really that valuable.
> 
> pci_read_config_*() is much more interesting because it returns two
> things, the function return value and the value read from the PCI
> device, and it's complicated to check both. 

  .../...

I agree. It's a mess at the moment.

We have separate mechanism to convey PCI errors (among other things the
channel state) which should apply to config space when detection is
possible.

I think returning all 1's is the right thing to do here and avoids odd
duplicate error detection logic which I bet you is never properly
tested.

> > For b), it might be nice to also change other aspects of the
> > interface, e.g. passing a pci_host_bridge pointer plus bus number
> > instead of a pci_bus pointer, or having the callback in the
> > pci_host_bridge structure.
> 
> I like this idea a lot, too.  I think the fact that
> pci_bus_read_config_word() requires a pci_bus * complicates things in
> a few places.
> 
> I think it's completely separate, as you say, and we should defer it
> for now because even part a) is a lot of work.  I added it to my list
> of possible future projects.

Agreed on both points.

Cheers,
Ben.



^ permalink raw reply

* Re: [PATCH 08/15] powerpc/powernv/sriov: Simplify used window tracking
From: Oliver O'Halloran @ 2020-07-15  1:41 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev
In-Reply-To: <86273d91-6c38-6d63-4f5b-1ed0d619f465@ozlabs.ru>

On Wed, Jul 15, 2020 at 11:34 AM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
> On 10/07/2020 15:23, Oliver O'Halloran wrote:
> > diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> > index 0156d7d17f7d..58c97e60c3db 100644
> > --- a/arch/powerpc/platforms/powernv/pci.h
> > +++ b/arch/powerpc/platforms/powernv/pci.h
> > @@ -243,8 +243,11 @@ struct pnv_iov_data {
> >       /* Did we map the VF BARs with single-PE IODA BARs? */
> >       bool    m64_single_mode;
> >
> > -     int     (*m64_map)[PCI_SRIOV_NUM_BARS];
> > -#define IODA_INVALID_M64        (-1)
> > +     /*
> > +      * Bit mask used to track which m64 windows that we used to map the
>
>
> Language question: either "which" or "that" but both?

Uhhhh... I don't speak english

^ permalink raw reply

* Re: [PATCH 03/15] powerpc/powernv/pci: Add explicit tracking of the DMA setup state
From: Oliver O'Halloran @ 2020-07-15  1:38 UTC (permalink / raw)
  To: Alexey Kardashevskiy; +Cc: linuxppc-dev, Cédric Le Goater
In-Reply-To: <34f7eea2-4ace-9931-7b5f-98ec159f3532@ozlabs.ru>

On Tue, Jul 14, 2020 at 5:21 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
>
> On 14/07/2020 15:58, Oliver O'Halloran wrote:
> > On Tue, Jul 14, 2020 at 3:37 PM Alexey Kardashevskiy <aik@ozlabs.ru> wrote:
> >>
> >> On 10/07/2020 15:23, Oliver O'Halloran wrote:
> >>> This also means the only remaining user of the old "DMA Weight" code is
> >>> the IODA1 DMA setup code that it was originally added for, which is good.
> >>
> >>
> >> Is ditching IODA1 in the plan? :)
> >
> > That or separating out the pci_controller_ops for IODA1 and IODA2 so
> > we can stop any IODA2 specific changes from breaking it.
>
> Is IODA1 tested at all these days? Or, is anyone running upstream
> kernels anywhere and keeps shouting when it does not work on IODA1? Thanks,

Cedric has a P7 with OPAL. That's probably the one left though.

^ permalink raw reply

* Re: [PATCH 08/15] powerpc/powernv/sriov: Simplify used window tracking
From: Alexey Kardashevskiy @ 2020-07-15  1:34 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev
In-Reply-To: <20200710052340.737567-9-oohall@gmail.com>



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> No need for the multi-dimensional arrays, just use a bitmap.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 48 +++++++---------------
>  arch/powerpc/platforms/powernv/pci.h       |  7 +++-
>  2 files changed, 20 insertions(+), 35 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index 216ceeff69b0..e4c65cb49757 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -303,28 +303,20 @@ static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
>  {
>  	struct pnv_iov_data   *iov;
>  	struct pnv_phb        *phb;
> -	int                    i, j;
> -	int                    m64_bars;
> +	int window_id;
>  
>  	phb = pci_bus_to_pnvhb(pdev->bus);
>  	iov = pnv_iov_get(pdev);
>  
> -	if (iov->m64_single_mode)
> -		m64_bars = num_vfs;
> -	else
> -		m64_bars = 1;
> +	for_each_set_bit(window_id, iov->used_m64_bar_mask, 64) {
> +		opal_pci_phb_mmio_enable(phb->opal_id,
> +					 OPAL_M64_WINDOW_TYPE,
> +					 window_id,
> +					 0);
>  
> -	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
> -		for (j = 0; j < m64_bars; j++) {
> -			if (iov->m64_map[j][i] == IODA_INVALID_M64)
> -				continue;
> -			opal_pci_phb_mmio_enable(phb->opal_id,
> -				OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 0);
> -			clear_bit(iov->m64_map[j][i], &phb->ioda.m64_bar_alloc);
> -			iov->m64_map[j][i] = IODA_INVALID_M64;
> -		}
> +		clear_bit(window_id, &phb->ioda.m64_bar_alloc);
> +	}
>  
> -	kfree(iov->m64_map);
>  	return 0;
>  }
>  
> @@ -350,23 +342,14 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  	else
>  		m64_bars = 1;
>  
> -	iov->m64_map = kmalloc_array(m64_bars,
> -				     sizeof(*iov->m64_map),
> -				     GFP_KERNEL);
> -	if (!iov->m64_map)
> -		return -ENOMEM;
> -	/* Initialize the m64_map to IODA_INVALID_M64 */
> -	for (i = 0; i < m64_bars ; i++)
> -		for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
> -			iov->m64_map[i][j] = IODA_INVALID_M64;
> -
> -
>  	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
>  		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>  		if (!res->flags || !res->parent)
>  			continue;
>  
>  		for (j = 0; j < m64_bars; j++) {
> +
> +			/* allocate a window ID for this BAR */
>  			do {
>  				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
>  						phb->ioda.m64_bar_idx + 1, 0);
> @@ -374,8 +357,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  				if (win >= phb->ioda.m64_bar_idx + 1)
>  					goto m64_failed;
>  			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
> -
> -			iov->m64_map[j][i] = win;
> +			set_bit(win, iov->used_m64_bar_mask);
>  
>  			if (iov->m64_single_mode) {
>  				size = pci_iov_resource_size(pdev,
> @@ -391,12 +373,12 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  				pe_num = iov->pe_num_map[j];
>  				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
>  						pe_num, OPAL_M64_WINDOW_TYPE,
> -						iov->m64_map[j][i], 0);
> +						win, 0);
>  			}
>  
>  			rc = opal_pci_set_phb_mem_window(phb->opal_id,
>  						 OPAL_M64_WINDOW_TYPE,
> -						 iov->m64_map[j][i],
> +						 win,
>  						 start,
>  						 0, /* unused */
>  						 size);
> @@ -410,10 +392,10 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
>  
>  			if (iov->m64_single_mode)
>  				rc = opal_pci_phb_mmio_enable(phb->opal_id,
> -				     OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 2);
> +				     OPAL_M64_WINDOW_TYPE, win, 2);
>  			else
>  				rc = opal_pci_phb_mmio_enable(phb->opal_id,
> -				     OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 1);
> +				     OPAL_M64_WINDOW_TYPE, win, 1);
>  
>  			if (rc != OPAL_SUCCESS) {
>  				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
> diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
> index 0156d7d17f7d..58c97e60c3db 100644
> --- a/arch/powerpc/platforms/powernv/pci.h
> +++ b/arch/powerpc/platforms/powernv/pci.h
> @@ -243,8 +243,11 @@ struct pnv_iov_data {
>  	/* Did we map the VF BARs with single-PE IODA BARs? */
>  	bool    m64_single_mode;
>  
> -	int     (*m64_map)[PCI_SRIOV_NUM_BARS];
> -#define IODA_INVALID_M64        (-1)
> +	/*
> +	 * Bit mask used to track which m64 windows that we used to map the


Language question: either "which" or "that" but both?


> +	 * SR-IOV BARs for this device.
> +	 */
> +	DECLARE_BITMAP(used_m64_bar_mask, 64);

64 here is the maximum number of M64's (which is 16 at the moment)? Can
we define this 64 somehow (appears twice in this patch alone)?

Anyway, the change is correct.

Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>



>  
>  	/*
>  	 * If we map the SR-IOV BARs with a segmented window then
> 

-- 
Alexey

^ permalink raw reply

* Re: [PATCH v8 5/8] powerpc/vdso: Prepare for switching VDSO to generic C implementation.
From: Michael Ellerman @ 2020-07-15  1:04 UTC (permalink / raw)
  To: Christophe Leroy, Benjamin Herrenschmidt, Paul Mackerras, nathanl
  Cc: linux-arch, arnd, linux-kernel, Tulio Magno Quites Machado Filho,
	luto, tglx, vincenzo.frascino, linuxppc-dev
In-Reply-To: <2a67c333893454868bbfda773ba4b01c20272a5d.1588079622.git.christophe.leroy@c-s.fr>

Christophe Leroy <christophe.leroy@c-s.fr> writes:
> Prepare for switching VDSO to generic C implementation in following
> patch. Here, we:
> - Modify __get_datapage() to take an offset
> - Prepare the helpers to call the C VDSO functions
> - Prepare the required callbacks for the C VDSO functions
> - Prepare the clocksource.h files to define VDSO_ARCH_CLOCKMODES
> - Add the C trampolines to the generic C VDSO functions
>
> powerpc is a bit special for VDSO as well as system calls in the
> way that it requires setting CR SO bit which cannot be done in C.
> Therefore, entry/exit needs to be performed in ASM.
>
> Implementing __arch_get_vdso_data() would clobber the link register,
> requiring the caller to save it. As the ASM calling function already
> has to set a stack frame and saves the link register before calling
> the C vdso function, retriving the vdso data pointer there is lighter.
...

> diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h
> new file mode 100644
> index 000000000000..4452897f9bd8
> --- /dev/null
> +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h
> @@ -0,0 +1,175 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __ASM_VDSO_GETTIMEOFDAY_H
> +#define __ASM_VDSO_GETTIMEOFDAY_H
> +
> +#include <asm/ptrace.h>
> +
> +#ifdef __ASSEMBLY__
> +
> +.macro cvdso_call funct
> +  .cfi_startproc
> +	PPC_STLU	r1, -STACK_FRAME_OVERHEAD(r1)
> +	mflr		r0
> +  .cfi_register lr, r0
> +	PPC_STL		r0, STACK_FRAME_OVERHEAD + PPC_LR_STKOFF(r1)

This doesn't work for me on ppc64(le) with glibc.

glibc doesn't create a stack frame before making the VDSO call, so the
store of r0 (LR) goes into the caller's frame, corrupting the saved LR,
leading to an infinite loop.

This is an example from a statically built program that calls
clock_gettime():

0000000010030cb0 <__clock_gettime>:
    10030cb0:   0e 10 40 3c     lis     r2,4110
    10030cb4:   00 7a 42 38     addi    r2,r2,31232
    10030cb8:   a6 02 08 7c     mflr    r0
    10030cbc:   ff ff 22 3d     addis   r9,r2,-1
    10030cc0:   58 6d 29 39     addi    r9,r9,27992
    10030cc4:   f0 ff c1 fb     std     r30,-16(r1)			<-- redzone store
    10030cc8:   78 23 9e 7c     mr      r30,r4
    10030ccc:   f8 ff e1 fb     std     r31,-8(r1)			<-- redzone store
    10030cd0:   78 1b 7f 7c     mr      r31,r3
    10030cd4:   10 00 01 f8     std     r0,16(r1)			<-- save LR to caller's frame
    10030cd8:   00 00 09 e8     ld      r0,0(r9)
    10030cdc:   00 00 20 2c     cmpdi   r0,0
    10030ce0:   50 00 82 41     beq     10030d30 <__clock_gettime+0x80>
    10030ce4:   a6 03 09 7c     mtctr   r0
    10030ce8:   21 04 80 4e     bctrl					<-- vdso call
    10030cec:   26 00 00 7c     mfcr    r0
    10030cf0:   00 10 09 74     andis.  r9,r0,4096
    10030cf4:   78 1b 69 7c     mr      r9,r3
    10030cf8:   28 00 82 40     bne     10030d20 <__clock_gettime+0x70>
    10030cfc:   b4 07 23 7d     extsw   r3,r9
    10030d00:   10 00 01 e8     ld      r0,16(r1)			<-- load saved LR, since clobbered by the VDSO
    10030d04:   f0 ff c1 eb     ld      r30,-16(r1)
    10030d08:   f8 ff e1 eb     ld      r31,-8(r1)
    10030d0c:   a6 03 08 7c     mtlr    r0				<-- restore LR
    10030d10:   20 00 80 4e     blr					<-- jumps to 10030cec


I'm kind of confused how it worked for you on 32-bit.

There's also no code to load/restore the TOC pointer on BE, which I
think we'll need to handle.

cheers

^ permalink raw reply

* Re: [PATCH v3 1/9] powerpc/watchpoint: Fix 512 byte boundary limit
From: Jordan Niethe @ 2020-07-15  1:01 UTC (permalink / raw)
  To: Ravi Bangoria
  Cc: Christophe Leroy, apopple, mikey, miltonm, peterz, oleg,
	Nicholas Piggin, linux-kernel, Paul Mackerras, jolsa, fweisbec,
	pedromfc, naveen.n.rao, linuxppc-dev, mingo
In-Reply-To: <20200708045046.135702-2-ravi.bangoria@linux.ibm.com>

On Wed, Jul 8, 2020 at 2:53 PM Ravi Bangoria
<ravi.bangoria@linux.ibm.com> wrote:
>
> Milton Miller reported that we are aligning start and end address to
> wrong size SZ_512M. It should be SZ_512. Fix that.
>
> While doing this change I also found a case where ALIGN() comparison
> fails. Within a given aligned range, ALIGN() of two addresses does not
> match when start address is pointing to the first byte and end address
> is pointing to any other byte except the first one. But that's not true
> for ALIGN_DOWN(). ALIGN_DOWN() of any two addresses within that range
> will always point to the first byte. So use ALIGN_DOWN() instead of
> ALIGN().
>
> Fixes: e68ef121c1f4 ("powerpc/watchpoint: Use builtin ALIGN*() macros")
> Reported-by: Milton Miller <miltonm@us.ibm.com>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
I tested this with the ptrace-hwbreak selftest. Can confirm without
also changing to ALIGN_DOWN() then these tests will fail.
Tested-by: Jordan Niethe <jniethe5@gmail.com>
> ---
>  arch/powerpc/kernel/hw_breakpoint.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
> index 0000daf0e1da..031e6defc08e 100644
> --- a/arch/powerpc/kernel/hw_breakpoint.c
> +++ b/arch/powerpc/kernel/hw_breakpoint.c
> @@ -419,7 +419,7 @@ static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
>         if (dawr_enabled()) {
>                 max_len = DAWR_MAX_LEN;
>                 /* DAWR region can't cross 512 bytes boundary */
> -               if (ALIGN(start_addr, SZ_512M) != ALIGN(end_addr - 1, SZ_512M))
> +               if (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512))
>                         return -EINVAL;
>         } else if (IS_ENABLED(CONFIG_PPC_8xx)) {
>                 /* 8xx can setup a range without limitation */
> --
> 2.26.2
>

^ permalink raw reply

* Re: [PATCH 07/15] powerpc/powernv/sriov: Rename truncate_iov
From: Alexey Kardashevskiy @ 2020-07-15  0:46 UTC (permalink / raw)
  To: Oliver O'Halloran, linuxppc-dev
In-Reply-To: <20200710052340.737567-8-oohall@gmail.com>



On 10/07/2020 15:23, Oliver O'Halloran wrote:
> This prevents SR-IOV being used by making the SR-IOV BAR resources
> unallocatable. Rename it to reflect what it actually does.
> 
> Signed-off-by: Oliver O'Halloran <oohall@gmail.com>


Reviewed-by: Alexey Kardashevskiy <aik@ozlabs.ru>


> ---
>  arch/powerpc/platforms/powernv/pci-sriov.c | 11 ++++++-----
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
> index f4c74ab1284d..216ceeff69b0 100644
> --- a/arch/powerpc/platforms/powernv/pci-sriov.c
> +++ b/arch/powerpc/platforms/powernv/pci-sriov.c
> @@ -155,7 +155,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  
>  	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
>  	if (!iov)
> -		goto truncate_iov;
> +		goto disable_iov;
>  	pdev->dev.archdata.iov_data = iov;
>  
>  	total_vfs = pci_sriov_get_totalvfs(pdev);
> @@ -170,7 +170,7 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  			dev_warn(&pdev->dev, "Don't support SR-IOV with"
>  					" non M64 VF BAR%d: %pR. \n",
>  				 i, res);
> -			goto truncate_iov;
> +			goto disable_iov;
>  		}
>  
>  		total_vf_bar_sz += pci_iov_resource_size(pdev,
> @@ -209,7 +209,8 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  		 * mode is 32MB.
>  		 */
>  		if (iov->m64_single_mode && (size < SZ_32M))
> -			goto truncate_iov;
> +			goto disable_iov;
> +
>  		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
>  		res->end = res->start + size * mul - 1;
>  		dev_dbg(&pdev->dev, "                       %pR\n", res);
> @@ -220,8 +221,8 @@ static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
>  
>  	return;
>  
> -truncate_iov:
> -	/* To save MMIO space, IOV BAR is truncated. */
> +disable_iov:
> +	/* Save ourselves some MMIO space by disabling the unusable BARs */
>  	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
>  		res = &pdev->resource[i + PCI_IOV_RESOURCES];
>  		res->flags = 0;
> 

-- 
Alexey

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox