* Re: [PATCH v4 13/13] mm/debug_vm_pgtable: Avoid none pte in pte_clear_test
From: Anshuman Khandual @ 2020-09-04 4:16 UTC (permalink / raw)
To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200902114601.183715-1-aneesh.kumar@linux.ibm.com>
On 09/02/2020 05:16 PM, Aneesh Kumar K.V wrote:
> pte_clear_tests operate on an existing pte entry. Make sure that
> is not a none pte entry.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
> mm/debug_vm_pgtable.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 9afa1354326b..c36530c69e33 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -542,9 +542,10 @@ static void __init pgd_populate_tests(struct mm_struct *mm, pgd_t *pgdp,
> #endif /* PAGETABLE_P4D_FOLDED */
>
> static void __init pte_clear_tests(struct mm_struct *mm, pte_t *ptep,
> - unsigned long vaddr)
> + unsigned long pfn, unsigned long vaddr,
> + pgprot_t prot)
> {
> - pte_t pte = ptep_get(ptep);
> + pte_t pte = pfn_pte(pfn, prot);
>
> pr_debug("Validating PTE clear\n");
> pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
> @@ -1049,7 +1050,7 @@ static int __init debug_vm_pgtable(void)
>
> ptl = pte_lockptr(mm, pmdp);
> spin_lock(ptl);
> - pte_clear_tests(mm, ptep, vaddr);
> + pte_clear_tests(mm, ptep, pte_aligned, vaddr, prot);
> pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
> pte_unmap_unlock(ptep, ptl);
>
>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
^ permalink raw reply
* Re: [PATCH v4 05/13] mm/debug_vm_pgtable/savedwrite: Enable savedwrite test with CONFIG_NUMA_BALANCING
From: Anshuman Khandual @ 2020-09-04 4:09 UTC (permalink / raw)
To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200902114222.181353-6-aneesh.kumar@linux.ibm.com>
On 09/02/2020 05:12 PM, Aneesh Kumar K.V wrote:
> Saved write support was added to track the write bit of a pte after
> marking the pte protnone. This was done so that AUTONUMA can convert
> a write pte to protnone and still track the old write bit. When converting
> it back we set the pte write bit correctly thereby avoiding a write fault
> again. Hence enable the test only when CONFIG_NUMA_BALANCING is enabled and
> use protnone protflags.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
> mm/debug_vm_pgtable.c | 11 +++++++++--
> 1 file changed, 9 insertions(+), 2 deletions(-)
>
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 4c73e63b4ceb..8704901f6bd8 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -119,10 +119,14 @@ static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
> {
> pte_t pte = pfn_pte(pfn, prot);
>
> + if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
> + return;
> +
> pr_debug("Validating PTE saved write\n");
> WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));
> WARN_ON(pte_savedwrite(pte_clear_savedwrite(pte_mk_savedwrite(pte))));
> }
> +
> #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
> {
> @@ -234,6 +238,9 @@ static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
> {
> pmd_t pmd = pfn_pmd(pfn, prot);
>
> + if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
> + return;
> +
> pr_debug("Validating PMD saved write\n");
> WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
> WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd))));
> @@ -1019,8 +1026,8 @@ static int __init debug_vm_pgtable(void)
> pmd_huge_tests(pmdp, pmd_aligned, prot);
> pud_huge_tests(pudp, pud_aligned, prot);
>
> - pte_savedwrite_tests(pte_aligned, prot);
> - pmd_savedwrite_tests(pmd_aligned, prot);
> + pte_savedwrite_tests(pte_aligned, protnone);
> + pmd_savedwrite_tests(pmd_aligned, protnone);
>
> pte_unmap_unlock(ptep, ptl);
>
>
No more checkpatch.pl warnings.
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
^ permalink raw reply
* Re: [PATCH v4 04/13] mm/debug_vm_pgtables/hugevmap: Use the arch helper to identify huge vmap support.
From: Anshuman Khandual @ 2020-09-04 4:08 UTC (permalink / raw)
To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200902114222.181353-5-aneesh.kumar@linux.ibm.com>
On 09/02/2020 05:12 PM, Aneesh Kumar K.V wrote:
> ppc64 supports huge vmap only with radix translation. Hence use arch helper
> to determine the huge vmap support.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
> mm/debug_vm_pgtable.c | 14 ++++++++++++--
> 1 file changed, 12 insertions(+), 2 deletions(-)
>
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 00649b47f6e0..4c73e63b4ceb 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -28,6 +28,7 @@
> #include <linux/swapops.h>
> #include <linux/start_kernel.h>
> #include <linux/sched/mm.h>
> +#include <linux/io.h>
> #include <asm/pgalloc.h>
> #include <asm/tlbflush.h>
>
> @@ -206,11 +207,12 @@ static void __init pmd_leaf_tests(unsigned long pfn, pgprot_t prot)
> WARN_ON(!pmd_leaf(pmd));
> }
>
> +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
> static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
> {
> pmd_t pmd;
>
> - if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
> + if (!arch_ioremap_pmd_supported())
> return;
>
> pr_debug("Validating PMD huge\n");
> @@ -224,6 +226,9 @@ static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot)
> pmd = READ_ONCE(*pmdp);
> WARN_ON(!pmd_none(pmd));
> }
> +#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
> +static void __init pmd_huge_tests(pmd_t *pmdp, unsigned long pfn, pgprot_t prot) { }
> +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
>
> static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
> {
> @@ -320,11 +325,12 @@ static void __init pud_leaf_tests(unsigned long pfn, pgprot_t prot)
> WARN_ON(!pud_leaf(pud));
> }
>
> +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
> static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
> {
> pud_t pud;
>
> - if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMAP))
> + if (!arch_ioremap_pud_supported())
> return;
>
> pr_debug("Validating PUD huge\n");
> @@ -338,6 +344,10 @@ static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
> pud = READ_ONCE(*pudp);
> WARN_ON(!pud_none(pud));
> }
> +#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
> +static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot) { }
> +#endif /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
> +
> #else /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
> static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { }
> static void __init pud_advanced_tests(struct mm_struct *mm,
>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
^ permalink raw reply
* Re: [PATCH v4 03/13] mm/debug_vm_pgtable/ppc64: Avoid setting top bits in radom value
From: Anshuman Khandual @ 2020-09-04 4:03 UTC (permalink / raw)
To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200902114222.181353-4-aneesh.kumar@linux.ibm.com>
On 09/02/2020 05:12 PM, Aneesh Kumar K.V wrote:
> ppc64 use bit 62 to indicate a pte entry (_PAGE_PTE). Avoid setting
> that bit in random value.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
> mm/debug_vm_pgtable.c | 13 ++++++++++---
> 1 file changed, 10 insertions(+), 3 deletions(-)
>
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 086309fb9b6f..00649b47f6e0 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -44,10 +44,17 @@
> * entry type. But these bits might affect the ability to clear entries with
> * pxx_clear() because of how dynamic page table folding works on s390. So
> * while loading up the entries do not change the lower 4 bits. It does not
> - * have affect any other platform.
> + * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
> + * used to mark a pte entry.
> */
> -#define S390_MASK_BITS 4
> -#define RANDOM_ORVALUE GENMASK(BITS_PER_LONG - 1, S390_MASK_BITS)
> +#define S390_SKIP_MASK GENMASK(3, 0)
> +#if __BITS_PER_LONG == 64
> +#define PPC64_SKIP_MASK GENMASK(62, 62)
> +#else
> +#define PPC64_SKIP_MASK 0x0
> +#endif
> +#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
> +#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
> #define RANDOM_NZVALUE GENMASK(7, 0)
>
> static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
^ permalink raw reply
* Re: [PATCH v4 08/13] mm/debug_vm_pgtable/locks: Move non page table modifying test together
From: Anshuman Khandual @ 2020-09-04 3:58 UTC (permalink / raw)
To: Aneesh Kumar K.V, linux-mm, akpm; +Cc: linuxppc-dev
In-Reply-To: <20200902114222.181353-9-aneesh.kumar@linux.ibm.com>
On 09/02/2020 05:12 PM, Aneesh Kumar K.V wrote:
> This will help in adding proper locks in a later patch
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> ---
> mm/debug_vm_pgtable.c | 51 ++++++++++++++++++++++++-------------------
> 1 file changed, 28 insertions(+), 23 deletions(-)
>
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index de333871f407..f59cf6a9b05e 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -986,7 +986,7 @@ static int __init debug_vm_pgtable(void)
> p4dp = p4d_alloc(mm, pgdp, vaddr);
> pudp = pud_alloc(mm, p4dp, vaddr);
> pmdp = pmd_alloc(mm, pudp, vaddr);
> - ptep = pte_alloc_map_lock(mm, pmdp, vaddr, &ptl);
> + ptep = pte_alloc_map(mm, pmdp, vaddr);
>
> /*
> * Save all the page table page addresses as the page table
> @@ -1006,33 +1006,12 @@ static int __init debug_vm_pgtable(void)
> p4d_basic_tests(p4d_aligned, prot);
> pgd_basic_tests(pgd_aligned, prot);
>
> - pte_clear_tests(mm, ptep, vaddr);
> - pmd_clear_tests(mm, pmdp);
> - pud_clear_tests(mm, pudp);
> - p4d_clear_tests(mm, p4dp);
> - pgd_clear_tests(mm, pgdp);
> -
> - pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
> - pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot);
> - pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
> - hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
> -
> pmd_leaf_tests(pmd_aligned, prot);
> pud_leaf_tests(pud_aligned, prot);
>
> - pmd_huge_tests(pmdp, pmd_aligned, prot);
> - pud_huge_tests(pudp, pud_aligned, prot);
> -
> pte_savedwrite_tests(pte_aligned, protnone);
> pmd_savedwrite_tests(pmd_aligned, protnone);
>
> - pte_unmap_unlock(ptep, ptl);
> -
> - pmd_populate_tests(mm, pmdp, saved_ptep);
> - pud_populate_tests(mm, pudp, saved_pmdp);
> - p4d_populate_tests(mm, p4dp, saved_pudp);
> - pgd_populate_tests(mm, pgdp, saved_p4dp);
> -
> pte_special_tests(pte_aligned, prot);
> pte_protnone_tests(pte_aligned, protnone);
> pmd_protnone_tests(pmd_aligned, protnone);
> @@ -1050,11 +1029,37 @@ static int __init debug_vm_pgtable(void)
> pmd_swap_tests(pmd_aligned, prot);
>
> swap_migration_tests();
> - hugetlb_basic_tests(pte_aligned, prot);
>
> pmd_thp_tests(pmd_aligned, prot);
> pud_thp_tests(pud_aligned, prot);
>
> + hugetlb_basic_tests(pte_aligned, prot);
> +
> + pte_clear_tests(mm, ptep, vaddr);
> + pmd_clear_tests(mm, pmdp);
> + pud_clear_tests(mm, pudp);
> + p4d_clear_tests(mm, p4dp);
> + pgd_clear_tests(mm, pgdp);
> +
> + ptl = pte_lockptr(mm, pmdp);
> + spin_lock(ptl);
> +
> + pte_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
> + pmd_advanced_tests(mm, vma, pmdp, pmd_aligned, vaddr, prot);
> + pud_advanced_tests(mm, vma, pudp, pud_aligned, vaddr, prot);
> + hugetlb_advanced_tests(mm, vma, ptep, pte_aligned, vaddr, prot);
> +
> +
> + pmd_huge_tests(pmdp, pmd_aligned, prot);
> + pud_huge_tests(pudp, pud_aligned, prot);
> +
> + pte_unmap_unlock(ptep, ptl);
> +
> + pmd_populate_tests(mm, pmdp, saved_ptep);
> + pud_populate_tests(mm, pudp, saved_pmdp);
> + p4d_populate_tests(mm, p4dp, saved_pudp);
> + pgd_populate_tests(mm, pgdp, saved_p4dp);
> +
> p4d_free(mm, saved_p4dp);
> pud_free(mm, saved_pudp);
> pmd_free(mm, saved_pmdp);
>
Patches applied till here [i.e PATCH_1..PATCH_8] does not boot on arm64
platform, which might create a potential git bisect problem later on.
static void __init pte_advanced_tests(struct mm_struct *mm,
struct vm_area_struct *vma, pte_t *ptep,
unsigned long pfn, unsigned long vaddr,
pgprot_t prot)
{
pte_t pte = pfn_pte(pfn, prot);
/*
* Architectures optimize set_pte_at by avoiding TLB flush.
* This requires set_pte_at to be not used to update an
* existing pte entry. Clear pte before we do set_pte_at
*/
pr_debug("Validating PTE advanced\n");
pte = pfn_pte(pfn, prot);
set_pte_at(mm, vaddr, ptep, pte); ----------> Crashed here.
............
............
[ 19.031831] Unable to handle kernel paging request at virtual address fffffdfffde00028
[ 19.033181] Mem abort info:
[ 19.033627] ESR = 0x96000006
[ 19.034129] EC = 0x25: DABT (current EL), IL = 32 bits
[ 19.035002] SET = 0, FnV = 0
[ 19.035523] EA = 0, S1PTW = 0
[ 19.036054] Data abort info:
[ 19.036538] ISV = 0, ISS = 0x00000006
[ 19.037170] CM = 0, WnR = 0
[ 19.037662] swapper pgtable: 4k pages, 48-bit VAs, pgdp=000000008158b000
[ 19.038749] [fffffdfffde00028] pgd=0000000081d69003, p4d=0000000081d69003, pud=0000000081d6a003, pmd=0000000000000000
[ 19.040560] Internal error: Oops: 96000006 [#1] PREEMPT SMP
[ 19.041467] Modules linked in:
[ 19.041968] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 5.9.0-rc3-00231-gdef09f62540f #62
[ 19.043472] Hardware name: linux,dummy-virt (DT)
[ 19.044292] pstate: 20400005 (nzCv daif +PAN -UAO BTYPE=--)
[ 19.045235] pc : _raw_spin_lock+0x34/0x70
[ 19.045944] lr : debug_vm_pgtable+0x3c8/0x8b8
[ 19.046713] sp : ffff80001219bcf0
[ 19.047295] x29: ffff80001219bcf0 x28: ffff8000114ac000
[ 19.048224] x27: ffff0005df61f080 x26: ffff8000114ac000
[ 19.049148] x25: 0020000000000fd3 x24: 0020000081400fd3
[ 19.050076] x23: 00003df483b17000 x22: ffff0005ddd63e90
[ 19.051011] x21: ffff0005dddc8000 x20: ffff0005ddd5f0e8
[ 19.051937] x19: ffff0005de3718b8 x18: 0000000000000010
[ 19.052846] x17: 00000000dbb99b48 x16: 00000000cc138a43
[ 19.053774] x15: 0000000000000001 x14: 0000000000000002
[ 19.054703] x13: 000000000055e46d x12: fffffe0017577200
[ 19.055630] x11: 0000000000000008 x10: ffff0005fc455210
[ 19.056553] x9 : ffff0005fc455210 x8 : 0000000000000010
[ 19.057484] x7 : ffff8005ead60000 x6 : ffff0005fcfcee00
[ 19.058410] x5 : ffff0005fc455200 x4 : 0000000000000000
[ 19.059344] x3 : fffffdfffde00028 x2 : 0000000000000001
[ 19.060272] x1 : 0000000000000000 x0 : fffffdfffde00028
[ 19.061203] Call trace:
[ 19.061644] _raw_spin_lock+0x34/0x70
[ 19.062289] debug_vm_pgtable+0x3c8/0x8b8
[ 19.063001] do_one_initcall+0x74/0x1cc
[ 19.063680] kernel_init_freeable+0x1d0/0x238
[ 19.064448] kernel_init+0x14/0x118
[ 19.065067] ret_from_fork+0x10/0x34
[ 19.065703] Code: d503201f 52800001 52800022 2a0103e4 (88e47c02)
[ 19.066835] ---[ end trace ff33eeb13d2a13af ]---
^ permalink raw reply
* Re: [PATCH 12/14] x86: remove address space overrides using set_fs()
From: Al Viro @ 2020-09-04 2:55 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-arch, linuxppc-dev, Kees Cook, x86, linux-kernel,
Luis Chamberlain, linux-fsdevel, Linus Torvalds, Alexey Dobriyan
In-Reply-To: <20200903142242.925828-13-hch@lst.de>
On Thu, Sep 03, 2020 at 04:22:40PM +0200, Christoph Hellwig wrote:
> diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
> index c8a85b512796e1..94f7be4971ed04 100644
> --- a/arch/x86/lib/getuser.S
> +++ b/arch/x86/lib/getuser.S
> @@ -35,10 +35,19 @@
> #include <asm/smap.h>
> #include <asm/export.h>
>
> +#ifdef CONFIG_X86_5LEVEL
> +#define LOAD_TASK_SIZE_MINUS_N(n) \
> + ALTERNATIVE "mov $((1 << 47) - 4096 - (n)),%rdx", \
> + "mov $((1 << 56) - 4096 - (n)),%rdx", X86_FEATURE_LA57
> +#else
> +#define LOAD_TASK_SIZE_MINUS_N(n) \
> + mov $(TASK_SIZE_MAX - (n)),%_ASM_DX
> +#endif
Wait a sec... how is that supposed to build with X86_5LEVEL? Do you mean
#define LOAD_TASK_SIZE_MINUS_N(n) \
ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rdx), \
__stringify(mov $((1 << 56) - 4096 - (n)),%rdx), X86_FEATURE_LA57
there?
^ permalink raw reply
* Re: [PATCH v1 09/10] powerpc/pseries/iommu: Make use of DDW even if it does not map the partition
From: Alexey Kardashevskiy @ 2020-09-04 1:00 UTC (permalink / raw)
To: Leonardo Bras, Michael Ellerman, Benjamin Herrenschmidt,
Paul Mackerras, Christophe Leroy, Joel Stanley,
Thiago Jung Bauermann, Ram Pai, Brian King,
Murilo Fossa Vicentini, David Dai
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <deb7163077326399b3a3170ecb777117f7f6737b.camel@gmail.com>
On 02/09/2020 16:11, Leonardo Bras wrote:
> On Mon, 2020-08-31 at 14:35 +1000, Alexey Kardashevskiy wrote:
>>
>> On 29/08/2020 04:36, Leonardo Bras wrote:
>>> On Mon, 2020-08-24 at 15:17 +1000, Alexey Kardashevskiy wrote:
>>>> On 18/08/2020 09:40, Leonardo Bras wrote:
>>>>> As of today, if the biggest DDW that can be created can't map the whole
>>>>> partition, it's creation is skipped and the default DMA window
>>>>> "ibm,dma-window" is used instead.
>>>>>
>>>>> DDW is 16x bigger than the default DMA window,
>>>>
>>>> 16x only under very specific circumstances which are
>>>> 1. phyp
>>>> 2. sriov
>>>> 3. device class in hmc (or what that priority number is in the lpar config).
>>>
>>> Yeah, missing details.
>>>
>>>>> having the same amount of
>>>>> pages, but increasing the page size to 64k.
>>>>> Besides larger DMA window,
>>>>
>>>> "Besides being larger"?
>>>
>>> You are right there.
>>>
>>>>> it performs better for allocations over 4k,
>>>>
>>>> Better how?
>>>
>>> I was thinking for allocations larger than (512 * 4k), since >2
>>> hypercalls are needed here, and for 64k pages would still be just 1
>>> hypercall up to (512 * 64k).
>>> But yeah, not the usual case anyway.
>>
>> Yup.
>>
>>
>>>>> so it would be nice to use it instead.
>>>>
>>>> I'd rather say something like:
>>>> ===
>>>> So far we assumed we can map the guest RAM 1:1 to the bus which worked
>>>> with a small number of devices. SRIOV changes it as the user can
>>>> configure hundreds VFs and since phyp preallocates TCEs and does not
>>>> allow IOMMU pages bigger than 64K, it has to limit the number of TCEs
>>>> per a PE to limit waste of physical pages.
>>>> ===
>>>
>>> I mixed this in my commit message, it looks like this:
>>>
>>> ===
>>> powerpc/pseries/iommu: Make use of DDW for indirect mapping
>>>
>>> So far it's assumed possible to map the guest RAM 1:1 to the bus, which
>>> works with a small number of devices. SRIOV changes it as the user can
>>> configure hundreds VFs and since phyp preallocates TCEs and does not
>>> allow IOMMU pages bigger than 64K, it has to limit the number of TCEs
>>> per a PE to limit waste of physical pages.
>>>
>>> As of today, if the assumed direct mapping is not possible, DDW
>>> creation is skipped and the default DMA window "ibm,dma-window" is used
>>> instead.
>>>
>>> The default DMA window uses 4k pages instead of 64k pages, and since
>>> the amount of pages is the same,
>>
>> Is the amount really the same? I thought you can prioritize some VFs
>> over others (== allocate different number of TCEs). Does it really
>> matter if it is the same?
>
> On a conversation with Travis Pizel, he explained how it's supposed to
> work, and I understood this:
>
> When a VF is created, it will be assigned a capacity, like 4%, 20%, and
> so on. The number of 'TCE entries' that are available to that partition
> are proportional to that capacity.
>
> If we use the default DMA window, the IOMMU pagesize/entry will be 4k,
> and if we use DDW, we will get 64k pagesize. As the number of entries
> will be the same (for the same capacity), the total space that can be
> addressed by the IOMMU will be 16 times bigger. This sometimes enable
> direct mapping, but sometimes it's still not enough.
Good to know. This is still an implementation detail, QEMU does not
allocate TCEs like this.
>
> On Travis words :
> "A low capacity VF, with less resources available, will certainly have
> less DMA window capability than a high capacity VF. But, an 8GB DMA
> window (with 64k pages) is still 16x larger than an 512MB window (with
> 4K pages).
> A high capacity VF - for example, one that Leonardo has in his scenario
> - will go from 8GB (using 4K pages) to 128GB (using 64K pages) - again,
> 16x larger - but it's obviously still possible to create a partition
> that exceeds 128GB of memory in size."
Right except the default dma window is not 8GB, it is <=2GB.
>
>>
>>
>>> making use of DDW instead of the
>>> default DMA window for indirect mapping will expand in 16x the amount
>>> of memory that can be mapped on DMA.
>>
>> Stop saying "16x", it is not guaranteed by anything :)
>>
>>
>>> The DDW created will be used for direct mapping by default. [...]
>>> ===
>>>
>>> What do you think?
>>>
>>>>> The DDW created will be used for direct mapping by default.
>>>>> If it's not available, indirect mapping will be used instead.
>>>>>
>>>>> For indirect mapping, it's necessary to update the iommu_table so
>>>>> iommu_alloc() can use the DDW created. For this,
>>>>> iommu_table_update_window() is called when everything else succeeds
>>>>> at enable_ddw().
>>>>>
>>>>> Removing the default DMA window for using DDW with indirect mapping
>>>>> is only allowed if there is no current IOMMU memory allocated in
>>>>> the iommu_table. enable_ddw() is aborted otherwise.
>>>>>
>>>>> As there will never have both direct and indirect mappings at the same
>>>>> time, the same property name can be used for the created DDW.
>>>>>
>>>>> So renaming
>>>>> define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
>>>>> to
>>>>> define DMA64_PROPNAME "linux,dma64-ddr-window-info"
>>>>> looks the right thing to do.
>>>>
>>>> I know I suggested this but this does not look so good anymore as I
>>>> suspect it breaks kexec (from older kernel to this one) so you either
>>>> need to check for both DT names or just keep the old one. Changing the
>>>> macro name is fine.
>>>>
>>>
>>> Yeah, having 'direct' in the name don't really makes sense if it's used
>>> for indirect mapping. I will just add this new define instead of
>>> replacing the old one, and check for both.
>>> Is that ok?
>>
>> No, having two of these does not seem right or useful. It is pseries
>> which does not use petitboot (relies on grub instead so until the target
>> kernel is started, there will be no ddw) so realistically we need this
>> property for kexec/kdump which uses the same kernel but different
>> initramdisk so for that purpose we need the same property name.
>>
>> But I can see myself annoyed when I try petitboot in the hacked pseries
>> qemu and things may crash :) On this basis I'd suggest keeping the name
>> and adding a comment next to it that it is not always "direct" anymore.
>>
>
> Keeping the same name should bring more problems than solve.
> If we have indirect mapping and kexec() to an older kernel, it will
> think direct mapping is enabled, and trying to use a DMA address
> without doing H_PUT_* first may cause a crash.
>
> I tested with a new property name, and it doesn't crash.
> As the property is not found, it does try to create a new DDW, which
> fails and it falls back to using the default DMA window.
> The device that need the IOMMU don't work well, but when iommu_map()
> fails, it doesn't try to use the DMA address as valid.
Right, as discussed on slack.
>
>>
>>>>> To make sure the property differentiates both cases, a new u32 for flags
>>>>> was added at the end of the property, where BIT(0) set means direct
>>>>> mapping.
>>>>>
>>>>> Signed-off-by: Leonardo Bras <leobras.c@gmail.com>
>>>>> ---
>>>>> arch/powerpc/platforms/pseries/iommu.c | 108 +++++++++++++++++++------
>>>>> 1 file changed, 84 insertions(+), 24 deletions(-)
>>>>>
>>>>> diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
>>>>> index 3a1ef02ad9d5..9544e3c91ced 100644
>>>>> --- a/arch/powerpc/platforms/pseries/iommu.c
>>>>> +++ b/arch/powerpc/platforms/pseries/iommu.c
>>>>> @@ -350,8 +350,11 @@ struct dynamic_dma_window_prop {
>>>>> __be64 dma_base; /* address hi,lo */
>>>>> __be32 tce_shift; /* ilog2(tce_page_size) */
>>>>> __be32 window_shift; /* ilog2(tce_window_size) */
>>>>> + __be32 flags; /* DDW properties, see bellow */
>>>>> };
>>>>>
>>>>> +#define DDW_FLAGS_DIRECT 0x01
>>>>
>>>> This is set if ((1<<window_shift) >= ddw_memory_hotplug_max()), you
>>>> could simply check window_shift and drop the flags.
>>>>
>>>
>>> Yeah, it's better this way, I will revert this.
>>>
>>>>> +
>>>>> struct direct_window {
>>>>> struct device_node *device;
>>>>> const struct dynamic_dma_window_prop *prop;
>>>>> @@ -377,7 +380,7 @@ static LIST_HEAD(direct_window_list);
>>>>> static DEFINE_SPINLOCK(direct_window_list_lock);
>>>>> /* protects initializing window twice for same device */
>>>>> static DEFINE_MUTEX(direct_window_init_mutex);
>>>>> -#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
>>>>> +#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
>>>>>
>>>>> static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
>>>>> unsigned long num_pfn, const void *arg)
>>>>> @@ -836,7 +839,7 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
>>>>> if (ret)
>>>>> return;
>>>>>
>>>>> - win = of_find_property(np, DIRECT64_PROPNAME, NULL);
>>>>> + win = of_find_property(np, DMA64_PROPNAME, NULL);
>>>>> if (!win)
>>>>> return;
>>>>>
>>>>> @@ -852,7 +855,7 @@ static void remove_ddw(struct device_node *np, bool remove_prop)
>>>>> np, ret);
>>>>> }
>>>>>
>>>>> -static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr)
>>>>> +static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr, bool *direct_mapping)
>>>>> {
>>>>> struct direct_window *window;
>>>>> const struct dynamic_dma_window_prop *direct64;
>>>>> @@ -864,6 +867,7 @@ static bool find_existing_ddw(struct device_node *pdn, u64 *dma_addr)
>>>>> if (window->device == pdn) {
>>>>> direct64 = window->prop;
>>>>> *dma_addr = be64_to_cpu(direct64->dma_base);
>>>>> + *direct_mapping = be32_to_cpu(direct64->flags) & DDW_FLAGS_DIRECT;
>>>>> found = true;
>>>>> break;
>>>>> }
>>>>> @@ -901,8 +905,8 @@ static int find_existing_ddw_windows(void)
>>>>> if (!firmware_has_feature(FW_FEATURE_LPAR))
>>>>> return 0;
>>>>>
>>>>> - for_each_node_with_property(pdn, DIRECT64_PROPNAME) {
>>>>> - direct64 = of_get_property(pdn, DIRECT64_PROPNAME, &len);
>>>>> + for_each_node_with_property(pdn, DMA64_PROPNAME) {
>>>>> + direct64 = of_get_property(pdn, DMA64_PROPNAME, &len);
>>>>> if (!direct64)
>>>>> continue;
>>>>>
>>>>> @@ -1124,7 +1128,8 @@ static void reset_dma_window(struct pci_dev *dev, struct device_node *par_dn)
>>>>> }
>>>>>
>>>>> static int ddw_property_create(struct property **ddw_win, const char *propname,
>>>>> - u32 liobn, u64 dma_addr, u32 page_shift, u32 window_shift)
>>>>> + u32 liobn, u64 dma_addr, u32 page_shift,
>>>>> + u32 window_shift, bool direct_mapping)
>>>>> {
>>>>> struct dynamic_dma_window_prop *ddwprop;
>>>>> struct property *win64;
>>>>> @@ -1144,6 +1149,36 @@ static int ddw_property_create(struct property **ddw_win, const char *propname,
>>>>> ddwprop->dma_base = cpu_to_be64(dma_addr);
>>>>> ddwprop->tce_shift = cpu_to_be32(page_shift);
>>>>> ddwprop->window_shift = cpu_to_be32(window_shift);
>>>>> + if (direct_mapping)
>>>>> + ddwprop->flags = cpu_to_be32(DDW_FLAGS_DIRECT);
>>>>> +
>>>>> + return 0;
>>>>> +}
>>>>> +
>>>>> +static int iommu_table_update_window(struct iommu_table **tbl, int nid, unsigned long liobn,
>>>>> + unsigned long win_addr, unsigned long page_shift,
>>>>> + unsigned long window_size)
>>>>
>>>> Rather strange helper imho. I'd extract the most of
>>>> iommu_table_setparms_lpar() into iommu_table_setparms() (except
>>>> of_parse_dma_window) and call new helper from where you call
>>>> iommu_table_update_window; and do
>>>> iommu_pseries_alloc_table/iommu_tce_table_put there.
>>>>
>>>
>>> I don't see how to extract iommu_table_setparms_lpar() into
>>> iommu_table_setparms(), they look to be used for different machine
>>> types.
>>>
>>> Do mean you extracting most of iommu_table_setparms_lpar() (and maybe
>>> iommu_table_setparms() ) into a new helper, which is called in both
>>> functions and use it instead of iommu_table_update_window() ?
>>
>> Yes, this.
>
> I will do that then, seems better. :)
>
>>
>>
>>>>> +{
>>>>> + struct iommu_table *new_tbl, *old_tbl;
>>>>> +
>>>>> + new_tbl = iommu_pseries_alloc_table(nid);
>>>>> + if (!new_tbl)
>>>>> + return -ENOMEM;
>>>>> +
>>>>> + old_tbl = *tbl;
>>>>> + new_tbl->it_index = liobn;
>>>>> + new_tbl->it_offset = win_addr >> page_shift;
>>>>> + new_tbl->it_page_shift = page_shift;
>>>>> + new_tbl->it_size = window_size >> page_shift;
>>>>> + new_tbl->it_base = old_tbl->it_base;
>>>>
>>>> Should not be used in pseries.
>>>>
>>>
>>> The point here is to migrate the values from the older tbl to the
>>
>> The actual window/table is new (on the hypervisor side), you are not
>> migrating a single TCE, you deleted one whole window and created another
>> whole window, calling it "migration" is confusing, especially when PAPR
>> actually defines TCE migration.
>
> Ok, I understand it's confusing now. I will avoid using this term from
> now on.
>
>>
>>
>>> newer. I Would like to understand why this is bad, if it will still be
>>> 'unused' as the older tbl.
>>
>> Having explicit values is more readable imho.
>
> Ok, I understand why it should be improved.!
>
> Alexey, thank you for reviewing, and for helping me with my questions!
Thanks for doing this all!
>
> Best regards,
>
>>
>>
>>>>> + new_tbl->it_busno = old_tbl->it_busno;
>>>>> + new_tbl->it_blocksize = old_tbl->it_blocksize;
>>>>
>>>> 16 for pseries and does not change (may be even make it a macro).
>>>>
>>>>> + new_tbl->it_type = old_tbl->it_type;
>>>>
>>>> TCE_PCI.
>>>>
>>>
>>> Same as above.
>>>
>>>>> + new_tbl->it_ops = old_tbl->it_ops;
>>>>> +
>>>>> + iommu_init_table(new_tbl, nid, old_tbl->it_reserved_start, old_tbl->it_reserved_end);
>>>>> + iommu_tce_table_put(old_tbl);
>>>>> + *tbl = new_tbl;
>>>>>
>>>>> return 0;
>>>>> }
>>>>> @@ -1171,12 +1206,16 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
>>>>> struct direct_window *window;
>>>>> struct property *win64 = NULL;
>>>>> struct failed_ddw_pdn *fpdn;
>>>>> - bool default_win_removed = false;
>>>>> + bool default_win_removed = false, maps_whole_partition = false;
>>>>
>>>> s/maps_whole_partition/direct_mapping/
>>>>
>>>
>>> Sure, I will get it replaced.
>>>
>>>>> + struct pci_dn *pci = PCI_DN(pdn);
>>>>> + struct iommu_table *tbl = pci->table_group->tables[0];
>>>>>
>>>>> mutex_lock(&direct_window_init_mutex);
>>>>>
>>>>> - if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset))
>>>>> - goto out_unlock;
>>>>> + if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &maps_whole_partition)) {
>>>>> + mutex_unlock(&direct_window_init_mutex);
>>>>> + return maps_whole_partition;
>>>>> + }
>>>>>
>>>>> /*
>>>>> * If we already went through this for a previous function of
>>>>> @@ -1258,16 +1297,24 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
>>>>> query.page_size);
>>>>> goto out_failed;
>>>>> }
>>>>> +
>>>>> /* verify the window * number of ptes will map the partition */
>>>>> - /* check largest block * page size > max memory hotplug addr */
>>>>> max_addr = ddw_memory_hotplug_max();
>>>>> if (query.largest_available_block < (max_addr >> page_shift)) {
>>>>> - dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu "
>>>>> - "%llu-sized pages\n", max_addr, query.largest_available_block,
>>>>> - 1ULL << page_shift);
>>>>> - goto out_failed;
>>>>> + dev_dbg(&dev->dev, "can't map partition max 0x%llx with %llu %llu-sized pages\n",
>>>>> + max_addr, query.largest_available_block,
>>>>> + 1ULL << page_shift);
>>>>> +
>>>>> + len = order_base_2(query.largest_available_block << page_shift);
>>>>> + } else {
>>>>> + maps_whole_partition = true;
>>>>> + len = order_base_2(max_addr);
>>>>> }
>>>>> - len = order_base_2(max_addr);
>>>>> +
>>>>> + /* DDW + IOMMU on single window may fail if there is any allocation */
>>>>> + if (default_win_removed && !maps_whole_partition &&
>>>>> + iommu_table_in_use(tbl))
>>>>> + goto out_failed;
>>>>>
>>>>> ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
>>>>> if (ret != 0)
>>>>> @@ -1277,8 +1324,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
>>>>> create.liobn, dn);
>>>>>
>>>>> win_addr = ((u64)create.addr_hi << 32) | create.addr_lo;
>>>>> - ret = ddw_property_create(&win64, DIRECT64_PROPNAME, create.liobn, win_addr,
>>>>> - page_shift, len);
>>>>> + ret = ddw_property_create(&win64, DMA64_PROPNAME, create.liobn, win_addr,
>>>>> + page_shift, len, maps_whole_partition);
>>>>> if (ret) {
>>>>> dev_info(&dev->dev,
>>>>> "couldn't allocate property, property name, or value\n");
>>>>> @@ -1297,12 +1344,25 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
>>>>> if (!window)
>>>>> goto out_prop_del;
>>>>>
>>>>> - ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
>>>>> - win64->value, tce_setrange_multi_pSeriesLP_walk);
>>>>> - if (ret) {
>>>>> - dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
>>>>> - dn, ret);
>>>>> - goto out_free_window;
>>>>> + if (maps_whole_partition) {
>>>>> + /* DDW maps the whole partition, so enable direct DMA mapping */
>>>>> + ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
>>>>> + win64->value, tce_setrange_multi_pSeriesLP_walk);
>>>>> + if (ret) {
>>>>> + dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
>>>>> + dn, ret);
>>>>> + goto out_free_window;
>>>>> + }
>>>>> + } else {
>>>>> + /* New table for using DDW instead of the default DMA window */
>>>>> + if (iommu_table_update_window(&tbl, pci->phb->node, create.liobn,
>>>>> + win_addr, page_shift, 1UL << len))
>>>>> + goto out_free_window;
>>>>> +
>>>>> + set_iommu_table_base(&dev->dev, tbl);
>>>>> + WARN_ON(dev->dev.archdata.dma_offset >= SZ_4G);
>>>>
>>>> What is this check for exactly? Why 4G, not >= 0, for example?
>>>
>>> I am not really sure, you suggested adding it here:
>>> http://patchwork.ozlabs.org/project/linuxppc-dev/patch/20200716071658.467820-6-leobras.c@gmail.com/#2488874
>>
>> Ah right I did suggest this :) My bad. I think I suggested it before
>> suggesting to keep the reserved area boundaries checked/adjusted to the
>> window boundaries, may as well drop this. Thanks,
>>
>>
>>> I can remove it if it's ok.
>>>
>>>>> + goto out_unlock;
>>>>> +
>>>>> }
>>>>>
>>>>> dev->dev.archdata.dma_offset = win_addr;
>>>>> @@ -1340,7 +1400,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
>>>>>
>>>>> out_unlock:
>>>>> mutex_unlock(&direct_window_init_mutex);
>>>>> - return win64;
>>>>> + return win64 && maps_whole_partition;
>>>>> }
>>>>>
>>>>> static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
>>>>>
>
--
Alexey
^ permalink raw reply
* [PATCH] spi: fsl-espi: Only process interrupts for expected events
From: Chris Packham @ 2020-09-04 0:28 UTC (permalink / raw)
To: broonie, npiggin, hkallweit1
Cc: Chris Packham, linuxppc-dev, linux-kernel, stable, linux-spi
The SPIE register contains counts for the TX FIFO so any time the irq
handler was invoked we would attempt to process the RX/TX fifos. Use the
SPIM value to mask the events so that we only process interrupts that
were expected.
This was a latent issue exposed by commit 3282a3da25bd ("powerpc/64:
Implement soft interrupt replay in C").
Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Cc: stable@vger.kernel.org
---
Notes:
I've tested this on a T2080RDB and a custom board using the T2081 SoC. With
this change I don't see any spurious instances of the "Transfer done but
SPIE_DON isn't set!" or "Transfer done but rx/tx fifo's aren't empty!" messages
and the updates to spi flash are successful.
I think this should go into the stable trees that contain 3282a3da25bd but I
haven't added a Fixes: tag because I think 3282a3da25bd exposed the issue as
opposed to causing it.
drivers/spi/spi-fsl-espi.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
index 7e7c92cafdbb..cb120b68c0e2 100644
--- a/drivers/spi/spi-fsl-espi.c
+++ b/drivers/spi/spi-fsl-espi.c
@@ -574,13 +574,14 @@ static void fsl_espi_cpu_irq(struct fsl_espi *espi, u32 events)
static irqreturn_t fsl_espi_irq(s32 irq, void *context_data)
{
struct fsl_espi *espi = context_data;
- u32 events;
+ u32 events, mask;
spin_lock(&espi->lock);
/* Get interrupt events(tx/rx) */
events = fsl_espi_read_reg(espi, ESPI_SPIE);
- if (!events) {
+ mask = fsl_espi_read_reg(espi, ESPI_SPIM);
+ if (!(events & mask)) {
spin_unlock(&espi->lock);
return IRQ_NONE;
}
--
2.28.0
^ permalink raw reply related
* Re: fsl_espi errors on v5.7.15
From: Chris Packham @ 2020-09-03 23:58 UTC (permalink / raw)
To: Nicholas Piggin, benh@kernel.crashing.org, broonie@kernel.org,
Heiner Kallweit, mpe@ellerman.id.au, paulus@samba.org
Cc: linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org,
linux-spi@vger.kernel.org
In-Reply-To: <1598940515.6e06nwgi0c.astroid@bobo.none>
On 1/09/20 6:14 pm, Nicholas Piggin wrote:
> Excerpts from Chris Packham's message of September 1, 2020 11:25 am:
>> On 1/09/20 12:33 am, Heiner Kallweit wrote:
>>> On 30.08.2020 23:59, Chris Packham wrote:
>>>> On 31/08/20 9:41 am, Heiner Kallweit wrote:
>>>>> On 30.08.2020 23:00, Chris Packham wrote:
>>>>>> On 31/08/20 12:30 am, Nicholas Piggin wrote:
>>>>>>> Excerpts from Chris Packham's message of August 28, 2020 8:07 am:
>>>>>> <snip>
>>>>>>
>>>>>>>>>>>> I've also now seen the RX FIFO not empty error on the T2080RDB
>>>>>>>>>>>>
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>>>>>>>>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>>>>>>>>>>
>>>>>>>>>>>> With my current workaround of emptying the RX FIFO. It seems
>>>>>>>>>>>> survivable. Interestingly it only ever seems to be 1 extra byte in the
>>>>>>>>>>>> RX FIFO and it seems to be after either a READ_SR or a READ_FSR.
>>>>>>>>>>>>
>>>>>>>>>>>> fsl_espi ffe110000.spi: tx 70
>>>>>>>>>>>> fsl_espi ffe110000.spi: rx 03
>>>>>>>>>>>> fsl_espi ffe110000.spi: Extra RX 00
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>>>>>>>>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>>>>>>>>>> fsl_espi ffe110000.spi: tx 05
>>>>>>>>>>>> fsl_espi ffe110000.spi: rx 00
>>>>>>>>>>>> fsl_espi ffe110000.spi: Extra RX 03
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>>>>>>>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>>>>>>>>>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>>>>>>>>>> fsl_espi ffe110000.spi: tx 05
>>>>>>>>>>>> fsl_espi ffe110000.spi: rx 00
>>>>>>>>>>>> fsl_espi ffe110000.spi: Extra RX 03
>>>>>>>>>>>>
>>>>>>>>>>>> From all the Micron SPI-NOR datasheets I've got access to it is
>>>>>>>>>>>> possible to continually read the SR/FSR. But I've no idea why it
>>>>>>>>>>>> happens some times and not others.
>>>>>>>>>>> So I think I've got a reproduction and I think I've bisected the problem
>>>>>>>>>>> to commit 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in
>>>>>>>>>>> C"). My day is just finishing now so I haven't applied too much scrutiny
>>>>>>>>>>> to this result. Given the various rabbit holes I've been down on this
>>>>>>>>>>> issue already I'd take this information with a good degree of skepticism.
>>>>>>>>>>>
>>>>>>>>>> OK, so an easy test should be to re-test with a 5.4 kernel.
>>>>>>>>>> It doesn't have yet the change you're referring to, and the fsl-espi driver
>>>>>>>>>> is basically the same as in 5.7 (just two small changes in 5.7).
>>>>>>>>> There's 6cc0c16d82f88 and maybe also other interrupt related patches
>>>>>>>>> around this time that could affect book E, so it's good if that exact
>>>>>>>>> patch is confirmed.
>>>>>>>> My confirmation is basically that I can induce the issue in a 5.4 kernel
>>>>>>>> by cherry-picking 3282a3da25bd. I'm also able to "fix" the issue in
>>>>>>>> 5.9-rc2 by reverting that one commit.
>>>>>>>>
>>>>>>>> I both cases it's not exactly a clean cherry-pick/revert so I also
>>>>>>>> confirmed the bisection result by building at 3282a3da25bd (which sees
>>>>>>>> the issue) and the commit just before (which does not).
>>>>>>> Thanks for testing, that confirms it well.
>>>>>>>
>>>>>>> [snip patch]
>>>>>>>
>>>>>>>> I still saw the issue with this change applied. PPC_IRQ_SOFT_MASK_DEBUG
>>>>>>>> didn't report anything (either with or without the change above).
>>>>>>> Okay, it was a bit of a shot in the dark. I still can't see what
>>>>>>> else has changed.
>>>>>>>
>>>>>>> What would cause this, a lost interrupt? A spurious interrupt? Or
>>>>>>> higher interrupt latency?
>>>>>>>
>>>>>>> I don't think the patch should cause significantly worse latency,
>>>>>>> (it's supposed to be a bit better if anything because it doesn't set
>>>>>>> up the full interrupt frame). But it's possible.
>>>>>> My working theory is that the SPI_DON indication is all about the TX
>>>>>> direction an now that the interrupts are faster we're hitting an error
>>>>>> because there is still RX activity going on. Heiner disagrees with my
>>>>>> interpretation of the SPI_DON indication and the fact that it doesn't
>>>>>> happen every time does throw doubt on it.
>>>>>>
>>>>> It's right that the eSPI spec can be interpreted that SPI_DON refers to
>>>>> TX only. However this wouldn't really make sense, because also for RX
>>>>> we program the frame length, and therefore want to be notified once the
>>>>> full frame was received. Also practical experience shows that SPI_DON
>>>>> is set also after RX-only transfers.
>>>>> Typical SPI NOR use case is that you write read command + start address,
>>>>> followed by a longer read. If the TX-only interpretation would be right,
>>>>> we'd always end up with SPI_DON not being set.
>>>>>
>>>>>> I can't really explain the extra RX byte in the fifo. We know how many
>>>>>> bytes to expect and we pull that many from the fifo so it's not as if
>>>>>> we're missing an interrupt causing us to skip the last byte. I've been
>>>>>> looking for some kind of off-by-one calculation but again if it were
>>>>>> something like that it'd happen all the time.
>>>>>>
>>>>> Maybe it helps to know what value this extra byte in the FIFO has. Is it:
>>>>> - a duplicate of the last read byte
>>>>> - or the next byte (at <end address> + 1)
>>>>> - or a fixed value, e.g. always 0x00 or 0xff
>>>> The values were up thread a bit but I'll repeat them here
>>>>
>>>> fsl_espi ffe110000.spi: tx 70
>>>> fsl_espi ffe110000.spi: rx 03
>>>> fsl_espi ffe110000.spi: Extra RX 00
>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>> fsl_espi ffe110000.spi: tx 05
>>>> fsl_espi ffe110000.spi: rx 00
>>>> fsl_espi ffe110000.spi: Extra RX 03
>>>> fsl_espi ffe110000.spi: Transfer done but SPIE_DON isn't set!
>>>> fsl_espi ffe110000.spi: Transfer done but rx/tx fifo's aren't empty!
>>>> fsl_espi ffe110000.spi: SPIE_RXCNT = 1, SPIE_TXCNT = 32
>>>> fsl_espi ffe110000.spi: tx 05
>>>> fsl_espi ffe110000.spi: rx 00
>>>> fsl_espi ffe110000.spi: Extra RX 03
>>>>
>>>>
>>>> The rx 00 Extra RX 03 is a bit concerning. I've only ever seen them with
>>>> either a READ_SR or a READ_FSR. Never a data read.
>>>>
>>> Just remembered something about SPIE_DON:
>>> Transfers are always full duplex, therefore in case of a read the chip
>>> sends dummy zero's. Having said that in case of a read SPIE_DON means
>>> that the last dummy zero was shifted out.
>>>
>>> READ_SR and READ_FSR are the shortest transfers, 1 byte out and 1 byte in.
>>> So the issue may have a dependency on the length of the transfer.
>>> However I see no good explanation so far. You can try adding a delay of
>>> a few miroseconds between the following to commands in fsl_espi_bufs().
>>>
>>> fsl_espi_write_reg(espi, ESPI_SPIM, mask);
>>>
>>> /* Prevent filling the fifo from getting interrupted */
>>> spin_lock_irq(&espi->lock);
>>>
>>> Maybe enabling interrupts and seeing the SPIE_DON interrupt are too close.
>> I think this might be heading in the right direction. Playing about with
>> a delay does seem to make the two symptoms less likely. Although I have
>> to set it quite high (i.e. msleep(100)) to completely avoid any
>> possibility of seeing either message.
> The patch might replay the interrupt a little bit faster, but it would
> be a few microseconds at most I think (just from improved code).
>
> Would you be able to ftrace the interrupt handler function and see if you
> can see a difference in number or timing of interrupts? I'm at a bit of
> a loss.
I tried ftrace but I really wasn't sure what I was looking for.
Capturing a "bad" case was pretty tricky. But I think I've identified a
fix (I'll send it as a proper patch shortly). The gist is
diff --git a/drivers/spi/spi-fsl-espi.c b/drivers/spi/spi-fsl-espi.c
index 7e7c92cafdbb..cb120b68c0e2 100644
--- a/drivers/spi/spi-fsl-espi.c
+++ b/drivers/spi/spi-fsl-espi.c
@@ -574,13 +574,14 @@ static void fsl_espi_cpu_irq(struct fsl_espi
*espi, u32 events)
static irqreturn_t fsl_espi_irq(s32 irq, void *context_data)
{
struct fsl_espi *espi = context_data;
- u32 events;
+ u32 events, mask;
spin_lock(&espi->lock);
/* Get interrupt events(tx/rx) */
events = fsl_espi_read_reg(espi, ESPI_SPIE);
- if (!events) {
+ mask = fsl_espi_read_reg(espi, ESPI_SPIM);
+ if (!(events & mask)) {
spin_unlock(&espi->lock);
return IRQ_NONE;
}
The SPIE register contains the TXCNT so events is pretty much always
going to have something set. By checking events against what we've
actually requested interrupts for we don't see any spurious events.
I've tested this on the T2080RDB and on our custom hardware and it seems
to resolve the problem.
^ permalink raw reply related
* Re: [PATCH 12/14] x86: remove address space overrides using set_fs()
From: Linus Torvalds @ 2020-09-03 23:25 UTC (permalink / raw)
To: David Laight
Cc: linux-arch@vger.kernel.org, Kees Cook, x86@kernel.org,
linux-kernel@vger.kernel.org, Christoph Hellwig, Luis Chamberlain,
Al Viro, linux-fsdevel@vger.kernel.org,
linuxppc-dev@lists.ozlabs.org, Alexey Dobriyan
In-Reply-To: <9ab40244a2164f7db2ff0c1d23ab59a0@AcuMS.aculab.com>
On Thu, Sep 3, 2020 at 2:30 PM David Laight <David.Laight@aculab.com> wrote:
>
> A non-canonical (is that the right term) address between the highest
> valid user address and the lowest valid kernel address (7ffe to fffe?)
> will fault anyway.
Yes.
But we actually warn against that fault, because it's been a good way
to catch places that didn't use the proper "access_ok()" pattern.
See ex_handler_uaccess() and the
WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in
user access. Non-canonical address?");
warning. It's been good for randomized testing - a missing range check
on a user address will often hit this.
Of course, you should never see it in real life (and hopefully not in
testing either any more). But belt-and-suspenders..
Linus
^ permalink raw reply
* Re: [PATCH] mm: check for memory's node later during boot
From: Andrew Morton @ 2020-09-03 21:35 UTC (permalink / raw)
To: Laurent Dufour
Cc: nathanl, Rafael J. Wysocki, Greg Kroah-Hartman, cheloha,
linux-kernel, linux-mm, linuxppc-dev
In-Reply-To: <20200902090911.11363-1-ldufour@linux.ibm.com>
On Wed, 2 Sep 2020 11:09:11 +0200 Laurent Dufour <ldufour@linux.ibm.com> wrote:
> register_mem_sect_under_nodem() is checking the memory block's node id only
> if the system state is "SYSTEM_BOOTING". On PowerPC, the memory blocks are
> registered while the system state is "SYSTEM_SCHEDULING", the one before
> SYSTEM_RUNNING.
>
> The consequence on PowerPC guest with interleaved memory node's ranges is
> that some memory block could be assigned to multiple nodes on sysfs. This
> lately prevents some memory hot-plug and hot-unplug to succeed because
> links are remaining. Such a panic is then displayed:
>
> ------------[ cut here ]------------
> kernel BUG at /Users/laurent/src/linux-ppc/mm/memory_hotplug.c:1084!
> Oops: Exception in kernel mode, sig: 5 [#1]
> LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
> Modules linked in: rpadlpar_io rpaphp pseries_rng rng_core vmx_crypto gf128mul binfmt_misc ip_tables x_tables xfs libcrc32c crc32c_vpmsum autofs4
> CPU: 8 PID: 10256 Comm: drmgr Not tainted 5.9.0-rc1+ #25
> NIP: c000000000403f34 LR: c000000000403f2c CTR: 0000000000000000
> REGS: c0000004876e3660 TRAP: 0700 Not tainted (5.9.0-rc1+)
> MSR: 800000000282b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 24000448 XER: 20040000
> CFAR: c000000000846d20 IRQMASK: 0
> GPR00: c000000000403f2c c0000004876e38f0 c0000000012f6f00 ffffffffffffffef
> GPR04: 0000000000000227 c0000004805ae680 0000000000000000 00000004886f0000
> GPR08: 0000000000000226 0000000000000003 0000000000000002 fffffffffffffffd
> GPR12: 0000000088000484 c00000001ec96280 0000000000000000 0000000000000000
> GPR16: 0000000000000000 0000000000000000 0000000000000004 0000000000000003
> GPR20: c00000047814ffe0 c0000007ffff7c08 0000000000000010 c0000000013332c8
> GPR24: 0000000000000000 c0000000011f6cc0 0000000000000000 0000000000000000
> GPR28: ffffffffffffffef 0000000000000001 0000000150000000 0000000010000000
> NIP [c000000000403f34] add_memory_resource+0x244/0x340
> LR [c000000000403f2c] add_memory_resource+0x23c/0x340
> Call Trace:
> [c0000004876e38f0] [c000000000403f2c] add_memory_resource+0x23c/0x340 (unreliable)
> [c0000004876e39c0] [c00000000040408c] __add_memory+0x5c/0xf0
> [c0000004876e39f0] [c0000000000e2b94] dlpar_add_lmb+0x1b4/0x500
> [c0000004876e3ad0] [c0000000000e3888] dlpar_memory+0x1f8/0xb80
> [c0000004876e3b60] [c0000000000dc0d0] handle_dlpar_errorlog+0xc0/0x190
> [c0000004876e3bd0] [c0000000000dc398] dlpar_store+0x198/0x4a0
> [c0000004876e3c90] [c00000000072e630] kobj_attr_store+0x30/0x50
> [c0000004876e3cb0] [c00000000051f954] sysfs_kf_write+0x64/0x90
> [c0000004876e3cd0] [c00000000051ee40] kernfs_fop_write+0x1b0/0x290
> [c0000004876e3d20] [c000000000438dd8] vfs_write+0xe8/0x290
> [c0000004876e3d70] [c0000000004391ac] ksys_write+0xdc/0x130
> [c0000004876e3dc0] [c000000000034e40] system_call_exception+0x160/0x270
> [c0000004876e3e20] [c00000000000d740] system_call_common+0xf0/0x27c
> Instruction dump:
> 48442e35 60000000 0b030000 3cbe0001 7fa3eb78 7bc48402 38a5fffe 7ca5fa14
> 78a58402 48442db1 60000000 7c7c1b78 <0b030000> 7f23cb78 4bda371d 60000000
> ---[ end trace 562fd6c109cd0fb2 ]---
>
> To prevent this multiple links, make the node checking done for states
> prior to SYSTEM_RUNNING.
Did you consider adding a cc:stable to this fix?
^ permalink raw reply
* RE: [PATCH 12/14] x86: remove address space overrides using set_fs()
From: David Laight @ 2020-09-03 21:30 UTC (permalink / raw)
To: 'Christoph Hellwig', Linus Torvalds, Al Viro,
Michael Ellerman, x86@kernel.org
Cc: linux-arch@vger.kernel.org, Kees Cook,
linux-kernel@vger.kernel.org, Luis Chamberlain,
linux-fsdevel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
Alexey Dobriyan
In-Reply-To: <20200903142242.925828-13-hch@lst.de>
From: Christoph Hellwig
> Sent: 03 September 2020 15:23
>
> Stop providing the possibility to override the address space using
> set_fs() now that there is no need for that any more. To properly
> handle the TASK_SIZE_MAX checking for 4 vs 5-level page tables on
> x86 a new alternative is introduced, which just like the one in
> entry_64.S has to use the hardcoded virtual address bits to escape
> the fact that TASK_SIZE_MAX isn't actually a constant when 5-level
> page tables are enabled.
Why does it matter whether 4 or 5 level page tables are in use?
Surely all access_ok() needs to do is ensure that a valid kernel
address isn't supplied.
A non-canonical (is that the right term) address between the highest
valid user address and the lowest valid kernel address (7ffe to fffe?)
will fault anyway.
So any limit between the valid user and kernel addresses should
work?
So a limit of 1<<63 would seem appropriate.
David
-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
^ permalink raw reply
* Re: [PATCH] powerpc/boot/dts: Fix dtc "pciex" warnings
From: Christian Lamparter @ 2020-09-03 21:21 UTC (permalink / raw)
To: Michael Ellerman, linuxppc-dev
Cc: sfr, Chris Blake,
open list:OPEN FIRMWARE AND FLATTENED DEVICE TREE BINDINGS
In-Reply-To: <20200623130320.405852-1-mpe@ellerman.id.au>
On 2020-06-23 15:03, Michael Ellerman wrote:
> With CONFIG_OF_ALL_DTBS=y, as set by eg. allmodconfig, we see lots of
> warnings about our dts files, such as:
>
> arch/powerpc/boot/dts/glacier.dts:492.26-532.5:
> Warning (pci_bridge): /plb/pciex@d00000000: node name is not "pci"
> or "pcie"
>
> The node name should not particularly matter, it's just a name, and
> AFAICS there's no kernel code that cares whether nodes are *named*
> "pciex" or "pcie". So shutup these warnings by converting to the name
> dtc wants.
>
> As always there's some risk this could break something obscure that
> does rely on the name, in which case we can revert.
Hmm, I noticed this when I was looking up why nobody commented
on my series of adding more devices to the APM82181/bluestone series:
<https://lore.kernel.org/linuxppc-dev/cover.1598124791.git.chunkeey@gmail.com/>
(I'll post a v3 "soonish".)
Unfortunately yes. This patch will break uboot code in Meraki MX60(W) / MX60.
> https://github.com/riptidewave93/meraki-uboot/blob/mx60w-20180413/board/amcc/bluestone/bluestone.c#L1178
| if (!pci_available()) {
| fdt_find_and_setprop(blob, "/plb/pciex@d00000000", "status",
| "disabled", sizeof("disabled"), 1);
| }
Backstory: There are two version of the Meraki MX60. The MX60
and the MX60W. The difference is that the MX60W has a populated
mini-pcie slot on the PCB for a >W<ireless card.
That said, this is not earth shattering.
(In theory, this can also cause problems for the bluestone and canyonlands
dev boards that have the option to be configured as either dual sata or
pcie+sata.... But this is probably not a problem for customer boards)
OT: Please note that the plb, opb and ebc node paths (/plb/opb/ebc) are
hardcoded too :(. Amending the proper unit-addresses will lead to no-longer
working DTBs as the "ranges" are missing.
Cheers,
Christian
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
>
> diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts
> index cc965a1816b6..aa1ae94cd776 100644
> --- a/arch/powerpc/boot/dts/bluestone.dts
> +++ b/arch/powerpc/boot/dts/bluestone.dts
> @@ -325,7 +325,7 @@ EMAC0: ethernet@ef600c00 {
> };
> };
>
> - PCIE0: pciex@d00000000 {
> + PCIE0: pcie@d00000000 {
> device_type = "pci";
> #interrupt-cells = <1>;
> #size-cells = <2>;
^ permalink raw reply
* [powerpc:fixes-test] BUILD SUCCESS 4c62285439f80f8996c38e0bda79b1125a192365
From: kernel test robot @ 2020-09-03 19:38 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git fixes-test
branch HEAD: 4c62285439f80f8996c38e0bda79b1125a192365 Revert "powerpc/build: vdso linker warning for orphan sections"
elapsed time: 800m
configs tested: 162
configs skipped: 19
The following configs have been built successfully.
More configs may be tested in the coming days.
gcc tested configs:
arm defconfig
arm64 allyesconfig
arm64 defconfig
arm allyesconfig
arm allmodconfig
powerpc ps3_defconfig
arm mxs_defconfig
powerpc ppc6xx_defconfig
mips rt305x_defconfig
nios2 alldefconfig
sh polaris_defconfig
arm gemini_defconfig
arc nsim_700_defconfig
arm cns3420vb_defconfig
arm vf610m4_defconfig
ia64 bigsur_defconfig
xtensa alldefconfig
arm zx_defconfig
microblaze defconfig
mips vocore2_defconfig
mips maltasmvp_defconfig
powerpc ppc44x_defconfig
arm spitz_defconfig
xtensa common_defconfig
mips e55_defconfig
m68k q40_defconfig
m68k m5475evb_defconfig
arm badge4_defconfig
arm zeus_defconfig
um i386_defconfig
mips mpc30x_defconfig
c6x evmc6474_defconfig
arm jornada720_defconfig
arm h5000_defconfig
arm sunxi_defconfig
sh se7721_defconfig
arm shmobile_defconfig
nds32 allnoconfig
arm omap1_defconfig
mips decstation_64_defconfig
arc axs103_smp_defconfig
mips rb532_defconfig
arc alldefconfig
sh rts7751r2d1_defconfig
arm mvebu_v5_defconfig
arm nhk8815_defconfig
mips ip28_defconfig
openrisc alldefconfig
arm iop32x_defconfig
sh sh7785lcr_defconfig
powerpc tqm8xx_defconfig
c6x alldefconfig
sh hp6xx_defconfig
sh ecovec24_defconfig
arm collie_defconfig
m68k multi_defconfig
sparc allyesconfig
sh microdev_defconfig
mips malta_defconfig
mips workpad_defconfig
c6x evmc6457_defconfig
mips rm200_defconfig
h8300 alldefconfig
mips gpr_defconfig
sh sh7724_generic_defconfig
mips mtx1_defconfig
arm stm32_defconfig
arm hisi_defconfig
microblaze nommu_defconfig
sh lboxre2_defconfig
powerpc mpc885_ads_defconfig
mips pic32mzda_defconfig
mips capcella_defconfig
ia64 allmodconfig
ia64 defconfig
ia64 allyesconfig
m68k allmodconfig
m68k defconfig
m68k allyesconfig
nios2 defconfig
arc allyesconfig
c6x allyesconfig
nds32 defconfig
nios2 allyesconfig
csky defconfig
alpha defconfig
alpha allyesconfig
xtensa allyesconfig
h8300 allyesconfig
arc defconfig
sh allmodconfig
parisc defconfig
s390 allyesconfig
parisc allyesconfig
s390 defconfig
i386 allyesconfig
sparc defconfig
i386 defconfig
mips allyesconfig
mips allmodconfig
powerpc defconfig
powerpc allyesconfig
powerpc allmodconfig
powerpc allnoconfig
x86_64 randconfig-a004-20200903
x86_64 randconfig-a006-20200903
x86_64 randconfig-a003-20200903
x86_64 randconfig-a005-20200903
x86_64 randconfig-a001-20200903
x86_64 randconfig-a002-20200903
i386 randconfig-a004-20200902
i386 randconfig-a005-20200902
i386 randconfig-a006-20200902
i386 randconfig-a002-20200902
i386 randconfig-a001-20200902
i386 randconfig-a003-20200902
i386 randconfig-a004-20200903
i386 randconfig-a005-20200903
i386 randconfig-a006-20200903
i386 randconfig-a002-20200903
i386 randconfig-a001-20200903
i386 randconfig-a003-20200903
x86_64 randconfig-a013-20200902
x86_64 randconfig-a016-20200902
x86_64 randconfig-a011-20200902
x86_64 randconfig-a012-20200902
x86_64 randconfig-a015-20200902
x86_64 randconfig-a014-20200902
i386 randconfig-a016-20200903
i386 randconfig-a015-20200903
i386 randconfig-a011-20200903
i386 randconfig-a013-20200903
i386 randconfig-a014-20200903
i386 randconfig-a012-20200903
i386 randconfig-a016-20200902
i386 randconfig-a015-20200902
i386 randconfig-a011-20200902
i386 randconfig-a013-20200902
i386 randconfig-a014-20200902
i386 randconfig-a012-20200902
riscv allyesconfig
riscv allnoconfig
riscv defconfig
riscv allmodconfig
x86_64 rhel
x86_64 allyesconfig
x86_64 rhel-7.6-kselftests
x86_64 defconfig
x86_64 rhel-8.3
x86_64 kexec
clang tested configs:
x86_64 randconfig-a004-20200902
x86_64 randconfig-a006-20200902
x86_64 randconfig-a003-20200902
x86_64 randconfig-a005-20200902
x86_64 randconfig-a001-20200902
x86_64 randconfig-a002-20200902
x86_64 randconfig-a013-20200903
x86_64 randconfig-a016-20200903
x86_64 randconfig-a011-20200903
x86_64 randconfig-a012-20200903
x86_64 randconfig-a015-20200903
x86_64 randconfig-a014-20200903
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* [powerpc:next-test] BUILD SUCCESS c3ea77ab83a1cd36cca6a54206657a4aeb98c49c
From: kernel test robot @ 2020-09-03 19:39 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next-test
branch HEAD: c3ea77ab83a1cd36cca6a54206657a4aeb98c49c powerpc: Warn about use of smt_snooze_delay
elapsed time: 799m
configs tested: 168
configs skipped: 16
The following configs have been built successfully.
More configs may be tested in the coming days.
gcc tested configs:
arm defconfig
arm64 allyesconfig
arm64 defconfig
arm allyesconfig
arm allmodconfig
powerpc ps3_defconfig
arm mxs_defconfig
powerpc ppc6xx_defconfig
sh se7206_defconfig
mips loongson1b_defconfig
sh lboxre2_defconfig
mips rt305x_defconfig
nios2 alldefconfig
sh polaris_defconfig
arm gemini_defconfig
arc nsim_700_defconfig
arm zx_defconfig
microblaze defconfig
mips vocore2_defconfig
mips maltasmvp_defconfig
powerpc ppc44x_defconfig
arm spitz_defconfig
xtensa common_defconfig
xtensa alldefconfig
mips e55_defconfig
m68k q40_defconfig
m68k m5475evb_defconfig
mips pnx8335_stb225_defconfig
arm badge4_defconfig
arm zeus_defconfig
um i386_defconfig
mips mpc30x_defconfig
c6x evmc6474_defconfig
arm jornada720_defconfig
m68k m5249evb_defconfig
x86_64 alldefconfig
mips sb1250_swarm_defconfig
arm imx_v4_v5_defconfig
sh se7712_defconfig
mips rb532_defconfig
sh hp6xx_defconfig
arm h5000_defconfig
arm sunxi_defconfig
sh se7721_defconfig
powerpc defconfig
arm tct_hammer_defconfig
arm mini2440_defconfig
powerpc allmodconfig
arm shmobile_defconfig
nds32 allnoconfig
arm omap1_defconfig
mips decstation_64_defconfig
arc axs103_smp_defconfig
arc alldefconfig
powerpc tqm8xx_defconfig
mips gcw0_defconfig
arm moxart_defconfig
powerpc adder875_defconfig
mips ip28_defconfig
openrisc alldefconfig
arm iop32x_defconfig
sh sh7785lcr_defconfig
i386 allyesconfig
sh sh7770_generic_defconfig
mips loongson3_defconfig
arm eseries_pxa_defconfig
arm sama5_defconfig
sparc64 defconfig
powerpc pmac32_defconfig
sh microdev_defconfig
mips malta_defconfig
powerpc mpc866_ads_defconfig
arm netwinder_defconfig
mips workpad_defconfig
c6x evmc6457_defconfig
mips rm200_defconfig
h8300 alldefconfig
arc allyesconfig
sh se7724_defconfig
arm mv78xx0_defconfig
microblaze mmu_defconfig
riscv rv32_defconfig
ia64 allmodconfig
ia64 defconfig
ia64 allyesconfig
m68k allmodconfig
m68k defconfig
m68k allyesconfig
nios2 defconfig
c6x allyesconfig
nds32 defconfig
nios2 allyesconfig
csky defconfig
alpha defconfig
alpha allyesconfig
xtensa allyesconfig
h8300 allyesconfig
arc defconfig
sh allmodconfig
parisc defconfig
s390 allyesconfig
parisc allyesconfig
s390 defconfig
sparc allyesconfig
sparc defconfig
i386 defconfig
mips allyesconfig
mips allmodconfig
powerpc allyesconfig
powerpc allnoconfig
x86_64 randconfig-a004-20200903
x86_64 randconfig-a006-20200903
x86_64 randconfig-a003-20200903
x86_64 randconfig-a005-20200903
x86_64 randconfig-a001-20200903
x86_64 randconfig-a002-20200903
i386 randconfig-a004-20200902
i386 randconfig-a005-20200902
i386 randconfig-a006-20200902
i386 randconfig-a002-20200902
i386 randconfig-a001-20200902
i386 randconfig-a003-20200902
i386 randconfig-a004-20200903
i386 randconfig-a005-20200903
i386 randconfig-a006-20200903
i386 randconfig-a002-20200903
i386 randconfig-a001-20200903
i386 randconfig-a003-20200903
x86_64 randconfig-a013-20200902
x86_64 randconfig-a016-20200902
x86_64 randconfig-a011-20200902
x86_64 randconfig-a012-20200902
x86_64 randconfig-a015-20200902
x86_64 randconfig-a014-20200902
i386 randconfig-a016-20200902
i386 randconfig-a015-20200902
i386 randconfig-a011-20200902
i386 randconfig-a013-20200902
i386 randconfig-a014-20200902
i386 randconfig-a012-20200902
i386 randconfig-a016-20200903
i386 randconfig-a015-20200903
i386 randconfig-a011-20200903
i386 randconfig-a013-20200903
i386 randconfig-a014-20200903
i386 randconfig-a012-20200903
riscv allyesconfig
riscv allnoconfig
riscv defconfig
riscv allmodconfig
x86_64 rhel
x86_64 allyesconfig
x86_64 rhel-7.6-kselftests
x86_64 defconfig
x86_64 rhel-8.3
x86_64 kexec
clang tested configs:
x86_64 randconfig-a004-20200902
x86_64 randconfig-a006-20200902
x86_64 randconfig-a003-20200902
x86_64 randconfig-a005-20200902
x86_64 randconfig-a001-20200902
x86_64 randconfig-a002-20200902
x86_64 randconfig-a013-20200903
x86_64 randconfig-a016-20200903
x86_64 randconfig-a011-20200903
x86_64 randconfig-a012-20200903
x86_64 randconfig-a015-20200903
x86_64 randconfig-a014-20200903
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* Re: [PATCH 1/2] dma-mapping: introduce dma_get_seg_boundary_nr_pages()
From: Christophe Leroy @ 2020-09-03 17:53 UTC (permalink / raw)
To: Christoph Hellwig, Andy Shevchenko
Cc: linux-ia64, James Bottomley, Paul Mackerras, H. Peter Anvin,
sparclinux, Stephen Rothwell, Helge Deller,
maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
Christian Borntraeger, Ingo Molnar, Matt Turner, Fenghua Yu,
Vasily Gorbik, schnelle, hca, Nicolin Chen, ink, Thomas Gleixner,
gerald.schaefer, rth, Tony Luck, linux-parisc, linux-s390,
Linux Kernel Mailing List, linux-alpha, Borislav Petkov,
open list:LINUX FOR POWERPC PA SEMI PWRFICIENT, David S. Miller
In-Reply-To: <20200903161252.GA24841@lst.de>
Le 03/09/2020 à 18:12, Christoph Hellwig a écrit :
> On Thu, Sep 03, 2020 at 01:57:39PM +0300, Andy Shevchenko wrote:
>>> +{
>>> + if (!dev)
>>> + return (U32_MAX >> page_shift) + 1;
>>> + return (dma_get_seg_boundary(dev) >> page_shift) + 1;
>>
>> Can it be better to do something like
>> unsigned long boundary = dev ? dma_get_seg_boundary(dev) : U32_MAX;
>>
>> return (boundary >> page_shift) + 1;
>>
>> ?
>
> I don't really see what that would buy us.
>
I guess it would avoid the duplication of >> page_shift) + 1
Christophe
^ permalink raw reply
* Re: [PATCH 0/2] dma-mapping: update default segment_boundary_mask
From: Christoph Hellwig @ 2020-09-03 16:13 UTC (permalink / raw)
To: Niklas Schnelle
Cc: linux-ia64, James.Bottomley, paulus, hpa, sparclinux, hch, sfr,
deller, x86, borntraeger, mingo, mattst88, fenghua.yu, gor,
linux-s390, hca, Nicolin Chen, ink, tglx, gerald.schaefer, rth,
tony.luck, linux-parisc, linux-kernel, linux-alpha, bp,
linuxppc-dev, davem
In-Reply-To: <2c8db0aa-e8b5-e577-b971-1de10ecc6747@linux.ibm.com>
Applied with the recommendation from Michael folded in.
^ permalink raw reply
* Re: [PATCH 1/2] dma-mapping: introduce dma_get_seg_boundary_nr_pages()
From: Christoph Hellwig @ 2020-09-03 16:12 UTC (permalink / raw)
To: Andy Shevchenko
Cc: linux-ia64, James Bottomley, Paul Mackerras, H. Peter Anvin,
sparclinux, Christoph Hellwig, Stephen Rothwell, Helge Deller,
maintainer:X86 ARCHITECTURE (32-BIT AND 64-BIT),
Christian Borntraeger, Ingo Molnar, Matt Turner, Fenghua Yu,
Vasily Gorbik, schnelle, hca, Nicolin Chen, ink, Thomas Gleixner,
gerald.schaefer, rth, Tony Luck, linux-parisc, linux-s390,
Linux Kernel Mailing List, linux-alpha, Borislav Petkov,
open list:LINUX FOR POWERPC PA SEMI PWRFICIENT, David S. Miller
In-Reply-To: <CAHp75VcVJBSnPQ6NfdF8FdEDfM+oQ=Sr+cH5VGX4SrAqrgpf-g@mail.gmail.com>
On Thu, Sep 03, 2020 at 01:57:39PM +0300, Andy Shevchenko wrote:
> > +{
> > + if (!dev)
> > + return (U32_MAX >> page_shift) + 1;
> > + return (dma_get_seg_boundary(dev) >> page_shift) + 1;
>
> Can it be better to do something like
> unsigned long boundary = dev ? dma_get_seg_boundary(dev) : U32_MAX;
>
> return (boundary >> page_shift) + 1;
>
> ?
I don't really see what that would buy us.
^ permalink raw reply
* Re: [PATCH 14/14] powerpc: remove address space overrides using set_fs()
From: Christophe Leroy @ 2020-09-03 16:03 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-arch, linuxppc-dev, Kees Cook, x86, linux-kernel,
Luis Chamberlain, Al Viro, linux-fsdevel, Linus Torvalds,
Alexey Dobriyan
In-Reply-To: <20200903155644.GA23521@lst.de>
Le 03/09/2020 à 17:56, Christoph Hellwig a écrit :
> On Thu, Sep 03, 2020 at 05:49:09PM +0200, Christoph Hellwig wrote:
>> On Thu, Sep 03, 2020 at 05:43:25PM +0200, Christophe Leroy wrote:
>>>
>>>
>>> Le 03/09/2020 à 16:22, Christoph Hellwig a écrit :
>>>> Stop providing the possibility to override the address space using
>>>> set_fs() now that there is no need for that any more.
>>>>
>>>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>>>> ---
>>>
>>>
>>>> -static inline int __access_ok(unsigned long addr, unsigned long size,
>>>> - mm_segment_t seg)
>>>> +static inline bool __access_ok(unsigned long addr, unsigned long size)
>>>> {
>>>> - if (addr > seg.seg)
>>>> - return 0;
>>>> - return (size == 0 || size - 1 <= seg.seg - addr);
>>>> + if (addr >= TASK_SIZE_MAX)
>>>> + return false;
>>>> + return size == 0 || size <= TASK_SIZE_MAX - addr;
>>>> }
>>>
>>> You don't need to test size == 0 anymore. It used to be necessary because
>>> of the 'size - 1', as size is unsigned.
>>>
>>> Now you can directly do
>>>
>>> return size <= TASK_SIZE_MAX - addr;
>>>
>>> If size is 0, this will always be true (because you already know that addr
>>> is not >= TASK_SIZE_MAX
>>
>> True. What do you think of Linus' comment about always using the
>> ppc32 version on ppc64 as well with this?
I have nothing against it. That's only adding a substract, all args are
already in registers so that will be in the noise for a modern CPU.
>
> i.e. something like this folded in:
>
> diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
> index 5363f7fc6dd06c..be070254e50943 100644
> --- a/arch/powerpc/include/asm/uaccess.h
> +++ b/arch/powerpc/include/asm/uaccess.h
> @@ -11,26 +11,14 @@
> #ifdef __powerpc64__
> /* We use TASK_SIZE_USER64 as TASK_SIZE is not constant */
> #define TASK_SIZE_MAX TASK_SIZE_USER64
> -
> -/*
> - * This check is sufficient because there is a large enough gap between user
> - * addresses and the kernel addresses.
> - */
> -static inline bool __access_ok(unsigned long addr, unsigned long size)
> -{
> - return addr < TASK_SIZE_MAX && size < TASK_SIZE_MAX;
> -}
> -
> #else
> #define TASK_SIZE_MAX TASK_SIZE
> +#endif
>
> static inline bool __access_ok(unsigned long addr, unsigned long size)
> {
> - if (addr >= TASK_SIZE_MAX)
> - return false;
> - return size == 0 || size <= TASK_SIZE_MAX - addr;
> + return addr < TASK_SIZE_MAX && size <= TASK_SIZE_MAX - addr;
> }
> -#endif /* __powerpc64__ */
>
> #define access_ok(addr, size) \
> (__chk_user_ptr(addr), \
>
Christophe
^ permalink raw reply
* Re: [PATCH 14/14] powerpc: remove address space overrides using set_fs()
From: Christoph Hellwig @ 2020-09-03 15:56 UTC (permalink / raw)
To: Christophe Leroy
Cc: linux-arch, linuxppc-dev, Kees Cook, x86, linux-kernel,
Alexey Dobriyan, Luis Chamberlain, Al Viro, linux-fsdevel,
Linus Torvalds, Christoph Hellwig
In-Reply-To: <20200903154909.GA23023@lst.de>
On Thu, Sep 03, 2020 at 05:49:09PM +0200, Christoph Hellwig wrote:
> On Thu, Sep 03, 2020 at 05:43:25PM +0200, Christophe Leroy wrote:
> >
> >
> > Le 03/09/2020 à 16:22, Christoph Hellwig a écrit :
> >> Stop providing the possibility to override the address space using
> >> set_fs() now that there is no need for that any more.
> >>
> >> Signed-off-by: Christoph Hellwig <hch@lst.de>
> >> ---
> >
> >
> >> -static inline int __access_ok(unsigned long addr, unsigned long size,
> >> - mm_segment_t seg)
> >> +static inline bool __access_ok(unsigned long addr, unsigned long size)
> >> {
> >> - if (addr > seg.seg)
> >> - return 0;
> >> - return (size == 0 || size - 1 <= seg.seg - addr);
> >> + if (addr >= TASK_SIZE_MAX)
> >> + return false;
> >> + return size == 0 || size <= TASK_SIZE_MAX - addr;
> >> }
> >
> > You don't need to test size == 0 anymore. It used to be necessary because
> > of the 'size - 1', as size is unsigned.
> >
> > Now you can directly do
> >
> > return size <= TASK_SIZE_MAX - addr;
> >
> > If size is 0, this will always be true (because you already know that addr
> > is not >= TASK_SIZE_MAX
>
> True. What do you think of Linus' comment about always using the
> ppc32 version on ppc64 as well with this?
i.e. something like this folded in:
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 5363f7fc6dd06c..be070254e50943 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -11,26 +11,14 @@
#ifdef __powerpc64__
/* We use TASK_SIZE_USER64 as TASK_SIZE is not constant */
#define TASK_SIZE_MAX TASK_SIZE_USER64
-
-/*
- * This check is sufficient because there is a large enough gap between user
- * addresses and the kernel addresses.
- */
-static inline bool __access_ok(unsigned long addr, unsigned long size)
-{
- return addr < TASK_SIZE_MAX && size < TASK_SIZE_MAX;
-}
-
#else
#define TASK_SIZE_MAX TASK_SIZE
+#endif
static inline bool __access_ok(unsigned long addr, unsigned long size)
{
- if (addr >= TASK_SIZE_MAX)
- return false;
- return size == 0 || size <= TASK_SIZE_MAX - addr;
+ return addr < TASK_SIZE_MAX && size <= TASK_SIZE_MAX - addr;
}
-#endif /* __powerpc64__ */
#define access_ok(addr, size) \
(__chk_user_ptr(addr), \
^ permalink raw reply related
* Re: [PATCH v2] cpuidle-pseries: Fix CEDE latency conversion from tb to us
From: Vaidyanathan Srinivasan @ 2020-09-03 15:51 UTC (permalink / raw)
To: Gautham R. Shenoy
Cc: linux-pm, Rafael J. Wysocki, linux-kernel, Joel Stanley,
linuxppc-dev
In-Reply-To: <1599125247-28488-1-git-send-email-ego@linux.vnet.ibm.com>
* Gautham R Shenoy <ego@linux.vnet.ibm.com> [2020-09-03 14:57:27]:
> From: "Gautham R. Shenoy" <ego@linux.vnet.ibm.com>
>
> commit d947fb4c965c ("cpuidle: pseries: Fixup exit latency for
> CEDE(0)") sets the exit latency of CEDE(0) based on the latency values
> of the Extended CEDE states advertised by the platform. The values
> advertised by the platform are in timebase ticks. However the cpuidle
> framework requires the latency values in microseconds.
>
> If the tb-ticks value advertised by the platform correspond to a value
> smaller than 1us, during the conversion from tb-ticks to microseconds,
> in the current code, the result becomes zero. This is incorrect as it
> puts a CEDE state on par with the snooze state.
>
> This patch fixes this by rounding up the result obtained while
> converting the latency value from tb-ticks to microseconds. It also
> prints a warning in case we discover an extended-cede state with
> wakeup latency to be 0. In such a case, ensure that CEDE(0) has a
> non-zero wakeup latency.
>
> Fixes: commit d947fb4c965c ("cpuidle: pseries: Fixup exit latency for
> CEDE(0)")
>
> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.ibm.com>
> ---
> v1-->v2: Added a warning if a CEDE state has 0 wakeup latency (Suggested by Joel Stanley)
> Also added code to ensure that CEDE(0) has a non-zero wakeup latency.
> drivers/cpuidle/cpuidle-pseries.c | 15 +++++++++++----
> 1 file changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
> index ff6d99e..a2b5c6f 100644
> --- a/drivers/cpuidle/cpuidle-pseries.c
> +++ b/drivers/cpuidle/cpuidle-pseries.c
> @@ -361,7 +361,10 @@ static void __init fixup_cede0_latency(void)
> for (i = 0; i < nr_xcede_records; i++) {
> struct xcede_latency_record *record = &payload->records[i];
> u64 latency_tb = be64_to_cpu(record->latency_ticks);
> - u64 latency_us = tb_to_ns(latency_tb) / NSEC_PER_USEC;
> + u64 latency_us = DIV_ROUND_UP_ULL(tb_to_ns(latency_tb), NSEC_PER_USEC);
This would fix the issue of rounding down to zero.
> +
> + if (latency_us == 0)
> + pr_warn("cpuidle: xcede record %d has an unrealistic latency of 0us.\n", i);
+1 This should not happen.
> if (latency_us < min_latency_us)
> min_latency_us = latency_us;
> @@ -378,10 +381,14 @@ static void __init fixup_cede0_latency(void)
> * Perform the fix-up.
> */
> if (min_latency_us < dedicated_states[1].exit_latency) {
> - u64 cede0_latency = min_latency_us - 1;
> + /*
> + * We set a minimum of 1us wakeup latency for cede0 to
> + * distinguish it from snooze
> + */
> + u64 cede0_latency = 1;
>
> - if (cede0_latency <= 0)
> - cede0_latency = min_latency_us;
> + if (min_latency_us > cede0_latency)
> + cede0_latency = min_latency_us - 1;
Good checks to expect cede latency of 1us or more.
> dedicated_states[1].exit_latency = cede0_latency;
> dedicated_states[1].target_residency = 10 * (cede0_latency);
--Vaidy
^ permalink raw reply
* Re: remove the last set_fs() in common code, and remove it for x86 and powerpc v3
From: Linus Torvalds @ 2020-09-03 15:49 UTC (permalink / raw)
To: Christoph Hellwig
Cc: linux-arch, Kees Cook, the arch/x86 maintainers,
Linux Kernel Mailing List, Luis Chamberlain, Al Viro,
linux-fsdevel, linuxppc-dev, Alexey Dobriyan
In-Reply-To: <20200903142242.925828-1-hch@lst.de>
On Thu, Sep 3, 2020 at 7:22 AM Christoph Hellwig <hch@lst.de> wrote:
>
> [Note to Linus: I'd like to get this into linux-next rather earlier
> than later. Do you think it is ok to add this tree to linux-next?]
This whole series looks really good to me now, with each patch looking
like a clear cleanup on its own.
So ack on the whole series, and yes, please get this into linux-next
and ready for 5.10. Whether through Al or something else.
Thanks,
Linus
^ permalink raw reply
* Re: [PATCH 14/14] powerpc: remove address space overrides using set_fs()
From: Christoph Hellwig @ 2020-09-03 15:49 UTC (permalink / raw)
To: Christophe Leroy
Cc: linux-arch, linuxppc-dev, Kees Cook, x86, linux-kernel,
Alexey Dobriyan, Luis Chamberlain, Al Viro, linux-fsdevel,
Linus Torvalds, Christoph Hellwig
In-Reply-To: <e7d2d231-5658-a4d3-0495-2af62f34aa34@csgroup.eu>
On Thu, Sep 03, 2020 at 05:43:25PM +0200, Christophe Leroy wrote:
>
>
> Le 03/09/2020 à 16:22, Christoph Hellwig a écrit :
>> Stop providing the possibility to override the address space using
>> set_fs() now that there is no need for that any more.
>>
>> Signed-off-by: Christoph Hellwig <hch@lst.de>
>> ---
>
>
>> -static inline int __access_ok(unsigned long addr, unsigned long size,
>> - mm_segment_t seg)
>> +static inline bool __access_ok(unsigned long addr, unsigned long size)
>> {
>> - if (addr > seg.seg)
>> - return 0;
>> - return (size == 0 || size - 1 <= seg.seg - addr);
>> + if (addr >= TASK_SIZE_MAX)
>> + return false;
>> + return size == 0 || size <= TASK_SIZE_MAX - addr;
>> }
>
> You don't need to test size == 0 anymore. It used to be necessary because
> of the 'size - 1', as size is unsigned.
>
> Now you can directly do
>
> return size <= TASK_SIZE_MAX - addr;
>
> If size is 0, this will always be true (because you already know that addr
> is not >= TASK_SIZE_MAX
True. What do you think of Linus' comment about always using the
ppc32 version on ppc64 as well with this?
^ permalink raw reply
* Re: [PATCH 14/14] powerpc: remove address space overrides using set_fs()
From: Christophe Leroy @ 2020-09-03 15:43 UTC (permalink / raw)
To: Christoph Hellwig, Linus Torvalds, Al Viro, Michael Ellerman, x86
Cc: linux-arch, Kees Cook, linux-kernel, Luis Chamberlain,
linux-fsdevel, linuxppc-dev, Alexey Dobriyan
In-Reply-To: <20200903142242.925828-15-hch@lst.de>
Le 03/09/2020 à 16:22, Christoph Hellwig a écrit :
> Stop providing the possibility to override the address space using
> set_fs() now that there is no need for that any more.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>
> -static inline int __access_ok(unsigned long addr, unsigned long size,
> - mm_segment_t seg)
> +static inline bool __access_ok(unsigned long addr, unsigned long size)
> {
> - if (addr > seg.seg)
> - return 0;
> - return (size == 0 || size - 1 <= seg.seg - addr);
> + if (addr >= TASK_SIZE_MAX)
> + return false;
> + return size == 0 || size <= TASK_SIZE_MAX - addr;
> }
You don't need to test size == 0 anymore. It used to be necessary
because of the 'size - 1', as size is unsigned.
Now you can directly do
return size <= TASK_SIZE_MAX - addr;
If size is 0, this will always be true (because you already know that
addr is not >= TASK_SIZE_MAX
Christophe
^ permalink raw reply
* Re: remove the last set_fs() in common code, and remove it for x86 and powerpc v3
From: Christoph Hellwig @ 2020-09-03 14:40 UTC (permalink / raw)
To: Al Viro
Cc: linux-arch, linuxppc-dev, Kees Cook, x86, linux-kernel,
Alexey Dobriyan, Luis Chamberlain, linux-fsdevel, Linus Torvalds,
Christoph Hellwig
In-Reply-To: <20200903143629.GN1236603@ZenIV.linux.org.uk>
On Thu, Sep 03, 2020 at 03:36:29PM +0100, Al Viro wrote:
> FWIW, vfs.git#for-next is always a merge of independent branches; I don't
> put stuff directly into #for-next - too easy to lose that way.
>
> IOW, that would be something like #base.set_fs, included into #for-next
> merge set. And I've no problem with never-rebased branches...
>
> Where in the mainline are the most recent prereqs of this series?
I can't think of anything past -rc1, but I haven't actually checked.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox