* [PATCH v3 03/10] powerpc/smp: Move powerpc_topology above
From: Srikar Dronamraju @ 2020-07-23 8:51 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Gautham R Shenoy, Michael Neuling,
Srikar Dronamraju, Peter Zijlstra, Jordan Niethe, LKML,
Nicholas Piggin, Valentin Schneider, Oliver O'Halloran,
linuxppc-dev, Ingo Molnar
In-Reply-To: <20200723085116.4731-1-srikar@linux.vnet.ibm.com>
Just moving the powerpc_topology description above.
This will help in using functions in this file and avoid declarations.
No other functional changes
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Jordan Niethe <jniethe5@gmail.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
arch/powerpc/kernel/smp.c | 116 +++++++++++++++++++-------------------
1 file changed, 58 insertions(+), 58 deletions(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 283a04e54f52..a685915e5941 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -818,6 +818,64 @@ static int init_cpu_l1_cache_map(int cpu)
return err;
}
+static bool shared_caches;
+
+#ifdef CONFIG_SCHED_SMT
+/* cpumask of CPUs with asymmetric SMT dependency */
+static int powerpc_smt_flags(void)
+{
+ int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
+
+ if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+ printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+ flags |= SD_ASYM_PACKING;
+ }
+ return flags;
+}
+#endif
+
+/*
+ * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
+ * This topology makes it *much* cheaper to migrate tasks between adjacent cores
+ * since the migrated task remains cache hot. We want to take advantage of this
+ * at the scheduler level so an extra topology level is required.
+ */
+static int powerpc_shared_cache_flags(void)
+{
+ return SD_SHARE_PKG_RESOURCES;
+}
+
+/*
+ * We can't just pass cpu_l2_cache_mask() directly because
+ * returns a non-const pointer and the compiler barfs on that.
+ */
+static const struct cpumask *shared_cache_mask(int cpu)
+{
+ if (shared_caches)
+ return cpu_l2_cache_mask(cpu);
+
+ if (has_big_cores)
+ return cpu_smallcore_mask(cpu);
+
+ return per_cpu(cpu_sibling_map, cpu);
+}
+
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *smallcore_smt_mask(int cpu)
+{
+ return cpu_smallcore_mask(cpu);
+}
+#endif
+
+static struct sched_domain_topology_level powerpc_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+ { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
+#endif
+ { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
static int init_big_cores(void)
{
int cpu;
@@ -1247,8 +1305,6 @@ static void add_cpu_to_masks(int cpu)
set_cpus_related(cpu, i, cpu_core_mask);
}
-static bool shared_caches;
-
/* Activate a secondary processor. */
void start_secondary(void *unused)
{
@@ -1312,62 +1368,6 @@ int setup_profiling_timer(unsigned int multiplier)
return 0;
}
-#ifdef CONFIG_SCHED_SMT
-/* cpumask of CPUs with asymmetric SMT dependency */
-static int powerpc_smt_flags(void)
-{
- int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
-
- if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
- printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
- flags |= SD_ASYM_PACKING;
- }
- return flags;
-}
-#endif
-
-/*
- * P9 has a slightly odd architecture where pairs of cores share an L2 cache.
- * This topology makes it *much* cheaper to migrate tasks between adjacent cores
- * since the migrated task remains cache hot. We want to take advantage of this
- * at the scheduler level so an extra topology level is required.
- */
-static int powerpc_shared_cache_flags(void)
-{
- return SD_SHARE_PKG_RESOURCES;
-}
-
-/*
- * We can't just pass cpu_l2_cache_mask() directly because
- * returns a non-const pointer and the compiler barfs on that.
- */
-static const struct cpumask *shared_cache_mask(int cpu)
-{
- if (shared_caches)
- return cpu_l2_cache_mask(cpu);
-
- if (has_big_cores)
- return cpu_smallcore_mask(cpu);
-
- return per_cpu(cpu_sibling_map, cpu);
-}
-
-#ifdef CONFIG_SCHED_SMT
-static const struct cpumask *smallcore_smt_mask(int cpu)
-{
- return cpu_smallcore_mask(cpu);
-}
-#endif
-
-static struct sched_domain_topology_level powerpc_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
-#endif
- { shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) },
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
-
void __init smp_cpus_done(unsigned int max_cpus)
{
/*
--
2.18.2
^ permalink raw reply related
* [PATCH v3 02/10] powerpc/smp: Merge Power9 topology with Power topology
From: Srikar Dronamraju @ 2020-07-23 8:51 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Gautham R Shenoy, Michael Neuling,
Srikar Dronamraju, Peter Zijlstra, Jordan Niethe, LKML,
Nicholas Piggin, Valentin Schneider, Oliver O'Halloran,
linuxppc-dev, Ingo Molnar
In-Reply-To: <20200723085116.4731-1-srikar@linux.vnet.ibm.com>
A new sched_domain_topology_level was added just for Power9. However the
same can be achieved by merging powerpc_topology with power9_topology
and makes the code more simpler especially when adding a new sched
domain.
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Jordan Niethe <jniethe5@gmail.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
Changelog v1 -> v2:
Replaced a reference to cpu_smt_mask with per_cpu(cpu_sibling_map, cpu)
since cpu_smt_mask is only defined under CONFIG_SCHED_SMT
arch/powerpc/kernel/smp.c | 33 ++++++++++-----------------------
1 file changed, 10 insertions(+), 23 deletions(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index edf94ca64eea..283a04e54f52 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1313,7 +1313,7 @@ int setup_profiling_timer(unsigned int multiplier)
}
#ifdef CONFIG_SCHED_SMT
-/* cpumask of CPUs with asymetric SMT dependancy */
+/* cpumask of CPUs with asymmetric SMT dependency */
static int powerpc_smt_flags(void)
{
int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES;
@@ -1326,14 +1326,6 @@ static int powerpc_smt_flags(void)
}
#endif
-static struct sched_domain_topology_level powerpc_topology[] = {
-#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
-#endif
- { cpu_cpu_mask, SD_INIT_NAME(DIE) },
- { NULL, },
-};
-
/*
* P9 has a slightly odd architecture where pairs of cores share an L2 cache.
* This topology makes it *much* cheaper to migrate tasks between adjacent cores
@@ -1351,7 +1343,13 @@ static int powerpc_shared_cache_flags(void)
*/
static const struct cpumask *shared_cache_mask(int cpu)
{
- return cpu_l2_cache_mask(cpu);
+ if (shared_caches)
+ return cpu_l2_cache_mask(cpu);
+
+ if (has_big_cores)
+ return cpu_smallcore_mask(cpu);
+
+ return per_cpu(cpu_sibling_map, cpu);
}
#ifdef CONFIG_SCHED_SMT
@@ -1361,7 +1359,7 @@ static const struct cpumask *smallcore_smt_mask(int cpu)
}
#endif
-static struct sched_domain_topology_level power9_topology[] = {
+static struct sched_domain_topology_level powerpc_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
#endif
@@ -1386,21 +1384,10 @@ void __init smp_cpus_done(unsigned int max_cpus)
#ifdef CONFIG_SCHED_SMT
if (has_big_cores) {
pr_info("Big cores detected but using small core scheduling\n");
- power9_topology[0].mask = smallcore_smt_mask;
powerpc_topology[0].mask = smallcore_smt_mask;
}
#endif
- /*
- * If any CPU detects that it's sharing a cache with another CPU then
- * use the deeper topology that is aware of this sharing.
- */
- if (shared_caches) {
- pr_info("Using shared cache scheduler topology\n");
- set_sched_topology(power9_topology);
- } else {
- pr_info("Using standard scheduler topology\n");
- set_sched_topology(powerpc_topology);
- }
+ set_sched_topology(powerpc_topology);
}
#ifdef CONFIG_HOTPLUG_CPU
--
2.18.2
^ permalink raw reply related
* [PATCH v3 01/10] powerpc/smp: Fix a warning under !NEED_MULTIPLE_NODES
From: Srikar Dronamraju @ 2020-07-23 8:51 UTC (permalink / raw)
To: Michael Ellerman
Cc: Nathan Lynch, Gautham R Shenoy, Michael Neuling,
Srikar Dronamraju, Peter Zijlstra, Jordan Niethe, LKML,
Nicholas Piggin, Valentin Schneider, Oliver O'Halloran,
linuxppc-dev, Ingo Molnar
In-Reply-To: <20200723085116.4731-1-srikar@linux.vnet.ibm.com>
Fix a build warning in a non CONFIG_NEED_MULTIPLE_NODES
"error: _numa_cpu_lookup_table_ undeclared"
Cc: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>
Cc: LKML <linux-kernel@vger.kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Anton Blanchard <anton@ozlabs.org>
Cc: Oliver O'Halloran <oohall@gmail.com>
Cc: Nathan Lynch <nathanl@linux.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Gautham R Shenoy <ego@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Valentin Schneider <valentin.schneider@arm.com>
Cc: Jordan Niethe <jniethe5@gmail.com>
Reviewed-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com>
Signed-off-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
Changelog v2 -> v3:
Removed node caching part. Rewrote the Commit msg (Michael Ellerman)
Renamed to powerpc/smp: Fix a warning under !NEED_MULTIPLE_NODES
arch/powerpc/kernel/smp.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 73199470c265..edf94ca64eea 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -860,6 +860,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
GFP_KERNEL, cpu_to_node(cpu));
zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
+#ifdef CONFIG_NEED_MULTIPLE_NODES
/*
* numa_node_id() works after this.
*/
@@ -868,6 +869,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
set_cpu_numa_mem(cpu,
local_memory_node(numa_cpu_lookup_table[cpu]));
}
+#endif
}
/* Init the cpumasks so the boot CPU is related to itself */
--
2.18.2
^ permalink raw reply related
* Re: [v4] powerpc/perf: Initialize power10 PMU registers in cpu setup routine
From: Jordan Niethe @ 2020-07-23 7:40 UTC (permalink / raw)
To: Athira Rajeev; +Cc: Michael Neuling, maddy, linuxppc-dev
In-Reply-To: <1595489557-2047-1-git-send-email-atrajeev@linux.vnet.ibm.com>
On Thu, Jul 23, 2020 at 5:32 PM Athira Rajeev
<atrajeev@linux.vnet.ibm.com> wrote:
>
> Initialize Monitor Mode Control Register 3 (MMCR3)
> SPR which is new in power10. For PowerISA v3.1, BHRB disable
> is controlled via Monitor Mode Control Register A (MMCRA) bit,
> namely "BHRB Recording Disable (BHRBRD)". This patch also initializes
> MMCRA BHRBRD to disable BHRB feature at boot for power10.
>
> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Reviewed-by: Jordan Niethe <jniethe5@gmail.com>
> ---
> Dependency:
> - On power10 PMU base enablement series V3:
> https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=190462
>
> Changes from v3 -> v4
> - Addressed review comments from Jordan and Michael Ellerman.
> This patch was initially part of Power10 PMU base enablement
> series. Moving this as separate patch as suggested by Michael
> Ellerman. Hence dependency of initial series Patch 7 which defines
> MMCRA_BHRB_DISABLE. Addressed review comments from Jordan to make
> sure existing PMU function (__INIT_PMU) will not overwrite ISA 3.1
> updates
>
> Changes from v2 -> v3
> - Addressed review comment from Michael Ellerman to
> call PMU init from __setup_cpu_power10
>
> arch/powerpc/kernel/cpu_setup_power.S | 19 +++++++++++++++----
> 1 file changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
> index efdcfa7..3fa6eef 100644
> --- a/arch/powerpc/kernel/cpu_setup_power.S
> +++ b/arch/powerpc/kernel/cpu_setup_power.S
> @@ -94,13 +94,15 @@ _GLOBAL(__restore_cpu_power8)
> _GLOBAL(__setup_cpu_power10)
> mflr r11
> bl __init_FSCR_power10
> + bl __init_PMU
> + bl __init_PMU_ISA31
> b 1f
>
> _GLOBAL(__setup_cpu_power9)
> mflr r11
> bl __init_FSCR
> -1: bl __init_PMU
> - bl __init_hvmode_206
> + bl __init_PMU
> +1: bl __init_hvmode_206
> mtlr r11
> beqlr
> li r0,0
> @@ -124,13 +126,15 @@ _GLOBAL(__setup_cpu_power9)
> _GLOBAL(__restore_cpu_power10)
> mflr r11
> bl __init_FSCR_power10
> + bl __init_PMU
> + bl __init_PMU_ISA31
> b 1f
>
> _GLOBAL(__restore_cpu_power9)
> mflr r11
> bl __init_FSCR
> -1: bl __init_PMU
> - mfmsr r3
> + bl __init_PMU
> +1: mfmsr r3
> rldicl. r0,r3,4,63
> mtlr r11
> beqlr
> @@ -233,3 +237,10 @@ __init_PMU_ISA207:
> li r5,0
> mtspr SPRN_MMCRS,r5
> blr
> +
> +__init_PMU_ISA31:
> + li r5,0
> + mtspr SPRN_MMCR3,r5
> + LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE)
> + mtspr SPRN_MMCRA,r5
> + blr
> --
> 1.8.3.1
>
^ permalink raw reply
* [v4] powerpc/perf: Initialize power10 PMU registers in cpu setup routine
From: Athira Rajeev @ 2020-07-23 7:32 UTC (permalink / raw)
To: mpe; +Cc: jniethe5, mikey, maddy, linuxppc-dev
Initialize Monitor Mode Control Register 3 (MMCR3)
SPR which is new in power10. For PowerISA v3.1, BHRB disable
is controlled via Monitor Mode Control Register A (MMCRA) bit,
namely "BHRB Recording Disable (BHRBRD)". This patch also initializes
MMCRA BHRBRD to disable BHRB feature at boot for power10.
Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
---
Dependency:
- On power10 PMU base enablement series V3:
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=190462
Changes from v3 -> v4
- Addressed review comments from Jordan and Michael Ellerman.
This patch was initially part of Power10 PMU base enablement
series. Moving this as separate patch as suggested by Michael
Ellerman. Hence dependency of initial series Patch 7 which defines
MMCRA_BHRB_DISABLE. Addressed review comments from Jordan to make
sure existing PMU function (__INIT_PMU) will not overwrite ISA 3.1
updates
Changes from v2 -> v3
- Addressed review comment from Michael Ellerman to
call PMU init from __setup_cpu_power10
arch/powerpc/kernel/cpu_setup_power.S | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index efdcfa7..3fa6eef 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -94,13 +94,15 @@ _GLOBAL(__restore_cpu_power8)
_GLOBAL(__setup_cpu_power10)
mflr r11
bl __init_FSCR_power10
+ bl __init_PMU
+ bl __init_PMU_ISA31
b 1f
_GLOBAL(__setup_cpu_power9)
mflr r11
bl __init_FSCR
-1: bl __init_PMU
- bl __init_hvmode_206
+ bl __init_PMU
+1: bl __init_hvmode_206
mtlr r11
beqlr
li r0,0
@@ -124,13 +126,15 @@ _GLOBAL(__setup_cpu_power9)
_GLOBAL(__restore_cpu_power10)
mflr r11
bl __init_FSCR_power10
+ bl __init_PMU
+ bl __init_PMU_ISA31
b 1f
_GLOBAL(__restore_cpu_power9)
mflr r11
bl __init_FSCR
-1: bl __init_PMU
- mfmsr r3
+ bl __init_PMU
+1: mfmsr r3
rldicl. r0,r3,4,63
mtlr r11
beqlr
@@ -233,3 +237,10 @@ __init_PMU_ISA207:
li r5,0
mtspr SPRN_MMCRS,r5
blr
+
+__init_PMU_ISA31:
+ li r5,0
+ mtspr SPRN_MMCR3,r5
+ LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE)
+ mtspr SPRN_MMCRA,r5
+ blr
--
1.8.3.1
^ permalink raw reply related
* [powerpc:merge] BUILD SUCCESS c27fe454aff795023d2f3f90f41eb1a3104e614f
From: kernel test robot @ 2020-07-23 7:28 UTC (permalink / raw)
To: Michael Ellerman; +Cc: linuxppc-dev
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git merge
branch HEAD: c27fe454aff795023d2f3f90f41eb1a3104e614f Automatic merge of 'master', 'next' and 'fixes' (2020-07-21 00:00)
elapsed time: 3913m
configs tested: 111
configs skipped: 2
The following configs have been built successfully.
More configs may be tested in the coming days.
arm64 allyesconfig
arm64 defconfig
arm64 allmodconfig
arm64 allnoconfig
arm allyesconfig
arm allmodconfig
arm allnoconfig
arm defconfig
sh rsk7264_defconfig
c6x defconfig
c6x evmc6474_defconfig
arm shannon_defconfig
powerpc ppc64_defconfig
arm footbridge_defconfig
s390 alldefconfig
s390 debug_defconfig
arm pxa3xx_defconfig
m68k m5407c3_defconfig
sh sdk7780_defconfig
c6x dsk6455_defconfig
arm h5000_defconfig
i386 allyesconfig
i386 defconfig
i386 debian-10.3
i386 allnoconfig
ia64 allmodconfig
ia64 defconfig
ia64 allnoconfig
ia64 allyesconfig
m68k allmodconfig
m68k allnoconfig
m68k sun3_defconfig
m68k defconfig
m68k allyesconfig
nios2 defconfig
nios2 allyesconfig
openrisc defconfig
c6x allyesconfig
c6x allnoconfig
openrisc allyesconfig
nds32 defconfig
nds32 allnoconfig
csky allyesconfig
csky defconfig
alpha defconfig
alpha allyesconfig
xtensa allyesconfig
h8300 allyesconfig
h8300 allmodconfig
xtensa defconfig
arc defconfig
arc allyesconfig
sh allmodconfig
sh allnoconfig
microblaze allnoconfig
mips allyesconfig
mips allnoconfig
mips allmodconfig
parisc allnoconfig
parisc defconfig
parisc allyesconfig
parisc allmodconfig
powerpc defconfig
powerpc allyesconfig
powerpc rhel-kconfig
powerpc allmodconfig
powerpc allnoconfig
i386 randconfig-a001-20200719
i386 randconfig-a006-20200719
i386 randconfig-a002-20200719
i386 randconfig-a005-20200719
i386 randconfig-a003-20200719
i386 randconfig-a004-20200719
x86_64 randconfig-a014-20200720
x86_64 randconfig-a015-20200720
x86_64 randconfig-a016-20200720
x86_64 randconfig-a012-20200720
x86_64 randconfig-a013-20200720
x86_64 randconfig-a011-20200720
x86_64 randconfig-a005-20200719
x86_64 randconfig-a002-20200719
x86_64 randconfig-a006-20200719
x86_64 randconfig-a001-20200719
x86_64 randconfig-a003-20200719
x86_64 randconfig-a004-20200719
i386 randconfig-a015-20200719
i386 randconfig-a011-20200719
i386 randconfig-a016-20200719
i386 randconfig-a012-20200719
i386 randconfig-a013-20200719
i386 randconfig-a014-20200719
riscv allyesconfig
riscv allnoconfig
riscv defconfig
riscv allmodconfig
s390 allyesconfig
s390 allnoconfig
s390 allmodconfig
s390 defconfig
sparc allyesconfig
sparc defconfig
sparc64 defconfig
sparc64 allnoconfig
sparc64 allyesconfig
sparc64 allmodconfig
x86_64 rhel-7.6-kselftests
x86_64 rhel-8.3
x86_64 kexec
x86_64 rhel
x86_64 lkp
x86_64 fedora-25
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
^ permalink raw reply
* Re: [PATCH 2/2] powerpc/64s: system call support for scv/rfscv instructions
From: Michael Ellerman @ 2020-07-23 6:47 UTC (permalink / raw)
To: Nicholas Piggin, linuxppc-dev; +Cc: libc-dev, musl, Nicholas Piggin, linux-api
In-Reply-To: <20200611081203.995112-3-npiggin@gmail.com>
Nicholas Piggin <npiggin@gmail.com> writes:
> diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
> index 2a39c716c343..b2bdc4de1292 100644
> --- a/arch/powerpc/include/asm/ppc-opcode.h
> +++ b/arch/powerpc/include/asm/ppc-opcode.h
> @@ -257,6 +257,7 @@
> #define PPC_INST_MFVSRD 0x7c000066
> #define PPC_INST_MTVSRD 0x7c000166
> #define PPC_INST_SC 0x44000002
> +#define PPC_INST_SCV 0x44000001
...
> @@ -411,6 +412,7 @@
...
> +#define __PPC_LEV(l) (((l) & 0x7f) << 5)
These conflicted and didn't seem to be used so I dropped them.
> diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
> index 5abe98216dc2..161bfccbc309 100644
> --- a/arch/powerpc/lib/sstep.c
> +++ b/arch/powerpc/lib/sstep.c
> @@ -3378,6 +3382,16 @@ int emulate_step(struct pt_regs *regs, struct ppc_inst instr)
> regs->msr = MSR_KERNEL;
> return 1;
>
> + case SYSCALL_VECTORED_0: /* scv 0 */
> + regs->gpr[9] = regs->gpr[13];
> + regs->gpr[10] = MSR_KERNEL;
> + regs->gpr[11] = regs->nip + 4;
> + regs->gpr[12] = regs->msr & MSR_MASK;
> + regs->gpr[13] = (unsigned long) get_paca();
> + regs->nip = (unsigned long) &system_call_vectored_emulate;
> + regs->msr = MSR_KERNEL;
> + return 1;
> +
This broke the ppc64e build:
ld: arch/powerpc/lib/sstep.o:(.toc+0x0): undefined reference to `system_call_vectored_emulate'
make[1]: *** [/home/michael/linux/Makefile:1139: vmlinux] Error 1
I wrapped it in #ifdef CONFIG_PPC64_BOOK3S.
cheers
^ permalink raw reply
* Re: [PATCH v4 07/12] ppc64/kexec_file: add support to relocate purgatory
From: Michael Ellerman @ 2020-07-23 6:47 UTC (permalink / raw)
To: Hari Bathini, Andrew Morton
Cc: kernel test robot, Pingfan Liu, Kexec-ml, Nayna Jain,
Petr Tesarik, Mahesh J Salgaonkar, Mimi Zohar, lkml, linuxppc-dev,
Sourabh Jain, Vivek Goyal, Dave Young, Thiago Jung Bauermann,
Eric Biederman
In-Reply-To: <2037fa32-28be-5995-1c22-c8b01cafe088@linux.ibm.com>
Hari Bathini <hbathini@linux.ibm.com> writes:
> On 22/07/20 9:55 am, Michael Ellerman wrote:
>> Hari Bathini <hbathini@linux.ibm.com> writes:
>>> Right now purgatory implementation is only minimal. But if purgatory
>>> code is to be enhanced to copy memory to the backup region and verify
>>> sha256 digest, relocations may have to be applied to the purgatory.
>>> So, add support to relocate purgatory in kexec_file_load system call
>>> by setting up TOC pointer and applying RELA relocations as needed.
>>>
>>> Reported-by: kernel test robot <lkp@intel.com>
>>> [lkp: In v1, 'struct mem_sym' was declared in parameter list]
>>> Signed-off-by: Hari Bathini <hbathini@linux.ibm.com>
>>> ---
>>>
>>> * Michael, can you share your opinion on the below:
>>> - https://lore.kernel.org/patchwork/patch/1272027/
>>> - My intention in cover note.
>>
>> It seems like a lot of complexity for little benefit.
>>
>> AFAICS your final purgatory_64.c is only 36 lines, and all it does is a
>> single (open coded) memcpy().
>>
>> It seems like we could write that in not many more lines of assembler
>> and avoid all this code.
>
> Hi Michael,
>
> I am not sure if you would agree with me on this, but I am looking at the
> purgatory code as work in progress. As mentioned in the cover note, I intend
> to add log messaging, sha256 verification into purgatory. And also change it
> to position independent executable after moving common purgatory code (sha256
> verification) to arch-independent code.
I've never understood the desire to put more logic into purgatory. It's
the absolute worst place to detect that something's gone wrong, because
we have no facilities in there to do anything useful. We don't even know
what platform we're on.
> When I initially took this up, I wanted to add all the above changes too, but
> cut down on it, in the interest of time, first to get kdump (kexec -s -p)
> working in v5.9 merge window.
>
> But as the logic in patches 07/12 & 08/12 has been tested in kexec-tools code
> a lot of times and there are unlikely to be any changes to them except for
> __kexec_do_relocs() function (afaics), when -PIE would be used, I submitted them.
> With patch 09/12, I tried for a change that uses relocations while is minimal
> for now.
>
> Would you prefer it to be absolutely minimal by dropping patches 7 & 8 for
> now and writing the backup data copy code in assembler?
Yes please.
cheers
^ permalink raw reply
* Re: [PATCHv3 2/2] powerpc/pseries: update device tree before ejecting hotplug uevents
From: Michael Ellerman @ 2020-07-23 6:41 UTC (permalink / raw)
To: Pingfan Liu; +Cc: Nathan Lynch, Kexec Mailing List, linuxppc-dev, Hari Bathini
In-Reply-To: <CAFgQCTsgX9XWJ476dxT2csTuuhpaO3KSZN-EewZiZ0mBj3N4aQ@mail.gmail.com>
Pingfan Liu <kernelfans@gmail.com> writes:
> On Wed, Jul 22, 2020 at 12:57 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
>>
>> Pingfan Liu <kernelfans@gmail.com> writes:
>> > A bug is observed on pseries by taking the following steps on rhel:
>> ^
>> RHEL
>>
>> I assume it happens on mainline too?
> Yes, it does.
>>
> [...]
>> > diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
>> > index 1a3ac3b..def8cb3f 100644
>> > --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
>> > +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
>> > @@ -372,6 +372,7 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
>> > invalidate_lmb_associativity_index(lmb);
>> > lmb_clear_nid(lmb);
>> > lmb->flags &= ~DRCONF_MEM_ASSIGNED;
>> > + drmem_update_dt();
>>
>> No error checking?
> Hmm, here should be a more careful design. Please see the comment at the end.
>>
>> > __remove_memory(nid, base_addr, block_sz);
>> >
>> > @@ -607,6 +608,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
>> >
>> > lmb_set_nid(lmb);
>> > lmb->flags |= DRCONF_MEM_ASSIGNED;
>> > + drmem_update_dt();
>>
>> And here ..
>> >
>> > block_sz = memory_block_size_bytes();
>> >
>> > @@ -625,6 +627,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
>> > invalidate_lmb_associativity_index(lmb);
>> > lmb_clear_nid(lmb);
>> > lmb->flags &= ~DRCONF_MEM_ASSIGNED;
>> > + drmem_update_dt();
>>
>>
>> And here ..
>>
>> > __remove_memory(nid, base_addr, block_sz);
>> > }
>> > @@ -877,9 +880,6 @@ int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
>> > break;
>> > }
>> >
>> > - if (!rc)
>> > - rc = drmem_update_dt();
>> > -
>> > unlock_device_hotplug();
>> > return rc;
>>
>> Whereas previously we did check it.
>
> drmem_update_dt() fails iff allocating memory fail.
That's true currently, but it might change in future.
> And in the failed case, even the original code does not roll back the
> effect of __add_memory()/__remove_memory().
Yeah hard to know what the desired behaviour is. If something fails we
at least need to print a message though, not silently swallow it.
> And I plan to do the following in V4: if drmem_update_dt() fails in
> dlpar_add_lmb(), then bails out immediately.
That sounds reasonable.
cheers
^ permalink raw reply
* Re: [PATCH v6 3/5] KVM: PPC: clean up redundant kvm_run parameters in assembly
From: Paul Mackerras @ 2020-07-23 6:19 UTC (permalink / raw)
To: Tianjia Zhang
Cc: wanpengli, kvm, david, heiko.carstens, peterx, linux-mips, hpa,
kvmarm, linux-s390, frankja, chenhuacai, maz, joro, x86,
borntraeger, mingo, julien.thierry.kdev, thuth, gor,
suzuki.poulose, kvm-ppc, bp, tglx, linux-arm-kernel, jmattson,
tsbogend, cohuck, christoffer.dall, sean.j.christopherson,
linux-kernel, james.morse, pbonzini, vkuznets, linuxppc-dev
In-Reply-To: <20200623131418.31473-4-tianjia.zhang@linux.alibaba.com>
On Tue, Jun 23, 2020 at 09:14:16PM +0800, Tianjia Zhang wrote:
> In the current kvm version, 'kvm_run' has been included in the 'kvm_vcpu'
> structure. For historical reasons, many kvm-related function parameters
> retain the 'kvm_run' and 'kvm_vcpu' parameters at the same time. This
> patch does a unified cleanup of these remaining redundant parameters.
Thanks, patch applied to my kvm-ppc-next branch, with fixes.
Paul.
^ permalink raw reply
* Re: [PATCH] powerpc/kvm: Enable support for ISA v3.1 guests
From: Paul Mackerras @ 2020-07-23 6:21 UTC (permalink / raw)
To: Alistair Popple; +Cc: mikey, linuxppc-dev, kvm-ppc, ravi.bangoria
In-Reply-To: <20200602055325.6102-1-alistair@popple.id.au>
On Tue, Jun 02, 2020 at 03:53:25PM +1000, Alistair Popple wrote:
> Adds support for emulating ISAv3.1 guests by adding the appropriate PCR
> and FSCR bits.
>
> Signed-off-by: Alistair Popple <alistair@popple.id.au>
Thanks, patch applied to my kvm-ppc-next branch.
Paul.
^ permalink raw reply
* Re: [PATCH kernel] KVM: PPC: Protect kvm_vcpu_read_guest with srcu locks
From: Paul Mackerras @ 2020-07-23 6:19 UTC (permalink / raw)
To: Alexey Kardashevskiy; +Cc: linuxppc-dev, kvm-ppc
In-Reply-To: <20200609021230.103494-1-aik@ozlabs.ru>
On Tue, Jun 09, 2020 at 12:12:29PM +1000, Alexey Kardashevskiy wrote:
> The kvm_vcpu_read_guest/kvm_vcpu_write_guest used for nested guests
> eventually call srcu_dereference_check to dereference a memslot and
> lockdep produces a warning as neither kvm->slots_lock nor
> kvm->srcu lock is held and kvm->users_count is above zero (>100 in fact).
>
> This wraps mentioned VCPU read/write helpers in srcu read lock/unlock as
> it is done in other places. This uses vcpu->srcu_idx when possible.
>
> These helpers are only used for nested KVM so this may explain why
> we did not see these before.
>
> Here is an example of a warning:
>
> =============================
> WARNING: suspicious RCU usage
> 5.7.0-rc3-le_dma-bypass.3.2_a+fstn1 #897 Not tainted
> -----------------------------
> include/linux/kvm_host.h:633 suspicious rcu_dereference_check() usage!
>
> other info that might help us debug this:
>
> rcu_scheduler_active = 2, debug_locks = 1
> 1 lock held by qemu-system-ppc/2752:
> #0: c000200359016be0 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0x144/0xd80 [kvm]
>
> stack backtrace:
> CPU: 80 PID: 2752 Comm: qemu-system-ppc Not tainted 5.7.0-rc3-le_dma-bypass.3.2_a+fstn1 #897
> Call Trace:
> [c0002003591ab240] [c000000000b23ab4] dump_stack+0x190/0x25c (unreliable)
> [c0002003591ab2b0] [c00000000023f954] lockdep_rcu_suspicious+0x140/0x164
> [c0002003591ab330] [c008000004a445f8] kvm_vcpu_gfn_to_memslot+0x4c0/0x510 [kvm]
> [c0002003591ab3a0] [c008000004a44c18] kvm_vcpu_read_guest+0xa0/0x180 [kvm]
> [c0002003591ab410] [c008000004ff9bd8] kvmhv_enter_nested_guest+0x90/0xb80 [kvm_hv]
> [c0002003591ab980] [c008000004fe07bc] kvmppc_pseries_do_hcall+0x7b4/0x1c30 [kvm_hv]
> [c0002003591aba10] [c008000004fe5d30] kvmppc_vcpu_run_hv+0x10a8/0x1a30 [kvm_hv]
> [c0002003591abae0] [c008000004a5d954] kvmppc_vcpu_run+0x4c/0x70 [kvm]
> [c0002003591abb10] [c008000004a56e54] kvm_arch_vcpu_ioctl_run+0x56c/0x7c0 [kvm]
> [c0002003591abba0] [c008000004a3ddc4] kvm_vcpu_ioctl+0x4ac/0xd80 [kvm]
> [c0002003591abd20] [c0000000006ebb58] ksys_ioctl+0x188/0x210
> [c0002003591abd70] [c0000000006ebc28] sys_ioctl+0x48/0xb0
> [c0002003591abdb0] [c000000000042764] system_call_exception+0x1d4/0x2e0
> [c0002003591abe20] [c00000000000cce8] system_call_common+0xe8/0x214
>
> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Thanks, patch applied to my kvm-ppc-next branch.
Paul.
^ permalink raw reply
* Re: [PATCH] KVM: PPC: Book3S HV: increase KVMPPC_NR_LPIDS on POWER8 and POWER9
From: Paul Mackerras @ 2020-07-23 6:20 UTC (permalink / raw)
To: Cédric Le Goater; +Cc: kvm-ppc, linuxppc-dev, Nicholas Piggin, kvm
In-Reply-To: <20200608115714.1139735-1-clg@kaod.org>
On Mon, Jun 08, 2020 at 01:57:14PM +0200, Cédric Le Goater wrote:
> POWER8 and POWER9 have 12-bit LPIDs. Change LPID_RSVD to support up to
> (4096 - 2) guests on these processors. POWER7 is kept the same with a
> limitation of (1024 - 2), but it might be time to drop KVM support for
> POWER7.
>
> Tested with 2048 guests * 4 vCPUs on a witherspoon system with 512G
> RAM and a bit of swap.
>
> Signed-off-by: Cédric Le Goater <clg@kaod.org>
Thanks, patch applied to my kvm-ppc-next branch.
Paul.
^ permalink raw reply
* Re: [v4 4/5] KVM: PPC: Book3S HV: retry page migration before erroring-out
From: Bharata B Rao @ 2020-07-23 6:13 UTC (permalink / raw)
To: Ram Pai
Cc: ldufour, cclaudio, kvm-ppc, sathnaga, aneesh.kumar, sukadev,
linuxppc-dev, bauerman, david
In-Reply-To: <1594972827-13928-5-git-send-email-linuxram@us.ibm.com>
On Fri, Jul 17, 2020 at 01:00:26AM -0700, Ram Pai wrote:
> @@ -812,7 +842,7 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
> struct vm_area_struct *vma;
> int srcu_idx;
> unsigned long gfn = gpa >> page_shift;
> - int ret;
> + int ret, repeat_count = REPEAT_COUNT;
>
> if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
> return H_UNSUPPORTED;
> @@ -826,34 +856,44 @@ unsigned long kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
> if (flags & H_PAGE_IN_SHARED)
> return kvmppc_share_page(kvm, gpa, page_shift);
>
> - ret = H_PARAMETER;
> srcu_idx = srcu_read_lock(&kvm->srcu);
> - mmap_read_lock(kvm->mm);
>
> - start = gfn_to_hva(kvm, gfn);
> - if (kvm_is_error_hva(start))
> - goto out;
> -
> - mutex_lock(&kvm->arch.uvmem_lock);
> /* Fail the page-in request of an already paged-in page */
> - if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
> - goto out_unlock;
> + mutex_lock(&kvm->arch.uvmem_lock);
> + ret = kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL);
> + mutex_unlock(&kvm->arch.uvmem_lock);
Same comment as for the prev patch. I don't think you can release
the lock here.
> + if (ret) {
> + srcu_read_unlock(&kvm->srcu, srcu_idx);
> + return H_PARAMETER;
> + }
>
> - end = start + (1UL << page_shift);
> - vma = find_vma_intersection(kvm->mm, start, end);
> - if (!vma || vma->vm_start > start || vma->vm_end < end)
> - goto out_unlock;
> + do {
> + ret = H_PARAMETER;
> + mmap_read_lock(kvm->mm);
>
> - if (kvmppc_svm_migrate_page(vma, start, end, gpa, kvm, page_shift,
> - true))
> - goto out_unlock;
> + start = gfn_to_hva(kvm, gfn);
> + if (kvm_is_error_hva(start)) {
> + mmap_read_unlock(kvm->mm);
> + break;
> + }
>
> - ret = H_SUCCESS;
> + end = start + (1UL << page_shift);
> + vma = find_vma_intersection(kvm->mm, start, end);
> + if (!vma || vma->vm_start > start || vma->vm_end < end) {
> + mmap_read_unlock(kvm->mm);
> + break;
> + }
> +
> + mutex_lock(&kvm->arch.uvmem_lock);
> + ret = kvmppc_svm_migrate_page(vma, start, end, gpa, kvm, page_shift, true);
> + mutex_unlock(&kvm->arch.uvmem_lock);
> +
> + mmap_read_unlock(kvm->mm);
> + } while (ret == -2 && repeat_count--);
> +
> + if (ret == -2)
> + ret = H_BUSY;
>
> -out_unlock:
> - mutex_unlock(&kvm->arch.uvmem_lock);
> -out:
> - mmap_read_unlock(kvm->mm);
> srcu_read_unlock(&kvm->srcu, srcu_idx);
> return ret;
> }
> --
> 1.8.3.1
^ permalink raw reply
* Re: [v4 3/5] KVM: PPC: Book3S HV: in H_SVM_INIT_DONE, migrate remaining normal-GFNs to secure-GFNs.
From: Bharata B Rao @ 2020-07-23 6:10 UTC (permalink / raw)
To: Ram Pai
Cc: ldufour, cclaudio, kvm-ppc, sathnaga, aneesh.kumar, sukadev,
linuxppc-dev, bauerman, david
In-Reply-To: <1594972827-13928-4-git-send-email-linuxram@us.ibm.com>
On Fri, Jul 17, 2020 at 01:00:25AM -0700, Ram Pai wrote:
>
> +int kvmppc_uv_migrate_mem_slot(struct kvm *kvm,
> + const struct kvm_memory_slot *memslot)
Don't see any callers for this outside of this file, so why not static?
> +{
> + unsigned long gfn = memslot->base_gfn;
> + struct vm_area_struct *vma;
> + unsigned long start, end;
> + int ret = 0;
> +
> + while (kvmppc_next_nontransitioned_gfn(memslot, kvm, &gfn)) {
So you checked the state of gfn under uvmem_lock above, but release
it too.
> +
> + mmap_read_lock(kvm->mm);
> + start = gfn_to_hva(kvm, gfn);
> + if (kvm_is_error_hva(start)) {
> + ret = H_STATE;
> + goto next;
> + }
> +
> + end = start + (1UL << PAGE_SHIFT);
> + vma = find_vma_intersection(kvm->mm, start, end);
> + if (!vma || vma->vm_start > start || vma->vm_end < end) {
> + ret = H_STATE;
> + goto next;
> + }
> +
> + mutex_lock(&kvm->arch.uvmem_lock);
> + ret = kvmppc_svm_migrate_page(vma, start, end,
> + (gfn << PAGE_SHIFT), kvm, PAGE_SHIFT, false);
What is the guarantee that the gfn is in the same earlier state when you do
do migration here?
Regards,
Bharata.
^ permalink raw reply
* Re: [PATCH 0/4] ASoC: fsl_asrc: allow selecting arbitrary clocks
From: Nicolin Chen @ 2020-07-23 5:46 UTC (permalink / raw)
To: Arnaud Ferraris
Cc: devicetree, alsa-devel, linuxppc-dev, Timur Tabi, Xiubo Li,
linux-kernel, Takashi Iwai, Liam Girdwood, Rob Herring,
Jaroslav Kysela, Mark Brown, kernel, Fabio Estevam
In-Reply-To: <abdd7265-43d2-49b5-6afd-70d65baac30e@collabora.com>
On Fri, Jul 17, 2020 at 01:16:42PM +0200, Arnaud Ferraris wrote:
> Hi Nic,
>
> Le 02/07/2020 à 20:42, Nicolin Chen a écrit :
> > Hi Arnaud,
> >
> > On Thu, Jul 02, 2020 at 04:22:31PM +0200, Arnaud Ferraris wrote:
> >> The current ASRC driver hardcodes the input and output clocks used for
> >> sample rate conversions. In order to allow greater flexibility and to
> >> cover more use cases, it would be preferable to select the clocks using
> >> device-tree properties.
> >
> > We recent just merged a new change that auto-selecting internal
> > clocks based on sample rates as the first option -- ideal ratio
> > mode is the fallback mode now. Please refer to:
> > https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/commit/?h=next-20200702&id=d0250cf4f2abfbea64ed247230f08f5ae23979f0
>
> While working on fixing the automatic clock selection (see my v3), I
> came across another potential issue, which would be better explained
> with an example:
> - Input has sample rate 8kHz and uses clock SSI1 with rate 512kHz
> - Output has sample rate 16kHz and uses clock SSI2 with rate 1024kHz
>
> Let's say my v3 patch is merged, then the selected input clock will be
> SSI1, while the selected output clock will be SSI2. In that case, it's
> all good, as the driver will calculate the dividers right.
>
> Now, suppose a similar board has the input wired to SSI2 and output to
> SSI1, meaning we're now in the following case:
> - Input has sample rate 8kHz and uses clock SSI2 with rate 512kHz
> - Output has sample rate 16kHz and uses clock SSI1 with rate 1024kHz
> (the same result is achieved during capture with the initial example
> setup, as input and output properties are then swapped)
>
> In that case, the selected clocks will still be SSI1 for input (just
> because it appears first in the clock table), and SSI2 for output,
> meaning the calculated dividers will be:
> - input: 512 / 16 => 32 (should be 64)
> - output: 1024 / 8 => 128 (should be 64 here too)
I don't get the 32, 128 and 64 parts. Would you please to elaborate
a bit? What you said sounds to me like the driver calculates wrong
dividers?
^ permalink raw reply
* Re: [v3 12/15] powerpc/perf: Add support for outputting extended regs in perf intr_regs
From: kajoljain @ 2020-07-23 5:44 UTC (permalink / raw)
To: Athira Rajeev, mpe, acme, jolsa
Cc: ego, mikey, maddy, kvm, kvm-ppc, svaidyan, linuxppc-dev
In-Reply-To: <1dded891-e5c2-ae1a-301c-4a3806aec3a0@linux.ibm.com>
On 7/21/20 11:32 AM, kajoljain wrote:
>
>
> On 7/17/20 8:08 PM, Athira Rajeev wrote:
>> From: Anju T Sudhakar <anju@linux.vnet.ibm.com>
>>
>> Add support for perf extended register capability in powerpc.
>> The capability flag PERF_PMU_CAP_EXTENDED_REGS, is used to indicate the
>> PMU which support extended registers. The generic code define the mask
>> of extended registers as 0 for non supported architectures.
>>
>> Patch adds extended regs support for power9 platform by
>> exposing MMCR0, MMCR1 and MMCR2 registers.
>>
>> REG_RESERVED mask needs update to include extended regs.
>> `PERF_REG_EXTENDED_MASK`, contains mask value of the supported registers,
>> is defined at runtime in the kernel based on platform since the supported
>> registers may differ from one processor version to another and hence the
>> MASK value.
>>
>> with patch
>> ----------
>>
>> available registers: r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11
>> r12 r13 r14 r15 r16 r17 r18 r19 r20 r21 r22 r23 r24 r25 r26
>> r27 r28 r29 r30 r31 nip msr orig_r3 ctr link xer ccr softe
>> trap dar dsisr sier mmcra mmcr0 mmcr1 mmcr2
>>
>> PERF_RECORD_SAMPLE(IP, 0x1): 4784/4784: 0 period: 1 addr: 0
>> ... intr regs: mask 0xffffffffffff ABI 64-bit
>> .... r0 0xc00000000012b77c
>> .... r1 0xc000003fe5e03930
>> .... r2 0xc000000001b0e000
>> .... r3 0xc000003fdcddf800
>> .... r4 0xc000003fc7880000
>> .... r5 0x9c422724be
>> .... r6 0xc000003fe5e03908
>> .... r7 0xffffff63bddc8706
>> .... r8 0x9e4
>> .... r9 0x0
>> .... r10 0x1
>> .... r11 0x0
>> .... r12 0xc0000000001299c0
>> .... r13 0xc000003ffffc4800
>> .... r14 0x0
>> .... r15 0x7fffdd8b8b00
>> .... r16 0x0
>> .... r17 0x7fffdd8be6b8
>> .... r18 0x7e7076607730
>> .... r19 0x2f
>> .... r20 0xc00000001fc26c68
>> .... r21 0xc0002041e4227e00
>> .... r22 0xc00000002018fb60
>> .... r23 0x1
>> .... r24 0xc000003ffec4d900
>> .... r25 0x80000000
>> .... r26 0x0
>> .... r27 0x1
>> .... r28 0x1
>> .... r29 0xc000000001be1260
>> .... r30 0x6008010
>> .... r31 0xc000003ffebb7218
>> .... nip 0xc00000000012b910
>> .... msr 0x9000000000009033
>> .... orig_r3 0xc00000000012b86c
>> .... ctr 0xc0000000001299c0
>> .... link 0xc00000000012b77c
>> .... xer 0x0
>> .... ccr 0x28002222
>> .... softe 0x1
>> .... trap 0xf00
>> .... dar 0x0
>> .... dsisr 0x80000000000
>> .... sier 0x0
>> .... mmcra 0x80000000000
>> .... mmcr0 0x82008090
>> .... mmcr1 0x1e000000
>> .... mmcr2 0x0
>> ... thread: perf:4784
>>
>> Signed-off-by: Anju T Sudhakar <anju@linux.vnet.ibm.com>
>> [Defined PERF_REG_EXTENDED_MASK at run time to add support for different platforms ]
>> Signed-off-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
>> Reviewed-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
>> ---
>
> Patch looks good to me.
>
> Reviewed-by: Kajol Jain <kjain@linux.ibm.com>
Hi Arnaldo and Jiri,
Please let me know if you have any comments on these patches. Can you pull/ack these
patches if they seems fine to you.
Thanks,
Kajol Jain
>
> Thanks,
> Kajol Jain
>
>> arch/powerpc/include/asm/perf_event_server.h | 8 +++++++
>> arch/powerpc/include/uapi/asm/perf_regs.h | 14 +++++++++++-
>> arch/powerpc/perf/core-book3s.c | 1 +
>> arch/powerpc/perf/perf_regs.c | 34 +++++++++++++++++++++++++---
>> arch/powerpc/perf/power9-pmu.c | 6 +++++
>> 5 files changed, 59 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
>> index 832450a..bf85d1a 100644
>> --- a/arch/powerpc/include/asm/perf_event_server.h
>> +++ b/arch/powerpc/include/asm/perf_event_server.h
>> @@ -15,6 +15,9 @@
>> #define MAX_EVENT_ALTERNATIVES 8
>> #define MAX_LIMITED_HWCOUNTERS 2
>>
>> +extern u64 PERF_REG_EXTENDED_MASK;
>> +#define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK
>> +
>> struct perf_event;
>>
>> struct mmcr_regs {
>> @@ -62,6 +65,11 @@ struct power_pmu {
>> int *blacklist_ev;
>> /* BHRB entries in the PMU */
>> int bhrb_nr;
>> + /*
>> + * set this flag with `PERF_PMU_CAP_EXTENDED_REGS` if
>> + * the pmu supports extended perf regs capability
>> + */
>> + int capabilities;
>> };
>>
>> /*
>> diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h b/arch/powerpc/include/uapi/asm/perf_regs.h
>> index f599064..225c64c 100644
>> --- a/arch/powerpc/include/uapi/asm/perf_regs.h
>> +++ b/arch/powerpc/include/uapi/asm/perf_regs.h
>> @@ -48,6 +48,18 @@ enum perf_event_powerpc_regs {
>> PERF_REG_POWERPC_DSISR,
>> PERF_REG_POWERPC_SIER,
>> PERF_REG_POWERPC_MMCRA,
>> - PERF_REG_POWERPC_MAX,
>> + /* Extended registers */
>> + PERF_REG_POWERPC_MMCR0,
>> + PERF_REG_POWERPC_MMCR1,
>> + PERF_REG_POWERPC_MMCR2,
>> + /* Max regs without the extended regs */
>> + PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
>> };
>> +
>> +#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
>> +
>> +/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
>> +#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK)
>> +
>> +#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1)
>> #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
>> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
>> index 31c0535..d5a9529 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -2316,6 +2316,7 @@ int register_power_pmu(struct power_pmu *pmu)
>> pmu->name);
>>
>> power_pmu.attr_groups = ppmu->attr_groups;
>> + power_pmu.capabilities |= (ppmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS);
>>
>> #ifdef MSR_HV
>> /*
>> diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
>> index a213a0a..b0cf68f 100644
>> --- a/arch/powerpc/perf/perf_regs.c
>> +++ b/arch/powerpc/perf/perf_regs.c
>> @@ -13,9 +13,11 @@
>> #include <asm/ptrace.h>
>> #include <asm/perf_regs.h>
>>
>> +u64 PERF_REG_EXTENDED_MASK;
>> +
>> #define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
>>
>> -#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
>> +#define REG_RESERVED (~(PERF_REG_EXTENDED_MASK | PERF_REG_PMU_MASK))
>>
>> static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
>> PT_REGS_OFFSET(PERF_REG_POWERPC_R0, gpr[0]),
>> @@ -69,10 +71,26 @@
>> PT_REGS_OFFSET(PERF_REG_POWERPC_MMCRA, dsisr),
>> };
>>
>> +/* Function to return the extended register values */
>> +static u64 get_ext_regs_value(int idx)
>> +{
>> + switch (idx) {
>> + case PERF_REG_POWERPC_MMCR0:
>> + return mfspr(SPRN_MMCR0);
>> + case PERF_REG_POWERPC_MMCR1:
>> + return mfspr(SPRN_MMCR1);
>> + case PERF_REG_POWERPC_MMCR2:
>> + return mfspr(SPRN_MMCR2);
>> + default: return 0;
>> + }
>> +}
>> +
>> u64 perf_reg_value(struct pt_regs *regs, int idx)
>> {
>> - if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
>> - return 0;
>> + u64 PERF_REG_EXTENDED_MAX;
>> +
>> + if (cpu_has_feature(CPU_FTR_ARCH_300))
>> + PERF_REG_EXTENDED_MAX = PERF_REG_MAX_ISA_300;
>>
>> if (idx == PERF_REG_POWERPC_SIER &&
>> (IS_ENABLED(CONFIG_FSL_EMB_PERF_EVENT) ||
>> @@ -85,6 +103,16 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
>> IS_ENABLED(CONFIG_PPC32)))
>> return 0;
>>
>> + if (idx >= PERF_REG_POWERPC_MAX && idx < PERF_REG_EXTENDED_MAX)
>> + return get_ext_regs_value(idx);
>> +
>> + /*
>> + * If the idx is referring to value beyond the
>> + * supported registers, return 0 with a warning
>> + */
>> + if (WARN_ON_ONCE(idx >= PERF_REG_EXTENDED_MAX))
>> + return 0;
>> +
>> return regs_get_register(regs, pt_regs_offset[idx]);
>> }
>>
>> diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
>> index 05dae38..2a57e93 100644
>> --- a/arch/powerpc/perf/power9-pmu.c
>> +++ b/arch/powerpc/perf/power9-pmu.c
>> @@ -90,6 +90,8 @@ enum {
>> #define POWER9_MMCRA_IFM3 0x00000000C0000000UL
>> #define POWER9_MMCRA_BHRB_MASK 0x00000000C0000000UL
>>
>> +extern u64 PERF_REG_EXTENDED_MASK;
>> +
>> /* Nasty Power9 specific hack */
>> #define PVR_POWER9_CUMULUS 0x00002000
>>
>> @@ -434,6 +436,7 @@ static void power9_config_bhrb(u64 pmu_bhrb_filter)
>> .cache_events = &power9_cache_events,
>> .attr_groups = power9_pmu_attr_groups,
>> .bhrb_nr = 32,
>> + .capabilities = PERF_PMU_CAP_EXTENDED_REGS,
>> };
>>
>> int init_power9_pmu(void)
>> @@ -457,6 +460,9 @@ int init_power9_pmu(void)
>> }
>> }
>>
>> + /* Set the PERF_REG_EXTENDED_MASK here */
>> + PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_300;
>> +
>> rc = register_power_pmu(&power9_pmu);
>> if (rc)
>> return rc;
>>
^ permalink raw reply
* RE: [PATCH devicetree 3/4] powerpc: dts: t1040rdb: put SGMII PHY under &mdio0 label
From: Madalin Bucur (OSS) @ 2020-07-23 5:40 UTC (permalink / raw)
To: Vladimir Oltean, robh+dt@kernel.org, shawnguo@kernel.org,
mpe@ellerman.id.au, devicetree@vger.kernel.org
Cc: Madalin Bucur (OSS), linux-kernel@vger.kernel.org,
Radu-andrei Bulie, fido_max@inbox.ru, paulus@samba.org,
netdev@vger.kernel.org, linuxppc-dev@lists.ozlabs.org
In-Reply-To: <20200722172422.2590489-4-olteanv@gmail.com>
> -----Original Message-----
> From: Vladimir Oltean <olteanv@gmail.com>
> Sent: Wednesday, July 22, 2020 8:24 PM
> To: robh+dt@kernel.org; shawnguo@kernel.org; mpe@ellerman.id.au;
> devicetree@vger.kernel.org
> Cc: benh@kernel.crashing.org; paulus@samba.org; linuxppc-
> dev@lists.ozlabs.org; linux-kernel@vger.kernel.org;
> netdev@vger.kernel.org; Madalin Bucur (OSS) <madalin.bucur@oss.nxp.com>;
> Radu-andrei Bulie <radu-andrei.bulie@nxp.com>; fido_max@inbox.ru
> Subject: [PATCH devicetree 3/4] powerpc: dts: t1040rdb: put SGMII PHY
> under &mdio0 label
>
> We're going to add 8 more PHYs in a future patch. It is easier to follow
> the hardware description if we don't need to fish for the path of the
> MDIO controllers inside the SoC and just use the labels.
>
Please align to the existing structure, it may be easier to add something
without paying attention to that but it's better to keep things organized.
This structure is used across all the device trees of the platforms using
DPAA, let's not start diverging now.
> Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
> ---
> arch/powerpc/boot/dts/fsl/t1040rdb.dts | 12 ++++++------
> 1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts
> b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
> index 65ff34c49025..40d7126dbe90 100644
> --- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts
> +++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
> @@ -59,12 +59,6 @@ ethernet@e4000 {
> phy-handle = <&phy_sgmii_2>;
> phy-connection-type = "sgmii";
> };
> -
> - mdio@fc000 {
> - phy_sgmii_2: ethernet-phy@3 {
> - reg = <0x03>;
> - };
> - };
> };
> };
>
> @@ -76,3 +70,9 @@ cpld@3,0 {
> };
>
> #include "t1040si-post.dtsi"
> +
> +&mdio0 {
> + phy_sgmii_2: ethernet-phy@3 {
> + reg = <0x3>;
> + };
> +};
> --
> 2.25.1
^ permalink raw reply
* Re: [PATCH v5 1/4] riscv: Move kernel mapping to vmalloc zone
From: Alex Ghiti @ 2020-07-23 5:36 UTC (permalink / raw)
To: Palmer Dabbelt, benh
Cc: aou, linux-mm, Anup Patel, linux-kernel, Atish Patra, paulus,
zong.li, Paul Walmsley, linux-riscv, linuxppc-dev
In-Reply-To: <mhng-4b49d09a-0267-4879-849f-30c24f26e2c3@palmerdabbelt-glaptop1>
Le 7/21/20 à 7:36 PM, Palmer Dabbelt a écrit :
> On Tue, 21 Jul 2020 16:11:02 PDT (-0700), benh@kernel.crashing.org wrote:
>> On Tue, 2020-07-21 at 14:36 -0400, Alex Ghiti wrote:
>>> > > I guess I don't understand why this is necessary at all.
>>> > > Specifically: why
>>> > > can't we just relocate the kernel within the linear map? That would
>>> > > let the
>>> > > bootloader put the kernel wherever it wants, modulo the physical
>>> > > memory size we
>>> > > support. We'd need to handle the regions that are coupled to the
>>> > > kernel's
>>> > > execution address, but we could just put them in an explicit memory
>>> > > region
>>> > > which is what we should probably be doing anyway.
>>> >
>>> > Virtual relocation in the linear mapping requires to move the kernel
>>> > physically too. Zong implemented this physical move in its KASLR RFC
>>> > patchset, which is cumbersome since finding an available physical spot
>>> > is harder than just selecting a virtual range in the vmalloc range.
>>> >
>>> > In addition, having the kernel mapping in the linear mapping prevents
>>> > the use of hugepage for the linear mapping resulting in performance
>>> loss
>>> > (at least for the GB that encompasses the kernel).
>>> >
>>> > Why do you find this "ugly" ? The vmalloc region is just a bunch of
>>> > available virtual addresses to whatever purpose we want, and as
>>> noted by
>>> > Zong, arm64 uses the same scheme.
>>
>> I don't get it :-)
>>
>> At least on powerpc we move the kernel in the linear mapping and it
>> works fine with huge pages, what is your problem there ? You rely on
>> punching small-page size holes in there ?
>
> That was my original suggestion, and I'm not actually sure it's
> invalid. It
> would mean that both the kernel's physical and virtual addresses are set
> by the
> bootloader, which may or may not be workable if we want to have an
> sv48+sv39
> kernel. My initial approach to sv48+sv39 kernels would be to just throw
> away
> the sv39 memory on sv48 kernels, which would preserve the linear map but
> mean
> that there is no single physical address that's accessible for both. That
> would require some coordination between the bootloader and the kernel as to
> where it should be loaded, but maybe there's a better way to design the
> linear
> map. Right now we have a bunch of unwritten rules about where things
> need to
> be loaded, which is a recipe for disaster.
>
> We could copy the kernel around, but I'm not sure I really like that
> idea. We
> do zero the BSS right now, so it's not like we entirely rely on the
> bootloader
> to set up the kernel image, but with the hart race boot scheme we have
> right
> now we'd at least need to leave a stub sitting around. Maybe we just throw
> away SBI v0.1, though, that's why we called it all legacy in the first
> place.
>
> My bigger worry is that anything that involves running the kernel at
> arbitrary
> virtual addresses means we need a PIC kernel, which means every global
> symbol
> needs an indirection. That's probably not so bad for shared libraries,
> but the
> kernel has a lot of global symbols. PLT references probably aren't so
> scary,
> as we have an incoherent instruction cache so the virtual function
> predictor
> isn't that hard to build, but making all global data accesses GOT-relative
> seems like a disaster for performance. This fixed-VA thing really just
> exists
> so we don't have to be full-on PIC.
>
> In theory I think we could just get away with pretending that medany is
> PIC,
> which I believe works as long as the data and text offset stays
> constant, you
> you don't have any symbols between 2GiB and -2GiB (as those may stay fixed,
> even in medany), and you deal with GP accordingly (which should work
> itself out
> in the current startup code). We rely on this for some of the early
> boot code
> (and will soon for kexec), but that's a very controlled code base and we've
> already had some issues. I'd be much more comfortable adding an explicit
> semi-PIC code model, as I tend to miss something when doing these sorts of
> things and then we could at least add it to the GCC test runs and
> guarantee it
> actually works. Not really sure I want to deal with that, though. It
> would,
> however, be the only way to get random virtual addresses during kernel
> execution.
>
>> At least in the old days, there were a number of assumptions that
>> the kernel text/data/bss resides in the linear mapping.
>
> Ya, it terrified me as well. Alex says arm64 puts the kernel in the
> vmalloc
> region, so assuming that's the case it must be possible. I didn't get that
> from reading the arm64 port (I guess it's no secret that pretty much all
> I do
> is copy their code)
See https://elixir.bootlin.com/linux/latest/source/arch/arm64/mm/mmu.c#L615.
>
>> If you change that you need to ensure that it's still physically
>> contiguous and you'll have to tweak __va and __pa, which might induce
>> extra overhead.
>
> I'm operating under the assumption that we don't want to add an
> additional load
> to virt2phys conversions. arm64 bends over backwards to avoid the load,
> and
> I'm assuming they have a reason for doing so. Of course, if we're PIC then
> maybe performance just doesn't matter, but I'm not sure I want to just
> give up.
> Distros will probably build the sv48+sv39 kernels as soon as they show
> up, even
> if there's no sv48 hardware for a while.
^ permalink raw reply
* Re: [PATCH 1/2] ASoC: fsl-asoc-card: Support configuring dai fmt from DT
From: Nicolin Chen @ 2020-07-23 5:35 UTC (permalink / raw)
To: Shengjiu Wang
Cc: devicetree, alsa-devel, timur, Xiubo.Lee, linuxppc-dev, tiwai,
lgirdwood, robh+dt, perex, broonie, festevam, linux-kernel
In-Reply-To: <1595302910-19688-1-git-send-email-shengjiu.wang@nxp.com>
On Tue, Jul 21, 2020 at 11:41:49AM +0800, Shengjiu Wang wrote:
> Support same propeties as simple card for configuring fmt
> from DT.
> In order to make this change compatible with old DT, these
> properties are optional.
>
> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
For both changes:
Acked-by: Nicolin Chen <nicoleotsuka@gmail.com>
^ permalink raw reply
* Re: [PATCH v5 1/4] riscv: Move kernel mapping to vmalloc zone
From: Alex Ghiti @ 2020-07-23 5:32 UTC (permalink / raw)
To: Palmer Dabbelt
Cc: aou, linux-mm, Anup Patel, linux-kernel, Atish Patra, paulus,
zong.li, Paul Walmsley, linux-riscv, linuxppc-dev
In-Reply-To: <mhng-08bff01a-ca15-4bbc-8454-2ca3e823fef8@palmerdabbelt-glaptop1>
Hi Palmer,
Le 7/21/20 à 3:05 PM, Palmer Dabbelt a écrit :
> On Tue, 21 Jul 2020 11:36:10 PDT (-0700), alex@ghiti.fr wrote:
>> Let's try to make progress here: I add linux-mm in CC to get feedback on
>> this patch as it blocks sv48 support too.
>
> Sorry for being slow here. I haven't replied because I hadn't really
> fleshed
No problem :)
> out the design yet, but just so everyone's on the same page my problems
> with
> this are:
>
> * We waste vmalloc space on 32-bit systems, where there isn't a lot of it.
> * On 64-bit systems the VA space around the kernel is precious because
> it's the
> only place we can place text (modules, BPF, whatever). If we start
> putting
> the kernel in the vmalloc space then we either have to pre-allocate a
> bunch
> of space around it (essentially making it a fixed mapping anyway) or it
> becomes likely that we won't be able to find space for modules as they're
> loaded into running systems.
Let's note that we already have this issue for BPF and modules right now.
But by keeping the kernel 'in the end' of the vmalloc region, that's
quite mitigate this problem: if we exhaust the vmalloc region in 64bit
and then start allocating here, I think the whole system will have other
problem.
> * Relying on a relocatable kernel for sv48 support introduces a fairly
> large
> performance hit.
I understand the performance penalty but I struggle to it "fairly
large": can we benchmark this somehow ?
>
> Roughly, my proposal would be to:
>
> * Leave the 32-bit memory map alone. On 32-bit systems we can load modules
> anywhere and we only have one VA width, so we're not really solving any
> problems with these changes.
Ok that's possible although a lot of ifdef will get involved :)
> * Staticly allocate a 2GiB portion of the VA space for all our text, as
> its own
> region. We'd link/relocate the kernel here instead of around
> PAGE_OFFSET,
> which would decouple the kernel from the physical memory layout of the
> system.
> This would have the side effect of sorting out a bunch of bootloader
> headaches
> that we currently have.
This amounts to doing the same as this patch but instead of using the
vmalloc region, we'd use our own right ? I believe we'd then lose the
vmalloc facilities to allocate modules around this zone.
> * Sort out how to maintain a linear map as the canonical hole moves around
> between the VA widths without adding a bunch of overhead to the
> virt2phys and
> friends. This is probably going to be the trickiest part, but I think
> if we
> just change the page table code to essentially lie about VAs when an sv39
> system runs an sv48+sv39 kernel we could make it work -- there'd be some
> logical complexity involved, but it would remain fast.
I have to think about that.
>
> This doesn't solve the problem of virtually relocatable kernels, but it
> does
> let us decouple that from the sv48 stuff. It also lets us stop relying
> on a
> fixed physical address the kernel is loaded into, which is another thing I
> don't like.
>
Agreed on this one.
> I know this may be a more complicated approach, but there aren't any sv48
> systems around right now so I just don't see the rush to support them,
> particularly when there's a cost to what already exists (for those who
> haven't
> been watching, so far all the sv48 patch sets have imposed a significant
> performance penalty on all systems).
>
Alex
>>
>> Alex
>>
>> Le 7/9/20 à 7:11 AM, Alex Ghiti a écrit :
>>> Hi Palmer,
>>>
>>> Le 7/9/20 à 1:05 AM, Palmer Dabbelt a écrit :
>>>> On Sun, 07 Jun 2020 00:59:46 PDT (-0700), alex@ghiti.fr wrote:
>>>>> This is a preparatory patch for relocatable kernel.
>>>>>
>>>>> The kernel used to be linked at PAGE_OFFSET address and used to be
>>>>> loaded
>>>>> physically at the beginning of the main memory. Therefore, we could
>>>>> use
>>>>> the linear mapping for the kernel mapping.
>>>>>
>>>>> But the relocated kernel base address will be different from
>>>>> PAGE_OFFSET
>>>>> and since in the linear mapping, two different virtual addresses
>>>>> cannot
>>>>> point to the same physical address, the kernel mapping needs to lie
>>>>> outside
>>>>> the linear mapping.
>>>>
>>>> I know it's been a while, but I keep opening this up to review it and
>>>> just
>>>> can't get over how ugly it is to put the kernel's linear map in the
>>>> vmalloc
>>>> region.
>>>>
>>>> I guess I don't understand why this is necessary at all.
>>>> Specifically: why
>>>> can't we just relocate the kernel within the linear map? That would
>>>> let the
>>>> bootloader put the kernel wherever it wants, modulo the physical
>>>> memory size we
>>>> support. We'd need to handle the regions that are coupled to the
>>>> kernel's
>>>> execution address, but we could just put them in an explicit memory
>>>> region
>>>> which is what we should probably be doing anyway.
>>>
>>> Virtual relocation in the linear mapping requires to move the kernel
>>> physically too. Zong implemented this physical move in its KASLR RFC
>>> patchset, which is cumbersome since finding an available physical spot
>>> is harder than just selecting a virtual range in the vmalloc range.
>>>
>>> In addition, having the kernel mapping in the linear mapping prevents
>>> the use of hugepage for the linear mapping resulting in performance loss
>>> (at least for the GB that encompasses the kernel).
>>>
>>> Why do you find this "ugly" ? The vmalloc region is just a bunch of
>>> available virtual addresses to whatever purpose we want, and as noted by
>>> Zong, arm64 uses the same scheme.
>>>
>>>>
>>>>> In addition, because modules and BPF must be close to the kernel
>>>>> (inside
>>>>> +-2GB window), the kernel is placed at the end of the vmalloc zone
>>>>> minus
>>>>> 2GB, which leaves room for modules and BPF. The kernel could not be
>>>>> placed at the beginning of the vmalloc zone since other vmalloc
>>>>> allocations from the kernel could get all the +-2GB window around the
>>>>> kernel which would prevent new modules and BPF programs to be loaded.
>>>>
>>>> Well, that's not enough to make sure this doesn't happen -- it's just
>>>> enough to
>>>> make sure it doesn't happen very quickily. That's the same boat we're
>>>> already
>>>> in, though, so it's not like it's worse.
>>>
>>> Indeed, that's not worse, I haven't found a way to reserve vmalloc area
>>> without actually allocating it.
>>>
>>>>
>>>>> Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
>>>>> Reviewed-by: Zong Li <zong.li@sifive.com>
>>>>> ---
>>>>> arch/riscv/boot/loader.lds.S | 3 +-
>>>>> arch/riscv/include/asm/page.h | 10 +++++-
>>>>> arch/riscv/include/asm/pgtable.h | 38 ++++++++++++++-------
>>>>> arch/riscv/kernel/head.S | 3 +-
>>>>> arch/riscv/kernel/module.c | 4 +--
>>>>> arch/riscv/kernel/vmlinux.lds.S | 3 +-
>>>>> arch/riscv/mm/init.c | 58
>>>>> +++++++++++++++++++++++++-------
>>>>> arch/riscv/mm/physaddr.c | 2 +-
>>>>> 8 files changed, 88 insertions(+), 33 deletions(-)
>>>>>
>>>>> diff --git a/arch/riscv/boot/loader.lds.S
>>>>> b/arch/riscv/boot/loader.lds.S
>>>>> index 47a5003c2e28..62d94696a19c 100644
>>>>> --- a/arch/riscv/boot/loader.lds.S
>>>>> +++ b/arch/riscv/boot/loader.lds.S
>>>>> @@ -1,13 +1,14 @@
>>>>> /* SPDX-License-Identifier: GPL-2.0 */
>>>>>
>>>>> #include <asm/page.h>
>>>>> +#include <asm/pgtable.h>
>>>>>
>>>>> OUTPUT_ARCH(riscv)
>>>>> ENTRY(_start)
>>>>>
>>>>> SECTIONS
>>>>> {
>>>>> - . = PAGE_OFFSET;
>>>>> + . = KERNEL_LINK_ADDR;
>>>>>
>>>>> .payload : {
>>>>> *(.payload)
>>>>> diff --git a/arch/riscv/include/asm/page.h
>>>>> b/arch/riscv/include/asm/page.h
>>>>> index 2d50f76efe48..48bb09b6a9b7 100644
>>>>> --- a/arch/riscv/include/asm/page.h
>>>>> +++ b/arch/riscv/include/asm/page.h
>>>>> @@ -90,18 +90,26 @@ typedef struct page *pgtable_t;
>>>>>
>>>>> #ifdef CONFIG_MMU
>>>>> extern unsigned long va_pa_offset;
>>>>> +extern unsigned long va_kernel_pa_offset;
>>>>> extern unsigned long pfn_base;
>>>>> #define ARCH_PFN_OFFSET (pfn_base)
>>>>> #else
>>>>> #define va_pa_offset 0
>>>>> +#define va_kernel_pa_offset 0
>>>>> #define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
>>>>> #endif /* CONFIG_MMU */
>>>>>
>>>>> extern unsigned long max_low_pfn;
>>>>> extern unsigned long min_low_pfn;
>>>>> +extern unsigned long kernel_virt_addr;
>>>>>
>>>>> #define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) +
>>>>> va_pa_offset))
>>>>> -#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
>>>>> +#define linear_mapping_va_to_pa(x) ((unsigned long)(x) -
>>>>> va_pa_offset)
>>>>> +#define kernel_mapping_va_to_pa(x) \
>>>>> + ((unsigned long)(x) - va_kernel_pa_offset)
>>>>> +#define __va_to_pa_nodebug(x) \
>>>>> + (((x) >= PAGE_OFFSET) ? \
>>>>> + linear_mapping_va_to_pa(x) : kernel_mapping_va_to_pa(x))
>>>>>
>>>>> #ifdef CONFIG_DEBUG_VIRTUAL
>>>>> extern phys_addr_t __virt_to_phys(unsigned long x);
>>>>> diff --git a/arch/riscv/include/asm/pgtable.h
>>>>> b/arch/riscv/include/asm/pgtable.h
>>>>> index 35b60035b6b0..94ef3b49dfb6 100644
>>>>> --- a/arch/riscv/include/asm/pgtable.h
>>>>> +++ b/arch/riscv/include/asm/pgtable.h
>>>>> @@ -11,23 +11,29 @@
>>>>>
>>>>> #include <asm/pgtable-bits.h>
>>>>>
>>>>> -#ifndef __ASSEMBLY__
>>>>> -
>>>>> -/* Page Upper Directory not used in RISC-V */
>>>>> -#include <asm-generic/pgtable-nopud.h>
>>>>> -#include <asm/page.h>
>>>>> -#include <asm/tlbflush.h>
>>>>> -#include <linux/mm_types.h>
>>>>> -
>>>>> -#ifdef CONFIG_MMU
>>>>> +#ifndef CONFIG_MMU
>>>>> +#define KERNEL_VIRT_ADDR PAGE_OFFSET
>>>>> +#define KERNEL_LINK_ADDR PAGE_OFFSET
>>>>> +#else
>>>>> +/*
>>>>> + * Leave 2GB for modules and BPF that must lie within a 2GB range
>>>>> around
>>>>> + * the kernel.
>>>>> + */
>>>>> +#define KERNEL_VIRT_ADDR (VMALLOC_END - SZ_2G + 1)
>>>>> +#define KERNEL_LINK_ADDR KERNEL_VIRT_ADDR
>>>>
>>>> At a bare minimum this is going to make a mess of the 32-bit port, as
>>>> non-relocatable kernels are now going to get linked at 1GiB which is
>>>> where user
>>>> code is supposed to live. That's an easy fix, though, as the 32-bit
>>>> stuff
>>>> doesn't need any module address restrictions.
>>>
>>> Indeed, I will take a look at that.
>>>
>>>>
>>>>> #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
>>>>> #define VMALLOC_END (PAGE_OFFSET - 1)
>>>>> #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
>>>>>
>>>>> #define BPF_JIT_REGION_SIZE (SZ_128M)
>>>>> -#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
>>>>> -#define BPF_JIT_REGION_END (VMALLOC_END)
>>>>> +#define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end)
>>>>> +#define BPF_JIT_REGION_END (BPF_JIT_REGION_START +
>>>>> BPF_JIT_REGION_SIZE)
>>>>> +
>>>>> +#ifdef CONFIG_64BIT
>>>>> +#define VMALLOC_MODULE_START BPF_JIT_REGION_END
>>>>> +#define VMALLOC_MODULE_END (((unsigned long)&_start & PAGE_MASK)
>>>>> + SZ_2G)
>>>>> +#endif
>>>>>
>>>>> /*
>>>>> * Roughly size the vmemmap space to be large enough to fit enough
>>>>> @@ -57,9 +63,16 @@
>>>>> #define FIXADDR_SIZE PGDIR_SIZE
>>>>> #endif
>>>>> #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
>>>>> -
>>>>> #endif
>>>>>
>>>>> +#ifndef __ASSEMBLY__
>>>>> +
>>>>> +/* Page Upper Directory not used in RISC-V */
>>>>> +#include <asm-generic/pgtable-nopud.h>
>>>>> +#include <asm/page.h>
>>>>> +#include <asm/tlbflush.h>
>>>>> +#include <linux/mm_types.h>
>>>>> +
>>>>> #ifdef CONFIG_64BIT
>>>>> #include <asm/pgtable-64.h>
>>>>> #else
>>>>> @@ -483,6 +496,7 @@ static inline void __kernel_map_pages(struct page
>>>>> *page, int numpages, int enabl
>>>>>
>>>>> #define kern_addr_valid(addr) (1) /* FIXME */
>>>>>
>>>>> +extern char _start[];
>>>>> extern void *dtb_early_va;
>>>>> void setup_bootmem(void);
>>>>> void paging_init(void);
>>>>> diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
>>>>> index 98a406474e7d..8f5bb7731327 100644
>>>>> --- a/arch/riscv/kernel/head.S
>>>>> +++ b/arch/riscv/kernel/head.S
>>>>> @@ -49,7 +49,8 @@ ENTRY(_start)
>>>>> #ifdef CONFIG_MMU
>>>>> relocate:
>>>>> /* Relocate return address */
>>>>> - li a1, PAGE_OFFSET
>>>>> + la a1, kernel_virt_addr
>>>>> + REG_L a1, 0(a1)
>>>>> la a2, _start
>>>>> sub a1, a1, a2
>>>>> add ra, ra, a1
>>>>> diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
>>>>> index 8bbe5dbe1341..1a8fbe05accf 100644
>>>>> --- a/arch/riscv/kernel/module.c
>>>>> +++ b/arch/riscv/kernel/module.c
>>>>> @@ -392,12 +392,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const
>>>>> char *strtab,
>>>>> }
>>>>>
>>>>> #if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
>>>>> -#define VMALLOC_MODULE_START \
>>>>> - max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START)
>>>>> void *module_alloc(unsigned long size)
>>>>> {
>>>>> return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START,
>>>>> - VMALLOC_END, GFP_KERNEL,
>>>>> + VMALLOC_MODULE_END, GFP_KERNEL,
>>>>> PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
>>>>> __builtin_return_address(0));
>>>>> }
>>>>> diff --git a/arch/riscv/kernel/vmlinux.lds.S
>>>>> b/arch/riscv/kernel/vmlinux.lds.S
>>>>> index 0339b6bbe11a..a9abde62909f 100644
>>>>> --- a/arch/riscv/kernel/vmlinux.lds.S
>>>>> +++ b/arch/riscv/kernel/vmlinux.lds.S
>>>>> @@ -4,7 +4,8 @@
>>>>> * Copyright (C) 2017 SiFive
>>>>> */
>>>>>
>>>>> -#define LOAD_OFFSET PAGE_OFFSET
>>>>> +#include <asm/pgtable.h>
>>>>> +#define LOAD_OFFSET KERNEL_LINK_ADDR
>>>>> #include <asm/vmlinux.lds.h>
>>>>> #include <asm/page.h>
>>>>> #include <asm/cache.h>
>>>>> diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
>>>>> index 736de6c8739f..71da78914645 100644
>>>>> --- a/arch/riscv/mm/init.c
>>>>> +++ b/arch/riscv/mm/init.c
>>>>> @@ -22,6 +22,9 @@
>>>>>
>>>>> #include "../kernel/head.h"
>>>>>
>>>>> +unsigned long kernel_virt_addr = KERNEL_VIRT_ADDR;
>>>>> +EXPORT_SYMBOL(kernel_virt_addr);
>>>>> +
>>>>> unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
>>>>> __page_aligned_bss;
>>>>> EXPORT_SYMBOL(empty_zero_page);
>>>>> @@ -178,8 +181,12 @@ void __init setup_bootmem(void)
>>>>> }
>>>>>
>>>>> #ifdef CONFIG_MMU
>>>>> +/* Offset between linear mapping virtual address and kernel load
>>>>> address */
>>>>> unsigned long va_pa_offset;
>>>>> EXPORT_SYMBOL(va_pa_offset);
>>>>> +/* Offset between kernel mapping virtual address and kernel load
>>>>> address */
>>>>> +unsigned long va_kernel_pa_offset;
>>>>> +EXPORT_SYMBOL(va_kernel_pa_offset);
>>>>> unsigned long pfn_base;
>>>>> EXPORT_SYMBOL(pfn_base);
>>>>>
>>>>> @@ -271,7 +278,7 @@ static phys_addr_t __init alloc_pmd(uintptr_t va)
>>>>> if (mmu_enabled)
>>>>> return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
>>>>>
>>>>> - pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
>>>>> + pmd_num = (va - kernel_virt_addr) >> PGDIR_SHIFT;
>>>>> BUG_ON(pmd_num >= NUM_EARLY_PMDS);
>>>>> return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
>>>>> }
>>>>> @@ -372,14 +379,30 @@ static uintptr_t __init
>>>>> best_map_size(phys_addr_t base, phys_addr_t size)
>>>>> #error "setup_vm() is called from head.S before relocate so it
>>>>> should not use absolute addressing."
>>>>> #endif
>>>>>
>>>>> +static uintptr_t load_pa, load_sz;
>>>>> +
>>>>> +static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t
>>>>> map_size)
>>>>> +{
>>>>> + uintptr_t va, end_va;
>>>>> +
>>>>> + end_va = kernel_virt_addr + load_sz;
>>>>> + for (va = kernel_virt_addr; va < end_va; va += map_size)
>>>>> + create_pgd_mapping(pgdir, va,
>>>>> + load_pa + (va - kernel_virt_addr),
>>>>> + map_size, PAGE_KERNEL_EXEC);
>>>>> +}
>>>>> +
>>>>> asmlinkage void __init setup_vm(uintptr_t dtb_pa)
>>>>> {
>>>>> uintptr_t va, end_va;
>>>>> - uintptr_t load_pa = (uintptr_t)(&_start);
>>>>> - uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
>>>>> uintptr_t map_size = best_map_size(load_pa,
>>>>> MAX_EARLY_MAPPING_SIZE);
>>>>>
>>>>> + load_pa = (uintptr_t)(&_start);
>>>>> + load_sz = (uintptr_t)(&_end) - load_pa;
>>>>> +
>>>>> va_pa_offset = PAGE_OFFSET - load_pa;
>>>>> + va_kernel_pa_offset = kernel_virt_addr - load_pa;
>>>>> +
>>>>> pfn_base = PFN_DOWN(load_pa);
>>>>>
>>>>> /*
>>>>> @@ -402,26 +425,22 @@ asmlinkage void __init setup_vm(uintptr_t
>>>>> dtb_pa)
>>>>> create_pmd_mapping(fixmap_pmd, FIXADDR_START,
>>>>> (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
>>>>> /* Setup trampoline PGD and PMD */
>>>>> - create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
>>>>> + create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr,
>>>>> (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
>>>>> - create_pmd_mapping(trampoline_pmd, PAGE_OFFSET,
>>>>> + create_pmd_mapping(trampoline_pmd, kernel_virt_addr,
>>>>> load_pa, PMD_SIZE, PAGE_KERNEL_EXEC);
>>>>> #else
>>>>> /* Setup trampoline PGD */
>>>>> - create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
>>>>> + create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr,
>>>>> load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC);
>>>>> #endif
>>>>>
>>>>> /*
>>>>> - * Setup early PGD covering entire kernel which will allows
>>>>> + * Setup early PGD covering entire kernel which will allow
>>>>> * us to reach paging_init(). We map all memory banks later
>>>>> * in setup_vm_final() below.
>>>>> */
>>>>> - end_va = PAGE_OFFSET + load_sz;
>>>>> - for (va = PAGE_OFFSET; va < end_va; va += map_size)
>>>>> - create_pgd_mapping(early_pg_dir, va,
>>>>> - load_pa + (va - PAGE_OFFSET),
>>>>> - map_size, PAGE_KERNEL_EXEC);
>>>>> + create_kernel_page_table(early_pg_dir, map_size);
>>>>>
>>>>> /* Create fixed mapping for early FDT parsing */
>>>>> end_va = __fix_to_virt(FIX_FDT) + FIX_FDT_SIZE;
>>>>> @@ -441,6 +460,7 @@ static void __init setup_vm_final(void)
>>>>> uintptr_t va, map_size;
>>>>> phys_addr_t pa, start, end;
>>>>> struct memblock_region *reg;
>>>>> + static struct vm_struct vm_kernel = { 0 };
>>>>>
>>>>> /* Set mmu_enabled flag */
>>>>> mmu_enabled = true;
>>>>> @@ -467,10 +487,22 @@ static void __init setup_vm_final(void)
>>>>> for (pa = start; pa < end; pa += map_size) {
>>>>> va = (uintptr_t)__va(pa);
>>>>> create_pgd_mapping(swapper_pg_dir, va, pa,
>>>>> - map_size, PAGE_KERNEL_EXEC);
>>>>> + map_size, PAGE_KERNEL);
>>>>> }
>>>>> }
>>>>>
>>>>> + /* Map the kernel */
>>>>> + create_kernel_page_table(swapper_pg_dir, PMD_SIZE);
>>>>> +
>>>>> + /* Reserve the vmalloc area occupied by the kernel */
>>>>> + vm_kernel.addr = (void *)kernel_virt_addr;
>>>>> + vm_kernel.phys_addr = load_pa;
>>>>> + vm_kernel.size = (load_sz + PMD_SIZE - 1) & ~(PMD_SIZE - 1);
>>>>> + vm_kernel.flags = VM_MAP | VM_NO_GUARD;
>>>>> + vm_kernel.caller = __builtin_return_address(0);
>>>>> +
>>>>> + vm_area_add_early(&vm_kernel);
>>>>> +
>>>>> /* Clear fixmap PTE and PMD mappings */
>>>>> clear_fixmap(FIX_PTE);
>>>>> clear_fixmap(FIX_PMD);
>>>>> diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c
>>>>> index e8e4dcd39fed..35703d5ef5fd 100644
>>>>> --- a/arch/riscv/mm/physaddr.c
>>>>> +++ b/arch/riscv/mm/physaddr.c
>>>>> @@ -23,7 +23,7 @@ EXPORT_SYMBOL(__virt_to_phys);
>>>>>
>>>>> phys_addr_t __phys_addr_symbol(unsigned long x)
>>>>> {
>>>>> - unsigned long kernel_start = (unsigned long)PAGE_OFFSET;
>>>>> + unsigned long kernel_start = (unsigned long)kernel_virt_addr;
>>>>> unsigned long kernel_end = (unsigned long)_end;
>>>>>
>>>>> /*
>>>
>>> Alex
^ permalink raw reply
* Re: [PATCH] ASoC: fsl_esai: add IRQF_SHARED for devm_request_irq
From: Nicolin Chen @ 2020-07-23 5:31 UTC (permalink / raw)
To: Shengjiu Wang
Cc: alsa-devel, timur, Xiubo.Lee, linuxppc-dev, tiwai, perex, broonie,
festevam, linux-kernel
In-Reply-To: <1595476808-28927-1-git-send-email-shengjiu.wang@nxp.com>
On Thu, Jul 23, 2020 at 12:00:08PM +0800, Shengjiu Wang wrote:
> ESAI interfaces may share same interrupt line with EDMA on
> some platforms (e.g. i.MX8QXP, i.MX8QM).
> Add IRQF_SHARED flag to allow sharing the irq among several
> devices
>
> Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
> Signed-off-by: Viorel Suman <viorel.suman@nxp.com>
Acked-by: Nicolin Chen <nicoleotsuka@gmail.com>
^ permalink raw reply
* Re: [PATCH v5 1/4] riscv: Move kernel mapping to vmalloc zone
From: Alex Ghiti @ 2020-07-23 5:21 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Palmer Dabbelt
Cc: aou, linux-mm, Anup Patel, linux-kernel, Atish Patra, paulus,
zong.li, Paul Walmsley, linux-riscv, linuxppc-dev
In-Reply-To: <54af168083aee9dbda1b531227521a26b77ba2c8.camel@kernel.crashing.org>
Hi Benjamin,
Le 7/21/20 à 7:11 PM, Benjamin Herrenschmidt a écrit :
> On Tue, 2020-07-21 at 14:36 -0400, Alex Ghiti wrote:
>>>> I guess I don't understand why this is necessary at all.
>>>> Specifically: why
>>>> can't we just relocate the kernel within the linear map? That would
>>>> let the
>>>> bootloader put the kernel wherever it wants, modulo the physical
>>>> memory size we
>>>> support. We'd need to handle the regions that are coupled to the
>>>> kernel's
>>>> execution address, but we could just put them in an explicit memory
>>>> region
>>>> which is what we should probably be doing anyway.
>>>
>>> Virtual relocation in the linear mapping requires to move the kernel
>>> physically too. Zong implemented this physical move in its KASLR RFC
>>> patchset, which is cumbersome since finding an available physical spot
>>> is harder than just selecting a virtual range in the vmalloc range.
>>>
>>> In addition, having the kernel mapping in the linear mapping prevents
>>> the use of hugepage for the linear mapping resulting in performance loss
>>> (at least for the GB that encompasses the kernel).
>>>
>>> Why do you find this "ugly" ? The vmalloc region is just a bunch of
>>> available virtual addresses to whatever purpose we want, and as noted by
>>> Zong, arm64 uses the same scheme.
>
> I don't get it :-)
>
> At least on powerpc we move the kernel in the linear mapping and it
> works fine with huge pages, what is your problem there ? You rely on
> punching small-page size holes in there ?
>
ARCH_HAS_STRICT_KERNEL_RWX prevents the use of a hugepage for the kernel
mapping in the direct mapping as it sets different permissions to
different part of the kernel (data, text..etc).
> At least in the old days, there were a number of assumptions that
> the kernel text/data/bss resides in the linear mapping.
>
> If you change that you need to ensure that it's still physically
> contiguous and you'll have to tweak __va and __pa, which might induce
> extra overhead.
>
Yes that's done in this patch and indeed there is an overhead to those
functions.
> Cheers,
> Ben.
>
>
Thanks,
Alex
^ permalink raw reply
* Re: [v4 2/5] KVM: PPC: Book3S HV: track the state GFNs associated with secure VMs
From: Bharata B Rao @ 2020-07-23 4:48 UTC (permalink / raw)
To: Ram Pai
Cc: ldufour, cclaudio, kvm-ppc, sathnaga, aneesh.kumar, sukadev,
linuxppc-dev, bauerman, david
In-Reply-To: <1594972827-13928-3-git-send-email-linuxram@us.ibm.com>
On Fri, Jul 17, 2020 at 01:00:24AM -0700, Ram Pai wrote:
> During the life of SVM, its GFNs transition through normal, secure and
> shared states. Since the kernel does not track GFNs that are shared, it
> is not possible to disambiguate a shared GFN from a GFN whose PFN has
> not yet been migrated to a secure-PFN. Also it is not possible to
> disambiguate a secure-GFN from a GFN whose GFN has been pagedout from
> the ultravisor.
>
> The ability to identify the state of a GFN is needed to skip migration
> of its PFN to secure-PFN during ESM transition.
>
> The code is re-organized to track the states of a GFN as explained
> below.
>
> ************************************************************************
> 1. States of a GFN
> ---------------
> The GFN can be in one of the following states.
>
> (a) Secure - The GFN is secure. The GFN is associated with
> a Secure VM, the contents of the GFN is not accessible
> to the Hypervisor. This GFN can be backed by a secure-PFN,
> or can be backed by a normal-PFN with contents encrypted.
> The former is true when the GFN is paged-in into the
> ultravisor. The latter is true when the GFN is paged-out
> of the ultravisor.
>
> (b) Shared - The GFN is shared. The GFN is associated with a
> a secure VM. The contents of the GFN is accessible to
> Hypervisor. This GFN is backed by a normal-PFN and its
> content is un-encrypted.
>
> (c) Normal - The GFN is a normal. The GFN is associated with
> a normal VM. The contents of the GFN is accesible to
> the Hypervisor. Its content is never encrypted.
>
> 2. States of a VM.
> ---------------
>
> (a) Normal VM: A VM whose contents are always accessible to
> the hypervisor. All its GFNs are normal-GFNs.
>
> (b) Secure VM: A VM whose contents are not accessible to the
> hypervisor without the VM's consent. Its GFNs are
> either Shared-GFN or Secure-GFNs.
>
> (c) Transient VM: A Normal VM that is transitioning to secure VM.
> The transition starts on successful return of
> H_SVM_INIT_START, and ends on successful return
> of H_SVM_INIT_DONE. This transient VM, can have GFNs
> in any of the three states; i.e Secure-GFN, Shared-GFN,
> and Normal-GFN. The VM never executes in this state
> in supervisor-mode.
>
> 3. Memory slot State.
> ------------------
> The state of a memory slot mirrors the state of the
> VM the memory slot is associated with.
>
> 4. VM State transition.
> --------------------
>
> A VM always starts in Normal Mode.
>
> H_SVM_INIT_START moves the VM into transient state. During this
> time the Ultravisor may request some of its GFNs to be shared or
> secured. So its GFNs can be in one of the three GFN states.
>
> H_SVM_INIT_DONE moves the VM entirely from transient state to
> secure-state. At this point any left-over normal-GFNs are
> transitioned to Secure-GFN.
>
> H_SVM_INIT_ABORT moves the transient VM back to normal VM.
> All its GFNs are moved to Normal-GFNs.
>
> UV_TERMINATE transitions the secure-VM back to normal-VM. All
> the secure-GFN and shared-GFNs are tranistioned to normal-GFN
> Note: The contents of the normal-GFN is undefined at this point.
>
> 5. GFN state implementation:
> -------------------------
>
> Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
> when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
> set, and contains the value of the secure-PFN.
> It is associated with a normal-PFN; also called mem_pfn, when
> the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
> The value of the normal-PFN is not tracked.
>
> Shared GFN is associated with a normal-PFN. Its pfn[] has
> KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
> is not tracked.
>
> Normal GFN is associated with normal-PFN. Its pfn[] has
> no flag set. The value of the normal-PFN is not tracked.
>
> 6. Life cycle of a GFN
> --------------------
> --------------------------------------------------------------
> | | Share | Unshare | SVM |H_SVM_INIT_DONE|
> | |operation |operation | abort/ | |
> | | | | terminate | |
> -------------------------------------------------------------
> | | | | | |
> | Secure | Shared | Secure |Normal |Secure |
> | | | | | |
> | Shared | Shared | Secure |Normal |Shared |
> | | | | | |
> | Normal | Shared | Secure |Normal |Secure |
> --------------------------------------------------------------
>
> 7. Life cycle of a VM
> --------------------
> --------------------------------------------------------------------
> | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ |
> | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE |
> | | | | | | |
> --------- ----------------------------------------------------------
> | | | | | | |
> | Normal | Normal | Transient|Error |Error |Normal |
> | | | | | | |
> | Secure | Error | Error |Error |Error |Normal |
> | | | | | | |
> |Transient| N/A | Error |Secure |Normal |Normal |
> --------------------------------------------------------------------
>
> ************************************************************************
>
> Cc: Paul Mackerras <paulus@ozlabs.org>
> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> Cc: Michael Ellerman <mpe@ellerman.id.au>
> Cc: Bharata B Rao <bharata@linux.ibm.com>
> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
> Cc: Laurent Dufour <ldufour@linux.ibm.com>
> Cc: Thiago Jung Bauermann <bauerman@linux.ibm.com>
> Cc: David Gibson <david@gibson.dropbear.id.au>
> Cc: Claudio Carvalho <cclaudio@linux.ibm.com>
> Cc: kvm-ppc@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Reviewed-by: Thiago Jung Bauermann <bauerman@linux.ibm.com>
> Signed-off-by: Ram Pai <linuxram@us.ibm.com>
> ---
> arch/powerpc/kvm/book3s_hv_uvmem.c | 187 +++++++++++++++++++++++++++++++++----
> 1 file changed, 168 insertions(+), 19 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
> index 0baa293..df2e272 100644
> --- a/arch/powerpc/kvm/book3s_hv_uvmem.c
> +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
> @@ -98,7 +98,127 @@
> static unsigned long *kvmppc_uvmem_bitmap;
> static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
>
> -#define KVMPPC_UVMEM_PFN (1UL << 63)
> +/*
> + * States of a GFN
> + * ---------------
> + * The GFN can be in one of the following states.
> + *
> + * (a) Secure - The GFN is secure. The GFN is associated with
> + * a Secure VM, the contents of the GFN is not accessible
> + * to the Hypervisor. This GFN can be backed by a secure-PFN,
> + * or can be backed by a normal-PFN with contents encrypted.
> + * The former is true when the GFN is paged-in into the
> + * ultravisor. The latter is true when the GFN is paged-out
> + * of the ultravisor.
> + *
> + * (b) Shared - The GFN is shared. The GFN is associated with a
> + * a secure VM. The contents of the GFN is accessible to
> + * Hypervisor. This GFN is backed by a normal-PFN and its
> + * content is un-encrypted.
> + *
> + * (c) Normal - The GFN is a normal. The GFN is associated with
> + * a normal VM. The contents of the GFN is accesible to
> + * the Hypervisor. Its content is never encrypted.
> + *
> + * States of a VM.
> + * ---------------
> + *
> + * Normal VM: A VM whose contents are always accessible to
> + * the hypervisor. All its GFNs are normal-GFNs.
> + *
> + * Secure VM: A VM whose contents are not accessible to the
> + * hypervisor without the VM's consent. Its GFNs are
> + * either Shared-GFN or Secure-GFNs.
> + *
> + * Transient VM: A Normal VM that is transitioning to secure VM.
> + * The transition starts on successful return of
> + * H_SVM_INIT_START, and ends on successful return
> + * of H_SVM_INIT_DONE. This transient VM, can have GFNs
> + * in any of the three states; i.e Secure-GFN, Shared-GFN,
> + * and Normal-GFN. The VM never executes in this state
> + * in supervisor-mode.
> + *
> + * Memory slot State.
> + * -----------------------------
> + * The state of a memory slot mirrors the state of the
> + * VM the memory slot is associated with.
> + *
> + * VM State transition.
> + * --------------------
> + *
> + * A VM always starts in Normal Mode.
> + *
> + * H_SVM_INIT_START moves the VM into transient state. During this
> + * time the Ultravisor may request some of its GFNs to be shared or
> + * secured. So its GFNs can be in one of the three GFN states.
> + *
> + * H_SVM_INIT_DONE moves the VM entirely from transient state to
> + * secure-state. At this point any left-over normal-GFNs are
> + * transitioned to Secure-GFN.
> + *
> + * H_SVM_INIT_ABORT moves the transient VM back to normal VM.
> + * All its GFNs are moved to Normal-GFNs.
> + *
> + * UV_TERMINATE transitions the secure-VM back to normal-VM. All
> + * the secure-GFN and shared-GFNs are tranistioned to normal-GFN
> + * Note: The contents of the normal-GFN is undefined at this point.
> + *
> + * GFN state implementation:
> + * -------------------------
> + *
> + * Secure GFN is associated with a secure-PFN; also called uvmem_pfn,
> + * when the GFN is paged-in. Its pfn[] has KVMPPC_GFN_UVMEM_PFN flag
> + * set, and contains the value of the secure-PFN.
> + * It is associated with a normal-PFN; also called mem_pfn, when
> + * the GFN is pagedout. Its pfn[] has KVMPPC_GFN_MEM_PFN flag set.
> + * The value of the normal-PFN is not tracked.
> + *
> + * Shared GFN is associated with a normal-PFN. Its pfn[] has
> + * KVMPPC_UVMEM_SHARED_PFN flag set. The value of the normal-PFN
> + * is not tracked.
> + *
> + * Normal GFN is associated with normal-PFN. Its pfn[] has
> + * no flag set. The value of the normal-PFN is not tracked.
> + *
> + * Life cycle of a GFN
> + * --------------------
> + *
> + * --------------------------------------------------------------
> + * | | Share | Unshare | SVM |H_SVM_INIT_DONE|
> + * | |operation |operation | abort/ | |
> + * | | | | terminate | |
> + * -------------------------------------------------------------
> + * | | | | | |
> + * | Secure | Shared | Secure |Normal |Secure |
> + * | | | | | |
> + * | Shared | Shared | Secure |Normal |Shared |
> + * | | | | | |
> + * | Normal | Shared | Secure |Normal |Secure |
> + * --------------------------------------------------------------
> + *
> + * Life cycle of a VM
> + * --------------------
> + *
> + * --------------------------------------------------------------------
> + * | | start | H_SVM_ |H_SVM_ |H_SVM_ |UV_SVM_ |
> + * | | VM |INIT_START|INIT_DONE|INIT_ABORT |TERMINATE |
> + * | | | | | | |
> + * --------- ----------------------------------------------------------
> + * | | | | | | |
> + * | Normal | Normal | Transient|Error |Error |Normal |
> + * | | | | | | |
> + * | Secure | Error | Error |Error |Error |Normal |
> + * | | | | | | |
> + * |Transient| N/A | Error |Secure |Normal |Normal |
> + * --------------------------------------------------------------------
> + */
> +
> +#define KVMPPC_GFN_UVMEM_PFN (1UL << 63)
> +#define KVMPPC_GFN_MEM_PFN (1UL << 62)
> +#define KVMPPC_GFN_SHARED (1UL << 61)
> +#define KVMPPC_GFN_SECURE (KVMPPC_GFN_UVMEM_PFN | KVMPPC_GFN_MEM_PFN)
> +#define KVMPPC_GFN_FLAG_MASK (KVMPPC_GFN_SECURE | KVMPPC_GFN_SHARED)
> +#define KVMPPC_GFN_PFN_MASK (~KVMPPC_GFN_FLAG_MASK)
>
> struct kvmppc_uvmem_slot {
> struct list_head list;
> @@ -106,11 +226,11 @@ struct kvmppc_uvmem_slot {
> unsigned long base_pfn;
> unsigned long *pfns;
> };
> -
> struct kvmppc_uvmem_page_pvt {
> struct kvm *kvm;
> unsigned long gpa;
> bool skip_page_out;
> + bool remove_gfn;
> };
>
> bool kvmppc_uvmem_available(void)
> @@ -163,8 +283,8 @@ void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
> mutex_unlock(&kvm->arch.uvmem_lock);
> }
>
> -static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
> - struct kvm *kvm)
> +static void kvmppc_mark_gfn(unsigned long gfn, struct kvm *kvm,
> + unsigned long flag, unsigned long uvmem_pfn)
> {
> struct kvmppc_uvmem_slot *p;
>
> @@ -172,24 +292,41 @@ static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
> if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
> unsigned long index = gfn - p->base_pfn;
>
> - p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN;
> + if (flag == KVMPPC_GFN_UVMEM_PFN)
> + p->pfns[index] = uvmem_pfn | flag;
> + else
> + p->pfns[index] = flag;
> return;
> }
> }
> }
>
> -static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm)
> +/* mark the GFN as secure-GFN associated with @uvmem pfn device-PFN. */
> +static void kvmppc_gfn_secure_uvmem_pfn(unsigned long gfn,
> + unsigned long uvmem_pfn, struct kvm *kvm)
> {
> - struct kvmppc_uvmem_slot *p;
> + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_UVMEM_PFN, uvmem_pfn);
> +}
>
> - list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
> - if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
> - p->pfns[gfn - p->base_pfn] = 0;
> - return;
> - }
> - }
> +/* mark the GFN as secure-GFN associated with a memory-PFN. */
> +static void kvmppc_gfn_secure_mem_pfn(unsigned long gfn, struct kvm *kvm)
> +{
> + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_MEM_PFN, 0);
> +}
> +
> +/* mark the GFN as a shared GFN. */
> +static void kvmppc_gfn_shared(unsigned long gfn, struct kvm *kvm)
> +{
> + kvmppc_mark_gfn(gfn, kvm, KVMPPC_GFN_SHARED, 0);
> +}
> +
> +/* mark the GFN as a non-existent GFN. */
> +static void kvmppc_gfn_remove(unsigned long gfn, struct kvm *kvm)
> +{
> + kvmppc_mark_gfn(gfn, kvm, 0, 0);
> }
>
> +/* return true, if the GFN is a secure-GFN backed by a secure-PFN */
> static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
> unsigned long *uvmem_pfn)
> {
> @@ -199,10 +336,10 @@ static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
> if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
> unsigned long index = gfn - p->base_pfn;
>
> - if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
> + if (p->pfns[index] & KVMPPC_GFN_UVMEM_PFN) {
> if (uvmem_pfn)
> *uvmem_pfn = p->pfns[index] &
> - ~KVMPPC_UVMEM_PFN;
> + KVMPPC_GFN_PFN_MASK;
> return true;
> } else
> return false;
> @@ -353,6 +490,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
>
> mutex_lock(&kvm->arch.uvmem_lock);
> if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
> + kvmppc_gfn_remove(gfn, kvm);
> mutex_unlock(&kvm->arch.uvmem_lock);
> continue;
> }
> @@ -360,6 +498,7 @@ void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
> uvmem_page = pfn_to_page(uvmem_pfn);
> pvt = uvmem_page->zone_device_data;
> pvt->skip_page_out = skip_page_out;
> + pvt->remove_gfn = true;
> mutex_unlock(&kvm->arch.uvmem_lock);
>
> pfn = gfn_to_pfn(kvm, gfn);
> @@ -429,7 +568,7 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
> goto out_clear;
>
> uvmem_pfn = bit + pfn_first;
> - kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
> + kvmppc_gfn_secure_uvmem_pfn(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
>
> pvt->gpa = gpa;
> pvt->kvm = kvm;
> @@ -524,6 +663,7 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
> uvmem_page = pfn_to_page(uvmem_pfn);
> pvt = uvmem_page->zone_device_data;
> pvt->skip_page_out = true;
> + pvt->remove_gfn = false;
> }
>
> retry:
> @@ -537,12 +677,16 @@ static unsigned long kvmppc_share_page(struct kvm *kvm, unsigned long gpa,
> uvmem_page = pfn_to_page(uvmem_pfn);
> pvt = uvmem_page->zone_device_data;
> pvt->skip_page_out = true;
> + pvt->remove_gfn = false;
This is the case of making an already secure page as shared page.
A comment here as to why remove_gfn is set to false here will help.
Also isn't it by default false? Is there a situation where it starts
out by default false, becomes true later and you are required to
explicitly mark it false here?
Otherwise, Reviewed-by: Bharata B Rao <bharata@linux.ibm.com>
Regards,
Bharata.
^ permalink raw reply
* [PATCH] ASoC: fsl_esai: add IRQF_SHARED for devm_request_irq
From: Shengjiu Wang @ 2020-07-23 4:00 UTC (permalink / raw)
To: timur, nicoleotsuka, Xiubo.Lee, festevam, broonie, perex, tiwai,
alsa-devel
Cc: linuxppc-dev, linux-kernel
ESAI interfaces may share same interrupt line with EDMA on
some platforms (e.g. i.MX8QXP, i.MX8QM).
Add IRQF_SHARED flag to allow sharing the irq among several
devices
Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
Signed-off-by: Viorel Suman <viorel.suman@nxp.com>
---
sound/soc/fsl/fsl_esai.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sound/soc/fsl/fsl_esai.c b/sound/soc/fsl/fsl_esai.c
index b8fbd7ba94af..4ae36099ae82 100644
--- a/sound/soc/fsl/fsl_esai.c
+++ b/sound/soc/fsl/fsl_esai.c
@@ -1012,7 +1012,7 @@ static int fsl_esai_probe(struct platform_device *pdev)
if (irq < 0)
return irq;
- ret = devm_request_irq(&pdev->dev, irq, esai_isr, 0,
+ ret = devm_request_irq(&pdev->dev, irq, esai_isr, IRQF_SHARED,
esai_priv->name, esai_priv);
if (ret) {
dev_err(&pdev->dev, "failed to claim irq %u\n", irq);
--
2.27.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox