* [PATCH v4 06/19] arm64: insn: Add N immediate encoding
From: Marc Zyngier @ 2018-01-04 18:43 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20180104184334.16571-1-marc.zyngier@arm.com>
We're missing the a way to generate the encoding of the N immediate,
which is only a single bit used in a number of instruction that take
an immediate.
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
arch/arm64/include/asm/insn.h | 1 +
arch/arm64/kernel/insn.c | 4 ++++
2 files changed, 5 insertions(+)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 4214c38d016b..21fffdd290a3 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -70,6 +70,7 @@ enum aarch64_insn_imm_type {
AARCH64_INSN_IMM_6,
AARCH64_INSN_IMM_S,
AARCH64_INSN_IMM_R,
+ AARCH64_INSN_IMM_N,
AARCH64_INSN_IMM_MAX
};
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 2718a77da165..7e432662d454 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -343,6 +343,10 @@ static int __kprobes aarch64_get_imm_shift_mask(enum aarch64_insn_imm_type type,
mask = BIT(6) - 1;
shift = 16;
break;
+ case AARCH64_INSN_IMM_N:
+ mask = 1;
+ shift = 22;
+ break;
default:
return -EINVAL;
}
--
2.14.2
^ permalink raw reply related
* [PATCH v4 05/19] arm64: alternatives: Add dynamic patching feature
From: Marc Zyngier @ 2018-01-04 18:43 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20180104184334.16571-1-marc.zyngier@arm.com>
We've so far relied on a patching infrastructure that only gave us
a single alternative, without any way to finely control what gets
patched. For a single feature, this is an all or nothing thing.
It would be interesting to have a more fine grained way of patching
the kernel though, where we could dynamically tune the code that gets
injected.
In order to achive this, let's introduce a new form of alternative
that is associated with a callback. This callback gets the instruction
sequence number and the old instruction as a parameter, and returns
the new instruction. This callback is always called, as the patching
decision is now done at runtime (not patching is equivalent to returning
the same instruction).
Patching with a callback is declared with the new ALTERNATIVE_CB
and alternative_cb directives:
asm volatile(ALTERNATIVE_CB("mov %0, #0\n", callback)
: "r" (v));
or
alternative_cb callback
mov x0, #0
alternative_cb_end
where callback is the C function computing the alternative.
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
arch/arm64/include/asm/alternative.h | 36 ++++++++++++++++++++++---
arch/arm64/include/asm/alternative_types.h | 4 +++
arch/arm64/kernel/alternative.c | 43 ++++++++++++++++++++++--------
3 files changed, 68 insertions(+), 15 deletions(-)
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 395befde7595..04f66f6173fc 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -18,10 +18,14 @@
void __init apply_alternatives_all(void);
void apply_alternatives(void *start, size_t length);
-#define ALTINSTR_ENTRY(feature) \
+#define ALTINSTR_ENTRY(feature,cb) \
" .align " __stringify(ALTINSTR_ALIGN) "\n" \
" .word 661b - .\n" /* label */ \
+ " .if " __stringify(cb) " == 0\n" \
" .word 663f - .\n" /* new instruction */ \
+ " .else\n" \
+ " .word " __stringify(cb) "- .\n" /* callback */ \
+ " .endif\n" \
" .hword " __stringify(feature) "\n" /* feature bit */ \
" .byte 662b-661b\n" /* source len */ \
" .byte 664f-663f\n" /* replacement len */
@@ -39,15 +43,18 @@ void apply_alternatives(void *start, size_t length);
* but most assemblers die if insn1 or insn2 have a .inst. This should
* be fixed in a binutils release posterior to 2.25.51.0.2 (anything
* containing commit 4e4d08cf7399b606 or c1baaddf8861).
+ *
+ * Alternatives with callbacks do not generate replacement instructions.
*/
-#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
+#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled, cb) \
".if "__stringify(cfg_enabled)" == 1\n" \
"661:\n\t" \
oldinstr "\n" \
"662:\n" \
".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(feature) \
+ ALTINSTR_ENTRY(feature,cb) \
".popsection\n" \
+ " .if " __stringify(cb) " == 0\n" \
".pushsection .altinstr_replacement, \"a\"\n" \
"663:\n\t" \
newinstr "\n" \
@@ -55,11 +62,17 @@ void apply_alternatives(void *start, size_t length);
".popsection\n\t" \
".org . - (664b-663b) + (662b-661b)\n\t" \
".org . - (662b-661b) + (664b-663b)\n" \
+ ".else\n\t" \
+ "663:\n\t" \
+ "664:\n\t" \
+ ".endif\n" \
".endif\n"
#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
- __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
+ __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg), 0)
+#define ALTERNATIVE_CB(oldinstr, cb) \
+ __ALTERNATIVE_CFG(oldinstr, "NOT_AN_INSTRUCTION", ARM64_NCAPS, 1, cb)
#else
#include <asm/assembler.h>
@@ -127,6 +140,14 @@ void apply_alternatives(void *start, size_t length);
661:
.endm
+.macro alternative_cb cb
+ .set .Lasm_alt_mode, 0
+ .pushsection .altinstructions, "a"
+ altinstruction_entry 661f, \cb, ARM64_NCAPS, 662f-661f, 0
+ .popsection
+661:
+.endm
+
/*
* Provide the other half of the alternative code sequence.
*/
@@ -152,6 +173,13 @@ void apply_alternatives(void *start, size_t length);
.org . - (662b-661b) + (664b-663b)
.endm
+/*
+ * Callback-based alternative epilogue
+ */
+.macro alternative_cb_end
+662:
+.endm
+
/*
* Provides a trivial alternative or default sequence consisting solely
* of NOPs. The number of NOPs is chosen automatically to match the
diff --git a/arch/arm64/include/asm/alternative_types.h b/arch/arm64/include/asm/alternative_types.h
index 26cf76167f2d..e400b9061957 100644
--- a/arch/arm64/include/asm/alternative_types.h
+++ b/arch/arm64/include/asm/alternative_types.h
@@ -2,6 +2,10 @@
#ifndef __ASM_ALTERNATIVE_TYPES_H
#define __ASM_ALTERNATIVE_TYPES_H
+struct alt_instr;
+typedef void (*alternative_cb_t)(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst);
+
struct alt_instr {
s32 orig_offset; /* offset to original instruction */
s32 alt_offset; /* offset to replacement instruction */
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c
index 6dd0a3a3e5c9..0f52627fbb29 100644
--- a/arch/arm64/kernel/alternative.c
+++ b/arch/arm64/kernel/alternative.c
@@ -105,32 +105,53 @@ static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnp
return insn;
}
+static void patch_alternative(struct alt_instr *alt,
+ __le32 *origptr, __le32 *updptr, int nr_inst)
+{
+ __le32 *replptr;
+ int i;
+
+ replptr = ALT_REPL_PTR(alt);
+ for (i = 0; i < nr_inst; i++) {
+ u32 insn;
+
+ insn = get_alt_insn(alt, origptr + i, replptr + i);
+ updptr[i] = cpu_to_le32(insn);
+ }
+}
+
static void __apply_alternatives(void *alt_region, bool use_linear_alias)
{
struct alt_instr *alt;
struct alt_region *region = alt_region;
- __le32 *origptr, *replptr, *updptr;
+ __le32 *origptr, *updptr;
+ alternative_cb_t alt_cb;
for (alt = region->begin; alt < region->end; alt++) {
- u32 insn;
- int i, nr_inst;
+ int nr_inst;
- if (!cpus_have_cap(alt->cpufeature))
+ /* Use ARM64_NCAPS as an unconditional patch */
+ if (alt->cpufeature < ARM64_NCAPS &&
+ !cpus_have_cap(alt->cpufeature))
continue;
- BUG_ON(alt->alt_len != alt->orig_len);
+ if (alt->cpufeature == ARM64_NCAPS)
+ BUG_ON(alt->alt_len != 0);
+ else
+ BUG_ON(alt->alt_len != alt->orig_len);
pr_info_once("patching kernel code\n");
origptr = ALT_ORIG_PTR(alt);
- replptr = ALT_REPL_PTR(alt);
updptr = use_linear_alias ? lm_alias(origptr) : origptr;
- nr_inst = alt->alt_len / sizeof(insn);
+ nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
- for (i = 0; i < nr_inst; i++) {
- insn = get_alt_insn(alt, origptr + i, replptr + i);
- updptr[i] = cpu_to_le32(insn);
- }
+ if (alt->cpufeature < ARM64_NCAPS)
+ alt_cb = patch_alternative;
+ else
+ alt_cb = ALT_REPL_PTR(alt);
+
+ alt_cb(alt, origptr, updptr, nr_inst);
flush_icache_range((uintptr_t)origptr,
(uintptr_t)(origptr + nr_inst));
--
2.14.2
^ permalink raw reply related
* [PATCH v4 04/19] arm64: alternatives: Enforce alignment of struct alt_instr
From: Marc Zyngier @ 2018-01-04 18:43 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20180104184334.16571-1-marc.zyngier@arm.com>
We're playing a dangerous game with struct alt_instr, as we produce
it using assembly tricks, but parse them using the C structure.
We just assume that the respective alignments of the two will
be the same.
But as we add more fields to this structure, the alignment requirements
of the structure may change, and lead to all kind of funky bugs.
TO solve this, let's move the definition of struct alt_instr to its
own file, and use this to generate the alignment constraint from
asm-offsets.c. The various macros are then patched to take the
alignment into account.
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
arch/arm64/include/asm/alternative.h | 13 +++++--------
arch/arm64/include/asm/alternative_types.h | 13 +++++++++++++
arch/arm64/kernel/asm-offsets.c | 4 ++++
3 files changed, 22 insertions(+), 8 deletions(-)
create mode 100644 arch/arm64/include/asm/alternative_types.h
diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h
index 4a85c6952a22..395befde7595 100644
--- a/arch/arm64/include/asm/alternative.h
+++ b/arch/arm64/include/asm/alternative.h
@@ -2,28 +2,24 @@
#ifndef __ASM_ALTERNATIVE_H
#define __ASM_ALTERNATIVE_H
+#include <asm/asm-offsets.h>
#include <asm/cpucaps.h>
#include <asm/insn.h>
#ifndef __ASSEMBLY__
+#include <asm/alternative_types.h>
+
#include <linux/init.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <linux/stringify.h>
-struct alt_instr {
- s32 orig_offset; /* offset to original instruction */
- s32 alt_offset; /* offset to replacement instruction */
- u16 cpufeature; /* cpufeature bit set for replacement */
- u8 orig_len; /* size of original instruction(s) */
- u8 alt_len; /* size of new instruction(s), <= orig_len */
-};
-
void __init apply_alternatives_all(void);
void apply_alternatives(void *start, size_t length);
#define ALTINSTR_ENTRY(feature) \
+ " .align " __stringify(ALTINSTR_ALIGN) "\n" \
" .word 661b - .\n" /* label */ \
" .word 663f - .\n" /* new instruction */ \
" .hword " __stringify(feature) "\n" /* feature bit */ \
@@ -69,6 +65,7 @@ void apply_alternatives(void *start, size_t length);
#include <asm/assembler.h>
.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
+ .align ALTINSTR_ALIGN
.word \orig_offset - .
.word \alt_offset - .
.hword \feature
diff --git a/arch/arm64/include/asm/alternative_types.h b/arch/arm64/include/asm/alternative_types.h
new file mode 100644
index 000000000000..26cf76167f2d
--- /dev/null
+++ b/arch/arm64/include/asm/alternative_types.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALTERNATIVE_TYPES_H
+#define __ASM_ALTERNATIVE_TYPES_H
+
+struct alt_instr {
+ s32 orig_offset; /* offset to original instruction */
+ s32 alt_offset; /* offset to replacement instruction */
+ u16 cpufeature; /* cpufeature bit set for replacement */
+ u8 orig_len; /* size of original instruction(s) */
+ u8 alt_len; /* size of new instruction(s), <= orig_len */
+};
+
+#endif
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 5ab8841af382..f00666341ae2 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -25,6 +25,7 @@
#include <linux/dma-mapping.h>
#include <linux/kvm_host.h>
#include <linux/suspend.h>
+#include <asm/alternative_types.h>
#include <asm/cpufeature.h>
#include <asm/thread_info.h>
#include <asm/memory.h>
@@ -151,5 +152,8 @@ int main(void)
DEFINE(HIBERN_PBE_ADDR, offsetof(struct pbe, address));
DEFINE(HIBERN_PBE_NEXT, offsetof(struct pbe, next));
DEFINE(ARM64_FTR_SYSVAL, offsetof(struct arm64_ftr_reg, sys_val));
+ BLANK();
+ DEFINE(ALTINSTR_ALIGN, (63 - __builtin_clzl(__alignof__(struct alt_instr))));
+
return 0;
}
--
2.14.2
^ permalink raw reply related
* [PATCH v4 03/19] arm64: asm-offsets: Remove potential circular dependency
From: Marc Zyngier @ 2018-01-04 18:43 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20180104184334.16571-1-marc.zyngier@arm.com>
So far, we've been lucky enough that none of the include files
that asm-offsets.c requires include asm-offsets.h. This is
about to change, and would introduce a nasty circular dependency...
Let's now guard the inclusion of asm-offsets.h so that it never
gets pulled from asm-offsets.c. The same issue exists between
bounce.c and include/generated/bounds.h, and is worked around
by using the existing guard symbol.
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
arch/arm64/include/asm/asm-offsets.h | 2 ++
arch/arm64/kernel/asm-offsets.c | 2 ++
2 files changed, 4 insertions(+)
diff --git a/arch/arm64/include/asm/asm-offsets.h b/arch/arm64/include/asm/asm-offsets.h
index d370ee36a182..7d6531a81eb3 100644
--- a/arch/arm64/include/asm/asm-offsets.h
+++ b/arch/arm64/include/asm/asm-offsets.h
@@ -1 +1,3 @@
+#if !defined(__GENERATING_ASM_OFFSETS_H) && !defined(__GENERATING_BOUNDS_H)
#include <generated/asm-offsets.h>
+#endif
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 742887330101..5ab8841af382 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -18,6 +18,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#define __GENERATING_ASM_OFFSETS_H 1
+
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/dma-mapping.h>
--
2.14.2
^ permalink raw reply related
* [PATCH v4 02/19] arm64: asm-offsets: Remove unused definitions
From: Marc Zyngier @ 2018-01-04 18:43 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20180104184334.16571-1-marc.zyngier@arm.com>
asm-offsets.h contains a number of definitions that are not used
at all, and in some cases conflict with other definitions (such as
NSEC_PER_SEC).
Spring clean-up time.
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
arch/arm64/kernel/asm-offsets.c | 5 -----
1 file changed, 5 deletions(-)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 7e8be0c22ce0..742887330101 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -83,10 +83,6 @@ int main(void)
DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm));
DEFINE(VMA_VM_FLAGS, offsetof(struct vm_area_struct, vm_flags));
BLANK();
- DEFINE(VM_EXEC, VM_EXEC);
- BLANK();
- DEFINE(PAGE_SZ, PAGE_SIZE);
- BLANK();
DEFINE(__DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL);
DEFINE(__DMA_TO_DEVICE, DMA_TO_DEVICE);
DEFINE(__DMA_FROM_DEVICE, DMA_FROM_DEVICE);
@@ -98,7 +94,6 @@ int main(void)
DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
- DEFINE(NSEC_PER_SEC, NSEC_PER_SEC);
BLANK();
DEFINE(VDSO_CS_CYCLE_LAST, offsetof(struct vdso_data, cs_cycle_last));
DEFINE(VDSO_RAW_TIME_SEC, offsetof(struct vdso_data, raw_time_sec));
--
2.14.2
^ permalink raw reply related
* [PATCH v4 01/19] arm64: asm-offsets: Avoid clashing DMA definitions
From: Marc Zyngier @ 2018-01-04 18:43 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20180104184334.16571-1-marc.zyngier@arm.com>
asm-offsets.h contains a few DMA related definitions that have
the exact same name than the enum members they are derived from.
While this is not a problem so far, it will become an issue if
both asm-offsets.h and include/linux/dma-direction.h: are pulled
by the same file.
Let's sidestep the issue by renaming the asm-offsets.h constants.
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
arch/arm64/kernel/asm-offsets.c | 6 +++---
arch/arm64/mm/cache.S | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 71bf088f1e4b..7e8be0c22ce0 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -87,9 +87,9 @@ int main(void)
BLANK();
DEFINE(PAGE_SZ, PAGE_SIZE);
BLANK();
- DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL);
- DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
- DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE);
+ DEFINE(__DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL);
+ DEFINE(__DMA_TO_DEVICE, DMA_TO_DEVICE);
+ DEFINE(__DMA_FROM_DEVICE, DMA_FROM_DEVICE);
BLANK();
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 7f1dbe962cf5..c1336be085eb 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -205,7 +205,7 @@ ENDPIPROC(__dma_flush_area)
* - dir - DMA direction
*/
ENTRY(__dma_map_area)
- cmp w2, #DMA_FROM_DEVICE
+ cmp w2, #__DMA_FROM_DEVICE
b.eq __dma_inv_area
b __dma_clean_area
ENDPIPROC(__dma_map_area)
@@ -217,7 +217,7 @@ ENDPIPROC(__dma_map_area)
* - dir - DMA direction
*/
ENTRY(__dma_unmap_area)
- cmp w2, #DMA_TO_DEVICE
+ cmp w2, #__DMA_TO_DEVICE
b.ne __dma_inv_area
ret
ENDPIPROC(__dma_unmap_area)
--
2.14.2
^ permalink raw reply related
* [PATCH v4 00/19] KVM/arm64: Randomise EL2 mappings
From: Marc Zyngier @ 2018-01-04 18:43 UTC (permalink / raw)
To: linux-arm-kernel
Whilst KVM benefits from the kernel randomisation via KASLR, there is
no additional randomisation when the kernel is running at EL1, as we
directly use a fixed offset from the linear mapping. This is not
necessarily a problem, but we could do a bit better by independently
randomizing the HYP placement.
This series proposes to randomise the offset by inserting a few random
bits between the MSB of the RAM linear mapping and the top of the HYP
VA (VA_BITS - 2). That's not a lot of random bits (on my Mustang, I
get 13 bits), but that's better than nothing.
In order to achieve this, we need to be able to patch dynamic values
in the kernel text. This results in a bunch of changes to the
alternative framework, the insn library, and a few more hacks in KVM
itself (we get a new way to map the GIC at EL2). This series used to
depend on a number of cleanups in asm-offsets, which is not the case
anymore. I'm still including them as I think they are still pretty
useful.
This has been tested on the FVP model, Seattle (both 39 and 48bit VA),
Mustang and Thunder-X. I've also done a sanity check on 32bit (which
is only impacted by the HYP IO VA stuff).
Thanks,
M.
* From v3:
- Reworked the alternative code to leave the actual patching to
the callback function. This should allow for more flexibility
should someone or something require it
- Now detects underflows in the IOVA allocator
- Moved the VA patching code to va_layout.c
* From v2:
- Fixed a crapload of bugs in the immediate generation patch
I now have a test harness for it, making sure it generates the
same thing as GAS...
- Fixed a bug in the asm-offsets.h exclusion patch
- Reworked the alternative_cb code to be nicer and avoid generating
pointless nops
* From v1:
- Now works correctly with KASLR
- Dropped the callback field from alt_instr, and reuse one of the
existing fields to store an offset to the callback
- Fix HYP teardown path (depends on fixes previously posted)
- Dropped the VA offset macros
Marc Zyngier (19):
arm64: asm-offsets: Avoid clashing DMA definitions
arm64: asm-offsets: Remove unused definitions
arm64: asm-offsets: Remove potential circular dependency
arm64: alternatives: Enforce alignment of struct alt_instr
arm64: alternatives: Add dynamic patching feature
arm64: insn: Add N immediate encoding
arm64: insn: Add encoder for bitwise operations using literals
arm64: KVM: Dynamically patch the kernel/hyp VA mask
arm64: cpufeatures: Drop the ARM64_HYP_OFFSET_LOW feature flag
KVM: arm/arm64: Do not use kern_hyp_va() with kvm_vgic_global_state
KVM: arm/arm64: Demote HYP VA range display to being a debug feature
KVM: arm/arm64: Move ioremap calls to create_hyp_io_mappings
KVM: arm/arm64: Keep GICv2 HYP VAs in kvm_vgic_global_state
KVM: arm/arm64: Move HYP IO VAs to the "idmap" range
arm64; insn: Add encoder for the EXTR instruction
arm64: insn: Allow ADD/SUB (immediate) with LSL #12
arm64: KVM: Dynamically compute the HYP VA mask
arm64: KVM: Introduce EL2 VA randomisation
arm64: Update the KVM memory map documentation
Documentation/arm64/memory.txt | 8 +-
arch/arm/include/asm/kvm_hyp.h | 6 +
arch/arm/include/asm/kvm_mmu.h | 4 +-
arch/arm64/include/asm/alternative.h | 49 ++++++--
arch/arm64/include/asm/alternative_types.h | 17 +++
arch/arm64/include/asm/asm-offsets.h | 2 +
arch/arm64/include/asm/cpucaps.h | 2 +-
arch/arm64/include/asm/insn.h | 16 +++
arch/arm64/include/asm/kvm_hyp.h | 9 ++
arch/arm64/include/asm/kvm_mmu.h | 57 ++++-----
arch/arm64/kernel/alternative.c | 43 +++++--
arch/arm64/kernel/asm-offsets.c | 17 +--
arch/arm64/kernel/cpufeature.c | 19 ---
arch/arm64/kernel/insn.c | 190 ++++++++++++++++++++++++++++-
arch/arm64/kvm/Makefile | 2 +-
arch/arm64/kvm/va_layout.c | 144 ++++++++++++++++++++++
arch/arm64/mm/cache.S | 4 +-
include/kvm/arm_vgic.h | 12 +-
virt/kvm/arm/hyp/vgic-v2-sr.c | 12 +-
virt/kvm/arm/mmu.c | 95 +++++++++++----
virt/kvm/arm/vgic/vgic-init.c | 6 -
virt/kvm/arm/vgic/vgic-v2.c | 40 ++----
22 files changed, 589 insertions(+), 165 deletions(-)
create mode 100644 arch/arm64/include/asm/alternative_types.h
create mode 100644 arch/arm64/kvm/va_layout.c
--
2.14.2
^ permalink raw reply
* [kernel-hardening] [PATCH] arm: Always use REFCOUNT_FULL
From: Kees Cook @ 2018-01-04 18:42 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20180104183504.GQ28752@n2100.armlinux.org.uk>
On Thu, Jan 4, 2018 at 10:35 AM, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Thu, Jan 04, 2018 at 10:32:46AM -0800, Kees Cook wrote:
>> On Thu, Jan 4, 2018 at 4:28 AM, Jinbum Park <jinb.park7@gmail.com> wrote:
>> > arm prefers to use REFCOUNT_FULL by default.
>> > This enables it for arm.
>> >
>> > Signed-off-by: Jinbum Park <jinb.park7@gmail.com>
>>
>> Acked-by: Kees Cook <keescook@chromium.org>
>
> I'd help if there was some kind of explanation about this. Not
> everyone knows what REFCOUNT_FULL is.
>
> Also, why does "arm" "prefer" to use this? Where does the preference
> come from - and why is it a preference but being enforced by the
> Kconfig ?
This came from discussions with Will Deacon (and others) during the
Linux Security Summit. The arm64 side of this is in commit
4adcec1164de ("arm64: Always use REFCOUNT_FULL"). AIUI, Will said he
didn't want the specialized "fast but technically incomplete"
refcounting as seen with x86's fast refcount infrastructure, but
rather to keep refcounts always fully protected by default because no
one could point to real-world performance impacts with REFCOUNT_FULL
vs unprotected atomic_t infrastructure.
I'm fine leaving this choice up to the end user, but I think it makes
sense to be always-on. If that's no okay, maybe make it default-y for
arm32, and still let people turn it off if they want?
-Kees
--
Kees Cook
Pixel Security
^ permalink raw reply
* [PATCH v5 1/2] PCI: mediatek: Clear IRQ status after IRQ dispatched to avoid reentry
From: Lorenzo Pieralisi @ 2018-01-04 18:40 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1514336394-17747-2-git-send-email-honghui.zhang@mediatek.com>
[+Marc]
On Wed, Dec 27, 2017 at 08:59:53AM +0800, honghui.zhang at mediatek.com wrote:
> From: Honghui Zhang <honghui.zhang@mediatek.com>
>
> There maybe a same IRQ reentry scenario after IRQ received in current
> IRQ handle flow:
> EP device PCIe host driver EP driver
> 1. issue an IRQ
> 2. received IRQ
> 3. clear IRQ status
> 4. dispatch IRQ
> 5. clear IRQ source
> The IRQ status was not successfully cleared at step 2 since the IRQ
> source was not cleared yet. So the PCIe host driver may receive the
> same IRQ after step 5. Then there's an IRQ reentry occurred.
> Even worse, if the reentry IRQ was not an IRQ that EP driver expected,
> it may not handle the IRQ. Then we may run into the infinite loop from
> step 2 to step 4.
> Clear the IRQ status after IRQ have been dispatched to avoid the IRQ
> reentry.
> This patch also fix another INTx IRQ issue by initialize the iterate
> before the loop. If an INTx IRQ re-occurred while we are dispatching
> the INTx IRQ, then iterate may start from PCI_NUM_INTX + INTX_SHIFT
> instead of INTX_SHIFT for the second time entering the
> for_each_set_bit_from() loop.
This looks like two different issues that should be fixed with two
patches.
> Signed-off-by: Honghui Zhang <honghui.zhang@mediatek.com>
> Acked-by: Ryder Lee <ryder.lee@mediatek.com>
> ---
> drivers/pci/host/pcie-mediatek.c | 11 ++++++-----
> 1 file changed, 6 insertions(+), 5 deletions(-)
For the sake of uniformity, I first want to understand why this
driver does not call:
chained_irq_enter/exit()
in the primary handler (mtk_pcie_intr_handler()).
With the GIC as a primary interrupt controller we have not
even figured out how current code can actually work without
calling the chained_* API.
I want to come up with a consistent handling of IRQ domains for
all host bridges and any discrepancy should be explained.
> diff --git a/drivers/pci/host/pcie-mediatek.c b/drivers/pci/host/pcie-mediatek.c
> index db93efd..fc29a9a 100644
> --- a/drivers/pci/host/pcie-mediatek.c
> +++ b/drivers/pci/host/pcie-mediatek.c
> @@ -601,15 +601,16 @@ static irqreturn_t mtk_pcie_intr_handler(int irq, void *data)
> struct mtk_pcie_port *port = (struct mtk_pcie_port *)data;
> unsigned long status;
> u32 virq;
> - u32 bit = INTX_SHIFT;
> + u32 bit;
>
> while ((status = readl(port->base + PCIE_INT_STATUS)) & INTX_MASK) {
> + bit = INTX_SHIFT;
> for_each_set_bit_from(bit, &status, PCI_NUM_INTX + INTX_SHIFT) {
> - /* Clear the INTx */
> - writel(1 << bit, port->base + PCIE_INT_STATUS);
> virq = irq_find_mapping(port->irq_domain,
> bit - INTX_SHIFT);
> generic_handle_irq(virq);
> + /* Clear the INTx */
> + writel(1 << bit, port->base + PCIE_INT_STATUS);
I think that these masking/acking should actually be done through
the irq_chip hooks (see for instance pci-ftpci100.c) - that would
make this kind of bugs much easier to prevent (because the IRQ
layer does the sequencing for you).
Marc (CC'ed) has a more comprehensive view on this than me - I would
like to get to a point where all host bridges uses a consistent
approach for chained IRQ handling and I hope this bug fix can be
a starting point.
Thanks,
Lorenzo
> }
> }
>
> @@ -619,10 +620,10 @@ static irqreturn_t mtk_pcie_intr_handler(int irq, void *data)
>
> while ((imsi_status = readl(port->base + PCIE_IMSI_STATUS))) {
> for_each_set_bit(bit, &imsi_status, MTK_MSI_IRQS_NUM) {
> - /* Clear the MSI */
> - writel(1 << bit, port->base + PCIE_IMSI_STATUS);
> virq = irq_find_mapping(port->msi_domain, bit);
> generic_handle_irq(virq);
> + /* Clear the MSI */
> + writel(1 << bit, port->base + PCIE_IMSI_STATUS);
> }
> }
> /* Clear MSI interrupt status */
> --
> 2.6.4
>
^ permalink raw reply
* [PATCH V2] ARM: imx_v6_v7_defconfig: select the CONFIG_CPUFREQ_DT
From: Anson Huang @ 2018-01-04 18:39 UTC (permalink / raw)
To: linux-arm-kernel
Select CONFIG_CPUFREQ_DT by default to enable
cpu-freq driver for i.MX7D.
The rest changes are generated by "make savedefconfig".
Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
---
changes since V1:
redo the patch based on Shawn's for-next branch.
arch/arm/configs/imx_v6_v7_defconfig | 10 +---------
1 file changed, 1 insertion(+), 9 deletions(-)
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 29cd1ac..885db90 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -58,6 +58,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_POWERSAVE=y
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPUFREQ_DT=y
CONFIG_ARM_IMX6Q_CPUFREQ=y
CONFIG_CPU_IDLE=y
CONFIG_VFP=y
@@ -318,14 +319,6 @@ CONFIG_USB_CONFIGFS_F_MIDI=y
CONFIG_USB_CONFIGFS_F_HID=y
CONFIG_USB_CONFIGFS_F_UVC=y
CONFIG_USB_CONFIGFS_F_PRINTER=y
-CONFIG_USB_ZERO=m
-CONFIG_USB_AUDIO=m
-CONFIG_USB_ETH=m
-CONFIG_USB_G_NCM=m
-CONFIG_USB_GADGETFS=m
-CONFIG_USB_FUNCTIONFS=m
-CONFIG_USB_MASS_STORAGE=m
-CONFIG_USB_G_SERIAL=m
CONFIG_MMC=y
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
@@ -350,7 +343,6 @@ CONFIG_RTC_DRV_PCF8563=y
CONFIG_RTC_DRV_M41T80=y
CONFIG_RTC_DRV_MC13XXX=y
CONFIG_RTC_DRV_MXC=y
-CONFIG_RTC_DRV_MXC_V2=y
CONFIG_RTC_DRV_SNVS=y
CONFIG_DMADEVICES=y
CONFIG_FSL_EDMA=y
--
1.9.1
^ permalink raw reply related
* [kernel-hardening] [PATCH] arm: Always use REFCOUNT_FULL
From: Russell King - ARM Linux @ 2018-01-04 18:35 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <CAGXu5j+YsB1FWvZHEjnixU8RMpk-DLDYLXXmxfQjsU_gy64ZDQ@mail.gmail.com>
On Thu, Jan 04, 2018 at 10:32:46AM -0800, Kees Cook wrote:
> On Thu, Jan 4, 2018 at 4:28 AM, Jinbum Park <jinb.park7@gmail.com> wrote:
> > arm prefers to use REFCOUNT_FULL by default.
> > This enables it for arm.
> >
> > Signed-off-by: Jinbum Park <jinb.park7@gmail.com>
>
> Acked-by: Kees Cook <keescook@chromium.org>
I'd help if there was some kind of explanation about this. Not
everyone knows what REFCOUNT_FULL is.
Also, why does "arm" "prefer" to use this? Where does the preference
come from - and why is it a preference but being enforced by the
Kconfig ?
--
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 8.8Mbps down 630kbps up
According to speedtest.net: 8.21Mbps down 510kbps up
^ permalink raw reply
* [PATCH 01/11] arm64: use RET instruction for exiting the trampoline
From: Ard Biesheuvel @ 2018-01-04 18:35 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20180104183158.GM13436@arm.com>
On 4 January 2018 at 18:31, Will Deacon <will.deacon@arm.com> wrote:
> Hi Ard,
>
> On Thu, Jan 04, 2018 at 04:24:22PM +0000, Ard Biesheuvel wrote:
>> On 4 January 2018 at 15:08, Will Deacon <will.deacon@arm.com> wrote:
>> > Speculation attacks against the entry trampoline can potentially resteer
>> > the speculative instruction stream through the indirect branch and into
>> > arbitrary gadgets within the kernel.
>> >
>> > This patch defends against these attacks by forcing a misprediction
>> > through the return stack: a dummy BL instruction loads an entry into
>> > the stack, so that the predicted program flow of the subsequent RET
>> > instruction is to a branch-to-self instruction which is finally resolved
>> > as a branch to the kernel vectors with speculation suppressed.
>> >
>> > Signed-off-by: Will Deacon <will.deacon@arm.com>
>> > ---
>> > arch/arm64/kernel/entry.S | 5 ++++-
>> > 1 file changed, 4 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
>> > index 031392ee5f47..b9feb587294d 100644
>> > --- a/arch/arm64/kernel/entry.S
>> > +++ b/arch/arm64/kernel/entry.S
>> > @@ -1029,6 +1029,9 @@ alternative_else_nop_endif
>> > .if \regsize == 64
>> > msr tpidrro_el0, x30 // Restored in kernel_ventry
>> > .endif
>> > + bl 2f
>> > + b .
>> > +2:
>>
>> This deserves a comment, I guess?
>
> Yeah, I suppose ;) I'll lift something out of the commit message.
>
>> Also, is deliberately unbalancing the return stack likely to cause
>> performance problems, e.g., in libc hot paths?
>
> I don't think so, because it remains balanced after this code. We push an
> entry on with the BL and pop it with the RET; the rest of the return stack
> remains unchanged.
Ah, of course. For some reason, I had it in my mind that the failed
prediction affects the state of the return stack but that doesn't make
sense.
> That said, I'm also not sure what we could do differently
> here!
>
> Will
^ permalink raw reply
* [kernel-hardening] [PATCH] arm: Always use REFCOUNT_FULL
From: Kees Cook @ 2018-01-04 18:32 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20180104122801.GA3038@pjb1027-Latitude-E5410>
On Thu, Jan 4, 2018 at 4:28 AM, Jinbum Park <jinb.park7@gmail.com> wrote:
> arm prefers to use REFCOUNT_FULL by default.
> This enables it for arm.
>
> Signed-off-by: Jinbum Park <jinb.park7@gmail.com>
Acked-by: Kees Cook <keescook@chromium.org>
-Kees
> ---
> arch/arm/Kconfig | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index 3d349b4..ec80270 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -100,6 +100,7 @@ config ARM
> select OLD_SIGACTION
> select OLD_SIGSUSPEND3
> select PERF_USE_VMALLOC
> + select REFCOUNT_FULL
> select RTC_LIB
> select SYS_SUPPORTS_APM_EMULATION
> # Above selects are sorted alphabetically; please add new ones
> --
> 1.9.1
>
--
Kees Cook
Pixel Security
^ permalink raw reply
* [PATCH 01/11] arm64: use RET instruction for exiting the trampoline
From: Will Deacon @ 2018-01-04 18:31 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <CAKv+Gu-D763a5OouvmQ_6WfCV-dLgtyHtfT0B1hmrxDX5knD0Q@mail.gmail.com>
Hi Ard,
On Thu, Jan 04, 2018 at 04:24:22PM +0000, Ard Biesheuvel wrote:
> On 4 January 2018 at 15:08, Will Deacon <will.deacon@arm.com> wrote:
> > Speculation attacks against the entry trampoline can potentially resteer
> > the speculative instruction stream through the indirect branch and into
> > arbitrary gadgets within the kernel.
> >
> > This patch defends against these attacks by forcing a misprediction
> > through the return stack: a dummy BL instruction loads an entry into
> > the stack, so that the predicted program flow of the subsequent RET
> > instruction is to a branch-to-self instruction which is finally resolved
> > as a branch to the kernel vectors with speculation suppressed.
> >
> > Signed-off-by: Will Deacon <will.deacon@arm.com>
> > ---
> > arch/arm64/kernel/entry.S | 5 ++++-
> > 1 file changed, 4 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> > index 031392ee5f47..b9feb587294d 100644
> > --- a/arch/arm64/kernel/entry.S
> > +++ b/arch/arm64/kernel/entry.S
> > @@ -1029,6 +1029,9 @@ alternative_else_nop_endif
> > .if \regsize == 64
> > msr tpidrro_el0, x30 // Restored in kernel_ventry
> > .endif
> > + bl 2f
> > + b .
> > +2:
>
> This deserves a comment, I guess?
Yeah, I suppose ;) I'll lift something out of the commit message.
> Also, is deliberately unbalancing the return stack likely to cause
> performance problems, e.g., in libc hot paths?
I don't think so, because it remains balanced after this code. We push an
entry on with the BL and pop it with the RET; the rest of the return stack
remains unchanged. That said, I'm also not sure what we could do differently
here!
Will
^ permalink raw reply
* [PATCH v3 00/20] arm64: Unmap the kernel whilst running in userspace (KPTI)
From: Florian Fainelli @ 2018-01-04 18:23 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20180104065018.GA22792@kroah.com>
On 01/03/2018 10:50 PM, Greg Kroah-Hartman wrote:
> On Wed, Jan 03, 2018 at 09:17:26PM -0800, Florian Fainelli wrote:
>> On 12/11/2017 09:59 AM, Catalin Marinas wrote:
>>> On Wed, Dec 06, 2017 at 12:35:19PM +0000, Will Deacon wrote:
>>>> Patches are also pushed here:
>>>>
>>>> git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git kpti
>>>>
>>>> Feedback and testing welcome. At this point, I'd like to start thinking
>>>> about getting this merged for 4.16.
>>>
>>> For the record, the fixed up version was pushed by Will here:
>>>
>>> git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git kpti
>>>
>>> and I queued it for 4.16 in the arm64 for-next/core branch (same tree as
>>> above).
>>
>> Greg proposed the x86/KPTI patches for the stable-4.9.75 queue, is there
>> a plan to get the ARM64/KPTI patches backported towards stable trees as
>> well?
>
> Stable tree patches have to get into Linus's tree first before I can do
> anything :)
>
> Anyway, once that happens, yes, there is a plan, but it's a bit
> "different", and I'll talk about it once these are merged.
Great, thanks! Bonus question, if someone is using any of the affected
devices in AArch32, should we be expecting to see ARM/Linux changes as
well, that is, is there a plan to come up with a kpti implementation for
ARM?
--
Florian
^ permalink raw reply
* v4.14.9 on ARMv5: OK with gcc 4.8/5.4, NOK with gcc 7.2
From: Andrea Adami @ 2018-01-04 18:19 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20180104171637.GP28752@n2100.armlinux.org.uk>
On Thu, Jan 4, 2018 at 6:16 PM, Russell King - ARM Linux
<linux@armlinux.org.uk> wrote:
> On Thu, Jan 04, 2018 at 06:02:38PM +0100, Thomas Petazzoni wrote:
>> Hello,
>>
>> I'm pretty sure it has already been reported, but just in case: I'm
>> testing v4.14.9 on an ARMv5 platform (Kirkwood-based OpenBlocks A7).
>> mvebu_v5_defconfig boots fine when built with gcc 4.8 (good old
>> Sourcery toolchain) or gcc 5.4 (some old Linaro toolchain), but doesn't
>> boot at all when built with gcc 7.2 (Fedora toolchain). Even when
>> configured and booted with earlyprintk, the kernel doesn't say anything.
>>
>> The toolchain being used is the default ARM toolchain provided by
>> Fedora 27. It says:
>>
>> gcc version 7.2.1 20170915 (Red Hat Cross 7.2.1-1) (GCC)
>> GNU ld version 2.29.1-1.fc27
>>
>> So it's quite bleeding edge.
>>
>> Let me know if this has already been fixed, or if I should test a
>> patch. I have not tested 4.15-rc or next at this point, assuming that
>> if a fix had been made, it would have been backported to 4.14-stable.
>
> It's a known issue -
>
> http://archive.armlinux.org.uk/lurker/message/20171018.095713.a56a717b.en.html
>
> but so far no patches have been forthcoming for the kernel, because
> (I guess) it's deemed to be a gcc bug.
>
> It's one of those discussions that just seemed to die without any
> real conclusion.
>
> --
> RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line in suburbia: sync at 8.8Mbps down 630kbps up
> According to speedtest.net: 8.21Mbps down 510kbps up
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
I am happy to report the gcc bug has been fixed in trunk.
I am as well dubious where they say this should also fix gcc6 where we
have no known issues with gcc6.
"
This should also fix
incorrect generation of ldrd/strd with unaligned accesses that could
previously have occurred on ARMv5e where all such operations must be
64-bit aligned."
https://github.com/gcc-mirror/gcc/commit/f59996b56aaa1c1d62a16cbb4010775b624cbde0
In OpenEmbedded we backported the patch a while ago, see the
discussion about qemu booting where real hw doesn't:
http://lists.openembedded.org/pipermail/openembedded-core/2017-November/144045.html
Regards
Andrea
^ permalink raw reply
* [PATCH] arm64: fpsimd: Fix state leakage when migrating after sigreturn
From: Catalin Marinas @ 2018-01-04 18:19 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <1513362878-8787-1-git-send-email-Dave.Martin@arm.com>
On Fri, Dec 15, 2017 at 06:34:38PM +0000, Dave P Martin wrote:
> When refactoring the sigreturn code to handle SVE, I changed the
> sigreturn implementation to store the new FPSIMD state from the
> user sigframe into task_struct before reloading the state into the
> CPU regs. This makes it easier to convert the data for SVE when
> needed.
>
> However, it turns out that the fpsimd_state structure passed into
> fpsimd_update_current_state is not fully initialised, so assigning
> the structure as a whole corrupts current->thread.fpsimd_state.cpu
> with uninitialised data.
>
> This means that if the garbage data written to .cpu happens to be a
> valid cpu number, and the task is subsequently migrated to the cpu
> identified by the that number, and then tries to enter userspace,
> the CPU FPSIMD regs will be assumed to be correct for the task and
> not reloaded as they should be. This can result in returning to
> userspace with the FPSIMD registers containing data that is stale or
> that belongs to another task or to the kernel.
>
> Knowingly handing around a kernel structure that is incompletely
> initialised with user data is a potential source of mistakes,
> especially across source file boundaries. To help avoid a repeat
> of this issue, this patch adapts the relevant internal API to hand
> around the user-accessible subset only: struct user_fpsimd_state.
>
> To avoid future surprises, this patch also converts all uses of
> struct fpsimd_state that really only access the user subset, to use
> struct user_fpsimd_state. A few missing consts are added to
> function prototypes for good measure.
>
> Thanks to Will for spotting the cause of the bug here.
>
> Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
> Fixes: 8cd969d28fd2 ("arm64/sve: Signal handling support")
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
I've queued this patch for 4.16 but do we actually need "Fixes:" here?
AFAICT, mainline already has the fix, the latest being a45448313706
"arm64: fpsimd: Fix copying of FP state from signal frame into task
struct" in -rc4 (for-next/core is based on -rc3).
--
Catalin
^ permalink raw reply
* [PATCH v2 0/8] ARM: sun9i: SMP support with Multi-Cluster Power Management
From: Lorenzo Pieralisi @ 2018-01-04 18:04 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <20180104145838.qk5sbtg3vjg33txt@flea.lan>
On Thu, Jan 04, 2018 at 03:58:38PM +0100, Maxime Ripard wrote:
> On Thu, Jan 04, 2018 at 10:37:46PM +0800, Chen-Yu Tsai wrote:
> > This is v2 of my sun9i SMP support with MCPM series which was started
> > over two years ago [1]. We've tried to implement PSCI for both the A80
> > and A83T. Results were not promising. The issue is that these two chips
> > have a broken security extensions implementation. If a specific bit is
> > not burned in its e-fuse, most if not all security protections don't
> > work [2]. Even worse, non-secure access to the GIC become secure. This
> > requires a crazy workaround in the GIC driver which probably doesn't work
> > in all cases [3].
> >
> > Nicolas mentioned that the MCPM framework is likely overkill in our
> > case [4]. However the framework does provide cluster/core state tracking
> > and proper sequencing of cache related operations. We could rework
> > the code to use standard smp_ops, but I would like to actually get
> > a working version in first.
> >
> > Much of the sunxi-specific MCPM code is derived from Allwinner code and
> > documentation, with some references to the other MCPM implementations,
> > as well as the Cortex's Technical Reference Manuals for the power
> > sequencing info.
> >
> > One major difference compared to other platforms is we currently do not
> > have a standalone PMU or other embedded firmware to do the actually power
> > sequencing. All power/reset control is done by the kernel. Nicolas
> > mentioned that a new optional callback should be added in cases where the
> > kernel has to do the actual power down [5]. For now however I'm using a
> > dedicated single thread workqueue. CPU and cluster power off work is
> > queued from the .{cpu,cluster}_powerdown_prepare callbacks. This solution
> > is somewhat heavy, as I have a total of 10 static work structs. It might
> > also be a bit racy, as nothing prevents the system from bringing a core
> > back before the asynchronous work shuts it down. This would likely
> > happen under a heavily loaded system with a scheduler that brings cores
> > in and out of the system frequently. In simple use-cases it performs OK.
>
> It all looks sane to me
> Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>
It does not to me, sorry. You do not need MCPM (and workqueues) to
do SMP bring-up.
Nico explained why, just do it:
commit 905cdf9dda5d ("ARM: hisi/hip04: remove the MCPM overhead")
Lorenzo
^ permalink raw reply
* [PATCH] arm: asid: Do not replace active_asids if already 0
From: Catalin Marinas @ 2018-01-04 18:04 UTC (permalink / raw)
To: linux-arm-kernel
Under some uncommon timing conditions, a generation check and
xchg(active_asids, A1) in check_and_switch_context() on P1 can race with
an ASID roll-over on P2. If P2 has not seen the update to
active_asids[P1], it can re-allocate A1 to a new task T2 on P2. P1 ends
up waiting on the spinlock since the xchg() returned 0 while P2 can go
through a second ASID roll-over with (T2,A1,G2) active on P2. This
roll-over copies active_asids[P1] == A1,G1 into reserved_asids[P1] and
active_asids[P2] == A1,G2 into reserved_asids[P2]. A subsequent
scheduling of T1 on P1 and T2 on P2 would match reserved_asids and get
their generation bumped to G3:
P1 P2
-- --
TTBR0.BADDR = T0
TTBR0.ASID = A0
asid_generation = G1
check_and_switch_context(T1,A1,G1)
generation match
check_and_switch_context(T2,A0,G0)
new_context()
ASID roll-over
asid_generation = G2
flush_context()
active_asids[P1] = 0
asid_map[A1] = 0
reserved_asids[P1] = A0,G0
xchg(active_asids, A1)
active_asids[P1] = A1,G1
xchg returns 0
spin_lock_irqsave()
allocated ASID (T2,A1,G2)
asid_map[A1] = 1
active_asids[P2] = A1,G2
...
check_and_switch_context(T3,A0,G0)
new_context()
ASID roll-over
asid_generation = G3
flush_context()
active_asids[P1] = 0
asid_map[A1] = 1
reserved_asids[P1] = A1,G1
reserved_asids[P2] = A1,G2
allocated ASID (T3,A2,G3)
asid_map[A2] = 1
active_asids[P2] = A2,G3
new_context()
check_update_reserved_asid(A1,G1)
matches reserved_asid[P1]
reserved_asid[P1] = A1,G3
updated T1 ASID to (T1,A1,G3)
check_and_switch_context(T2,A1,G2)
new_context()
check_and_switch_context(A1,G2)
matches reserved_asids[P2]
reserved_asids[P2] = A1,G3
updated T2 ASID to (T2,A1,G3)
At this point, we have two tasks, T1 and T2 both using ASID A1 with the
latest generation G3. Any of them is allowed to be scheduled on the
other CPU leading to two different tasks with the same ASID on the same
CPU.
This patch changes the xchg to cmpxchg so that the active_asids is only
updated if non-zero to avoid a race with an ASID roll-over on a
different CPU.
Cc: Russell King <linux@armlinux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
---
This is similar to the arm64 patch [1], with the difference that non-relaxed
(cmp)xchg is used as per the existing code.
[1] http://lkml.kernel.org/r/20180104111721.33834-1-catalin.marinas at arm.com
arch/arm/mm/context.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index c8c8b9ed02e0..3208319d72e7 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -241,7 +241,7 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
{
unsigned long flags;
unsigned int cpu = smp_processor_id();
- u64 asid;
+ u64 asid, old_active_asid;
if (unlikely(mm->context.vmalloc_seq != init_mm.context.vmalloc_seq))
__check_vmalloc_seq(mm);
@@ -254,8 +254,17 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
cpu_set_reserved_ttbr0();
asid = atomic64_read(&mm->context.id);
- if (!((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS)
- && atomic64_xchg(&per_cpu(active_asids, cpu), asid))
+
+ /*
+ * If our active_asids is zero, we are racing with an ASID roll-over
+ * on a different CPU, so skip the update (using cmpxchg if non-zero)
+ * and take the slow path.
+ */
+ old_active_asid = atomic64_read(&per_cpu(active_asids, cpu));
+ if (old_active_asid &&
+ !((asid ^ atomic64_read(&asid_generation)) >> ASID_BITS) &&
+ atomic64_cmpxchg(&per_cpu(active_asids, cpu),
+ old_active_asid, asid))
goto switch_mm_fastpath;
raw_spin_lock_irqsave(&cpu_asid_lock, flags);
^ permalink raw reply related
* [PATCH v5 7/9] arm64: Topology, rename cluster_id
From: Jeremy Linton @ 2018-01-04 18:00 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <e59f197d-eff9-9429-6e3b-519aeac354fc@huawei.com>
Hi,
On 01/03/2018 09:59 PM, Xiongfeng Wang wrote:
>
>
> On 2018/1/4 1:32, Jeremy Linton wrote:
>> Hi,
>>
>> On 01/03/2018 08:29 AM, Sudeep Holla wrote:
>>>
>>> On 02/01/18 02:29, Xiongfeng Wang wrote:
>>>> Hi,
>>>>
>>>> On 2017/12/18 20:42, Morten Rasmussen wrote:
>>>>> On Fri, Dec 15, 2017 at 10:36:35AM -0600, Jeremy Linton wrote:
>>>>>> Hi,
>>>>>>
>>>>>> On 12/13/2017 12:02 PM, Lorenzo Pieralisi wrote:
>>>>>>> [+Morten, Dietmar]
>>>>>>>
>>>>>>> $SUBJECT should be:
>>>>>>>
>>>>>>> arm64: topology: rename cluster_id
>>>>>>
>>>> [cut]
>>>>>>
>>>> I think we still need the information describing which cores are in one
>>>> cluster. Many arm64 chips have the architecture core/cluster/socket. Cores
>>>> in one cluster may share a same L2 cache. That information can be used to
>>>> build the sched_domain. If we put cores in one cluster in one sched_domain,
>>>> the performance will be better.(please see kernel/sched/topology.c:1197,
>>>> cpu_coregroup_mask() uses 'core_sibling' to build a multi-core
>>>> sched_domain).
>>>
>>> We get all the cache information from DT/ACPI PPTT(mainly topology) and now
>>> even the geometry. So ideally, the sharing information must come from that.
>>> Any other solution might end up in conflict if DT/PPTT and that mismatch.
>>>
>>>> So I think we still need variable to record which cores are in one
>>>> sched_domain for future use.
>>>
>>> I tend to say no, at-least not as is.
>>>
>>
>> Well, either way, with DynamiQ (and a55/a75) the cores have private L2's, which means that the cluster sharing is happening at what is then the L3 level. So, the code I had in earlier versions would have needed tweaks to deal with that anyway.
>>
>> IMHO, if we want to detect this kind of sharing for future scheduling domains, it should probably be done independent of PPTT/DT/MIPDR by picking out shared cache levels from struct cacheinfo *. Which makes that change unrelated to the basic population of cacheinfo and cpu_topology in this patchset.
>>
> I think we need to build scheduling domains not only on the cache-sharing information,
> but also some other information, such as which cores use the same cache coherent interconnect
> (I don't know the detail, I just guess)
>
> I think PPTT is used to report the cores topology, which cores are more related to each other.
> They may share the same cache, or use the same CCI, or are physically near to each other.
> I think we should use this information to build MC(multi-cores) scheduling domains.
>
> Or maybe we can just discard the MC scheduling domain and handle this scheduling-domain-building
> task to the NUMA subsystem entirely, I don't know if it is proper.
For the immediate future what I would like is a way to identify where in
the PPTT topology the NUMA domains begin (rather than assuming socket,
which is the current plan). That allows the manufactures of systems
(with say say MCM based topologies) to dictate at which level in the
cpu/cache topology they want to start describing the topology with the
SLIT/SRAT tables. I think that moves us in the direction you are
indicating while still leaving the door open for something like a
cluster level scheduling domain (based on cores sharing caches) or a
split LLC domain (also based on cores sharing caches) that happens to be
on die...
^ permalink raw reply
* [PATCH v5 6/9] ACPI/PPTT: Add topology parsing code
From: Jeremy Linton @ 2018-01-04 17:50 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <020801d38528$0ed8c670$2c8a5350$@codeaurora.org>
Hi,
On 01/04/2018 12:48 AM, vkilari at codeaurora.org wrote:
> Hi Jeremy
>
>> -----Original Message-----
>> From: linux-arm-kernel
> [mailto:linux-arm-kernel-bounces at lists.infradead.org]
>> On Behalf Of Jeremy Linton
>> Sent: Wednesday, January 3, 2018 10:28 PM
>> To: vkilari at codeaurora.org
>> Cc: 'Mark Rutland' <mark.rutland@arm.com>; Jonathan.Zhang at cavium.com;
>> Jayachandran.Nair at cavium.com; 'Lorenzo Pieralisi'
>> <lorenzo.pieralisi@arm.com>; austinwc at codeaurora.org; 'Linux PM' <linux-
>> pm at vger.kernel.org>; jhugo at codeaurora.org; 'Catalin Marinas'
>> <catalin.marinas@arm.com>; 'Sudeep Holla' <sudeep.holla@arm.com>; 'Will
>> Deacon' <will.deacon@arm.com>; 'Linux Kernel Mailing List' <linux-
>> kernel at vger.kernel.org>; wangxiongfeng2 at huawei.com; 'ACPI Devel Maling
>> List' <linux-acpi@vger.kernel.org>; 'Viresh Kumar'
> <viresh.kumar@linaro.org>;
>> 'Rafael J. Wysocki' <rjw@rjwysocki.net>; 'Hanjun Guo'
>> <hanjun.guo@linaro.org>; 'Greg Kroah-Hartman'
>> <gregkh@linuxfoundation.org>; 'Rafael J. Wysocki' <rafael@kernel.org>; 'Al
>> Stone' <ahs3@redhat.com>; linux-arm-kernel at lists.infradead.org; 'Len
> Brown'
>> <lenb@kernel.org>
>> Subject: Re: [PATCH v5 6/9] ACPI/PPTT: Add topology parsing code
>>
>> Hi,
>>
>> On 01/03/2018 02:49 AM, vkilari at codeaurora.org wrote:
>>> Hi Jeremy,
>>>
>>> Sorry, I don't have your previous patch emails to reply on right
>>> patch context.
>>> So commenting on top of this patch.
>>>
>>> AFAIU, the PPTT v5 patches still rely on CLIDR_EL1 register to know
>>> the type of Caches enabled/available on the platform. With PPTT, it
>>> should not rely on architecture registers. There can be platforms
>>> which can report cache availability in PPTT but not in architecture
>>> registers.
>>>
>>> The following code snippet shows usage of CLIDR_EL1
>>>
>>> In arch/arm64/kernel/cacheinfo.c
>>>
>>> static inline enum cache_type get_cache_type(int level) {
>>> u64 clidr;
>>>
>>> if (level > MAX_CACHE_LEVEL)
>>> return CACHE_TYPE_NOCACHE;
>>> clidr = read_sysreg(clidr_el1);
>>> return CLIDR_CTYPE(clidr, level); }
>>>
>>> static int __populate_cache_leaves(unsigned int cpu) {
>>> unsigned int level, idx;
>>> enum cache_type type;
>>> struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
>>> struct cacheinfo *this_leaf = this_cpu_ci->info_list;
>>>
>>> for (idx = 0, level = 1; level <= this_cpu_ci->num_levels &&
>>> idx < this_cpu_ci->num_leaves; idx++, level++) {
>>> type = get_cache_type(level);
>>> if (type == CACHE_TYPE_SEPARATE) {
>>> ci_leaf_init(this_leaf++, CACHE_TYPE_DATA,
> level);
>>> ci_leaf_init(this_leaf++, CACHE_TYPE_INST,
> level);
>>> } else {
>>> ci_leaf_init(this_leaf++, type, level);
>>> }
>>> }
>>> return 0;
>>> }
>>>
>>> In populate_cache_leaves() the cache type is read from CLIDR_EL1
> register.
>>> If CLIDR_EL1 reports CACHE_TYPE_NOCACHE for a particular level then
>>> sysfs entry /sys/devices/system/cpu/cpu0/index<n>/type is not created
>>> and hence userspace tools like lstopo will not report this cache
>>> level.
>>
>>
>> This sounds suspiciously like one of things tweaked between v4->v5. If you
> look
>> at update_cache_properties() in patch 2/9, you will see that we only
>> update/find NOCACHE nodes and convert them to UNIFIED when all the
>> attributes in the node are supplied.
>>
>> This means that if the node has an incomplete set of attributes we won't
> update
>> it. Can you verify that you have all those attributes set for nodes which
> aren't
>> being described by the hardware?
>
> Thanks for pointing out.
> Why do we need to check for set of attributes and decide it as UNIFIED
> cache.?
> We can get cache type from attributes bits[3:2] if cache type valid flag is
> set
> irrespective of other attributes. If cache type valid flag is not set then
> we can assume
> it as NOCACHE type as neither architecture register nor in PPTT has valid
> cache type.
To answer the first question, in a strict sense we don't need to check
any of the attributes in order to override the cache type. That said,
initially I was going to trigger the override only when important
attributes were set to assure that we weren't exporting meaningless
nodes into sysfs. Then while picking which attributes I considered
important, I came to the conclusion that it was simply better to assure
that they were all set for nodes entirely generated by the PPTT. AKA, I
don't want to see L3 cache nodes with their size or associativity unset,
its better in that case that they remain hidden.
Per, the cache type valid bit. The code is written with the assumption
that it is overriding probed values (despite that not being true at the
moment for arm64) in the spirit of the standard. This informs/restricts
how the code works because we aren't simply generating the entire
cacheinfo directly from PPTT walks. Instead we are merging the PPTT
information with anything previously probed, meaning we need a way to
match existing cacheinfo structures with PPTT nodes.
So, the logic finding/matching an existing probed cache node requires
that the cache type is valid because the cache level, and type is used
as the match key. If the PPTT cache node doesn't have the cache type
valid set, then the match logic won't find the node, and the PPTT code
won't make any updates. That may also be what your seeing.. Basically
what is happening is that cacheinfo NOCACHE nodes that happen to match
valid PPTT UNIFIED nodes, can have their cache types overridden, but
only if we determine the remainder of the PPTT node has sufficient
information that we aren't exporting cacheinfo structures without useful
information. Currently, the only time this can happen is for nodes which
are entirely PPTT generated, so I think its fair the PPTT contain enough
information to make those nodes useful.
Thanks,
>
>>
>> Thanks,
>>
>>
>>>
>>> Regards
>>> Vijay
>>>
>>>> -----Original Message-----
>>>> From: linux-arm-kernel
>>> [mailto:linux-arm-kernel-bounces at lists.infradead.org]
>>>> On Behalf Of Rafael J. Wysocki
>>>> Sent: Thursday, December 14, 2017 4:40 AM
>>>> To: Jeremy Linton <jeremy.linton@arm.com>
>>>> Cc: Mark Rutland <mark.rutland@arm.com>; Jonathan.Zhang at cavium.com;
>>>> Jayachandran.Nair at cavium.com; Lorenzo Pieralisi
>>>> <lorenzo.pieralisi@arm.com>; Catalin Marinas
>>>> <catalin.marinas@arm.com>; Rafael J. Wysocki <rafael@kernel.org>;
>>>> jhugo at codeaurora.org; Will Deacon <will.deacon@arm.com>; Linux PM
>> <linux-pm@vger.kernel.org>; Rafael J.
>>>> Wysocki <rjw@rjwysocki.net>; Greg Kroah-Hartman
>>>> <gregkh@linuxfoundation.org>; Linux Kernel Mailing List <linux-
>>>> kernel at vger.kernel.org>; ACPI Devel Maling List
>>> <linux-acpi@vger.kernel.org>;
>>>> Viresh Kumar <viresh.kumar@linaro.org>; Hanjun Guo
>>>> <hanjun.guo@linaro.org>; Al Stone <ahs3@redhat.com>; Sudeep Holla
>>>> <sudeep.holla@arm.com>; austinwc at codeaurora.org;
>>>> wangxiongfeng2 at huawei.com; linux-arm-kernel at lists.infradead.org; Len
>>>> Brown <lenb@kernel.org>
>>>> Subject: Re: [PATCH v5 6/9] ACPI/PPTT: Add topology parsing code
>>>>
>>>> On Thu, Dec 14, 2017 at 12:06 AM, Jeremy Linton
>>>> <jeremy.linton@arm.com>
>>>> wrote:
>>>>> Hi,
>>>>>
>>>>>
>>>>> On 12/13/2017 04:28 PM, Rafael J. Wysocki wrote:
>>>>>>
>>>>>> On Wed, Dec 13, 2017 at 6:38 PM, Lorenzo Pieralisi
>>>>>> <lorenzo.pieralisi@arm.com> wrote:
>>>>>>>
>>>>>>> On Tue, Dec 12, 2017 at 10:13:08AM -0600, Jeremy Linton wrote:
>>>>>>>>
>>>>>>>> Hi,
>>>>>>>>
>>>>>>>> First, thanks for taking a look at this.
>>>>>>>>
>>>>>>>> On 12/11/2017 07:12 PM, Rafael J. Wysocki wrote:
>>>>>>>>>
>>>>>>>>> On Friday, December 1, 2017 11:23:27 PM CET Jeremy Linton wrote:
>>>>>>>>>>
>>>>>>>>>> The PPTT can be used to determine the groupings of CPU's at
>>>>>>>>>> given levels in the system. Lets add a few routines to the PPTT
>>>>>>>>>> parsing code to return a unique id for each unique level in the
>>>>>>>>>> processor hierarchy. This can then be matched to build
>>>>>>>>>> thread/core/cluster/die/package/etc mappings for each
>>>>>>>>>> processing element in the system.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Jeremy Linton <jeremy.linton@arm.com>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> Why can't this be folded into patch [2/9]?
>>>>>>>>
>>>>>>>>
>>>>>>>> It can, and I will be happy squash it.
>>>>>>>>
>>>>>>>> It was requested that the topology portion of the parser be split
>>>>>>>> out back in v3.
>>>>>>>>
>>>>>>>> https://www.spinics.net/lists/linux-acpi/msg78487.html
>>>>>>>
>>>>>>>
>>>>>>> I asked to split cache/topology since I am not familiar with cache
>>>>>>> code and Sudeep - who looks after the cache code - won't be able
>>>>>>> to review this series in time for v4.16.
>>>>>>
>>>>>>
>>>>>> OK, so why do we need it in 4.16?
>>>>>
>>>>>
>>>>> I think its more case of as soon as possible. That is because there
>>>>> are machines where the topology is completely incorrect due to
>>>>> assumptions the kernel makes based on registers that aren't defined
>>>>> for that purpose (say describing which cores are in a physical
>>>>> socket, or LLC's attached to interconnects or memory controllers).
>>>>>
>>>>> This incorrect topology information is reported to things like the
>>>>> kernel scheduler, which then makes poor scheduling decisions
>>>>> resulting in sub-optimal system performance.
>>>>>
>>>>> This patchset (and ACPI 6.2) clears up a lot of those problems.
>>>>
>>>> As long as the ACPI tables are as expected that is, I suppose?
>>>>
>>>> Anyway, fair enough, but I don't want to rush it in.
>>>>
>>>> Thanks,
>>>> Rafael
>>>>
>>>> _______________________________________________
>>>> linux-arm-kernel mailing list
>>>> linux-arm-kernel at lists.infradead.org
>>>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>>>
>>
>>
>> _______________________________________________
>> linux-arm-kernel mailing list
>> linux-arm-kernel at lists.infradead.org
>> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
>
^ permalink raw reply
* [PATCH v4 6/7] ARM: davinci: convert to common clock framework
From: David Lechner @ 2018-01-04 17:50 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <e36250ba-3f76-71d9-206f-475738d19e36@ti.com>
On 1/4/18 6:39 AM, Sekhar Nori wrote:
> On Monday 01 January 2018 05:09 AM, David Lechner wrote:
>> This converts all of arch/arm/mach-davinci to the common clock framework.
>> The clock drivers from clock.c and psc.c have been moved to drivers/clk,
>> so these files are removed.
>>
>> There is one subtle change in the clock trees. AUX, BPDIV and OSCDIV
>> clocks now have "ref_clk" as a parent instead of the PLL clock. These
>> clocks are part of the PLL's MMIO block, but they bypass the PLL and
>> therefore it makes more sense to have "ref_clk" as their parent since
>> "ref_clk" is the input clock of the PLL.
>>
>> CONFIG_DAVINCI_RESET_CLOCKS is removed since the common clock frameworks
>> takes care of disabling unused clocks.
>>
>> Known issue: This breaks CPU frequency scaling on da850.
>
> This functionality needs to be restored as part of this series since we
> cannot commit anything with regressions.
>
Do you have a suggestion on how to accomplish this? I don't have a board
for testing, so I don't have a way of knowing if my changes will work or
not.
>>
>> Also, the order of #includes are cleaned up in files while we are touching
>> this code.
>>
>> Signed-off-by: David Lechner <david@lechnology.com>
>
> This is a pretty huge patch again and I hope it can be broken down.
> Ideally one per SoC converted and then the unused code removal.
>
Will do.
^ permalink raw reply
* [PATCH v4 5/7] clk: Introduce davinci clocks
From: David Lechner @ 2018-01-04 17:47 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <d0165dc8-c23c-c939-d1b7-f10be3b10553@ti.com>
On 1/4/18 6:43 AM, Sekhar Nori wrote:
> Hi David,
>
> On Monday 01 January 2018 05:09 AM, David Lechner wrote:
>> + /* TODO: old davinci clocks for da850 set MDCTL_FORCE bit for sata and
>> + * dsp here. Is this really needed?
>> + */
>
> The commit that introduced this flag suggests so.
>
> commit aad70de20fc69970a3080e7e8f02b54a4a3fe3e6
> Author: Sekhar Nori <nsekhar@ti.com>
> AuthorDate: Wed Jul 6 06:01:22 2011 +0000
> Commit: Sekhar Nori <nsekhar@ti.com>
> CommitDate: Fri Jul 8 11:10:09 2011 +0530
>
> davinci: enable forced transitions on PSC
>
> Some DaVinci modules like the SATA on DA850
> need forced module state transitions.
>
> Define a "force" flag which can be passed to
> the PSC config function to enable it to make
> forced transitions.
>
> Forced transitions shouldn't normally be attempted,
> unless the TRM explicitly specifies its usage.
>
> ChangeLog:
> v2:
> Modified to take care of the fact that
> davinci_psc_config() now takes the flags
> directly.
>
> Signed-off-by: Sekhar Nori <nsekhar@ti.com>
>
> I can check without that flag again, but I do recall it being needed.
>
OK, I will add it back. I need to add some other flags as well anyway.
^ permalink raw reply
* [PATCH v4 5/7] clk: Introduce davinci clocks
From: David Lechner @ 2018-01-04 17:46 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <521d77e1-1375-7ef0-4ef5-d8a3401d1b73@ti.com>
On 1/4/18 6:28 AM, Sekhar Nori wrote:
> On Wednesday 03 January 2018 03:01 AM, David Lechner wrote:
>> Forgot to cc linux-clk, so doing that now...
>>
>>
>> On 12/31/2017 05:39 PM, David Lechner wrote:
>>> This introduces new drivers for arch/arm/mach-davinci. The code is based
>>> on the clock drivers from there and adapted to use the common clock
>>> framework.
>>>
>>> Signed-off-by: David Lechner <david@lechnology.com>
>>> ---
>>> ? drivers/clk/Makefile????????????????????? |?? 1 +
>>> ? drivers/clk/davinci/Makefile????????????? |?? 3 +
>>> ? drivers/clk/davinci/da8xx-cfgchip-clk.c?? | 380
>>> ++++++++++++++++++++++++++++++
>>> ? drivers/clk/davinci/pll.c???????????????? | 333
>>> ++++++++++++++++++++++++++
>>> ? drivers/clk/davinci/psc.c???????????????? | 217 +++++++++++++++++
>>> ? include/linux/clk/davinci.h?????????????? |? 46 ++++
>>> ? include/linux/platform_data/davinci_clk.h |? 25 ++
>>> ? 7 files changed, 1005 insertions(+)
>
> This is a pretty huge patch and I think each of cfgchip, pll and PSC
> clocks deserve a patch of their own.
Will do.
>
> On the PLL patch, please describe how the PLL implementation on DaVinci
> is different from Keystone, so no reuse is really possible. Similarly
> for the PSC patch (no non-DT support in keystone etc).
OK.
>
>>> diff --git a/drivers/clk/davinci/psc.c b/drivers/clk/davinci/psc.c
>>> new file mode 100644
>>> index 0000000..8ae85ee
>>> --- /dev/null
>>> +++ b/drivers/clk/davinci/psc.c
>>> @@ -0,0 +1,217 @@
>
>>> +static void psc_config(struct davinci_psc_clk *psc,
>>> +?????????????? enum davinci_psc_state next_state)
>>> +{
>>> +??? u32 epcpr, ptcmd, pdstat, pdctl, mdstat, mdctl, ptstat;
>>> +
>>> +??? mdctl = readl(psc->base + MDCTL + 4 * psc->lpsc);
>>> +??? mdctl &= ~MDSTAT_STATE_MASK;
>>> +??? mdctl |= next_state;
>>> +??? /* TODO: old davinci clocks for da850 set MDCTL_FORCE bit for
>>> sata and
>>> +???? * dsp here. Is this really needed?
>>> +???? */
>>> +??? writel(mdctl, psc->base + MDCTL + 4 * psc->lpsc);
>>> +
>>> +??? pdstat = readl(psc->base + PDSTAT + 4 * psc->pd);
>>> +??? if ((pdstat & PDSTAT_STATE_MASK) == 0) {
>>> +??????? pdctl = readl(psc->base + PDSTAT + 4 * psc->pd);
>>> +??????? pdctl |= PDCTL_NEXT;
>>> +??????? writel(pdctl, psc->base + PDSTAT + 4 * psc->pd);
>>> +
>>> +??????? ptcmd = BIT(psc->pd);
>>> +??????? writel(ptcmd, psc->base + PTCMD);
>>> +
>>> +??????? do {
>>> +??????????? epcpr = __raw_readl(psc->base + EPCPR);
>>> +??????? } while (!(epcpr & BIT(psc->pd)));
>>> +
>>> +??????? pdctl = __raw_readl(psc->base + PDCTL + 4 * psc->pd);
>>> +??????? pdctl |= PDCTL_EPCGOOD;
>>> +??????? __raw_writel(pdctl, psc->base + PDCTL + 4 * psc->pd);
>
> Can we shift to regmap here too? Then the polling loops like above can
> be converted to regmap_read_poll_timeout() too like you have done elsewhere.
>
I'll give it a try.
^ permalink raw reply
* [PATCH v4 3/7] ARM: davinci: fix duplicate clocks
From: David Lechner @ 2018-01-04 17:44 UTC (permalink / raw)
To: linux-arm-kernel
In-Reply-To: <e3beaac9-c583-b181-38a8-d0aa6db44045@ti.com>
On 1/4/18 5:12 AM, Sekhar Nori wrote:
> On Monday 01 January 2018 05:09 AM, David Lechner wrote:
>> There are a number of clocks that were duplicated because they are used by
>> more than one device. It is no longer necessary to do this since we are
>> explicitly calling clk_register_clkdev() for each clock. In da830.c, some
>> clocks were using the same LPSC, which would cause problems with reference
>> counting, so these are combinded into one clock each. In da850.c the
>> duplicate clocks had already been fixed by creating dummy child clocks, so
>> these clocks are removed.
>>
>> Signed-off-by: David Lechner <david@lechnology.com>
>
> If we do end up keeping 2/7, this should be done before that - to avoid
> retouching code that was just introduced.
>
FWIW, this can't be done before because it will cause broken linked
lists in the davinci clocks. But, as I mentioned already, I am going to
try a different approach, so this patch will go away completely.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox