* [PATCH v4 18/23] powerpc/32: Remove verification of MSR_PR on syscall in the ASM entry
From: Christophe Leroy @ 2021-01-25 14:48 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
msuchanek
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1611585031.git.christophe.leroy@csgroup.eu>
system_call_exception() checks MSR_PR and BUGs if a syscall
is issued from kernel mode.
No need to handle it anymore from the ASM entry code.
null_syscall reduction 2 cycles (348 => 346 cycles)
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/entry_32.S | 30 ------------------------------
arch/powerpc/kernel/head_32.h | 3 ---
arch/powerpc/kernel/head_booke.h | 3 ---
3 files changed, 36 deletions(-)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index ce5fdb23ed7c..9922a04650f7 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -416,36 +416,6 @@ ret_from_kernel_thread:
li r3,0
b ret_from_syscall
- /*
- * System call was called from kernel. We get here with SRR1 in r9.
- * Mark the exception as recoverable once we have retrieved SRR0,
- * trap a warning and return ENOSYS with CR[SO] set.
- */
- .globl ret_from_kernel_syscall
-ret_from_kernel_syscall:
- mfspr r9, SPRN_SRR0
- mfspr r10, SPRN_SRR1
-#if !defined(CONFIG_4xx) && !defined(CONFIG_BOOKE)
- LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_IR|MSR_DR))
- mtmsr r11
-#endif
-
-0: trap
- EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
-
- li r3, ENOSYS
- crset so
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
- mtspr SPRN_NRI, r0
-#endif
- mtspr SPRN_SRR0, r9
- mtspr SPRN_SRR1, r10
- rfi
-#ifdef CONFIG_40x
- b . /* Prevent prefetch past rfi */
-#endif
-_ASM_NOKPROBE_SYMBOL(ret_from_kernel_syscall)
-
/*
* Top-level page fault handling.
* This is in assembler because if do_page_fault tells us that
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index c2aa0d8f1f63..c0de4acbe3f8 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -118,8 +118,6 @@
.macro SYSCALL_ENTRY trapno
mfspr r9, SPRN_SRR1
mfspr r10, SPRN_SRR0
- andi. r11, r9, MSR_PR
- beq- 99f
LOAD_REG_IMMEDIATE(r11, MSR_KERNEL) /* can take exceptions */
lis r12, 1f@h
ori r12, r12, 1f@l
@@ -176,7 +174,6 @@
3:
#endif
b transfer_to_syscall /* jump to handler */
-99: b ret_from_kernel_syscall
.endm
.macro save_dar_dsisr_on_stack reg1, reg2, sp
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index faff094b650e..7af84e1e717b 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -106,10 +106,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
#endif
mfspr r9, SPRN_SRR1
BOOKE_CLEAR_BTB(r11)
- andi. r11, r9, MSR_PR
lwz r11, TASK_STACK - THREAD(r10)
rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
- beq- 99f
ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
stw r12, _CCR(r11) /* save various registers */
mflr r12
@@ -157,7 +155,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
3:
b transfer_to_syscall /* jump to handler */
-99: b ret_from_kernel_syscall
.endm
/* To handle the additional exception priority levels on 40x and Book-E
--
2.25.0
^ permalink raw reply related
* [PATCH v4 19/23] powerpc/syscall: Avoid stack frame in likely part of system_call_exception()
From: Christophe Leroy @ 2021-01-25 14:48 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
msuchanek
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1611585031.git.christophe.leroy@csgroup.eu>
When r3 is not modified, reload it from regs->orig_r3 to free
volatile registers. This avoids a stack frame for the likely part
of system_call_exception()
Before the patch:
c000b4d4 <system_call_exception>:
c000b4d4: 7c 08 02 a6 mflr r0
c000b4d8: 94 21 ff e0 stwu r1,-32(r1)
c000b4dc: 93 e1 00 1c stw r31,28(r1)
c000b4e0: 90 01 00 24 stw r0,36(r1)
c000b4e4: 90 6a 00 88 stw r3,136(r10)
c000b4e8: 81 6a 00 84 lwz r11,132(r10)
c000b4ec: 69 6b 00 02 xori r11,r11,2
c000b4f0: 55 6b ff fe rlwinm r11,r11,31,31,31
c000b4f4: 0f 0b 00 00 twnei r11,0
c000b4f8: 81 6a 00 a0 lwz r11,160(r10)
c000b4fc: 55 6b 07 fe clrlwi r11,r11,31
c000b500: 0f 0b 00 00 twnei r11,0
c000b504: 7c 0c 42 e6 mftb r0
c000b508: 83 e2 00 08 lwz r31,8(r2)
c000b50c: 81 82 00 28 lwz r12,40(r2)
c000b510: 90 02 00 24 stw r0,36(r2)
c000b514: 7d 8c f8 50 subf r12,r12,r31
c000b518: 7c 0c 02 14 add r0,r12,r0
c000b51c: 90 02 00 08 stw r0,8(r2)
c000b520: 7c 10 13 a6 mtspr 80,r0
c000b524: 81 62 00 70 lwz r11,112(r2)
c000b528: 71 60 86 91 andi. r0,r11,34449
c000b52c: 40 82 00 34 bne c000b560 <system_call_exception+0x8c>
c000b530: 2b 89 01 b6 cmplwi cr7,r9,438
c000b534: 41 9d 00 64 bgt cr7,c000b598 <system_call_exception+0xc4>
c000b538: 3d 40 c0 5c lis r10,-16292
c000b53c: 55 29 10 3a rlwinm r9,r9,2,0,29
c000b540: 39 4a 41 e8 addi r10,r10,16872
c000b544: 80 01 00 24 lwz r0,36(r1)
c000b548: 7d 2a 48 2e lwzx r9,r10,r9
c000b54c: 7c 08 03 a6 mtlr r0
c000b550: 7d 29 03 a6 mtctr r9
c000b554: 83 e1 00 1c lwz r31,28(r1)
c000b558: 38 21 00 20 addi r1,r1,32
c000b55c: 4e 80 04 20 bctr
After the patch:
c000b4d4 <system_call_exception>:
c000b4d4: 81 6a 00 84 lwz r11,132(r10)
c000b4d8: 90 6a 00 88 stw r3,136(r10)
c000b4dc: 69 6b 00 02 xori r11,r11,2
c000b4e0: 55 6b ff fe rlwinm r11,r11,31,31,31
c000b4e4: 0f 0b 00 00 twnei r11,0
c000b4e8: 80 6a 00 a0 lwz r3,160(r10)
c000b4ec: 54 63 07 fe clrlwi r3,r3,31
c000b4f0: 0f 03 00 00 twnei r3,0
c000b4f4: 7d 6c 42 e6 mftb r11
c000b4f8: 81 82 00 08 lwz r12,8(r2)
c000b4fc: 80 02 00 28 lwz r0,40(r2)
c000b500: 91 62 00 24 stw r11,36(r2)
c000b504: 7c 00 60 50 subf r0,r0,r12
c000b508: 7d 60 5a 14 add r11,r0,r11
c000b50c: 91 62 00 08 stw r11,8(r2)
c000b510: 7c 10 13 a6 mtspr 80,r0
c000b514: 80 62 00 70 lwz r3,112(r2)
c000b518: 70 6b 86 91 andi. r11,r3,34449
c000b51c: 40 82 00 28 bne c000b544 <system_call_exception+0x70>
c000b520: 2b 89 01 b6 cmplwi cr7,r9,438
c000b524: 41 9d 00 84 bgt cr7,c000b5a8 <system_call_exception+0xd4>
c000b528: 80 6a 00 88 lwz r3,136(r10)
c000b52c: 3d 40 c0 5c lis r10,-16292
c000b530: 55 29 10 3a rlwinm r9,r9,2,0,29
c000b534: 39 4a 41 e4 addi r10,r10,16868
c000b538: 7d 2a 48 2e lwzx r9,r10,r9
c000b53c: 7d 29 03 a6 mtctr r9
c000b540: 4e 80 04 20 bctr
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/syscall.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index a3510fa4e641..476909b11051 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -115,6 +115,9 @@ notrace long system_call_exception(long r3, long r4, long r5,
return regs->gpr[3];
}
return -ENOSYS;
+ } else {
+ /* Restore r3 from orig_gpr3 to free up a volatile reg */
+ r3 = regs->orig_gpr3;
}
/* May be faster to do array_index_nospec? */
--
2.25.0
^ permalink raw reply related
* [PATCH v4 20/23] powerpc/syscall: Do not check unsupported scv vector on PPC32
From: Christophe Leroy @ 2021-01-25 14:48 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
msuchanek
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1611585031.git.christophe.leroy@csgroup.eu>
Only PPC64 has scv. No need to check the 0x7ff0 trap on PPC32.
And ignore the scv parameter in syscall_exit_prepare (Save 14 cycles
346 => 332 cycles)
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/entry_32.S | 1 -
arch/powerpc/kernel/syscall.c | 7 +++++--
2 files changed, 5 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 9922a04650f7..6ae9c7bcb06c 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -343,7 +343,6 @@ transfer_to_syscall:
ret_from_syscall:
addi r4,r1,STACK_FRAME_OVERHEAD
- li r5,0
bl syscall_exit_prepare
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
/* If the process has its own DBCR0 value, load it up. The internal
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 476909b11051..30f8a397a522 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -86,7 +86,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
local_irq_enable();
if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
- if (unlikely(regs->trap == 0x7ff0)) {
+ if (IS_ENABLED(CONFIG_PPC64) && unlikely(regs->trap == 0x7ff0)) {
/* Unsupported scv vector */
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
return regs->gpr[3];
@@ -109,7 +109,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
r8 = regs->gpr[8];
} else if (unlikely(r0 >= NR_syscalls)) {
- if (unlikely(regs->trap == 0x7ff0)) {
+ if (IS_ENABLED(CONFIG_PPC64) && unlikely(regs->trap == 0x7ff0)) {
/* Unsupported scv vector */
_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
return regs->gpr[3];
@@ -187,6 +187,9 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
unsigned long ti_flags;
unsigned long ret = 0;
+ if (IS_ENABLED(CONFIG_PPC32))
+ scv = 0;
+
CT_WARN_ON(ct_state() == CONTEXT_USER);
kuap_check();
--
2.25.0
^ permalink raw reply related
* [PATCH v4 21/23] powerpc/syscall: Remove FULL_REGS verification in system_call_exception
From: Christophe Leroy @ 2021-01-25 14:48 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
msuchanek
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1611585031.git.christophe.leroy@csgroup.eu>
For book3s/64, FULL_REGS() is 'true' at all time, so the test voids.
For others, non volatile registers are saved inconditionally.
So the verification is pointless.
Should one fail to do it, it would anyway be caught by the
CHECK_FULL_REGS() in copy_thread() as we have removed the
special versions ppc_fork() and friends.
null_syscall benchmark reduction 4 cycles (332 => 328 cycles)
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/syscall.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 30f8a397a522..a40775daa88b 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -42,7 +42,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
BUG_ON(!(regs->msr & MSR_RI));
BUG_ON(!(regs->msr & MSR_PR));
- BUG_ON(!FULL_REGS(regs));
BUG_ON(arch_irq_disabled_regs(regs));
#ifdef CONFIG_PPC_PKEY
--
2.25.0
^ permalink raw reply related
* [PATCH v4 22/23] powerpc/syscall: Optimise checks in beginning of system_call_exception()
From: Christophe Leroy @ 2021-01-25 14:48 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
msuchanek
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1611585031.git.christophe.leroy@csgroup.eu>
Combine all tests of regs->msr into a single logical one.
Before the patch:
0: 81 6a 00 84 lwz r11,132(r10)
4: 90 6a 00 88 stw r3,136(r10)
8: 69 60 00 02 xori r0,r11,2
c: 54 00 ff fe rlwinm r0,r0,31,31,31
10: 0f 00 00 00 twnei r0,0
14: 69 63 40 00 xori r3,r11,16384
18: 54 63 97 fe rlwinm r3,r3,18,31,31
1c: 0f 03 00 00 twnei r3,0
20: 69 6b 80 00 xori r11,r11,32768
24: 55 6b 8f fe rlwinm r11,r11,17,31,31
28: 0f 0b 00 00 twnei r11,0
After the patch:
0: 81 6a 00 84 lwz r11,132(r10)
4: 90 6a 00 88 stw r3,136(r10)
8: 7d 6b 58 f8 not r11,r11
c: 71 6b c0 02 andi. r11,r11,49154
10: 0f 0b 00 00 twnei r11,0
6 cycles less on powerpc 8xx (328 => 322 cycles).
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/syscall.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index a40775daa88b..47ae55f94d1c 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -28,6 +28,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
unsigned long r0, struct pt_regs *regs)
{
syscall_fn f;
+ unsigned long expected_msr;
regs->orig_gpr3 = r3;
@@ -39,10 +40,13 @@ notrace long system_call_exception(long r3, long r4, long r5,
trace_hardirqs_off(); /* finish reconciling */
+ expected_msr = MSR_PR;
if (!IS_ENABLED(CONFIG_BOOKE) && !IS_ENABLED(CONFIG_40x))
- BUG_ON(!(regs->msr & MSR_RI));
- BUG_ON(!(regs->msr & MSR_PR));
- BUG_ON(arch_irq_disabled_regs(regs));
+ expected_msr |= MSR_RI;
+ if (IS_ENABLED(CONFIG_PPC32))
+ expected_msr |= MSR_EE;
+ BUG_ON((regs->msr & expected_msr) ^ expected_msr);
+ BUG_ON(IS_ENABLED(CONFIG_PPC64) && arch_irq_disabled_regs(regs));
#ifdef CONFIG_PPC_PKEY
if (mmu_has_feature(MMU_FTR_PKEY)) {
--
2.25.0
^ permalink raw reply related
* [PATCH v4 23/23] powerpc/syscall: Avoid storing 'current' in another pointer
From: Christophe Leroy @ 2021-01-25 14:48 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras, Michael Ellerman, npiggin,
msuchanek
Cc: linuxppc-dev, linux-kernel
In-Reply-To: <cover.1611585031.git.christophe.leroy@csgroup.eu>
By saving the pointer pointing to thread_info.flags, gcc copies r2
in a non-volatile register.
We know 'current' doesn't change, so avoid that intermediaite pointer.
Reduces null_syscall benchmark by 2 cycles (322 => 320 cycles)
On PPC64, gcc seems to know that 'current' is not changing, and it keeps
it in a non volatile register to avoid multiple read of 'current' in paca.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
---
arch/powerpc/kernel/syscall.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 47ae55f94d1c..72e0b18b88d8 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -186,7 +186,6 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
struct pt_regs *regs,
long scv)
{
- unsigned long *ti_flagsp = ¤t_thread_info()->flags;
unsigned long ti_flags;
unsigned long ret = 0;
@@ -202,7 +201,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
/* Check whether the syscall is issued inside a restartable sequence */
rseq_syscall(regs);
- ti_flags = *ti_flagsp;
+ ti_flags = current_thread_info()->flags;
if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) {
if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
@@ -216,7 +215,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
ret = _TIF_RESTOREALL;
else
regs->gpr[3] = r3;
- clear_bits(_TIF_PERSYSCALL_MASK, ti_flagsp);
+ clear_bits(_TIF_PERSYSCALL_MASK, ¤t_thread_info()->flags);
} else {
regs->gpr[3] = r3;
}
@@ -228,7 +227,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
again:
local_irq_disable();
- ti_flags = READ_ONCE(*ti_flagsp);
+ ti_flags = READ_ONCE(current_thread_info()->flags);
while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) {
local_irq_enable();
if (ti_flags & _TIF_NEED_RESCHED) {
@@ -244,7 +243,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3,
do_notify_resume(regs, ti_flags);
}
local_irq_disable();
- ti_flags = READ_ONCE(*ti_flagsp);
+ ti_flags = READ_ONCE(current_thread_info()->flags);
}
if (IS_ENABLED(CONFIG_PPC_BOOK3S) && IS_ENABLED(CONFIG_PPC_FPU)) {
--
2.25.0
^ permalink raw reply related
* Re: [PATCH] PCI: dwc: layerscape: convert to builtin_platform_driver()
From: Lorenzo Pieralisi @ 2021-01-25 16:50 UTC (permalink / raw)
To: Michael Walle
Cc: Rob Herring, Saravana Kannan, Roy Zang, PCI, linux-kernel,
Minghuan Lian, Mingkai Hu, Greg Kroah-Hartman, Bjorn Helgaas,
linuxppc-dev, linux-arm-kernel
In-Reply-To: <c3e35b90e173b15870a859fd7001a712@walle.cc>
On Wed, Jan 20, 2021 at 08:28:36PM +0100, Michael Walle wrote:
> [RESEND, fat-fingered the buttons of my mail client and converted
> all CCs to BCCs :(]
>
> Am 2021-01-20 20:02, schrieb Saravana Kannan:
> > On Wed, Jan 20, 2021 at 6:24 AM Rob Herring <robh@kernel.org> wrote:
> > >
> > > On Wed, Jan 20, 2021 at 4:53 AM Michael Walle <michael@walle.cc>
> > > wrote:
> > > >
> > > > fw_devlink will defer the probe until all suppliers are ready. We can't
> > > > use builtin_platform_driver_probe() because it doesn't retry after probe
> > > > deferral. Convert it to builtin_platform_driver().
> > >
> > > If builtin_platform_driver_probe() doesn't work with fw_devlink, then
> > > shouldn't it be fixed or removed?
> >
> > I was actually thinking about this too. The problem with fixing
> > builtin_platform_driver_probe() to behave like
> > builtin_platform_driver() is that these probe functions could be
> > marked with __init. But there are also only 20 instances of
> > builtin_platform_driver_probe() in the kernel:
> > $ git grep ^builtin_platform_driver_probe | wc -l
> > 20
> >
> > So it might be easier to just fix them to not use
> > builtin_platform_driver_probe().
> >
> > Michael,
> >
> > Any chance you'd be willing to help me by converting all these to
> > builtin_platform_driver() and delete builtin_platform_driver_probe()?
>
> If it just moving the probe function to the _driver struct and
> remove the __init annotations. I could look into that.
Can I drop this patch then ?
Thanks,
Lorenzo
^ permalink raw reply
* Re: [PATCH] PCI: dwc: layerscape: convert to builtin_platform_driver()
From: Saravana Kannan @ 2021-01-25 18:58 UTC (permalink / raw)
To: Lorenzo Pieralisi
Cc: Rob Herring, Roy Zang, PCI, LKML, Minghuan Lian, Michael Walle,
Mingkai Hu, Greg Kroah-Hartman, Bjorn Helgaas, linuxppc-dev,
linux-arm-kernel
In-Reply-To: <20210125165041.GA5979@e121166-lin.cambridge.arm.com>
On Mon, Jan 25, 2021 at 8:50 AM Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
>
> On Wed, Jan 20, 2021 at 08:28:36PM +0100, Michael Walle wrote:
> > [RESEND, fat-fingered the buttons of my mail client and converted
> > all CCs to BCCs :(]
> >
> > Am 2021-01-20 20:02, schrieb Saravana Kannan:
> > > On Wed, Jan 20, 2021 at 6:24 AM Rob Herring <robh@kernel.org> wrote:
> > > >
> > > > On Wed, Jan 20, 2021 at 4:53 AM Michael Walle <michael@walle.cc>
> > > > wrote:
> > > > >
> > > > > fw_devlink will defer the probe until all suppliers are ready. We can't
> > > > > use builtin_platform_driver_probe() because it doesn't retry after probe
> > > > > deferral. Convert it to builtin_platform_driver().
> > > >
> > > > If builtin_platform_driver_probe() doesn't work with fw_devlink, then
> > > > shouldn't it be fixed or removed?
> > >
> > > I was actually thinking about this too. The problem with fixing
> > > builtin_platform_driver_probe() to behave like
> > > builtin_platform_driver() is that these probe functions could be
> > > marked with __init. But there are also only 20 instances of
> > > builtin_platform_driver_probe() in the kernel:
> > > $ git grep ^builtin_platform_driver_probe | wc -l
> > > 20
> > >
> > > So it might be easier to just fix them to not use
> > > builtin_platform_driver_probe().
> > >
> > > Michael,
> > >
> > > Any chance you'd be willing to help me by converting all these to
> > > builtin_platform_driver() and delete builtin_platform_driver_probe()?
> >
> > If it just moving the probe function to the _driver struct and
> > remove the __init annotations. I could look into that.
>
> Can I drop this patch then ?
No, please pick it up. Michael and I were talking about doing similar
changes for other drivers.
-Saravana
^ permalink raw reply
* Re: [PATCH] PCI: dwc: layerscape: convert to builtin_platform_driver()
From: Michael Walle @ 2021-01-25 19:44 UTC (permalink / raw)
To: Saravana Kannan
Cc: Rob Herring, Lorenzo Pieralisi, Roy Zang, PCI, LKML,
Minghuan Lian, Mingkai Hu, Greg Kroah-Hartman, Bjorn Helgaas,
linuxppc-dev, linux-arm-kernel
In-Reply-To: <CAGETcx-P-MxM+49XdUGBmg5YgnHS=fmz8uewywXvLSFKj=MqRQ@mail.gmail.com>
Am 2021-01-25 19:58, schrieb Saravana Kannan:
> On Mon, Jan 25, 2021 at 8:50 AM Lorenzo Pieralisi
> <lorenzo.pieralisi@arm.com> wrote:
>>
>> On Wed, Jan 20, 2021 at 08:28:36PM +0100, Michael Walle wrote:
>> > [RESEND, fat-fingered the buttons of my mail client and converted
>> > all CCs to BCCs :(]
>> >
>> > Am 2021-01-20 20:02, schrieb Saravana Kannan:
>> > > On Wed, Jan 20, 2021 at 6:24 AM Rob Herring <robh@kernel.org> wrote:
>> > > >
>> > > > On Wed, Jan 20, 2021 at 4:53 AM Michael Walle <michael@walle.cc>
>> > > > wrote:
>> > > > >
>> > > > > fw_devlink will defer the probe until all suppliers are ready. We can't
>> > > > > use builtin_platform_driver_probe() because it doesn't retry after probe
>> > > > > deferral. Convert it to builtin_platform_driver().
>> > > >
>> > > > If builtin_platform_driver_probe() doesn't work with fw_devlink, then
>> > > > shouldn't it be fixed or removed?
>> > >
>> > > I was actually thinking about this too. The problem with fixing
>> > > builtin_platform_driver_probe() to behave like
>> > > builtin_platform_driver() is that these probe functions could be
>> > > marked with __init. But there are also only 20 instances of
>> > > builtin_platform_driver_probe() in the kernel:
>> > > $ git grep ^builtin_platform_driver_probe | wc -l
>> > > 20
>> > >
>> > > So it might be easier to just fix them to not use
>> > > builtin_platform_driver_probe().
>> > >
>> > > Michael,
>> > >
>> > > Any chance you'd be willing to help me by converting all these to
>> > > builtin_platform_driver() and delete builtin_platform_driver_probe()?
>> >
>> > If it just moving the probe function to the _driver struct and
>> > remove the __init annotations. I could look into that.
>>
>> Can I drop this patch then ?
>
> No, please pick it up. Michael and I were talking about doing similar
> changes for other drivers.
Yes please, I was just about to answer, but Saravana beat me.
-michael
^ permalink raw reply
* Re: [PATCH] PCI: dwc: layerscape: convert to builtin_platform_driver()
From: Michael Walle @ 2021-01-25 19:49 UTC (permalink / raw)
To: Geert Uytterhoeven
Cc: Roy Zang, Lorenzo Pieralisi, Saravana Kannan, PCI, LKML,
Minghuan Lian, linux-arm-kernel, Greg Kroah-Hartman,
Bjorn Helgaas, linuxppc-dev, Mingkai Hu
In-Reply-To: <CAMuHMdWvFej-6vkaLM44t7eX2LpkDSXu4_7VH-X-3XRueXTO=A@mail.gmail.com>
Am 2021-01-21 12:01, schrieb Geert Uytterhoeven:
> Hi Saravana,
>
> On Thu, Jan 21, 2021 at 1:05 AM Saravana Kannan <saravanak@google.com>
> wrote:
>> On Wed, Jan 20, 2021 at 3:53 PM Michael Walle <michael@walle.cc>
>> wrote:
>> > Am 2021-01-20 20:47, schrieb Saravana Kannan:
>> > > On Wed, Jan 20, 2021 at 11:28 AM Michael Walle <michael@walle.cc>
>> > > wrote:
>> > >>
>> > >> [RESEND, fat-fingered the buttons of my mail client and converted
>> > >> all CCs to BCCs :(]
>> > >>
>> > >> Am 2021-01-20 20:02, schrieb Saravana Kannan:
>> > >> > On Wed, Jan 20, 2021 at 6:24 AM Rob Herring <robh@kernel.org> wrote:
>> > >> >>
>> > >> >> On Wed, Jan 20, 2021 at 4:53 AM Michael Walle <michael@walle.cc>
>> > >> >> wrote:
>> > >> >> >
>> > >> >> > fw_devlink will defer the probe until all suppliers are ready. We can't
>> > >> >> > use builtin_platform_driver_probe() because it doesn't retry after probe
>> > >> >> > deferral. Convert it to builtin_platform_driver().
>> > >> >>
>> > >> >> If builtin_platform_driver_probe() doesn't work with fw_devlink, then
>> > >> >> shouldn't it be fixed or removed?
>> > >> >
>> > >> > I was actually thinking about this too. The problem with fixing
>> > >> > builtin_platform_driver_probe() to behave like
>> > >> > builtin_platform_driver() is that these probe functions could be
>> > >> > marked with __init. But there are also only 20 instances of
>> > >> > builtin_platform_driver_probe() in the kernel:
>> > >> > $ git grep ^builtin_platform_driver_probe | wc -l
>> > >> > 20
>> > >> >
>> > >> > So it might be easier to just fix them to not use
>> > >> > builtin_platform_driver_probe().
>> > >> >
>> > >> > Michael,
>> > >> >
>> > >> > Any chance you'd be willing to help me by converting all these to
>> > >> > builtin_platform_driver() and delete builtin_platform_driver_probe()?
>> > >>
>> > >> If it just moving the probe function to the _driver struct and
>> > >> remove the __init annotations. I could look into that.
>> > >
>> > > Yup. That's pretty much it AFAICT.
>> > >
>> > > builtin_platform_driver_probe() also makes sure the driver doesn't ask
>> > > for async probe, etc. But I doubt anyone is actually setting async
>> > > flags and still using builtin_platform_driver_probe().
>> >
>> > Hasn't module_platform_driver_probe() the same problem? And there
>> > are ~80 drivers which uses that.
>>
>> Yeah. The biggest problem with all of these is the __init markers.
>> Maybe some familiar with coccinelle can help?
>
> And dropping them will increase memory usage.
Although I do have the changes for the builtin_platform_driver_probe()
ready, I don't think it makes much sense to send these unless we agree
on the increased memory footprint. While there are just a few
builtin_platform_driver_probe() and memory increase _might_ be
negligible, there are many more module_platform_driver_probe().
-michael
^ permalink raw reply
* Re: [PATCH] PCI: dwc: layerscape: convert to builtin_platform_driver()
From: Saravana Kannan @ 2021-01-25 22:41 UTC (permalink / raw)
To: Michael Walle
Cc: Roy Zang, Lorenzo Pieralisi, PCI, LKML, Minghuan Lian,
Geert Uytterhoeven, linux-arm-kernel, Greg Kroah-Hartman,
Bjorn Helgaas, linuxppc-dev, Mingkai Hu
In-Reply-To: <a24391e62b107040435766fff52bdd31@walle.cc>
On Mon, Jan 25, 2021 at 11:49 AM Michael Walle <michael@walle.cc> wrote:
>
> Am 2021-01-21 12:01, schrieb Geert Uytterhoeven:
> > Hi Saravana,
> >
> > On Thu, Jan 21, 2021 at 1:05 AM Saravana Kannan <saravanak@google.com>
> > wrote:
> >> On Wed, Jan 20, 2021 at 3:53 PM Michael Walle <michael@walle.cc>
> >> wrote:
> >> > Am 2021-01-20 20:47, schrieb Saravana Kannan:
> >> > > On Wed, Jan 20, 2021 at 11:28 AM Michael Walle <michael@walle.cc>
> >> > > wrote:
> >> > >>
> >> > >> [RESEND, fat-fingered the buttons of my mail client and converted
> >> > >> all CCs to BCCs :(]
> >> > >>
> >> > >> Am 2021-01-20 20:02, schrieb Saravana Kannan:
> >> > >> > On Wed, Jan 20, 2021 at 6:24 AM Rob Herring <robh@kernel.org> wrote:
> >> > >> >>
> >> > >> >> On Wed, Jan 20, 2021 at 4:53 AM Michael Walle <michael@walle.cc>
> >> > >> >> wrote:
> >> > >> >> >
> >> > >> >> > fw_devlink will defer the probe until all suppliers are ready. We can't
> >> > >> >> > use builtin_platform_driver_probe() because it doesn't retry after probe
> >> > >> >> > deferral. Convert it to builtin_platform_driver().
> >> > >> >>
> >> > >> >> If builtin_platform_driver_probe() doesn't work with fw_devlink, then
> >> > >> >> shouldn't it be fixed or removed?
> >> > >> >
> >> > >> > I was actually thinking about this too. The problem with fixing
> >> > >> > builtin_platform_driver_probe() to behave like
> >> > >> > builtin_platform_driver() is that these probe functions could be
> >> > >> > marked with __init. But there are also only 20 instances of
> >> > >> > builtin_platform_driver_probe() in the kernel:
> >> > >> > $ git grep ^builtin_platform_driver_probe | wc -l
> >> > >> > 20
> >> > >> >
> >> > >> > So it might be easier to just fix them to not use
> >> > >> > builtin_platform_driver_probe().
> >> > >> >
> >> > >> > Michael,
> >> > >> >
> >> > >> > Any chance you'd be willing to help me by converting all these to
> >> > >> > builtin_platform_driver() and delete builtin_platform_driver_probe()?
> >> > >>
> >> > >> If it just moving the probe function to the _driver struct and
> >> > >> remove the __init annotations. I could look into that.
> >> > >
> >> > > Yup. That's pretty much it AFAICT.
> >> > >
> >> > > builtin_platform_driver_probe() also makes sure the driver doesn't ask
> >> > > for async probe, etc. But I doubt anyone is actually setting async
> >> > > flags and still using builtin_platform_driver_probe().
> >> >
> >> > Hasn't module_platform_driver_probe() the same problem? And there
> >> > are ~80 drivers which uses that.
> >>
> >> Yeah. The biggest problem with all of these is the __init markers.
> >> Maybe some familiar with coccinelle can help?
> >
> > And dropping them will increase memory usage.
>
> Although I do have the changes for the builtin_platform_driver_probe()
> ready, I don't think it makes much sense to send these unless we agree
> on the increased memory footprint. While there are just a few
> builtin_platform_driver_probe() and memory increase _might_ be
> negligible, there are many more module_platform_driver_probe().
While it's good to drop code that'll not be used past kernel init, the
module_platform_driver_probe() is going even more extreme. It doesn't
even allow deferred probe (well before kernel init is done). I don't
think that behavior is right and that's why we should delete it. Also,
I doubt if any of these probe functions even take up 4KB of memory.
-Saravana
^ permalink raw reply
* Re: [PATCH v2 2/2] memblock: do not start bottom-up allocations with kernel_end
From: Thiago Jung Bauermann @ 2021-01-26 0:30 UTC (permalink / raw)
To: Mike Rapoport
Cc: riel, kernel-team, Ram Pai, linux-kernel, guro, linux-mm,
Satheesh Rajendran, Konrad Rzeszutek Wilk, iamjoonsoo.kim, mhocko,
linuxppc-dev, Andrew Morton
In-Reply-To: <20210124073421.GG6332@kernel.org>
Mike Rapoport <rppt@kernel.org> writes:
> On Sat, Jan 23, 2021 at 06:09:11PM -0800, Andrew Morton wrote:
>> On Fri, 22 Jan 2021 01:37:14 -0300 Thiago Jung Bauermann <bauerman@linux.ibm.com> wrote:
>>
>> > Mike Rapoport <rppt@kernel.org> writes:
>> >
>> > > > Signed-off-by: Roman Gushchin <guro@fb.com>
>> > >
>> > > Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
>> >
>> > I've seen a couple of spurious triggers of the WARN_ONCE() removed by this
>> > patch. This happens on some ppc64le bare metal (powernv) server machines with
>> > CONFIG_SWIOTLB=y and crashkernel=4G, as described in a candidate patch I posted
>> > to solve this issue in a different way:
>> >
>> > https://lore.kernel.org/linuxppc-dev/20201218062103.76102-1-bauerman@linux.ibm.com/
>> >
>> > Since this patch solves that problem, is it possible to include it in the next
>> > feasible v5.11-rcX, with the following tag?
>>
>> We could do this,
Thanks!
>> if we're confident that this patch doesn't depend on
>> [1/2] "mm: cma: allocate cma areas bottom-up"? I think it is...
>
> A think it does not depend on cma bottom-up allocation, it's rather the other
> way around: without this CMA bottom-up allocation could fail with KASLR
> enabled.
I agree. Conceptually, this could have been patch 1 in this series.
> Still, this patch may need updates to the way x86 does early reservations:
>
> https://lore.kernel.org/lkml/20210115083255.12744-1-rppt@kernel.org
Ah, I wasn't aware of this. Thanks for fixing those issues. That series
seems to be well accepted.
>> > Fixes: 8fabc623238e ("powerpc: Ensure that swiotlb buffer is allocated from low memory")
>>
>> I added that.
Thanks!
--
Thiago Jung Bauermann
IBM Linux Technology Center
^ permalink raw reply
* Re: [PATCH] powerpc/mm: Limit allocation of SWIOTLB on server machines
From: Thiago Jung Bauermann @ 2021-01-26 1:02 UTC (permalink / raw)
To: Konrad Rzeszutek Wilk
Cc: linuxppc-dev, Satheesh Rajendran, Ram Pai, linux-kernel
In-Reply-To: <20210121155448.GA97019@fedora>
Konrad Rzeszutek Wilk <konrad@darnok.org> writes:
> On Fri, Jan 08, 2021 at 09:27:01PM -0300, Thiago Jung Bauermann wrote:
>>
>> Ram Pai <linuxram@us.ibm.com> writes:
>>
>> > On Wed, Dec 23, 2020 at 09:06:01PM -0300, Thiago Jung Bauermann wrote:
>> >>
>> >> Hi Ram,
>> >>
>> >> Thanks for reviewing this patch.
>> >>
>> >> Ram Pai <linuxram@us.ibm.com> writes:
>> >>
>> >> > On Fri, Dec 18, 2020 at 03:21:03AM -0300, Thiago Jung Bauermann wrote:
>> >> >> On server-class POWER machines, we don't need the SWIOTLB unless we're a
>> >> >> secure VM. Nevertheless, if CONFIG_SWIOTLB is enabled we unconditionally
>> >> >> allocate it.
>> >> >>
>> >> >> In most cases this is harmless, but on a few machine configurations (e.g.,
>> >> >> POWER9 powernv systems with 4 GB area reserved for crashdump kernel) it can
>> >> >> happen that memblock can't find a 64 MB chunk of memory for the SWIOTLB and
>> >> >> fails with a scary-looking WARN_ONCE:
>> >> >>
>> >> >> ------------[ cut here ]------------
>> >> >> memblock: bottom-up allocation failed, memory hotremove may be affected
>> >> >> WARNING: CPU: 0 PID: 0 at mm/memblock.c:332 memblock_find_in_range_node+0x328/0x340
>> >> >> Modules linked in:
>> >> >> CPU: 0 PID: 0 Comm: swapper Not tainted 5.10.0-rc2-orig+ #6
>> >> >> NIP: c000000000442f38 LR: c000000000442f34 CTR: c0000000001e0080
>> >> >> REGS: c000000001def900 TRAP: 0700 Not tainted (5.10.0-rc2-orig+)
>> >> >> MSR: 9000000002021033 <SF,HV,VEC,ME,IR,DR,RI,LE> CR: 28022222 XER: 20040000
>> >> >> CFAR: c00000000014b7b4 IRQMASK: 1
>> >> >> GPR00: c000000000442f34 c000000001defba0 c000000001deff00 0000000000000047
>> >> >> GPR04: 00000000ffff7fff c000000001def828 c000000001def820 0000000000000000
>> >> >> GPR08: 0000001ffc3e0000 c000000001b75478 c000000001b75478 0000000000000001
>> >> >> GPR12: 0000000000002000 c000000002030000 0000000000000000 0000000000000000
>> >> >> GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000002030000
>> >> >> GPR20: 0000000000000000 0000000000010000 0000000000010000 c000000001defc10
>> >> >> GPR24: c000000001defc08 c000000001c91868 c000000001defc18 c000000001c91890
>> >> >> GPR28: 0000000000000000 ffffffffffffffff 0000000004000000 00000000ffffffff
>> >> >> NIP [c000000000442f38] memblock_find_in_range_node+0x328/0x340
>> >> >> LR [c000000000442f34] memblock_find_in_range_node+0x324/0x340
>> >> >> Call Trace:
>> >> >> [c000000001defba0] [c000000000442f34] memblock_find_in_range_node+0x324/0x340 (unreliable)
>> >> >> [c000000001defc90] [c0000000015ac088] memblock_alloc_range_nid+0xec/0x1b0
>> >> >> [c000000001defd40] [c0000000015ac1f8] memblock_alloc_internal+0xac/0x110
>> >> >> [c000000001defda0] [c0000000015ac4d0] memblock_alloc_try_nid+0x94/0xcc
>> >> >> [c000000001defe30] [c00000000159c3c8] swiotlb_init+0x78/0x104
>> >> >> [c000000001defea0] [c00000000158378c] mem_init+0x4c/0x98
>> >> >> [c000000001defec0] [c00000000157457c] start_kernel+0x714/0xac8
>> >> >> [c000000001deff90] [c00000000000d244] start_here_common+0x1c/0x58
>> >> >> Instruction dump:
>> >> >> 2c230000 4182ffd4 ea610088 ea810090 4bfffe84 39200001 3d42fff4 3c62ff60
>> >> >> 3863c560 992a8bfc 4bd0881d 60000000 <0fe00000> ea610088 4bfffd94 60000000
>> >> >> random: get_random_bytes called from __warn+0x128/0x184 with crng_init=0
>> >> >> ---[ end trace 0000000000000000 ]---
>> >> >> software IO TLB: Cannot allocate buffer
>> >> >>
>> >> >> Unless this is a secure VM the message can actually be ignored, because the
>> >> >> SWIOTLB isn't needed. Therefore, let's avoid the SWIOTLB in those cases.
>> >> >
>> >> > The above warn_on is conveying a genuine warning. Should it be silenced?
>> >>
>> >> Not sure I understand your point. This patch doesn't silence the
>> >> warning, it avoids the problem it is warning about.
>> >
>> > Sorry, I should have explained it better. My point is...
>> >
>> > If CONFIG_SWIOTLB is enabled, it means that the kernel is
>> > promising the bounce buffering capability. I know, currently we
>> > do not have any kernel subsystems that use bounce buffers on
>> > non-secure-pseries-kernel or powernv-kernel. But that does not
>> > mean, there wont be any. In case there is such a third-party
>> > module needing bounce buffering, it wont be able to operate,
>> > because of the proposed change in your patch.
>> >
>> > Is that a good thing or a bad thing, I do not know. I will let
>> > the experts opine.
>>
>> Ping? Does anyone else has an opinion on this? The other option I can
>> think of is changing the crashkernel code to not reserve so much memory
>> below 4 GB. Other people are considering this option, but it's not
>> planned for the near future.
>
> That seems a more suitable solution regardless, but there is always
> the danger of not being enough or being too big.
>
> There was some autocrashkernel allocation patches going around
> for x86 and ARM that perhaps could be re-used?
Do you mean this?
https://lore.kernel.org/lkml/20201118232431.21832-1-saeed.mirzamohammadi@oracle.com/
I agree it would be great to have crashkernel=auto upstream.
>> Also, there's a patch currently in linux-next which removes the scary
>> warning because of unrelated reasons:
>>
>> https://lore.kernel.org/lkml/20201217201214.3414100-2-guro@fb.com
>>
>> So assuming that the patch above goes in and keeping the assumption that
>> the swiotlb won't be needed in the powernv machines where I've seen the
>> warning happen, we can just leave things as they are now.
>
> If that solves the problem, then that is OK.
Awesome! Today Mike Rapoport mentioned that the patch above uncovers a
few issues with late memory reservations on on x86 that he is fixing:
https://lore.kernel.org/lkml/20210115083255.12744-1-rppt@kernel.org/
So that would be the last piece of the puzzle.
--
Thiago Jung Bauermann
IBM Linux Technology Center
^ permalink raw reply
* [PATCH v2] tpm: ibmvtpm: fix error return code in tpm_ibmvtpm_probe()
From: Stefan Berger @ 2021-01-26 1:47 UTC (permalink / raw)
To: jarkko, linux-integrity
Cc: Wang Hai, linux-kernel, Hulk Robot, paulus, linuxppc-dev,
Stefan Berger
From: Stefan Berger <stefanb@linux.ibm.com>
Return error code -ETIMEDOUT rather than '0' when waiting for the
rtce_buf to be set has timed out.
Fixes: d8d74ea3c002 ("tpm: ibmvtpm: Wait for buffer to be set before proceeding")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
---
drivers/char/tpm/tpm_ibmvtpm.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 994385bf37c0..813eb2cac0ce 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -687,6 +687,7 @@ static int tpm_ibmvtpm_probe(struct vio_dev *vio_dev,
ibmvtpm->rtce_buf != NULL,
HZ)) {
dev_err(dev, "CRQ response timed out\n");
+ rc = -ETIMEDOUT;
goto init_irq_cleanup;
}
--
2.25.4
^ permalink raw reply related
* [PATCH v11 00/13] huge vmalloc mappings
From: Nicholas Piggin @ 2021-01-26 4:44 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Ding Tianhong, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Jonathan Cameron, Rick Edgecombe, linuxppc-dev
I think I ended up implementing all Christoph's comments because
they turned out better in the end. Cleanups coming in another
series though.
Thanks,
Nick
Since v10:
- Fixed code style, most > 80 colums, tweak patch titles, etc [thanks Christoph]
- Made huge vmalloc code and data structure compile away if unselected
[Christoph]
- Archs only have to provide arch_vmap_p?d_supported for levels they
implement [Christoph]
Since v9:
- Fixed intermediate build breakage on x86-32 !PAE [thanks Ding]
- Fixed small page fallback case vm_struct double-free [thanks Ding]
Since v8:
- Fixed nommu compile.
- Added Kconfig option help text
- Added VM_NOHUGE which should help archs implement it [suggested by Rick]
Since v7:
- Rebase, added some acks, compile fix
- Removed "order=" from vmallocinfo, it's a bit confusing (nr_pages
is in small page size for compatibility).
- Added arch_vmap_pmd_supported() test before starting to allocate
the large page, rather than only testing it when doing the map, to
avoid unsupported configs trying to allocate huge pages for no
reason.
Since v6:
- Fixed a false positive warning introduced in patch 2, found by
kbuild test robot.
Since v5:
- Split arch changes out better and make the constant folding work
- Avoid most of the 80 column wrap, fix a reference to lib/ioremap.c
- Fix compile error on some archs
Since v4:
- Fixed an off-by-page-order bug in v4
- Several minor cleanups.
- Added page order to /proc/vmallocinfo
- Added hugepage to alloc_large_system_hage output.
- Made an architecture config option, powerpc only for now.
Since v3:
- Fixed an off-by-one bug in a loop
- Fix !CONFIG_HAVE_ARCH_HUGE_VMAP build fail
Nicholas Piggin (13):
mm/vmalloc: fix HUGE_VMAP regression by enabling huge pages in
vmalloc_to_page
mm: apply_to_pte_range warn and fail if a large pte is encountered
mm/vmalloc: rename vmap_*_range vmap_pages_*_range
mm/ioremap: rename ioremap_*_range to vmap_*_range
mm: HUGE_VMAP arch support cleanup
powerpc: inline huge vmap supported functions
arm64: inline huge vmap supported functions
x86: inline huge vmap supported functions
mm/vmalloc: provide fallback arch huge vmap support functions
mm: Move vmap_range from mm/ioremap.c to mm/vmalloc.c
mm/vmalloc: add vmap_range_noflush variant
mm/vmalloc: Hugepage vmalloc mappings
powerpc/64s/radix: Enable huge vmalloc mappings
.../admin-guide/kernel-parameters.txt | 2 +
arch/Kconfig | 11 +
arch/arm64/include/asm/vmalloc.h | 24 +
arch/arm64/mm/mmu.c | 26 -
arch/powerpc/Kconfig | 1 +
arch/powerpc/include/asm/vmalloc.h | 20 +
arch/powerpc/kernel/module.c | 21 +-
arch/powerpc/mm/book3s64/radix_pgtable.c | 21 -
arch/x86/include/asm/vmalloc.h | 20 +
arch/x86/mm/ioremap.c | 19 -
arch/x86/mm/pgtable.c | 13 -
include/linux/io.h | 9 -
include/linux/vmalloc.h | 46 ++
init/main.c | 1 -
mm/ioremap.c | 225 +-------
mm/memory.c | 66 ++-
mm/page_alloc.c | 5 +-
mm/vmalloc.c | 484 +++++++++++++++---
18 files changed, 614 insertions(+), 400 deletions(-)
--
2.23.0
^ permalink raw reply
* [PATCH v11 01/13] mm/vmalloc: fix HUGE_VMAP regression by enabling huge pages in vmalloc_to_page
From: Nicholas Piggin @ 2021-01-26 4:44 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Ding Tianhong, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Jonathan Cameron, Rick Edgecombe, linuxppc-dev,
Christoph Hellwig
In-Reply-To: <20210126044510.2491820-1-npiggin@gmail.com>
vmalloc_to_page returns NULL for addresses mapped by larger pages[*].
Whether or not a vmap is huge depends on the architecture details,
alignments, boot options, etc., which the caller can not be expected
to know. Therefore HUGE_VMAP is a regression for vmalloc_to_page.
This change teaches vmalloc_to_page about larger pages, and returns
the struct page that corresponds to the offset within the large page.
This makes the API agnostic to mapping implementation details.
[*] As explained by commit 029c54b095995 ("mm/vmalloc.c: huge-vmap:
fail gracefully on unexpected huge vmap mappings")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
mm/vmalloc.c | 41 ++++++++++++++++++++++++++---------------
1 file changed, 26 insertions(+), 15 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index e6f352bf0498..62372f9e0167 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -34,7 +34,7 @@
#include <linux/bitops.h>
#include <linux/rbtree_augmented.h>
#include <linux/overflow.h>
-
+#include <linux/pgtable.h>
#include <linux/uaccess.h>
#include <asm/tlbflush.h>
#include <asm/shmparam.h>
@@ -343,7 +343,9 @@ int is_vmalloc_or_module_addr(const void *x)
}
/*
- * Walk a vmap address to the struct page it maps.
+ * Walk a vmap address to the struct page it maps. Huge vmap mappings will
+ * return the tail page that corresponds to the base page address, which
+ * matches small vmap mappings.
*/
struct page *vmalloc_to_page(const void *vmalloc_addr)
{
@@ -363,25 +365,33 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
if (pgd_none(*pgd))
return NULL;
+ if (WARN_ON_ONCE(pgd_leaf(*pgd)))
+ return NULL; /* XXX: no allowance for huge pgd */
+ if (WARN_ON_ONCE(pgd_bad(*pgd)))
+ return NULL;
+
p4d = p4d_offset(pgd, addr);
if (p4d_none(*p4d))
return NULL;
- pud = pud_offset(p4d, addr);
+ if (p4d_leaf(*p4d))
+ return p4d_page(*p4d) + ((addr & ~P4D_MASK) >> PAGE_SHIFT);
+ if (WARN_ON_ONCE(p4d_bad(*p4d)))
+ return NULL;
- /*
- * Don't dereference bad PUD or PMD (below) entries. This will also
- * identify huge mappings, which we may encounter on architectures
- * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be
- * identified as vmalloc addresses by is_vmalloc_addr(), but are
- * not [unambiguously] associated with a struct page, so there is
- * no correct value to return for them.
- */
- WARN_ON_ONCE(pud_bad(*pud));
- if (pud_none(*pud) || pud_bad(*pud))
+ pud = pud_offset(p4d, addr);
+ if (pud_none(*pud))
+ return NULL;
+ if (pud_leaf(*pud))
+ return pud_page(*pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
+ if (WARN_ON_ONCE(pud_bad(*pud)))
return NULL;
+
pmd = pmd_offset(pud, addr);
- WARN_ON_ONCE(pmd_bad(*pmd));
- if (pmd_none(*pmd) || pmd_bad(*pmd))
+ if (pmd_none(*pmd))
+ return NULL;
+ if (pmd_leaf(*pmd))
+ return pmd_page(*pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
+ if (WARN_ON_ONCE(pmd_bad(*pmd)))
return NULL;
ptep = pte_offset_map(pmd, addr);
@@ -389,6 +399,7 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
if (pte_present(pte))
page = pte_page(pte);
pte_unmap(ptep);
+
return page;
}
EXPORT_SYMBOL(vmalloc_to_page);
--
2.23.0
^ permalink raw reply related
* [PATCH v11 02/13] mm: apply_to_pte_range warn and fail if a large pte is encountered
From: Nicholas Piggin @ 2021-01-26 4:44 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Ding Tianhong, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Jonathan Cameron, Rick Edgecombe, linuxppc-dev,
Christoph Hellwig
In-Reply-To: <20210126044510.2491820-1-npiggin@gmail.com>
apply_to_pte_range might mistake a large pte for bad, or treat it as a
page table, resulting in a crash or corruption. Add a test to warn and
return error if large entries are found.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
mm/memory.c | 66 +++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 49 insertions(+), 17 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index feff48e1465a..672e39a72788 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2440,13 +2440,21 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
}
do {
next = pmd_addr_end(addr, end);
- if (create || !pmd_none_or_clear_bad(pmd)) {
- err = apply_to_pte_range(mm, pmd, addr, next, fn, data,
- create, mask);
- if (err)
- break;
+ if (pmd_none(*pmd) && !create)
+ continue;
+ if (WARN_ON_ONCE(pmd_leaf(*pmd)))
+ return -EINVAL;
+ if (!pmd_none(*pmd) && WARN_ON_ONCE(pmd_bad(*pmd))) {
+ if (!create)
+ continue;
+ pmd_clear_bad(pmd);
}
+ err = apply_to_pte_range(mm, pmd, addr, next,
+ fn, data, create, mask);
+ if (err)
+ break;
} while (pmd++, addr = next, addr != end);
+
return err;
}
@@ -2468,13 +2476,21 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
}
do {
next = pud_addr_end(addr, end);
- if (create || !pud_none_or_clear_bad(pud)) {
- err = apply_to_pmd_range(mm, pud, addr, next, fn, data,
- create, mask);
- if (err)
- break;
+ if (pud_none(*pud) && !create)
+ continue;
+ if (WARN_ON_ONCE(pud_leaf(*pud)))
+ return -EINVAL;
+ if (!pud_none(*pud) && WARN_ON_ONCE(pud_bad(*pud))) {
+ if (!create)
+ continue;
+ pud_clear_bad(pud);
}
+ err = apply_to_pmd_range(mm, pud, addr, next,
+ fn, data, create, mask);
+ if (err)
+ break;
} while (pud++, addr = next, addr != end);
+
return err;
}
@@ -2496,13 +2512,21 @@ static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
}
do {
next = p4d_addr_end(addr, end);
- if (create || !p4d_none_or_clear_bad(p4d)) {
- err = apply_to_pud_range(mm, p4d, addr, next, fn, data,
- create, mask);
- if (err)
- break;
+ if (p4d_none(*p4d) && !create)
+ continue;
+ if (WARN_ON_ONCE(p4d_leaf(*p4d)))
+ return -EINVAL;
+ if (!p4d_none(*p4d) && WARN_ON_ONCE(p4d_bad(*p4d))) {
+ if (!create)
+ continue;
+ p4d_clear_bad(p4d);
}
+ err = apply_to_pud_range(mm, p4d, addr, next,
+ fn, data, create, mask);
+ if (err)
+ break;
} while (p4d++, addr = next, addr != end);
+
return err;
}
@@ -2522,9 +2546,17 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end);
- if (!create && pgd_none_or_clear_bad(pgd))
+ if (pgd_none(*pgd) && !create)
continue;
- err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create, &mask);
+ if (WARN_ON_ONCE(pgd_leaf(*pgd)))
+ return -EINVAL;
+ if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) {
+ if (!create)
+ continue;
+ pgd_clear_bad(pgd);
+ }
+ err = apply_to_p4d_range(mm, pgd, addr, next,
+ fn, data, create, &mask);
if (err)
break;
} while (pgd++, addr = next, addr != end);
--
2.23.0
^ permalink raw reply related
* [PATCH v11 03/13] mm/vmalloc: rename vmap_*_range vmap_pages_*_range
From: Nicholas Piggin @ 2021-01-26 4:45 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Ding Tianhong, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Jonathan Cameron, Rick Edgecombe, linuxppc-dev,
Christoph Hellwig
In-Reply-To: <20210126044510.2491820-1-npiggin@gmail.com>
The vmalloc mapper operates on a struct page * array rather than a
linear physical address, re-name it to make this distinction clear.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
mm/vmalloc.c | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 62372f9e0167..7f2f36116980 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -189,7 +189,7 @@ void unmap_kernel_range_noflush(unsigned long start, unsigned long size)
arch_sync_kernel_mappings(start, end);
}
-static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
+static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{
@@ -217,7 +217,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr,
return 0;
}
-static int vmap_pmd_range(pud_t *pud, unsigned long addr,
+static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{
@@ -229,13 +229,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr,
return -ENOMEM;
do {
next = pmd_addr_end(addr, end);
- if (vmap_pte_range(pmd, addr, next, prot, pages, nr, mask))
+ if (vmap_pages_pte_range(pmd, addr, next, prot, pages, nr, mask))
return -ENOMEM;
} while (pmd++, addr = next, addr != end);
return 0;
}
-static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
+static int vmap_pages_pud_range(p4d_t *p4d, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{
@@ -247,13 +247,13 @@ static int vmap_pud_range(p4d_t *p4d, unsigned long addr,
return -ENOMEM;
do {
next = pud_addr_end(addr, end);
- if (vmap_pmd_range(pud, addr, next, prot, pages, nr, mask))
+ if (vmap_pages_pmd_range(pud, addr, next, prot, pages, nr, mask))
return -ENOMEM;
} while (pud++, addr = next, addr != end);
return 0;
}
-static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
+static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr,
unsigned long end, pgprot_t prot, struct page **pages, int *nr,
pgtbl_mod_mask *mask)
{
@@ -265,7 +265,7 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr,
return -ENOMEM;
do {
next = p4d_addr_end(addr, end);
- if (vmap_pud_range(p4d, addr, next, prot, pages, nr, mask))
+ if (vmap_pages_pud_range(p4d, addr, next, prot, pages, nr, mask))
return -ENOMEM;
} while (p4d++, addr = next, addr != end);
return 0;
@@ -306,7 +306,7 @@ int map_kernel_range_noflush(unsigned long addr, unsigned long size,
next = pgd_addr_end(addr, end);
if (pgd_bad(*pgd))
mask |= PGTBL_PGD_MODIFIED;
- err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
+ err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask);
if (err)
return err;
} while (pgd++, addr = next, addr != end);
--
2.23.0
^ permalink raw reply related
* [PATCH v11 04/13] mm/ioremap: rename ioremap_*_range to vmap_*_range
From: Nicholas Piggin @ 2021-01-26 4:45 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Ding Tianhong, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Jonathan Cameron, Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210126044510.2491820-1-npiggin@gmail.com>
This will be used as a generic kernel virtual mapping function, so
re-name it in preparation.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
mm/ioremap.c | 64 +++++++++++++++++++++++++++-------------------------
1 file changed, 33 insertions(+), 31 deletions(-)
diff --git a/mm/ioremap.c b/mm/ioremap.c
index 5fa1ab41d152..3f4d36f9745a 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -61,9 +61,9 @@ static inline int ioremap_pud_enabled(void) { return 0; }
static inline int ioremap_pmd_enabled(void) { return 0; }
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
-static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
- unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ pgtbl_mod_mask *mask)
{
pte_t *pte;
u64 pfn;
@@ -81,9 +81,8 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr,
return 0;
}
-static int ioremap_try_huge_pmd(pmd_t *pmd, unsigned long addr,
- unsigned long end, phys_addr_t phys_addr,
- pgprot_t prot)
+static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
{
if (!ioremap_pmd_enabled())
return 0;
@@ -103,9 +102,9 @@ static int ioremap_try_huge_pmd(pmd_t *pmd, unsigned long addr,
return pmd_set_huge(pmd, phys_addr, prot);
}
-static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
- unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ pgtbl_mod_mask *mask)
{
pmd_t *pmd;
unsigned long next;
@@ -116,20 +115,19 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
do {
next = pmd_addr_end(addr, end);
- if (ioremap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) {
+ if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) {
*mask |= PGTBL_PMD_MODIFIED;
continue;
}
- if (ioremap_pte_range(pmd, addr, next, phys_addr, prot, mask))
+ if (vmap_pte_range(pmd, addr, next, phys_addr, prot, mask))
return -ENOMEM;
} while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
return 0;
}
-static int ioremap_try_huge_pud(pud_t *pud, unsigned long addr,
- unsigned long end, phys_addr_t phys_addr,
- pgprot_t prot)
+static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
{
if (!ioremap_pud_enabled())
return 0;
@@ -149,9 +147,9 @@ static int ioremap_try_huge_pud(pud_t *pud, unsigned long addr,
return pud_set_huge(pud, phys_addr, prot);
}
-static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
- unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ pgtbl_mod_mask *mask)
{
pud_t *pud;
unsigned long next;
@@ -162,20 +160,19 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
do {
next = pud_addr_end(addr, end);
- if (ioremap_try_huge_pud(pud, addr, next, phys_addr, prot)) {
+ if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot)) {
*mask |= PGTBL_PUD_MODIFIED;
continue;
}
- if (ioremap_pmd_range(pud, addr, next, phys_addr, prot, mask))
+ if (vmap_pmd_range(pud, addr, next, phys_addr, prot, mask))
return -ENOMEM;
} while (pud++, phys_addr += (next - addr), addr = next, addr != end);
return 0;
}
-static int ioremap_try_huge_p4d(p4d_t *p4d, unsigned long addr,
- unsigned long end, phys_addr_t phys_addr,
- pgprot_t prot)
+static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
{
if (!ioremap_p4d_enabled())
return 0;
@@ -195,9 +192,9 @@ static int ioremap_try_huge_p4d(p4d_t *p4d, unsigned long addr,
return p4d_set_huge(p4d, phys_addr, prot);
}
-static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr,
- unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ pgtbl_mod_mask *mask)
{
p4d_t *p4d;
unsigned long next;
@@ -208,19 +205,19 @@ static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr,
do {
next = p4d_addr_end(addr, end);
- if (ioremap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) {
+ if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) {
*mask |= PGTBL_P4D_MODIFIED;
continue;
}
- if (ioremap_pud_range(p4d, addr, next, phys_addr, prot, mask))
+ if (vmap_pud_range(p4d, addr, next, phys_addr, prot, mask))
return -ENOMEM;
} while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
return 0;
}
-int ioremap_page_range(unsigned long addr,
- unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+static int vmap_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
{
pgd_t *pgd;
unsigned long start;
@@ -235,8 +232,7 @@ int ioremap_page_range(unsigned long addr,
pgd = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
- err = ioremap_p4d_range(pgd, addr, next, phys_addr, prot,
- &mask);
+ err = vmap_p4d_range(pgd, addr, next, phys_addr, prot, &mask);
if (err)
break;
} while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
@@ -249,6 +245,12 @@ int ioremap_page_range(unsigned long addr,
return err;
}
+int ioremap_page_range(unsigned long addr,
+ unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
+{
+ return vmap_range(addr, end, phys_addr, prot);
+}
+
#ifdef CONFIG_GENERIC_IOREMAP
void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot)
{
--
2.23.0
^ permalink raw reply related
* [PATCH v11 05/13] mm: HUGE_VMAP arch support cleanup
From: Nicholas Piggin @ 2021-01-26 4:45 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, x86, H. Peter Anvin, Will Deacon, Catalin Marinas,
Ding Tianhong, linux-kernel, Nicholas Piggin, Christoph Hellwig,
Ingo Molnar, Borislav Petkov, Jonathan Cameron, Thomas Gleixner,
Rick Edgecombe, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20210126044510.2491820-1-npiggin@gmail.com>
This changes the awkward approach where architectures provide init
functions to determine which levels they can provide large mappings for,
to one where the arch is queried for each call.
This removes code and indirection, and allows constant-folding of dead
code for unsupported levels.
This also adds a prot argument to the arch query. This is unused
currently but could help with some architectures (e.g., some powerpc
processors can't map uncacheable memory with large pages).
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: x86@kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com> [arm64]
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/arm64/include/asm/vmalloc.h | 8 ++
arch/arm64/mm/mmu.c | 10 +--
arch/powerpc/include/asm/vmalloc.h | 8 ++
arch/powerpc/mm/book3s64/radix_pgtable.c | 8 +-
arch/x86/include/asm/vmalloc.h | 7 ++
arch/x86/mm/ioremap.c | 12 +--
include/linux/io.h | 9 ---
include/linux/vmalloc.h | 6 ++
init/main.c | 1 -
mm/ioremap.c | 94 ++++++++++--------------
10 files changed, 85 insertions(+), 78 deletions(-)
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
index 2ca708ab9b20..597b40405319 100644
--- a/arch/arm64/include/asm/vmalloc.h
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -1,4 +1,12 @@
#ifndef _ASM_ARM64_VMALLOC_H
#define _ASM_ARM64_VMALLOC_H
+#include <asm/page.h>
+
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+bool arch_vmap_p4d_supported(pgprot_t prot);
+bool arch_vmap_pud_supported(pgprot_t prot);
+bool arch_vmap_pmd_supported(pgprot_t prot);
+#endif
+
#endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index ae0c3d023824..1613d290cbd1 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1313,12 +1313,12 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
return dt_virt;
}
-int __init arch_ioremap_p4d_supported(void)
+bool arch_vmap_p4d_supported(pgprot_t prot)
{
- return 0;
+ return false;
}
-int __init arch_ioremap_pud_supported(void)
+bool arch_vmap_pud_supported(pgprot_t prot)
{
/*
* Only 4k granule supports level 1 block mappings.
@@ -1328,9 +1328,9 @@ int __init arch_ioremap_pud_supported(void)
!IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
}
-int __init arch_ioremap_pmd_supported(void)
+bool arch_vmap_pmd_supported(pgprot_t prot)
{
- /* See arch_ioremap_pud_supported() */
+ /* See arch_vmap_pud_supported() */
return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
}
diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h
index b992dfaaa161..105abb73f075 100644
--- a/arch/powerpc/include/asm/vmalloc.h
+++ b/arch/powerpc/include/asm/vmalloc.h
@@ -1,4 +1,12 @@
#ifndef _ASM_POWERPC_VMALLOC_H
#define _ASM_POWERPC_VMALLOC_H
+#include <asm/page.h>
+
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+bool arch_vmap_p4d_supported(pgprot_t prot);
+bool arch_vmap_pud_supported(pgprot_t prot);
+bool arch_vmap_pmd_supported(pgprot_t prot);
+#endif
+
#endif /* _ASM_POWERPC_VMALLOC_H */
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 98f0b243c1ab..743807fc210f 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1082,13 +1082,13 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
set_pte_at(mm, addr, ptep, pte);
}
-int __init arch_ioremap_pud_supported(void)
+bool arch_vmap_pud_supported(pgprot_t prot)
{
/* HPT does not cope with large pages in the vmalloc area */
return radix_enabled();
}
-int __init arch_ioremap_pmd_supported(void)
+bool arch_vmap_pmd_supported(pgprot_t prot)
{
return radix_enabled();
}
@@ -1182,7 +1182,7 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
return 1;
}
-int __init arch_ioremap_p4d_supported(void)
+bool arch_vmap_p4d_supported(pgprot_t prot)
{
- return 0;
+ return false;
}
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
index 29837740b520..094ea2b565f3 100644
--- a/arch/x86/include/asm/vmalloc.h
+++ b/arch/x86/include/asm/vmalloc.h
@@ -1,6 +1,13 @@
#ifndef _ASM_X86_VMALLOC_H
#define _ASM_X86_VMALLOC_H
+#include <asm/page.h>
#include <asm/pgtable_areas.h>
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+bool arch_vmap_p4d_supported(pgprot_t prot);
+bool arch_vmap_pud_supported(pgprot_t prot);
+bool arch_vmap_pmd_supported(pgprot_t prot);
+#endif
+
#endif /* _ASM_X86_VMALLOC_H */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 9e5ccc56f8e0..fbaf0c447986 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -481,24 +481,26 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
-int __init arch_ioremap_p4d_supported(void)
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+bool arch_vmap_p4d_supported(pgprot_t prot)
{
- return 0;
+ return false;
}
-int __init arch_ioremap_pud_supported(void)
+bool arch_vmap_pud_supported(pgprot_t prot)
{
#ifdef CONFIG_X86_64
return boot_cpu_has(X86_FEATURE_GBPAGES);
#else
- return 0;
+ return false;
#endif
}
-int __init arch_ioremap_pmd_supported(void)
+bool arch_vmap_pmd_supported(pgprot_t prot)
{
return boot_cpu_has(X86_FEATURE_PSE);
}
+#endif
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
diff --git a/include/linux/io.h b/include/linux/io.h
index 8394c56babc2..f1effd4d7a3c 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -31,15 +31,6 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end,
}
#endif
-#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-void __init ioremap_huge_init(void);
-int arch_ioremap_p4d_supported(void);
-int arch_ioremap_pud_supported(void);
-int arch_ioremap_pmd_supported(void);
-#else
-static inline void ioremap_huge_init(void) { }
-#endif
-
/*
* Managed iomap interface
*/
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 80c0181c411d..00bd62bd701e 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -83,6 +83,12 @@ struct vmap_area {
};
};
+#ifndef CONFIG_HAVE_ARCH_HUGE_VMAP
+static inline bool arch_vmap_p4d_supported(pgprot_t prot) { return false; }
+static inline bool arch_vmap_pud_supported(pgprot_t prot) { return false; }
+static inline bool arch_vmap_pmd_supported(pgprot_t prot) { return false; }
+#endif
+
/*
* Highlevel APIs for driver use
*/
diff --git a/init/main.c b/init/main.c
index c68d784376ca..bf9389e5b2e4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -834,7 +834,6 @@ static void __init mm_init(void)
pgtable_init();
debug_objects_mem_init();
vmalloc_init();
- ioremap_huge_init();
/* Should be run before the first non-init thread is created */
init_espfix_bsp();
/* Should be run after espfix64 is set up. */
diff --git a/mm/ioremap.c b/mm/ioremap.c
index 3f4d36f9745a..3264d0203785 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -16,49 +16,16 @@
#include "pgalloc-track.h"
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-static int __read_mostly ioremap_p4d_capable;
-static int __read_mostly ioremap_pud_capable;
-static int __read_mostly ioremap_pmd_capable;
-static int __read_mostly ioremap_huge_disabled;
+static bool __ro_after_init iomap_max_page_shift = PAGE_SHIFT;
static int __init set_nohugeiomap(char *str)
{
- ioremap_huge_disabled = 1;
+ iomap_max_page_shift = P4D_SHIFT;
return 0;
}
early_param("nohugeiomap", set_nohugeiomap);
-
-void __init ioremap_huge_init(void)
-{
- if (!ioremap_huge_disabled) {
- if (arch_ioremap_p4d_supported())
- ioremap_p4d_capable = 1;
- if (arch_ioremap_pud_supported())
- ioremap_pud_capable = 1;
- if (arch_ioremap_pmd_supported())
- ioremap_pmd_capable = 1;
- }
-}
-
-static inline int ioremap_p4d_enabled(void)
-{
- return ioremap_p4d_capable;
-}
-
-static inline int ioremap_pud_enabled(void)
-{
- return ioremap_pud_capable;
-}
-
-static inline int ioremap_pmd_enabled(void)
-{
- return ioremap_pmd_capable;
-}
-
-#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
-static inline int ioremap_p4d_enabled(void) { return 0; }
-static inline int ioremap_pud_enabled(void) { return 0; }
-static inline int ioremap_pmd_enabled(void) { return 0; }
+#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
+static const bool iomap_max_page_shift = PAGE_SHIFT;
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
@@ -82,9 +49,13 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
}
static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot)
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
{
- if (!ioremap_pmd_enabled())
+ if (max_page_shift < PMD_SHIFT)
+ return 0;
+
+ if (!arch_vmap_pmd_supported(prot))
return 0;
if ((end - addr) != PMD_SIZE)
@@ -104,7 +75,7 @@ static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
pmd_t *pmd;
unsigned long next;
@@ -115,7 +86,8 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
do {
next = pmd_addr_end(addr, end);
- if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) {
+ if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot,
+ max_page_shift)) {
*mask |= PGTBL_PMD_MODIFIED;
continue;
}
@@ -127,9 +99,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
}
static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot)
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
{
- if (!ioremap_pud_enabled())
+ if (max_page_shift < PUD_SHIFT)
+ return 0;
+
+ if (!arch_vmap_pud_supported(prot))
return 0;
if ((end - addr) != PUD_SIZE)
@@ -149,7 +125,7 @@ static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
pud_t *pud;
unsigned long next;
@@ -160,21 +136,27 @@ static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
do {
next = pud_addr_end(addr, end);
- if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot)) {
+ if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot,
+ max_page_shift)) {
*mask |= PGTBL_PUD_MODIFIED;
continue;
}
- if (vmap_pmd_range(pud, addr, next, phys_addr, prot, mask))
+ if (vmap_pmd_range(pud, addr, next, phys_addr, prot,
+ max_page_shift, mask))
return -ENOMEM;
} while (pud++, phys_addr += (next - addr), addr = next, addr != end);
return 0;
}
static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot)
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
{
- if (!ioremap_p4d_enabled())
+ if (max_page_shift < P4D_SHIFT)
+ return 0;
+
+ if (!arch_vmap_p4d_supported(prot))
return 0;
if ((end - addr) != P4D_SIZE)
@@ -194,7 +176,7 @@ static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
{
p4d_t *p4d;
unsigned long next;
@@ -205,19 +187,22 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
do {
next = p4d_addr_end(addr, end);
- if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) {
+ if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot,
+ max_page_shift)) {
*mask |= PGTBL_P4D_MODIFIED;
continue;
}
- if (vmap_pud_range(p4d, addr, next, phys_addr, prot, mask))
+ if (vmap_pud_range(p4d, addr, next, phys_addr, prot,
+ max_page_shift, mask))
return -ENOMEM;
} while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
return 0;
}
static int vmap_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot)
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
{
pgd_t *pgd;
unsigned long start;
@@ -232,7 +217,8 @@ static int vmap_range(unsigned long addr, unsigned long end,
pgd = pgd_offset_k(addr);
do {
next = pgd_addr_end(addr, end);
- err = vmap_p4d_range(pgd, addr, next, phys_addr, prot, &mask);
+ err = vmap_p4d_range(pgd, addr, next, phys_addr, prot,
+ max_page_shift, &mask);
if (err)
break;
} while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
@@ -248,7 +234,7 @@ static int vmap_range(unsigned long addr, unsigned long end,
int ioremap_page_range(unsigned long addr,
unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
- return vmap_range(addr, end, phys_addr, prot);
+ return vmap_range(addr, end, phys_addr, prot, iomap_max_page_shift);
}
#ifdef CONFIG_GENERIC_IOREMAP
--
2.23.0
^ permalink raw reply related
* [PATCH v11 06/13] powerpc: inline huge vmap supported functions
From: Nicholas Piggin @ 2021-01-26 4:45 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Ding Tianhong, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Jonathan Cameron, Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210126044510.2491820-1-npiggin@gmail.com>
This allows unsupported levels to be constant folded away, and so
p4d_free_pud_page can be removed because it's no longer linked to.
Cc: linuxppc-dev@lists.ozlabs.org
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/vmalloc.h | 19 ++++++++++++++++---
arch/powerpc/mm/book3s64/radix_pgtable.c | 21 ---------------------
2 files changed, 16 insertions(+), 24 deletions(-)
diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h
index 105abb73f075..3f0c153befb0 100644
--- a/arch/powerpc/include/asm/vmalloc.h
+++ b/arch/powerpc/include/asm/vmalloc.h
@@ -1,12 +1,25 @@
#ifndef _ASM_POWERPC_VMALLOC_H
#define _ASM_POWERPC_VMALLOC_H
+#include <asm/mmu.h>
#include <asm/page.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-bool arch_vmap_p4d_supported(pgprot_t prot);
-bool arch_vmap_pud_supported(pgprot_t prot);
-bool arch_vmap_pmd_supported(pgprot_t prot);
+static inline bool arch_vmap_p4d_supported(pgprot_t prot)
+{
+ return false;
+}
+
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+ /* HPT does not cope with large pages in the vmalloc area */
+ return radix_enabled();
+}
+
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+ return radix_enabled();
+}
#endif
#endif /* _ASM_POWERPC_VMALLOC_H */
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 743807fc210f..8da62afccee5 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -1082,22 +1082,6 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma,
set_pte_at(mm, addr, ptep, pte);
}
-bool arch_vmap_pud_supported(pgprot_t prot)
-{
- /* HPT does not cope with large pages in the vmalloc area */
- return radix_enabled();
-}
-
-bool arch_vmap_pmd_supported(pgprot_t prot)
-{
- return radix_enabled();
-}
-
-int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
-{
- return 0;
-}
-
int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
{
pte_t *ptep = (pte_t *)pud;
@@ -1181,8 +1165,3 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
return 1;
}
-
-bool arch_vmap_p4d_supported(pgprot_t prot)
-{
- return false;
-}
--
2.23.0
^ permalink raw reply related
* [PATCH v11 07/13] arm64: inline huge vmap supported functions
From: Nicholas Piggin @ 2021-01-26 4:45 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Will Deacon, Catalin Marinas, Ding Tianhong,
linux-kernel, Nicholas Piggin, Christoph Hellwig,
Jonathan Cameron, Rick Edgecombe, linuxppc-dev, linux-arm-kernel
In-Reply-To: <20210126044510.2491820-1-npiggin@gmail.com>
This allows unsupported levels to be constant folded away, and so
p4d_free_pud_page can be removed because it's no longer linked to.
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/arm64/include/asm/vmalloc.h | 23 ++++++++++++++++++++---
arch/arm64/mm/mmu.c | 26 --------------------------
2 files changed, 20 insertions(+), 29 deletions(-)
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
index 597b40405319..fc9a12d6cc1a 100644
--- a/arch/arm64/include/asm/vmalloc.h
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -4,9 +4,26 @@
#include <asm/page.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-bool arch_vmap_p4d_supported(pgprot_t prot);
-bool arch_vmap_pud_supported(pgprot_t prot);
-bool arch_vmap_pmd_supported(pgprot_t prot);
+static inline bool arch_vmap_p4d_supported(pgprot_t prot)
+{
+ return false;
+}
+
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+ /*
+ * Only 4k granule supports level 1 block mappings.
+ * SW table walks can't handle removal of intermediate entries.
+ */
+ return IS_ENABLED(CONFIG_ARM64_4K_PAGES) &&
+ !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
+}
+
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+ /* See arch_vmap_pud_supported() */
+ return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
+}
#endif
#endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 1613d290cbd1..ab9ba7c36dae 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1313,27 +1313,6 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
return dt_virt;
}
-bool arch_vmap_p4d_supported(pgprot_t prot)
-{
- return false;
-}
-
-bool arch_vmap_pud_supported(pgprot_t prot)
-{
- /*
- * Only 4k granule supports level 1 block mappings.
- * SW table walks can't handle removal of intermediate entries.
- */
- return IS_ENABLED(CONFIG_ARM64_4K_PAGES) &&
- !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
-}
-
-bool arch_vmap_pmd_supported(pgprot_t prot)
-{
- /* See arch_vmap_pud_supported() */
- return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
-}
-
int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot)
{
pud_t new_pud = pfn_pud(__phys_to_pfn(phys), mk_pud_sect_prot(prot));
@@ -1425,11 +1404,6 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
return 1;
}
-int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
-{
- return 0; /* Don't attempt a block mapping */
-}
-
#ifdef CONFIG_MEMORY_HOTPLUG
static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
{
--
2.23.0
^ permalink raw reply related
* [PATCH v11 08/13] x86: inline huge vmap supported functions
From: Nicholas Piggin @ 2021-01-26 4:45 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, x86, H. Peter Anvin, Ding Tianhong, linux-kernel,
Nicholas Piggin, Christoph Hellwig, Ingo Molnar, Borislav Petkov,
Jonathan Cameron, Thomas Gleixner, Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210126044510.2491820-1-npiggin@gmail.com>
This allows unsupported levels to be constant folded away, and so
p4d_free_pud_page can be removed because it's no longer linked to.
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: x86@kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/x86/include/asm/vmalloc.h | 22 +++++++++++++++++++---
arch/x86/mm/ioremap.c | 21 ---------------------
arch/x86/mm/pgtable.c | 13 -------------
3 files changed, 19 insertions(+), 37 deletions(-)
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
index 094ea2b565f3..e714b00fc0ca 100644
--- a/arch/x86/include/asm/vmalloc.h
+++ b/arch/x86/include/asm/vmalloc.h
@@ -1,13 +1,29 @@
#ifndef _ASM_X86_VMALLOC_H
#define _ASM_X86_VMALLOC_H
+#include <asm/cpufeature.h>
#include <asm/page.h>
#include <asm/pgtable_areas.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-bool arch_vmap_p4d_supported(pgprot_t prot);
-bool arch_vmap_pud_supported(pgprot_t prot);
-bool arch_vmap_pmd_supported(pgprot_t prot);
+static inline bool arch_vmap_p4d_supported(pgprot_t prot)
+{
+ return false;
+}
+
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+#ifdef CONFIG_X86_64
+ return boot_cpu_has(X86_FEATURE_GBPAGES);
+#else
+ return false;
+#endif
+}
+
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+ return boot_cpu_has(X86_FEATURE_PSE);
+}
#endif
#endif /* _ASM_X86_VMALLOC_H */
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index fbaf0c447986..12c686c65ea9 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -481,27 +481,6 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
-#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-bool arch_vmap_p4d_supported(pgprot_t prot)
-{
- return false;
-}
-
-bool arch_vmap_pud_supported(pgprot_t prot)
-{
-#ifdef CONFIG_X86_64
- return boot_cpu_has(X86_FEATURE_GBPAGES);
-#else
- return false;
-#endif
-}
-
-bool arch_vmap_pmd_supported(pgprot_t prot)
-{
- return boot_cpu_has(X86_FEATURE_PSE);
-}
-#endif
-
/*
* Convert a physical pointer to a virtual kernel pointer for /dev/mem
* access
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index f6a9e2e36642..d27cf69e811d 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -780,14 +780,6 @@ int pmd_clear_huge(pmd_t *pmd)
return 0;
}
-/*
- * Until we support 512GB pages, skip them in the vmap area.
- */
-int p4d_free_pud_page(p4d_t *p4d, unsigned long addr)
-{
- return 0;
-}
-
#ifdef CONFIG_X86_64
/**
* pud_free_pmd_page - Clear pud entry and free pmd page.
@@ -861,11 +853,6 @@ int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
#else /* !CONFIG_X86_64 */
-int pud_free_pmd_page(pud_t *pud, unsigned long addr)
-{
- return pud_none(*pud);
-}
-
/*
* Disable free page handling on x86-PAE. This assures that ioremap()
* does not update sync'd pmd entries. See vmalloc_sync_one().
--
2.23.0
^ permalink raw reply related
* [PATCH v11 09/13] mm/vmalloc: provide fallback arch huge vmap support functions
From: Nicholas Piggin @ 2021-01-26 4:45 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Ding Tianhong, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Jonathan Cameron, Rick Edgecombe, linuxppc-dev
In-Reply-To: <20210126044510.2491820-1-npiggin@gmail.com>
If an architecture doesn't support a particular page table level as
a huge vmap page size then allow it to skip defining the support
query function.
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/arm64/include/asm/vmalloc.h | 7 +++----
arch/powerpc/include/asm/vmalloc.h | 7 +++----
arch/x86/include/asm/vmalloc.h | 13 +++++--------
include/linux/vmalloc.h | 24 ++++++++++++++++++++----
4 files changed, 31 insertions(+), 20 deletions(-)
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
index fc9a12d6cc1a..7a22aeea9bb5 100644
--- a/arch/arm64/include/asm/vmalloc.h
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -4,11 +4,8 @@
#include <asm/page.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-static inline bool arch_vmap_p4d_supported(pgprot_t prot)
-{
- return false;
-}
+#define arch_vmap_pud_supported arch_vmap_pud_supported
static inline bool arch_vmap_pud_supported(pgprot_t prot)
{
/*
@@ -19,11 +16,13 @@ static inline bool arch_vmap_pud_supported(pgprot_t prot)
!IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
}
+#define arch_vmap_pmd_supported arch_vmap_pmd_supported
static inline bool arch_vmap_pmd_supported(pgprot_t prot)
{
/* See arch_vmap_pud_supported() */
return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS);
}
+
#endif
#endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h
index 3f0c153befb0..4c69ece52a31 100644
--- a/arch/powerpc/include/asm/vmalloc.h
+++ b/arch/powerpc/include/asm/vmalloc.h
@@ -5,21 +5,20 @@
#include <asm/page.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-static inline bool arch_vmap_p4d_supported(pgprot_t prot)
-{
- return false;
-}
+#define arch_vmap_pud_supported arch_vmap_pud_supported
static inline bool arch_vmap_pud_supported(pgprot_t prot)
{
/* HPT does not cope with large pages in the vmalloc area */
return radix_enabled();
}
+#define arch_vmap_pmd_supported arch_vmap_pmd_supported
static inline bool arch_vmap_pmd_supported(pgprot_t prot)
{
return radix_enabled();
}
+
#endif
#endif /* _ASM_POWERPC_VMALLOC_H */
diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h
index e714b00fc0ca..49ce331f3ac6 100644
--- a/arch/x86/include/asm/vmalloc.h
+++ b/arch/x86/include/asm/vmalloc.h
@@ -6,24 +6,21 @@
#include <asm/pgtable_areas.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-static inline bool arch_vmap_p4d_supported(pgprot_t prot)
-{
- return false;
-}
+#ifdef CONFIG_X86_64
+#define arch_vmap_pud_supported arch_vmap_pud_supported
static inline bool arch_vmap_pud_supported(pgprot_t prot)
{
-#ifdef CONFIG_X86_64
return boot_cpu_has(X86_FEATURE_GBPAGES);
-#else
- return false;
-#endif
}
+#endif
+#define arch_vmap_pmd_supported arch_vmap_pmd_supported
static inline bool arch_vmap_pmd_supported(pgprot_t prot)
{
return boot_cpu_has(X86_FEATURE_PSE);
}
+
#endif
#endif /* _ASM_X86_VMALLOC_H */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 00bd62bd701e..9f7b8b00101b 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -83,10 +83,26 @@ struct vmap_area {
};
};
-#ifndef CONFIG_HAVE_ARCH_HUGE_VMAP
-static inline bool arch_vmap_p4d_supported(pgprot_t prot) { return false; }
-static inline bool arch_vmap_pud_supported(pgprot_t prot) { return false; }
-static inline bool arch_vmap_pmd_supported(pgprot_t prot) { return false; }
+/* archs that select HAVE_ARCH_HUGE_VMAP should override one or more of these */
+#ifndef arch_vmap_p4d_supported
+static inline bool arch_vmap_p4d_supported(pgprot_t prot)
+{
+ return false;
+}
+#endif
+
+#ifndef arch_vmap_pud_supported
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+ return false;
+}
+#endif
+
+#ifndef arch_vmap_pmd_supported
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+ return false;
+}
#endif
/*
--
2.23.0
^ permalink raw reply related
* [PATCH v11 10/13] mm: Move vmap_range from mm/ioremap.c to mm/vmalloc.c
From: Nicholas Piggin @ 2021-01-26 4:45 UTC (permalink / raw)
To: linux-mm, Andrew Morton
Cc: linux-arch, Ding Tianhong, linux-kernel, Nicholas Piggin,
Christoph Hellwig, Jonathan Cameron, Rick Edgecombe, linuxppc-dev,
Christoph Hellwig
In-Reply-To: <20210126044510.2491820-1-npiggin@gmail.com>
This is a generic kernel virtual memory mapper, not specific to ioremap.
Code is unchanged other than making vmap_range non-static.
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
include/linux/vmalloc.h | 3 +
mm/ioremap.c | 203 ----------------------------------------
mm/vmalloc.c | 202 +++++++++++++++++++++++++++++++++++++++
3 files changed, 205 insertions(+), 203 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 9f7b8b00101b..99ea72d547dc 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -194,6 +194,9 @@ extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
#ifdef CONFIG_MMU
+int vmap_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift);
extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
pgprot_t prot, struct page **pages);
int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot,
diff --git a/mm/ioremap.c b/mm/ioremap.c
index 3264d0203785..d1dcc7e744ac 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -28,209 +28,6 @@ early_param("nohugeiomap", set_nohugeiomap);
static const bool iomap_max_page_shift = PAGE_SHIFT;
#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
-static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- pgtbl_mod_mask *mask)
-{
- pte_t *pte;
- u64 pfn;
-
- pfn = phys_addr >> PAGE_SHIFT;
- pte = pte_alloc_kernel_track(pmd, addr, mask);
- if (!pte)
- return -ENOMEM;
- do {
- BUG_ON(!pte_none(*pte));
- set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
- pfn++;
- } while (pte++, addr += PAGE_SIZE, addr != end);
- *mask |= PGTBL_PTE_MODIFIED;
- return 0;
-}
-
-static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift)
-{
- if (max_page_shift < PMD_SHIFT)
- return 0;
-
- if (!arch_vmap_pmd_supported(prot))
- return 0;
-
- if ((end - addr) != PMD_SIZE)
- return 0;
-
- if (!IS_ALIGNED(addr, PMD_SIZE))
- return 0;
-
- if (!IS_ALIGNED(phys_addr, PMD_SIZE))
- return 0;
-
- if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr))
- return 0;
-
- return pmd_set_huge(pmd, phys_addr, prot);
-}
-
-static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift, pgtbl_mod_mask *mask)
-{
- pmd_t *pmd;
- unsigned long next;
-
- pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
- if (!pmd)
- return -ENOMEM;
- do {
- next = pmd_addr_end(addr, end);
-
- if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot,
- max_page_shift)) {
- *mask |= PGTBL_PMD_MODIFIED;
- continue;
- }
-
- if (vmap_pte_range(pmd, addr, next, phys_addr, prot, mask))
- return -ENOMEM;
- } while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
- return 0;
-}
-
-static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift)
-{
- if (max_page_shift < PUD_SHIFT)
- return 0;
-
- if (!arch_vmap_pud_supported(prot))
- return 0;
-
- if ((end - addr) != PUD_SIZE)
- return 0;
-
- if (!IS_ALIGNED(addr, PUD_SIZE))
- return 0;
-
- if (!IS_ALIGNED(phys_addr, PUD_SIZE))
- return 0;
-
- if (pud_present(*pud) && !pud_free_pmd_page(pud, addr))
- return 0;
-
- return pud_set_huge(pud, phys_addr, prot);
-}
-
-static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift, pgtbl_mod_mask *mask)
-{
- pud_t *pud;
- unsigned long next;
-
- pud = pud_alloc_track(&init_mm, p4d, addr, mask);
- if (!pud)
- return -ENOMEM;
- do {
- next = pud_addr_end(addr, end);
-
- if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot,
- max_page_shift)) {
- *mask |= PGTBL_PUD_MODIFIED;
- continue;
- }
-
- if (vmap_pmd_range(pud, addr, next, phys_addr, prot,
- max_page_shift, mask))
- return -ENOMEM;
- } while (pud++, phys_addr += (next - addr), addr = next, addr != end);
- return 0;
-}
-
-static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift)
-{
- if (max_page_shift < P4D_SHIFT)
- return 0;
-
- if (!arch_vmap_p4d_supported(prot))
- return 0;
-
- if ((end - addr) != P4D_SIZE)
- return 0;
-
- if (!IS_ALIGNED(addr, P4D_SIZE))
- return 0;
-
- if (!IS_ALIGNED(phys_addr, P4D_SIZE))
- return 0;
-
- if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr))
- return 0;
-
- return p4d_set_huge(p4d, phys_addr, prot);
-}
-
-static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift, pgtbl_mod_mask *mask)
-{
- p4d_t *p4d;
- unsigned long next;
-
- p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
- if (!p4d)
- return -ENOMEM;
- do {
- next = p4d_addr_end(addr, end);
-
- if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot,
- max_page_shift)) {
- *mask |= PGTBL_P4D_MODIFIED;
- continue;
- }
-
- if (vmap_pud_range(p4d, addr, next, phys_addr, prot,
- max_page_shift, mask))
- return -ENOMEM;
- } while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
- return 0;
-}
-
-static int vmap_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift)
-{
- pgd_t *pgd;
- unsigned long start;
- unsigned long next;
- int err;
- pgtbl_mod_mask mask = 0;
-
- might_sleep();
- BUG_ON(addr >= end);
-
- start = addr;
- pgd = pgd_offset_k(addr);
- do {
- next = pgd_addr_end(addr, end);
- err = vmap_p4d_range(pgd, addr, next, phys_addr, prot,
- max_page_shift, &mask);
- if (err)
- break;
- } while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
-
- flush_cache_vmap(start, end);
-
- if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
- arch_sync_kernel_mappings(start, end);
-
- return err;
-}
-
int ioremap_page_range(unsigned long addr,
unsigned long end, phys_addr_t phys_addr, pgprot_t prot)
{
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 7f2f36116980..f043386bb51d 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -68,6 +68,208 @@ static void free_work(struct work_struct *w)
}
/*** Page table manipulation functions ***/
+static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ pgtbl_mod_mask *mask)
+{
+ pte_t *pte;
+ u64 pfn;
+
+ pfn = phys_addr >> PAGE_SHIFT;
+ pte = pte_alloc_kernel_track(pmd, addr, mask);
+ if (!pte)
+ return -ENOMEM;
+ do {
+ BUG_ON(!pte_none(*pte));
+ set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot));
+ pfn++;
+ } while (pte++, addr += PAGE_SIZE, addr != end);
+ *mask |= PGTBL_PTE_MODIFIED;
+ return 0;
+}
+
+static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
+{
+ if (max_page_shift < PMD_SHIFT)
+ return 0;
+
+ if (!arch_vmap_pmd_supported(prot))
+ return 0;
+
+ if ((end - addr) != PMD_SIZE)
+ return 0;
+
+ if (!IS_ALIGNED(addr, PMD_SIZE))
+ return 0;
+
+ if (!IS_ALIGNED(phys_addr, PMD_SIZE))
+ return 0;
+
+ if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr))
+ return 0;
+
+ return pmd_set_huge(pmd, phys_addr, prot);
+}
+
+static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
+{
+ pmd_t *pmd;
+ unsigned long next;
+
+ pmd = pmd_alloc_track(&init_mm, pud, addr, mask);
+ if (!pmd)
+ return -ENOMEM;
+ do {
+ next = pmd_addr_end(addr, end);
+
+ if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot,
+ max_page_shift)) {
+ *mask |= PGTBL_PMD_MODIFIED;
+ continue;
+ }
+
+ if (vmap_pte_range(pmd, addr, next, phys_addr, prot, mask))
+ return -ENOMEM;
+ } while (pmd++, phys_addr += (next - addr), addr = next, addr != end);
+ return 0;
+}
+
+static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
+{
+ if (max_page_shift < PUD_SHIFT)
+ return 0;
+
+ if (!arch_vmap_pud_supported(prot))
+ return 0;
+
+ if ((end - addr) != PUD_SIZE)
+ return 0;
+
+ if (!IS_ALIGNED(addr, PUD_SIZE))
+ return 0;
+
+ if (!IS_ALIGNED(phys_addr, PUD_SIZE))
+ return 0;
+
+ if (pud_present(*pud) && !pud_free_pmd_page(pud, addr))
+ return 0;
+
+ return pud_set_huge(pud, phys_addr, prot);
+}
+
+static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
+{
+ pud_t *pud;
+ unsigned long next;
+
+ pud = pud_alloc_track(&init_mm, p4d, addr, mask);
+ if (!pud)
+ return -ENOMEM;
+ do {
+ next = pud_addr_end(addr, end);
+
+ if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot,
+ max_page_shift)) {
+ *mask |= PGTBL_PUD_MODIFIED;
+ continue;
+ }
+
+ if (vmap_pmd_range(pud, addr, next, phys_addr, prot,
+ max_page_shift, mask))
+ return -ENOMEM;
+ } while (pud++, phys_addr += (next - addr), addr = next, addr != end);
+ return 0;
+}
+
+static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
+{
+ if (max_page_shift < P4D_SHIFT)
+ return 0;
+
+ if (!arch_vmap_p4d_supported(prot))
+ return 0;
+
+ if ((end - addr) != P4D_SIZE)
+ return 0;
+
+ if (!IS_ALIGNED(addr, P4D_SIZE))
+ return 0;
+
+ if (!IS_ALIGNED(phys_addr, P4D_SIZE))
+ return 0;
+
+ if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr))
+ return 0;
+
+ return p4d_set_huge(p4d, phys_addr, prot);
+}
+
+static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift, pgtbl_mod_mask *mask)
+{
+ p4d_t *p4d;
+ unsigned long next;
+
+ p4d = p4d_alloc_track(&init_mm, pgd, addr, mask);
+ if (!p4d)
+ return -ENOMEM;
+ do {
+ next = p4d_addr_end(addr, end);
+
+ if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot,
+ max_page_shift)) {
+ *mask |= PGTBL_P4D_MODIFIED;
+ continue;
+ }
+
+ if (vmap_pud_range(p4d, addr, next, phys_addr, prot,
+ max_page_shift, mask))
+ return -ENOMEM;
+ } while (p4d++, phys_addr += (next - addr), addr = next, addr != end);
+ return 0;
+}
+
+int vmap_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot,
+ unsigned int max_page_shift)
+{
+ pgd_t *pgd;
+ unsigned long start;
+ unsigned long next;
+ int err;
+ pgtbl_mod_mask mask = 0;
+
+ might_sleep();
+ BUG_ON(addr >= end);
+
+ start = addr;
+ pgd = pgd_offset_k(addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ err = vmap_p4d_range(pgd, addr, next, phys_addr, prot,
+ max_page_shift, &mask);
+ if (err)
+ break;
+ } while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
+
+ flush_cache_vmap(start, end);
+
+ if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
+ arch_sync_kernel_mappings(start, end);
+
+ return err;
+}
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pgtbl_mod_mask *mask)
--
2.23.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox