* [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping
@ 2024-08-07 12:41 Michael Ellerman
2024-08-07 12:41 ` [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap() Michael Ellerman
` (4 more replies)
0 siblings, 5 replies; 25+ messages in thread
From: Michael Ellerman @ 2024-08-07 12:41 UTC (permalink / raw)
To: linux-mm
Cc: linuxppc-dev, npiggin, linux-kernel, christophe.leroy, jeffxu,
jeffxu, oliver.sang, Liam.Howlett, akpm, torvalds, pedro.falcato
Add an optional close() callback to struct vm_special_mapping. It will
be used, by powerpc at least, to handle unmapping of the VDSO.
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
include/linux/mm_types.h | 2 ++
mm/mmap.c | 3 +++
2 files changed, 5 insertions(+)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 485424979254..ef32d87a3adc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1313,6 +1313,8 @@ struct vm_special_mapping {
int (*mremap)(const struct vm_special_mapping *sm,
struct vm_area_struct *new_vma);
+ void (*close)(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma);
};
enum tlb_flush_reason {
diff --git a/mm/mmap.c b/mm/mmap.c
index d0dfc85b209b..24bd6aa9155c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3624,6 +3624,9 @@ static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
*/
static void special_mapping_close(struct vm_area_struct *vma)
{
+ const struct vm_special_mapping *sm = vma->vm_private_data;
+ if (sm->close)
+ sm->close(sm, vma);
}
static const char *special_mapping_name(struct vm_area_struct *vma)
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-07 12:41 [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping Michael Ellerman
@ 2024-08-07 12:41 ` Michael Ellerman
2024-08-07 15:33 ` David Hildenbrand
2024-08-07 15:43 ` Jeff Xu
2024-08-07 12:41 ` [PATCH 3/4] mm: Remove arch_unmap() Michael Ellerman
` (3 subsequent siblings)
4 siblings, 2 replies; 25+ messages in thread
From: Michael Ellerman @ 2024-08-07 12:41 UTC (permalink / raw)
To: linux-mm
Cc: linuxppc-dev, npiggin, linux-kernel, christophe.leroy, jeffxu,
jeffxu, oliver.sang, Liam.Howlett, akpm, torvalds, pedro.falcato
Add a close() callback to the VDSO special mapping to handle unmapping
of the VDSO. That will make it possible to remove the arch_unmap() hook
entirely in a subsequent patch.
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
arch/powerpc/include/asm/mmu_context.h | 4 ----
arch/powerpc/kernel/vdso.c | 17 +++++++++++++++++
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 37bffa0f7918..9b8c1555744e 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -263,10 +263,6 @@ extern void arch_exit_mmap(struct mm_struct *mm);
static inline void arch_unmap(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
- unsigned long vdso_base = (unsigned long)mm->context.vdso;
-
- if (start <= vdso_base && vdso_base < end)
- mm->context.vdso = NULL;
}
#ifdef CONFIG_PPC_MEM_KEYS
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 7a2ff9010f17..220a76cae7c1 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -81,6 +81,21 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str
return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
}
+static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
+{
+ struct mm_struct *mm = vma->vm_mm;
+
+ /*
+ * close() is called for munmap() but also for mremap(). In the mremap()
+ * case the vdso pointer has already been updated by the mremap() hook
+ * above, so it must not be set to NULL here.
+ */
+ if (vma->vm_start != (unsigned long)mm->context.vdso)
+ return;
+
+ mm->context.vdso = NULL;
+}
+
static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
struct vm_area_struct *vma, struct vm_fault *vmf);
@@ -92,11 +107,13 @@ static struct vm_special_mapping vvar_spec __ro_after_init = {
static struct vm_special_mapping vdso32_spec __ro_after_init = {
.name = "[vdso]",
.mremap = vdso32_mremap,
+ .close = vdso_close,
};
static struct vm_special_mapping vdso64_spec __ro_after_init = {
.name = "[vdso]",
.mremap = vdso64_mremap,
+ .close = vdso_close,
};
#ifdef CONFIG_TIME_NS
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 3/4] mm: Remove arch_unmap()
2024-08-07 12:41 [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping Michael Ellerman
2024-08-07 12:41 ` [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap() Michael Ellerman
@ 2024-08-07 12:41 ` Michael Ellerman
2024-08-07 15:33 ` David Hildenbrand
2024-08-07 17:42 ` Thomas Gleixner
2024-08-07 12:41 ` [PATCH 4/4] powerpc/vdso: Refactor error handling Michael Ellerman
` (2 subsequent siblings)
4 siblings, 2 replies; 25+ messages in thread
From: Michael Ellerman @ 2024-08-07 12:41 UTC (permalink / raw)
To: linux-mm
Cc: linuxppc-dev, npiggin, linux-kernel, christophe.leroy, jeffxu,
jeffxu, oliver.sang, Liam.Howlett, akpm, torvalds, pedro.falcato
Now that powerpc no longer uses arch_unmap() to handle VDSO unmapping,
there are no meaningful implementions left. Drop support for it
entirely, and update comments which refer to it.
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
arch/powerpc/include/asm/mmu_context.h | 5 -----
arch/x86/include/asm/mmu_context.h | 5 -----
include/asm-generic/mm_hooks.h | 11 +++--------
mm/mmap.c | 12 +++---------
4 files changed, 6 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 9b8c1555744e..a334a1368848 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -260,11 +260,6 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
extern void arch_exit_mmap(struct mm_struct *mm);
-static inline void arch_unmap(struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
-}
-
#ifdef CONFIG_PPC_MEM_KEYS
bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
bool execute, bool foreign);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8dac45a2c7fc..80f2a3187aa6 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -232,11 +232,6 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
}
#endif
-static inline void arch_unmap(struct mm_struct *mm, unsigned long start,
- unsigned long end)
-{
-}
-
/*
* We only want to enforce protection keys on the current process
* because we effectively have no access to PKRU for other
diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h
index 4dbb177d1150..6eea3b3c1e65 100644
--- a/include/asm-generic/mm_hooks.h
+++ b/include/asm-generic/mm_hooks.h
@@ -1,8 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Define generic no-op hooks for arch_dup_mmap, arch_exit_mmap
- * and arch_unmap to be included in asm-FOO/mmu_context.h for any
- * arch FOO which doesn't need to hook these.
+ * Define generic no-op hooks for arch_dup_mmap and arch_exit_mmap
+ * to be included in asm-FOO/mmu_context.h for any arch FOO which
+ * doesn't need to hook these.
*/
#ifndef _ASM_GENERIC_MM_HOOKS_H
#define _ASM_GENERIC_MM_HOOKS_H
@@ -17,11 +17,6 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
{
}
-static inline void arch_unmap(struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
-}
-
static inline bool arch_vma_access_permitted(struct vm_area_struct *vma,
bool write, bool execute, bool foreign)
{
diff --git a/mm/mmap.c b/mm/mmap.c
index 24bd6aa9155c..adaaf1ef197a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2789,7 +2789,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
*
* This function takes a @mas that is either pointing to the previous VMA or set
* to MA_START and sets it up to remove the mapping(s). The @len will be
- * aligned and any arch_unmap work will be preformed.
+ * aligned.
*
* Return: 0 on success and drops the lock if so directed, error and leaves the
* lock held otherwise.
@@ -2809,16 +2809,12 @@ int do_vmi_munmap(struct vma_iterator *vmi, struct mm_struct *mm,
return -EINVAL;
/*
- * Check if memory is sealed before arch_unmap.
- * Prevent unmapping a sealed VMA.
+ * Check if memory is sealed, prevent unmapping a sealed VMA.
* can_modify_mm assumes we have acquired the lock on MM.
*/
if (unlikely(!can_modify_mm(mm, start, end)))
return -EPERM;
- /* arch_unmap() might do unmaps itself. */
- arch_unmap(mm, start, end);
-
/* Find the first overlapping VMA */
vma = vma_find(vmi, end);
if (!vma) {
@@ -3232,14 +3228,12 @@ int do_vma_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma,
struct mm_struct *mm = vma->vm_mm;
/*
- * Check if memory is sealed before arch_unmap.
- * Prevent unmapping a sealed VMA.
+ * Check if memory is sealed, prevent unmapping a sealed VMA.
* can_modify_mm assumes we have acquired the lock on MM.
*/
if (unlikely(!can_modify_mm(mm, start, end)))
return -EPERM;
- arch_unmap(mm, start, end);
return do_vmi_align_munmap(vmi, vma, mm, start, end, uf, unlock);
}
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread
* [PATCH 4/4] powerpc/vdso: Refactor error handling
2024-08-07 12:41 [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping Michael Ellerman
2024-08-07 12:41 ` [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap() Michael Ellerman
2024-08-07 12:41 ` [PATCH 3/4] mm: Remove arch_unmap() Michael Ellerman
@ 2024-08-07 12:41 ` Michael Ellerman
2024-08-07 15:32 ` [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping David Hildenbrand
2024-08-07 15:52 ` Liam R. Howlett
4 siblings, 0 replies; 25+ messages in thread
From: Michael Ellerman @ 2024-08-07 12:41 UTC (permalink / raw)
To: linux-mm
Cc: linuxppc-dev, npiggin, linux-kernel, christophe.leroy, jeffxu,
jeffxu, oliver.sang, Liam.Howlett, akpm, torvalds, pedro.falcato
Linus noticed that the error handling in __arch_setup_additional_pages()
fails to clear the mm VDSO pointer if _install_special_mapping()
fails. In practice there should be no actual bug, because if there's an
error the VDSO pointer is cleared later in arch_setup_additional_pages().
However it's no longer necessary to set the pointer before installing
the mapping. Commit c1bab64360e6 ("powerpc/vdso: Move to
_install_special_mapping() and remove arch_vma_name()") reworked the
code so that the VMA name comes from the vm_special_mapping.name, rather
than relying on arch_vma_name().
So rework the code to only set the VDSO pointer once the mappings have
been installed correctly, and remove the stale comment.
Depends-on: c1bab64360e6 ("powerpc/vdso: Move to _install_special_mapping() and remove arch_vma_name()")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
arch/powerpc/kernel/vdso.c | 18 +++++++-----------
1 file changed, 7 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 220a76cae7c1..ee4b9d676cff 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -214,13 +214,6 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
/* Add required alignment. */
vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT);
- /*
- * Put vDSO base into mm struct. We need to do this before calling
- * install_special_mapping or the perf counter mmap tracking code
- * will fail to recognise it as a vDSO.
- */
- mm->context.vdso = (void __user *)vdso_base + vvar_size;
-
vma = _install_special_mapping(mm, vdso_base, vvar_size,
VM_READ | VM_MAYREAD | VM_IO |
VM_DONTDUMP | VM_PFNMAP, &vvar_spec);
@@ -240,10 +233,15 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int
vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size,
VM_READ | VM_EXEC | VM_MAYREAD |
VM_MAYWRITE | VM_MAYEXEC, vdso_spec);
- if (IS_ERR(vma))
+ if (IS_ERR(vma)) {
do_munmap(mm, vdso_base, vvar_size, NULL);
+ return PTR_ERR(vma);
+ }
- return PTR_ERR_OR_ZERO(vma);
+ // Now that the mappings are in place, set the mm VDSO pointer
+ mm->context.vdso = (void __user *)vdso_base + vvar_size;
+
+ return 0;
}
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
@@ -257,8 +255,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
return -EINTR;
rc = __arch_setup_additional_pages(bprm, uses_interp);
- if (rc)
- mm->context.vdso = NULL;
mmap_write_unlock(mm);
return rc;
--
2.45.2
^ permalink raw reply related [flat|nested] 25+ messages in thread
* Re: [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping
2024-08-07 12:41 [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping Michael Ellerman
` (2 preceding siblings ...)
2024-08-07 12:41 ` [PATCH 4/4] powerpc/vdso: Refactor error handling Michael Ellerman
@ 2024-08-07 15:32 ` David Hildenbrand
2024-08-12 8:23 ` Michael Ellerman
2024-08-07 15:52 ` Liam R. Howlett
4 siblings, 1 reply; 25+ messages in thread
From: David Hildenbrand @ 2024-08-07 15:32 UTC (permalink / raw)
To: Michael Ellerman, linux-mm
Cc: linuxppc-dev, npiggin, linux-kernel, christophe.leroy, jeffxu,
jeffxu, oliver.sang, Liam.Howlett, akpm, torvalds, pedro.falcato
On 07.08.24 14:41, Michael Ellerman wrote:
> Add an optional close() callback to struct vm_special_mapping. It will
> be used, by powerpc at least, to handle unmapping of the VDSO.
>
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
> include/linux/mm_types.h | 2 ++
> mm/mmap.c | 3 +++
> 2 files changed, 5 insertions(+)
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 485424979254..ef32d87a3adc 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -1313,6 +1313,8 @@ struct vm_special_mapping {
>
> int (*mremap)(const struct vm_special_mapping *sm,
> struct vm_area_struct *new_vma);
> + void (*close)(const struct vm_special_mapping *sm,
> + struct vm_area_struct *vma);
> };
>
> enum tlb_flush_reason {
> diff --git a/mm/mmap.c b/mm/mmap.c
> index d0dfc85b209b..24bd6aa9155c 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -3624,6 +3624,9 @@ static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
> */
> static void special_mapping_close(struct vm_area_struct *vma)
> {
> + const struct vm_special_mapping *sm = vma->vm_private_data;
I'm old-fashioned, I enjoy an empty line here ;)
> + if (sm->close)
> + sm->close(sm, vma);
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Cheers,
David / dhildenb
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-07 12:41 ` [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap() Michael Ellerman
@ 2024-08-07 15:33 ` David Hildenbrand
2024-08-07 15:43 ` Jeff Xu
1 sibling, 0 replies; 25+ messages in thread
From: David Hildenbrand @ 2024-08-07 15:33 UTC (permalink / raw)
To: Michael Ellerman, linux-mm
Cc: linuxppc-dev, npiggin, linux-kernel, christophe.leroy, jeffxu,
jeffxu, oliver.sang, Liam.Howlett, akpm, torvalds, pedro.falcato
On 07.08.24 14:41, Michael Ellerman wrote:
> Add a close() callback to the VDSO special mapping to handle unmapping
> of the VDSO. That will make it possible to remove the arch_unmap() hook
> entirely in a subsequent patch.
>
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
Reviewed-by: David Hildenbrand <david@redhat.com>
--
Cheers,
David / dhildenb
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 3/4] mm: Remove arch_unmap()
2024-08-07 12:41 ` [PATCH 3/4] mm: Remove arch_unmap() Michael Ellerman
@ 2024-08-07 15:33 ` David Hildenbrand
2024-08-07 17:42 ` Thomas Gleixner
1 sibling, 0 replies; 25+ messages in thread
From: David Hildenbrand @ 2024-08-07 15:33 UTC (permalink / raw)
To: Michael Ellerman, linux-mm
Cc: linuxppc-dev, npiggin, linux-kernel, christophe.leroy, jeffxu,
jeffxu, oliver.sang, Liam.Howlett, akpm, torvalds, pedro.falcato
On 07.08.24 14:41, Michael Ellerman wrote:
> Now that powerpc no longer uses arch_unmap() to handle VDSO unmapping,
> there are no meaningful implementions left. Drop support for it
> entirely, and update comments which refer to it.
>
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
Acked-by: David Hildenbrand <david@redhat.com>
--
Cheers,
David / dhildenb
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-07 12:41 ` [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap() Michael Ellerman
2024-08-07 15:33 ` David Hildenbrand
@ 2024-08-07 15:43 ` Jeff Xu
2024-08-07 15:56 ` Liam R. Howlett
1 sibling, 1 reply; 25+ messages in thread
From: Jeff Xu @ 2024-08-07 15:43 UTC (permalink / raw)
To: Michael Ellerman
Cc: Kees Cook, jeffxu, linuxppc-dev, npiggin, linux-kernel,
christophe.leroy, linux-mm, oliver.sang, Liam.Howlett, akpm,
torvalds, pedro.falcato
On Wed, Aug 7, 2024 at 5:41 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> Add a close() callback to the VDSO special mapping to handle unmapping
> of the VDSO. That will make it possible to remove the arch_unmap() hook
> entirely in a subsequent patch.
>
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
> arch/powerpc/include/asm/mmu_context.h | 4 ----
> arch/powerpc/kernel/vdso.c | 17 +++++++++++++++++
> 2 files changed, 17 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> index 37bffa0f7918..9b8c1555744e 100644
> --- a/arch/powerpc/include/asm/mmu_context.h
> +++ b/arch/powerpc/include/asm/mmu_context.h
> @@ -263,10 +263,6 @@ extern void arch_exit_mmap(struct mm_struct *mm);
> static inline void arch_unmap(struct mm_struct *mm,
> unsigned long start, unsigned long end)
> {
> - unsigned long vdso_base = (unsigned long)mm->context.vdso;
> -
> - if (start <= vdso_base && vdso_base < end)
> - mm->context.vdso = NULL;
> }
>
> #ifdef CONFIG_PPC_MEM_KEYS
> diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
> index 7a2ff9010f17..220a76cae7c1 100644
> --- a/arch/powerpc/kernel/vdso.c
> +++ b/arch/powerpc/kernel/vdso.c
> @@ -81,6 +81,21 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str
> return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
> }
>
> +static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> +{
> + struct mm_struct *mm = vma->vm_mm;
> +
> + /*
> + * close() is called for munmap() but also for mremap(). In the mremap()
> + * case the vdso pointer has already been updated by the mremap() hook
> + * above, so it must not be set to NULL here.
> + */
> + if (vma->vm_start != (unsigned long)mm->context.vdso)
> + return;
> +
> + mm->context.vdso = NULL;
> +}
> +
> static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> struct vm_area_struct *vma, struct vm_fault *vmf);
>
> @@ -92,11 +107,13 @@ static struct vm_special_mapping vvar_spec __ro_after_init = {
> static struct vm_special_mapping vdso32_spec __ro_after_init = {
> .name = "[vdso]",
> .mremap = vdso32_mremap,
> + .close = vdso_close,
IIUC, only CHECKPOINT_RESTORE requires this, and
CHECKPOINT_RESTORE is in init/Kconfig, with default N
Can we add #ifdef CONFIG_CHECKPOINT_RESTORE here ?
Thanks
Best regards,
-Jeff
> };
>
> static struct vm_special_mapping vdso64_spec __ro_after_init = {
> .name = "[vdso]",
> .mremap = vdso64_mremap,
> + .close = vdso_close,
> };
>
> #ifdef CONFIG_TIME_NS
> --
> 2.45.2
>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping
2024-08-07 12:41 [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping Michael Ellerman
` (3 preceding siblings ...)
2024-08-07 15:32 ` [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping David Hildenbrand
@ 2024-08-07 15:52 ` Liam R. Howlett
2024-08-12 8:22 ` Michael Ellerman
4 siblings, 1 reply; 25+ messages in thread
From: Liam R. Howlett @ 2024-08-07 15:52 UTC (permalink / raw)
To: Michael Ellerman
Cc: jeffxu, linuxppc-dev, linux-kernel, christophe.leroy, jeffxu,
linux-mm, oliver.sang, npiggin, akpm, torvalds, pedro.falcato
* Michael Ellerman <mpe@ellerman.id.au> [240807 08:41]:
> Add an optional close() callback to struct vm_special_mapping. It will
> be used, by powerpc at least, to handle unmapping of the VDSO.
>
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> ---
> include/linux/mm_types.h | 2 ++
> mm/mmap.c | 3 +++
> 2 files changed, 5 insertions(+)
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 485424979254..ef32d87a3adc 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -1313,6 +1313,8 @@ struct vm_special_mapping {
>
> int (*mremap)(const struct vm_special_mapping *sm,
> struct vm_area_struct *new_vma);
nit: missing new line?
> + void (*close)(const struct vm_special_mapping *sm,
> + struct vm_area_struct *vma);
> };
>
> enum tlb_flush_reason {
> diff --git a/mm/mmap.c b/mm/mmap.c
> index d0dfc85b209b..24bd6aa9155c 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -3624,6 +3624,9 @@ static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
> */
The above comment should probably be expanded to explain what this is
about, or removed.
> static void special_mapping_close(struct vm_area_struct *vma)
> {
> + const struct vm_special_mapping *sm = vma->vm_private_data;
> + if (sm->close)
> + sm->close(sm, vma);
Right now we have the same sort of situation for mremap calls on
special: we have a call to the specific vma mremap() function.
However, every single one of the vdso mremap() calls that I see:
s390, riscv, powerppc, parisc, loongarch, arm64, arm
seems to do the same thing, except ppc which verifies the size is okay
before doing the same thing.
So, are we missing an opportunity to avoid every arch having the same
implementation here (that will evolve into random bugs existing in some
archs for years before someone realises the cloned code wasn't fixed)?
Do we already have a fix in ppc for the size checking that doesn't exist
in the other archs in the case of mremap?
That is, if it's a special mapping that has the same start as the vdso,
can't all platforms do the same thing and set it to NULL and avoid every
platform cloning the same function?
Since this deals with mm_context_t, which is per-platform data, I think
the easiest way to make this more generic is to make a
generic_vdso_close() and set it in specific vmas on a per-platform
basis. At least then we can use the same close function across multiple
platforms and make this less error prone to cloned code not receiving
fixes.
...
Thanks,
Liam
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-07 15:43 ` Jeff Xu
@ 2024-08-07 15:56 ` Liam R. Howlett
2024-08-07 16:36 ` Jeff Xu
0 siblings, 1 reply; 25+ messages in thread
From: Liam R. Howlett @ 2024-08-07 15:56 UTC (permalink / raw)
To: Jeff Xu
Cc: Kees Cook, jeffxu, linuxppc-dev, christophe.leroy, linux-kernel,
linux-mm, oliver.sang, npiggin, akpm, torvalds, pedro.falcato
* Jeff Xu <jeffxu@google.com> [240807 11:44]:
> On Wed, Aug 7, 2024 at 5:41 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> >
> > Add a close() callback to the VDSO special mapping to handle unmapping
> > of the VDSO. That will make it possible to remove the arch_unmap() hook
> > entirely in a subsequent patch.
> >
> > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> > Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> > ---
> > arch/powerpc/include/asm/mmu_context.h | 4 ----
> > arch/powerpc/kernel/vdso.c | 17 +++++++++++++++++
> > 2 files changed, 17 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> > index 37bffa0f7918..9b8c1555744e 100644
> > --- a/arch/powerpc/include/asm/mmu_context.h
> > +++ b/arch/powerpc/include/asm/mmu_context.h
> > @@ -263,10 +263,6 @@ extern void arch_exit_mmap(struct mm_struct *mm);
> > static inline void arch_unmap(struct mm_struct *mm,
> > unsigned long start, unsigned long end)
> > {
> > - unsigned long vdso_base = (unsigned long)mm->context.vdso;
> > -
> > - if (start <= vdso_base && vdso_base < end)
> > - mm->context.vdso = NULL;
> > }
> >
> > #ifdef CONFIG_PPC_MEM_KEYS
> > diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
> > index 7a2ff9010f17..220a76cae7c1 100644
> > --- a/arch/powerpc/kernel/vdso.c
> > +++ b/arch/powerpc/kernel/vdso.c
> > @@ -81,6 +81,21 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str
> > return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
> > }
> >
> > +static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> > +{
> > + struct mm_struct *mm = vma->vm_mm;
> > +
> > + /*
> > + * close() is called for munmap() but also for mremap(). In the mremap()
> > + * case the vdso pointer has already been updated by the mremap() hook
> > + * above, so it must not be set to NULL here.
> > + */
> > + if (vma->vm_start != (unsigned long)mm->context.vdso)
> > + return;
> > +
> > + mm->context.vdso = NULL;
> > +}
> > +
> > static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> > struct vm_area_struct *vma, struct vm_fault *vmf);
> >
> > @@ -92,11 +107,13 @@ static struct vm_special_mapping vvar_spec __ro_after_init = {
> > static struct vm_special_mapping vdso32_spec __ro_after_init = {
> > .name = "[vdso]",
> > .mremap = vdso32_mremap,
> > + .close = vdso_close,
> IIUC, only CHECKPOINT_RESTORE requires this, and
> CHECKPOINT_RESTORE is in init/Kconfig, with default N
>
> Can we add #ifdef CONFIG_CHECKPOINT_RESTORE here ?
>
No, these can be unmapped and it needs to be cleaned up. Valgrind is
one application that is known to unmap the vdso and runs into issues on
platforms that do not handle the removal correctly.
Thanks,
Liam
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-07 15:56 ` Liam R. Howlett
@ 2024-08-07 16:36 ` Jeff Xu
2024-08-07 17:11 ` Liam R. Howlett
0 siblings, 1 reply; 25+ messages in thread
From: Jeff Xu @ 2024-08-07 16:36 UTC (permalink / raw)
To: Liam R. Howlett, Jeff Xu, Michael Ellerman, linux-mm,
linuxppc-dev, torvalds, akpm, christophe.leroy, jeffxu,
linux-kernel, npiggin, oliver.sang, pedro.falcato, Kees Cook
On Wed, Aug 7, 2024 at 8:56 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
>
> * Jeff Xu <jeffxu@google.com> [240807 11:44]:
> > On Wed, Aug 7, 2024 at 5:41 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> > >
> > > Add a close() callback to the VDSO special mapping to handle unmapping
> > > of the VDSO. That will make it possible to remove the arch_unmap() hook
> > > entirely in a subsequent patch.
> > >
> > > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> > > Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> > > ---
> > > arch/powerpc/include/asm/mmu_context.h | 4 ----
> > > arch/powerpc/kernel/vdso.c | 17 +++++++++++++++++
> > > 2 files changed, 17 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> > > index 37bffa0f7918..9b8c1555744e 100644
> > > --- a/arch/powerpc/include/asm/mmu_context.h
> > > +++ b/arch/powerpc/include/asm/mmu_context.h
> > > @@ -263,10 +263,6 @@ extern void arch_exit_mmap(struct mm_struct *mm);
> > > static inline void arch_unmap(struct mm_struct *mm,
> > > unsigned long start, unsigned long end)
> > > {
> > > - unsigned long vdso_base = (unsigned long)mm->context.vdso;
> > > -
> > > - if (start <= vdso_base && vdso_base < end)
> > > - mm->context.vdso = NULL;
> > > }
> > >
> > > #ifdef CONFIG_PPC_MEM_KEYS
> > > diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
> > > index 7a2ff9010f17..220a76cae7c1 100644
> > > --- a/arch/powerpc/kernel/vdso.c
> > > +++ b/arch/powerpc/kernel/vdso.c
> > > @@ -81,6 +81,21 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str
> > > return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
> > > }
> > >
> > > +static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> > > +{
> > > + struct mm_struct *mm = vma->vm_mm;
> > > +
> > > + /*
> > > + * close() is called for munmap() but also for mremap(). In the mremap()
> > > + * case the vdso pointer has already been updated by the mremap() hook
> > > + * above, so it must not be set to NULL here.
> > > + */
> > > + if (vma->vm_start != (unsigned long)mm->context.vdso)
> > > + return;
> > > +
> > > + mm->context.vdso = NULL;
> > > +}
> > > +
> > > static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> > > struct vm_area_struct *vma, struct vm_fault *vmf);
> > >
> > > @@ -92,11 +107,13 @@ static struct vm_special_mapping vvar_spec __ro_after_init = {
> > > static struct vm_special_mapping vdso32_spec __ro_after_init = {
> > > .name = "[vdso]",
> > > .mremap = vdso32_mremap,
> > > + .close = vdso_close,
> > IIUC, only CHECKPOINT_RESTORE requires this, and
> > CHECKPOINT_RESTORE is in init/Kconfig, with default N
> >
> > Can we add #ifdef CONFIG_CHECKPOINT_RESTORE here ?
> >
>
> No, these can be unmapped and it needs to be cleaned up. Valgrind is
> one application that is known to unmap the vdso and runs into issues on
> platforms that do not handle the removal correctly.
>
Maybe Valgrind needs that exactly for checkpoint restore ? [1]
"CRIU fails to restore applications that have unmapped the vDSO
segment. One such
application is Valgrind."
Usually when the kernel accepts new functionality, the patch needs to
state the user case.
The only user case I found for .mremap and .close so far is the CRIU case.
[1] https://github.com/checkpoint-restore/criu/issues/488
Thanks
-Jeff
> Thanks,
> Liam
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-07 16:36 ` Jeff Xu
@ 2024-08-07 17:11 ` Liam R. Howlett
2024-08-07 20:11 ` Jeff Xu
0 siblings, 1 reply; 25+ messages in thread
From: Liam R. Howlett @ 2024-08-07 17:11 UTC (permalink / raw)
To: Jeff Xu
Cc: Kees Cook, jeffxu, linuxppc-dev, christophe.leroy, linux-kernel,
linux-mm, oliver.sang, npiggin, akpm, torvalds, pedro.falcato
* Jeff Xu <jeffxu@google.com> [240807 12:37]:
> On Wed, Aug 7, 2024 at 8:56 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> >
> > * Jeff Xu <jeffxu@google.com> [240807 11:44]:
> > > On Wed, Aug 7, 2024 at 5:41 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> > > >
> > > > Add a close() callback to the VDSO special mapping to handle unmapping
> > > > of the VDSO. That will make it possible to remove the arch_unmap() hook
> > > > entirely in a subsequent patch.
> > > >
> > > > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> > > > Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> > > > ---
> > > > arch/powerpc/include/asm/mmu_context.h | 4 ----
> > > > arch/powerpc/kernel/vdso.c | 17 +++++++++++++++++
> > > > 2 files changed, 17 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> > > > index 37bffa0f7918..9b8c1555744e 100644
> > > > --- a/arch/powerpc/include/asm/mmu_context.h
> > > > +++ b/arch/powerpc/include/asm/mmu_context.h
> > > > @@ -263,10 +263,6 @@ extern void arch_exit_mmap(struct mm_struct *mm);
> > > > static inline void arch_unmap(struct mm_struct *mm,
> > > > unsigned long start, unsigned long end)
> > > > {
> > > > - unsigned long vdso_base = (unsigned long)mm->context.vdso;
> > > > -
> > > > - if (start <= vdso_base && vdso_base < end)
> > > > - mm->context.vdso = NULL;
> > > > }
> > > >
> > > > #ifdef CONFIG_PPC_MEM_KEYS
> > > > diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
> > > > index 7a2ff9010f17..220a76cae7c1 100644
> > > > --- a/arch/powerpc/kernel/vdso.c
> > > > +++ b/arch/powerpc/kernel/vdso.c
> > > > @@ -81,6 +81,21 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str
> > > > return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
> > > > }
> > > >
> > > > +static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> > > > +{
> > > > + struct mm_struct *mm = vma->vm_mm;
> > > > +
> > > > + /*
> > > > + * close() is called for munmap() but also for mremap(). In the mremap()
> > > > + * case the vdso pointer has already been updated by the mremap() hook
> > > > + * above, so it must not be set to NULL here.
> > > > + */
> > > > + if (vma->vm_start != (unsigned long)mm->context.vdso)
> > > > + return;
> > > > +
> > > > + mm->context.vdso = NULL;
> > > > +}
> > > > +
> > > > static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> > > > struct vm_area_struct *vma, struct vm_fault *vmf);
> > > >
> > > > @@ -92,11 +107,13 @@ static struct vm_special_mapping vvar_spec __ro_after_init = {
> > > > static struct vm_special_mapping vdso32_spec __ro_after_init = {
> > > > .name = "[vdso]",
> > > > .mremap = vdso32_mremap,
> > > > + .close = vdso_close,
> > > IIUC, only CHECKPOINT_RESTORE requires this, and
> > > CHECKPOINT_RESTORE is in init/Kconfig, with default N
> > >
> > > Can we add #ifdef CONFIG_CHECKPOINT_RESTORE here ?
> > >
> >
> > No, these can be unmapped and it needs to be cleaned up. Valgrind is
> > one application that is known to unmap the vdso and runs into issues on
> > platforms that do not handle the removal correctly.
> >
> Maybe Valgrind needs that exactly for checkpoint restore ? [1]
Maybe, and maybe there are 100 other applications unmapping the vdso for
some other reason?
>
> "CRIU fails to restore applications that have unmapped the vDSO
> segment. One such
> application is Valgrind."
>
> Usually when the kernel accepts new functionality, the patch needs to
> state the user case.
This isn't new functionality, the arch_unmap() existed before and this
is moving the functionality. We can't just disable it because we assume
we know it's only used once.
I had planned to do this sort of patch set in a follow up to my patch
set [1], so I was deep into looking at this before the mseal()
regression - which I expected to happen and have been advocating to
avoid extra walks... This would be fixed by my patch set by reducing the
walk count.
> The only user case I found for .mremap and .close so far is the CRIU case.
>
In fact, this is broken on other archs for valgrind since they unmap the
vdso and then crash [2]. There has been a fix in the works for a while,
which I stepped in during the patch set [1], which can be seen here
[3]. In the discussion, the issue with Valgrind is raised and a generic
solution to solve for more than just ppc is discussed. The solution
avoids crashing if vdso is unmapped and that seems like the sane
direction of this work.
We also have users unmapping vdsos here [4] too.
Why would we leave a dangling pointer in the mm struct based on a
compile flag?
[1] https://lore.kernel.org/linux-mm/20240717200709.1552558-18-Liam.Howlett@oracle.com/
[2] https://lore.kernel.org/lkml/87imd5h5kb.fsf@mpe.ellerman.id.au/
[3] https://lore.kernel.org/all/20210611180242.711399-17-dima@arista.com/
[4] https://github.com/insanitybit/void-ship
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 3/4] mm: Remove arch_unmap()
2024-08-07 12:41 ` [PATCH 3/4] mm: Remove arch_unmap() Michael Ellerman
2024-08-07 15:33 ` David Hildenbrand
@ 2024-08-07 17:42 ` Thomas Gleixner
1 sibling, 0 replies; 25+ messages in thread
From: Thomas Gleixner @ 2024-08-07 17:42 UTC (permalink / raw)
To: Michael Ellerman, linux-mm
Cc: linuxppc-dev, npiggin, linux-kernel, christophe.leroy, jeffxu,
jeffxu, oliver.sang, Liam.Howlett, akpm, torvalds, pedro.falcato
On Wed, Aug 07 2024 at 22:41, Michael Ellerman wrote:
> Now that powerpc no longer uses arch_unmap() to handle VDSO unmapping,
> there are no meaningful implementions left. Drop support for it
> entirely, and update comments which refer to it.
>
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-07 17:11 ` Liam R. Howlett
@ 2024-08-07 20:11 ` Jeff Xu
2024-08-07 23:20 ` Liam R. Howlett
0 siblings, 1 reply; 25+ messages in thread
From: Jeff Xu @ 2024-08-07 20:11 UTC (permalink / raw)
To: Liam R. Howlett, Jeff Xu, Michael Ellerman, linux-mm,
linuxppc-dev, torvalds, akpm, christophe.leroy, jeffxu,
linux-kernel, npiggin, oliver.sang, pedro.falcato, Kees Cook
On Wed, Aug 7, 2024 at 10:11 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
>
> * Jeff Xu <jeffxu@google.com> [240807 12:37]:
> > On Wed, Aug 7, 2024 at 8:56 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> > >
> > > * Jeff Xu <jeffxu@google.com> [240807 11:44]:
> > > > On Wed, Aug 7, 2024 at 5:41 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> > > > >
> > > > > Add a close() callback to the VDSO special mapping to handle unmapping
> > > > > of the VDSO. That will make it possible to remove the arch_unmap() hook
> > > > > entirely in a subsequent patch.
> > > > >
> > > > > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> > > > > Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> > > > > ---
> > > > > arch/powerpc/include/asm/mmu_context.h | 4 ----
> > > > > arch/powerpc/kernel/vdso.c | 17 +++++++++++++++++
> > > > > 2 files changed, 17 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> > > > > index 37bffa0f7918..9b8c1555744e 100644
> > > > > --- a/arch/powerpc/include/asm/mmu_context.h
> > > > > +++ b/arch/powerpc/include/asm/mmu_context.h
> > > > > @@ -263,10 +263,6 @@ extern void arch_exit_mmap(struct mm_struct *mm);
> > > > > static inline void arch_unmap(struct mm_struct *mm,
> > > > > unsigned long start, unsigned long end)
> > > > > {
> > > > > - unsigned long vdso_base = (unsigned long)mm->context.vdso;
> > > > > -
> > > > > - if (start <= vdso_base && vdso_base < end)
> > > > > - mm->context.vdso = NULL;
> > > > > }
> > > > >
> > > > > #ifdef CONFIG_PPC_MEM_KEYS
> > > > > diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
> > > > > index 7a2ff9010f17..220a76cae7c1 100644
> > > > > --- a/arch/powerpc/kernel/vdso.c
> > > > > +++ b/arch/powerpc/kernel/vdso.c
> > > > > @@ -81,6 +81,21 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str
> > > > > return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
> > > > > }
> > > > >
> > > > > +static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> > > > > +{
> > > > > + struct mm_struct *mm = vma->vm_mm;
> > > > > +
> > > > > + /*
> > > > > + * close() is called for munmap() but also for mremap(). In the mremap()
> > > > > + * case the vdso pointer has already been updated by the mremap() hook
> > > > > + * above, so it must not be set to NULL here.
> > > > > + */
> > > > > + if (vma->vm_start != (unsigned long)mm->context.vdso)
> > > > > + return;
> > > > > +
> > > > > + mm->context.vdso = NULL;
> > > > > +}
> > > > > +
> > > > > static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> > > > > struct vm_area_struct *vma, struct vm_fault *vmf);
> > > > >
> > > > > @@ -92,11 +107,13 @@ static struct vm_special_mapping vvar_spec __ro_after_init = {
> > > > > static struct vm_special_mapping vdso32_spec __ro_after_init = {
> > > > > .name = "[vdso]",
> > > > > .mremap = vdso32_mremap,
> > > > > + .close = vdso_close,
> > > > IIUC, only CHECKPOINT_RESTORE requires this, and
> > > > CHECKPOINT_RESTORE is in init/Kconfig, with default N
> > > >
> > > > Can we add #ifdef CONFIG_CHECKPOINT_RESTORE here ?
> > > >
> > >
> > > No, these can be unmapped and it needs to be cleaned up. Valgrind is
> > > one application that is known to unmap the vdso and runs into issues on
> > > platforms that do not handle the removal correctly.
> > >
> > Maybe Valgrind needs that exactly for checkpoint restore ? [1]
>
> Maybe, and maybe there are 100 other applications unmapping the vdso for
> some other reason?
>
When PPC added arch_munamp, it was for CRIU, wasn't it ? That was the original
intention.
Maybe there are more apps that have started unmapping vdso, it might
be interesting
to know those use cases before widely opening this without kernel config.
> >
> > "CRIU fails to restore applications that have unmapped the vDSO
> > segment. One such
> > application is Valgrind."
> >
> > Usually when the kernel accepts new functionality, the patch needs to
> > state the user case.
>
> This isn't new functionality, the arch_unmap() existed before and this
> is moving the functionality. We can't just disable it because we assume
> we know it's only used once.
>
> I had planned to do this sort of patch set in a follow up to my patch
> set [1], so I was deep into looking at this before the mseal()
> regression - which I expected to happen and have been advocating to
> avoid extra walks... This would be fixed by my patch set by reducing the
> walk count.
>
I would rather leave mseal() in-loop check discussion to the other
email thread :-)
> > The only user case I found for .mremap and .close so far is the CRIU case.
> >
>
> In fact, this is broken on other archs for valgrind since they unmap the
> vdso and then crash [2]. There has been a fix in the works for a while,
> which I stepped in during the patch set [1], which can be seen here
> [3]. In the discussion, the issue with Valgrind is raised and a generic
> solution to solve for more than just ppc is discussed. The solution
> avoids crashing if vdso is unmapped and that seems like the sane
> direction of this work.
>
> We also have users unmapping vdsos here [4] too.
This is a strange code. If the use case about clock_gettime is legit, then
kerne can provide an option to not unload vdso during elf loading.
>
> Why would we leave a dangling pointer in the mm struct based on a
> compile flag?
>
> [1] https://lore.kernel.org/linux-mm/20240717200709.1552558-18-Liam.Howlett@oracle.com/
> [2] https://lore.kernel.org/lkml/87imd5h5kb.fsf@mpe.ellerman.id.au/
> [3] https://lore.kernel.org/all/20210611180242.711399-17-dima@arista.com/
> [4] https://github.com/insanitybit/void-ship
>
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-07 20:11 ` Jeff Xu
@ 2024-08-07 23:20 ` Liam R. Howlett
2024-08-08 3:21 ` Linus Torvalds
0 siblings, 1 reply; 25+ messages in thread
From: Liam R. Howlett @ 2024-08-07 23:20 UTC (permalink / raw)
To: Jeff Xu
Cc: Kees Cook, jeffxu, linuxppc-dev, christophe.leroy, linux-kernel,
linux-mm, oliver.sang, npiggin, akpm, torvalds, pedro.falcato
* Jeff Xu <jeffxu@google.com> [240807 16:12]:
> On Wed, Aug 7, 2024 at 10:11 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> >
> > * Jeff Xu <jeffxu@google.com> [240807 12:37]:
> > > On Wed, Aug 7, 2024 at 8:56 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> > > >
> > > > * Jeff Xu <jeffxu@google.com> [240807 11:44]:
> > > > > On Wed, Aug 7, 2024 at 5:41 AM Michael Ellerman <mpe@ellerman.id.au> wrote:
> > > > > >
> > > > > > Add a close() callback to the VDSO special mapping to handle unmapping
> > > > > > of the VDSO. That will make it possible to remove the arch_unmap() hook
> > > > > > entirely in a subsequent patch.
> > > > > >
> > > > > > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> > > > > > Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
> > > > > > ---
> > > > > > arch/powerpc/include/asm/mmu_context.h | 4 ----
> > > > > > arch/powerpc/kernel/vdso.c | 17 +++++++++++++++++
> > > > > > 2 files changed, 17 insertions(+), 4 deletions(-)
> > > > > >
> > > > > > diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
> > > > > > index 37bffa0f7918..9b8c1555744e 100644
> > > > > > --- a/arch/powerpc/include/asm/mmu_context.h
> > > > > > +++ b/arch/powerpc/include/asm/mmu_context.h
> > > > > > @@ -263,10 +263,6 @@ extern void arch_exit_mmap(struct mm_struct *mm);
> > > > > > static inline void arch_unmap(struct mm_struct *mm,
> > > > > > unsigned long start, unsigned long end)
> > > > > > {
> > > > > > - unsigned long vdso_base = (unsigned long)mm->context.vdso;
> > > > > > -
> > > > > > - if (start <= vdso_base && vdso_base < end)
> > > > > > - mm->context.vdso = NULL;
> > > > > > }
> > > > > >
> > > > > > #ifdef CONFIG_PPC_MEM_KEYS
> > > > > > diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
> > > > > > index 7a2ff9010f17..220a76cae7c1 100644
> > > > > > --- a/arch/powerpc/kernel/vdso.c
> > > > > > +++ b/arch/powerpc/kernel/vdso.c
> > > > > > @@ -81,6 +81,21 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str
> > > > > > return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start);
> > > > > > }
> > > > > >
> > > > > > +static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma)
> > > > > > +{
> > > > > > + struct mm_struct *mm = vma->vm_mm;
> > > > > > +
> > > > > > + /*
> > > > > > + * close() is called for munmap() but also for mremap(). In the mremap()
> > > > > > + * case the vdso pointer has already been updated by the mremap() hook
> > > > > > + * above, so it must not be set to NULL here.
> > > > > > + */
> > > > > > + if (vma->vm_start != (unsigned long)mm->context.vdso)
> > > > > > + return;
> > > > > > +
> > > > > > + mm->context.vdso = NULL;
> > > > > > +}
> > > > > > +
> > > > > > static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> > > > > > struct vm_area_struct *vma, struct vm_fault *vmf);
> > > > > >
> > > > > > @@ -92,11 +107,13 @@ static struct vm_special_mapping vvar_spec __ro_after_init = {
> > > > > > static struct vm_special_mapping vdso32_spec __ro_after_init = {
> > > > > > .name = "[vdso]",
> > > > > > .mremap = vdso32_mremap,
> > > > > > + .close = vdso_close,
> > > > > IIUC, only CHECKPOINT_RESTORE requires this, and
> > > > > CHECKPOINT_RESTORE is in init/Kconfig, with default N
> > > > >
> > > > > Can we add #ifdef CONFIG_CHECKPOINT_RESTORE here ?
> > > > >
> > > >
> > > > No, these can be unmapped and it needs to be cleaned up. Valgrind is
> > > > one application that is known to unmap the vdso and runs into issues on
> > > > platforms that do not handle the removal correctly.
> > > >
> > > Maybe Valgrind needs that exactly for checkpoint restore ? [1]
> >
> > Maybe, and maybe there are 100 other applications unmapping the vdso for
> > some other reason?
> >
> When PPC added arch_munamp, it was for CRIU, wasn't it ? That was the original
> intention.
>
> Maybe there are more apps that have started unmapping vdso, it might
> be interesting
> to know those use cases before widely opening this without kernel config.
>
> > >
> > > "CRIU fails to restore applications that have unmapped the vDSO
> > > segment. One such
> > > application is Valgrind."
> > >
> > > Usually when the kernel accepts new functionality, the patch needs to
> > > state the user case.
> >
...
> > > The only user case I found for .mremap and .close so far is the CRIU case.
> > >
> >
> > In fact, this is broken on other archs for valgrind since they unmap the
> > vdso and then crash [2]. There has been a fix in the works for a while,
> > which I stepped in during the patch set [1], which can be seen here
> > [3]. In the discussion, the issue with Valgrind is raised and a generic
> > solution to solve for more than just ppc is discussed. The solution
> > avoids crashing if vdso is unmapped and that seems like the sane
> > direction of this work.
> >
> > We also have users unmapping vdsos here [4] too.
> This is a strange code. If the use case about clock_gettime is legit, then
> kerne can provide an option to not unload vdso during elf loading.
Yes, I would not want to do this - but there are people doing strange
(to put it politely) things that we did not intend.
>
> >
> > Why would we leave a dangling pointer in the mm struct based on a
> > compile flag?
Okay, I'm going to try one more time here. You are suggesting to have a
conf flag to leave the vdso pointer unchanged when it is unmapped.
Having the close behind the conf option will not prevent it from being
unmapped or mapped over, so what you are suggesting is have a
configuration option that leaves a pointer, mm->context.vdso, to be
unsafe if it is unmapped if you disable checkpoint restore.
This is also not what userspace sees today, so you are changing user
visible behaviour based on a configuration option because you think, but
are not sure, that checkpoint restore is the only user.
Or did I miss something?
Thanks,
Liam
> >
> > [1] https://lore.kernel.org/linux-mm/20240717200709.1552558-18-Liam.Howlett@oracle.com/
> > [2] https://lore.kernel.org/lkml/87imd5h5kb.fsf@mpe.ellerman.id.au/
> > [3] https://lore.kernel.org/all/20210611180242.711399-17-dima@arista.com/
> > [4] https://github.com/insanitybit/void-ship
> >
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-07 23:20 ` Liam R. Howlett
@ 2024-08-08 3:21 ` Linus Torvalds
2024-08-08 3:36 ` Jeff Xu
` (2 more replies)
0 siblings, 3 replies; 25+ messages in thread
From: Linus Torvalds @ 2024-08-08 3:21 UTC (permalink / raw)
To: Liam R. Howlett, Jeff Xu, Michael Ellerman, linux-mm,
linuxppc-dev, torvalds, akpm, christophe.leroy, jeffxu,
linux-kernel, npiggin, oliver.sang, pedro.falcato, Kees Cook
On Wed, 7 Aug 2024 at 16:20, Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
>
> Okay, I'm going to try one more time here. You are suggesting to have a
> conf flag to leave the vdso pointer unchanged when it is unmapped.
> Having the close behind the conf option will not prevent it from being
> unmapped or mapped over, so what you are suggesting is have a
> configuration option that leaves a pointer, mm->context.vdso, to be
> unsafe if it is unmapped if you disable checkpoint restore.
We definitely do not want that kind of complexity. It makes the kernel
just more complicated to have to deal with both cases.
That said, I don't love how special powerpc is here.
What we could do is to is
- stop calling these things "special mappings", and just admit that
it's for different vdso mappings and nothing else (for some odd reason
arm and nios2 calls it a "kuser helper" rather than vdso, but it's the
exact same thing)
- don't do this whole indirect function pointer thing with mremap and
close at all, and just do this all unapologetically and for all
architectures in the generic VM layer together with "if (vma->vm_start
== mm->context.vdso)" etc.
that would get rid of the conceptual complexity of having different
architectures doing different things (and the unnecessary overhead of
having an indirect function pointer that just points to one single
thing).
But I think the current "clean up the existing mess" is probably the
less invasive one over "make the existing mess be explicitly about
vdso and avoid unnecessary per-architecture differences".
If people want to, we can do the unification (and stop pretending the
"special mappings" could be something else) later.
Linus
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-08 3:21 ` Linus Torvalds
@ 2024-08-08 3:36 ` Jeff Xu
2024-08-08 18:08 ` Liam R. Howlett
2024-08-08 4:18 ` Jeff Xu
2024-08-08 16:15 ` Liam R. Howlett
2 siblings, 1 reply; 25+ messages in thread
From: Jeff Xu @ 2024-08-08 3:36 UTC (permalink / raw)
To: Linus Torvalds
Cc: Kees Cook, jeffxu, npiggin, linux-kernel, christophe.leroy,
linux-mm, oliver.sang, Liam R. Howlett, akpm, linuxppc-dev,
pedro.falcato
On Wed, Aug 7, 2024 at 8:21 PM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> On Wed, 7 Aug 2024 at 16:20, Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> >
> > Okay, I'm going to try one more time here. You are suggesting to have a
> > conf flag to leave the vdso pointer unchanged when it is unmapped.
> > Having the close behind the conf option will not prevent it from being
> > unmapped or mapped over, so what you are suggesting is have a
> > configuration option that leaves a pointer, mm->context.vdso, to be
> > unsafe if it is unmapped if you disable checkpoint restore.
>
This is a new point that I didn't realize before, if we are going to handle
unmap vdso safely, yes, this is a bugfix that should be applied everywhere
for all arch, without CHECKPOINT_RESTORE config.
Do we need to worry about mmap(fixed) ? which can have the same effect
as mremap.
> We definitely do not want that kind of complexity. It makes the kernel
> just more complicated to have to deal with both cases.
>
> That said, I don't love how special powerpc is here.
>
> What we could do is to is
>
> - stop calling these things "special mappings", and just admit that
> it's for different vdso mappings and nothing else (for some odd reason
> arm and nios2 calls it a "kuser helper" rather than vdso, but it's the
> exact same thing)
>
> - don't do this whole indirect function pointer thing with mremap and
> close at all, and just do this all unapologetically and for all
> architectures in the generic VM layer together with "if (vma->vm_start
> == mm->context.vdso)" etc.
>
> that would get rid of the conceptual complexity of having different
> architectures doing different things (and the unnecessary overhead of
> having an indirect function pointer that just points to one single
> thing).
>
> But I think the current "clean up the existing mess" is probably the
> less invasive one over "make the existing mess be explicitly about
> vdso and avoid unnecessary per-architecture differences".
>
> If people want to, we can do the unification (and stop pretending the
> "special mappings" could be something else) later.
>
> Linus
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-08 3:21 ` Linus Torvalds
2024-08-08 3:36 ` Jeff Xu
@ 2024-08-08 4:18 ` Jeff Xu
2024-08-08 16:15 ` Liam R. Howlett
2 siblings, 0 replies; 25+ messages in thread
From: Jeff Xu @ 2024-08-08 4:18 UTC (permalink / raw)
To: Linus Torvalds
Cc: Kees Cook, jeffxu, npiggin, linux-kernel, christophe.leroy,
linux-mm, oliver.sang, Liam R. Howlett, akpm, linuxppc-dev,
pedro.falcato
On Wed, Aug 7, 2024 at 8:21 PM Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> On Wed, 7 Aug 2024 at 16:20, Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> >
> > Okay, I'm going to try one more time here. You are suggesting to have a
> > conf flag to leave the vdso pointer unchanged when it is unmapped.
> > Having the close behind the conf option will not prevent it from being
> > unmapped or mapped over, so what you are suggesting is have a
> > configuration option that leaves a pointer, mm->context.vdso, to be
> > unsafe if it is unmapped if you disable checkpoint restore.
>
> We definitely do not want that kind of complexity. It makes the kernel
> just more complicated to have to deal with both cases.
>
> That said, I don't love how special powerpc is here.
>
> What we could do is to is
>
> - stop calling these things "special mappings", and just admit that
> it's for different vdso mappings and nothing else (for some odd reason
> arm and nios2 calls it a "kuser helper" rather than vdso, but it's the
> exact same thing)
>
> - don't do this whole indirect function pointer thing with mremap and
> close at all, and just do this all unapologetically and for all
> architectures in the generic VM layer together with "if (vma->vm_start
> == mm->context.vdso)" etc.
>
> that would get rid of the conceptual complexity of having different
> architectures doing different things (and the unnecessary overhead of
> having an indirect function pointer that just points to one single
> thing).
>
> But I think the current "clean up the existing mess" is probably the
> less invasive one over "make the existing mess be explicitly about
> vdso and avoid unnecessary per-architecture differences".
>
> If people want to, we can do the unification (and stop pretending the
> "special mappings" could be something else) later.
OK. Now this is clear to me (at last).
Treating vdso mapping, (maybe all the special mappings) as normal
mapping and handling mmap/mremap/munmap properly from userspace will
indeed make things clear conceptually. I'm OK with doing this later
since it is a big change.
Thanks
-Jeff
>
> Linus
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-08 3:21 ` Linus Torvalds
2024-08-08 3:36 ` Jeff Xu
2024-08-08 4:18 ` Jeff Xu
@ 2024-08-08 16:15 ` Liam R. Howlett
2 siblings, 0 replies; 25+ messages in thread
From: Liam R. Howlett @ 2024-08-08 16:15 UTC (permalink / raw)
To: Linus Torvalds
Cc: Kees Cook, jeffxu, linux-kernel, christophe.leroy, Jeff Xu,
linux-mm, oliver.sang, npiggin, akpm, linuxppc-dev, pedro.falcato
* Linus Torvalds <torvalds@linux-foundation.org> [240807 23:21]:
> On Wed, 7 Aug 2024 at 16:20, Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> >
...
>
> That said, I don't love how special powerpc is here.
I think more (all?) archs should be doing something like ppc when the
vdso is removed. If someone removes the vdso, then the speed up
provided should just go away and the function calls shouldn't try to use
the quick look up and crash.
I view this as another 'caching of a vma pointer' issue that isn't
cleaned up when the vma goes away.
>
> What we could do is to is
>
> - stop calling these things "special mappings", and just admit that
> it's for different vdso mappings and nothing else (for some odd reason
> arm and nios2 calls it a "kuser helper" rather than vdso, but it's the
> exact same thing)
But isn't it a special mapping? We don't allow for merging of the vma,
the mlock handling has some odd behaviour with this vma, and there is
the comment in mm/internal.h's mlock_vma_folio about ignoring these
special vmas in a race.
There is also some other 'special mapping' of vvars too? I haven't
looked deeply into this yet as my investigation was preempted by
vacation.
>
> - don't do this whole indirect function pointer thing with mremap and
> close at all, and just do this all unapologetically and for all
> architectures in the generic VM layer together with "if (vma->vm_start
> == mm->context.vdso)" etc.
>
> that would get rid of the conceptual complexity of having different
> architectures doing different things (and the unnecessary overhead of
> having an indirect function pointer that just points to one single
> thing).
>
> But I think the current "clean up the existing mess" is probably the
> less invasive one over "make the existing mess be explicitly about
> vdso and avoid unnecessary per-architecture differences".
Okay, sure.
>
> If people want to, we can do the unification (and stop pretending the
> "special mappings" could be something else) later.
>
I was planning to use the regular vma vm_ops to jump into the 'special
unmap code' and then do all the checks there. IOW, keep the vma flagged
as VM_SPECIAL and call the special_mapping_whatever() function as a
regular vmops for, say, ->remove_vma() or ->mremap(). Keeping the flag
means all the race avoidance/locking/merging works the same as it does
today.
What I am trying to avoid is another arch_get_unmapped_area() scenario
where a bug exists for a decade in some versions of the cloned code.
Thanks,
Liam
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-08 3:36 ` Jeff Xu
@ 2024-08-08 18:08 ` Liam R. Howlett
2024-08-08 18:36 ` Jeff Xu
0 siblings, 1 reply; 25+ messages in thread
From: Liam R. Howlett @ 2024-08-08 18:08 UTC (permalink / raw)
To: Jeff Xu
Cc: Kees Cook, jeffxu, linuxppc-dev, christophe.leroy, linux-kernel,
linux-mm, oliver.sang, npiggin, akpm, Linus Torvalds,
pedro.falcato
* Jeff Xu <jeffxu@google.com> [240807 23:37]:
> On Wed, Aug 7, 2024 at 8:21 PM Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
> >
> > On Wed, 7 Aug 2024 at 16:20, Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> > >
> > > Okay, I'm going to try one more time here. You are suggesting to have a
> > > conf flag to leave the vdso pointer unchanged when it is unmapped.
> > > Having the close behind the conf option will not prevent it from being
> > > unmapped or mapped over, so what you are suggesting is have a
> > > configuration option that leaves a pointer, mm->context.vdso, to be
> > > unsafe if it is unmapped if you disable checkpoint restore.
> >
> This is a new point that I didn't realize before, if we are going to handle
> unmap vdso safely, yes, this is a bugfix that should be applied everywhere
> for all arch, without CHECKPOINT_RESTORE config.
>
> Do we need to worry about mmap(fixed) ? which can have the same effect
> as mremap.
Yes, but it should be handled by vm_ops->close() when MAP_FIXED unmaps
the vdso. Note that you cannot MAP_FIXED over half of the vma as the
vm_ops->may_split() is special_mapping_split(), which just returns
-EINVAL.
Thanks,
Liam
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-08 18:08 ` Liam R. Howlett
@ 2024-08-08 18:36 ` Jeff Xu
2024-08-08 18:46 ` Liam R. Howlett
0 siblings, 1 reply; 25+ messages in thread
From: Jeff Xu @ 2024-08-08 18:36 UTC (permalink / raw)
To: Liam R. Howlett, Jeff Xu, Linus Torvalds, Michael Ellerman,
linux-mm, linuxppc-dev, akpm, christophe.leroy, jeffxu,
linux-kernel, npiggin, oliver.sang, pedro.falcato, Kees Cook
On Thu, Aug 8, 2024 at 11:08 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
>
> * Jeff Xu <jeffxu@google.com> [240807 23:37]:
> > On Wed, Aug 7, 2024 at 8:21 PM Linus Torvalds
> > <torvalds@linux-foundation.org> wrote:
> > >
> > > On Wed, 7 Aug 2024 at 16:20, Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> > > >
> > > > Okay, I'm going to try one more time here. You are suggesting to have a
> > > > conf flag to leave the vdso pointer unchanged when it is unmapped.
> > > > Having the close behind the conf option will not prevent it from being
> > > > unmapped or mapped over, so what you are suggesting is have a
> > > > configuration option that leaves a pointer, mm->context.vdso, to be
> > > > unsafe if it is unmapped if you disable checkpoint restore.
> > >
> > This is a new point that I didn't realize before, if we are going to handle
> > unmap vdso safely, yes, this is a bugfix that should be applied everywhere
> > for all arch, without CHECKPOINT_RESTORE config.
> >
> > Do we need to worry about mmap(fixed) ? which can have the same effect
> > as mremap.
>
> Yes, but it should be handled by vm_ops->close() when MAP_FIXED unmaps
> the vdso. Note that you cannot MAP_FIXED over half of the vma as the
> vm_ops->may_split() is special_mapping_split(), which just returns
> -EINVAL.
>
The may_split() failure logic is specific to vm_special_mapping, right ?
Do we still need to keep vm_special_mapping struct , if we are going to
treat special vma as normal vma ?
> Thanks,
> Liam
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-08 18:36 ` Jeff Xu
@ 2024-08-08 18:46 ` Liam R. Howlett
2024-08-08 18:52 ` Jeff Xu
0 siblings, 1 reply; 25+ messages in thread
From: Liam R. Howlett @ 2024-08-08 18:46 UTC (permalink / raw)
To: Jeff Xu
Cc: Kees Cook, jeffxu, linuxppc-dev, christophe.leroy, linux-kernel,
linux-mm, oliver.sang, npiggin, akpm, Linus Torvalds,
pedro.falcato
* Jeff Xu <jeffxu@google.com> [240808 14:37]:
> On Thu, Aug 8, 2024 at 11:08 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> >
> > * Jeff Xu <jeffxu@google.com> [240807 23:37]:
> > > On Wed, Aug 7, 2024 at 8:21 PM Linus Torvalds
> > > <torvalds@linux-foundation.org> wrote:
> > > >
> > > > On Wed, 7 Aug 2024 at 16:20, Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> > > > >
> > > > > Okay, I'm going to try one more time here. You are suggesting to have a
> > > > > conf flag to leave the vdso pointer unchanged when it is unmapped.
> > > > > Having the close behind the conf option will not prevent it from being
> > > > > unmapped or mapped over, so what you are suggesting is have a
> > > > > configuration option that leaves a pointer, mm->context.vdso, to be
> > > > > unsafe if it is unmapped if you disable checkpoint restore.
> > > >
> > > This is a new point that I didn't realize before, if we are going to handle
> > > unmap vdso safely, yes, this is a bugfix that should be applied everywhere
> > > for all arch, without CHECKPOINT_RESTORE config.
> > >
> > > Do we need to worry about mmap(fixed) ? which can have the same effect
> > > as mremap.
> >
> > Yes, but it should be handled by vm_ops->close() when MAP_FIXED unmaps
> > the vdso. Note that you cannot MAP_FIXED over half of the vma as the
> > vm_ops->may_split() is special_mapping_split(), which just returns
> > -EINVAL.
> >
> The may_split() failure logic is specific to vm_special_mapping, right ?
Not really, it's just what exists for these vmas vm_ops struct. It's
called on every vma for every split in __split_vma().
>
> Do we still need to keep vm_special_mapping struct , if we are going to
> treat special vma as normal vma ?
No, just set the vm_ops may_split to something that returns -EINVAL.
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap()
2024-08-08 18:46 ` Liam R. Howlett
@ 2024-08-08 18:52 ` Jeff Xu
0 siblings, 0 replies; 25+ messages in thread
From: Jeff Xu @ 2024-08-08 18:52 UTC (permalink / raw)
To: Liam R. Howlett, Jeff Xu, Linus Torvalds, Michael Ellerman,
linux-mm, linuxppc-dev, akpm, christophe.leroy, jeffxu,
linux-kernel, npiggin, oliver.sang, pedro.falcato, Kees Cook
On Thu, Aug 8, 2024 at 11:46 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
>
> * Jeff Xu <jeffxu@google.com> [240808 14:37]:
> > On Thu, Aug 8, 2024 at 11:08 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> > >
> > > * Jeff Xu <jeffxu@google.com> [240807 23:37]:
> > > > On Wed, Aug 7, 2024 at 8:21 PM Linus Torvalds
> > > > <torvalds@linux-foundation.org> wrote:
> > > > >
> > > > > On Wed, 7 Aug 2024 at 16:20, Liam R. Howlett <Liam.Howlett@oracle.com> wrote:
> > > > > >
> > > > > > Okay, I'm going to try one more time here. You are suggesting to have a
> > > > > > conf flag to leave the vdso pointer unchanged when it is unmapped.
> > > > > > Having the close behind the conf option will not prevent it from being
> > > > > > unmapped or mapped over, so what you are suggesting is have a
> > > > > > configuration option that leaves a pointer, mm->context.vdso, to be
> > > > > > unsafe if it is unmapped if you disable checkpoint restore.
> > > > >
> > > > This is a new point that I didn't realize before, if we are going to handle
> > > > unmap vdso safely, yes, this is a bugfix that should be applied everywhere
> > > > for all arch, without CHECKPOINT_RESTORE config.
> > > >
> > > > Do we need to worry about mmap(fixed) ? which can have the same effect
> > > > as mremap.
> > >
> > > Yes, but it should be handled by vm_ops->close() when MAP_FIXED unmaps
> > > the vdso. Note that you cannot MAP_FIXED over half of the vma as the
> > > vm_ops->may_split() is special_mapping_split(), which just returns
> > > -EINVAL.
> > >
> > The may_split() failure logic is specific to vm_special_mapping, right ?
>
> Not really, it's just what exists for these vmas vm_ops struct. It's
> called on every vma for every split in __split_vma().
>
> >
> > Do we still need to keep vm_special_mapping struct , if we are going to
> > treat special vma as normal vma ?
>
> No, just set the vm_ops may_split to something that returns -EINVAL.
>
OK, that makes sense.
Thanks
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping
2024-08-07 15:52 ` Liam R. Howlett
@ 2024-08-12 8:22 ` Michael Ellerman
0 siblings, 0 replies; 25+ messages in thread
From: Michael Ellerman @ 2024-08-12 8:22 UTC (permalink / raw)
To: Liam R. Howlett
Cc: linux-mm, linuxppc-dev, torvalds, akpm, christophe.leroy, jeffxu,
jeffxu, linux-kernel, npiggin, oliver.sang, pedro.falcato
"Liam R. Howlett" <Liam.Howlett@oracle.com> writes:
> * Michael Ellerman <mpe@ellerman.id.au> [240807 08:41]:
>> Add an optional close() callback to struct vm_special_mapping. It will
>> be used, by powerpc at least, to handle unmapping of the VDSO.
>>
>> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
>> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
>> ---
>> include/linux/mm_types.h | 2 ++
>> mm/mmap.c | 3 +++
>> 2 files changed, 5 insertions(+)
>>
>> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
>> index 485424979254..ef32d87a3adc 100644
>> --- a/include/linux/mm_types.h
>> +++ b/include/linux/mm_types.h
>> @@ -1313,6 +1313,8 @@ struct vm_special_mapping {
>>
>> int (*mremap)(const struct vm_special_mapping *sm,
>> struct vm_area_struct *new_vma);
>
> nit: missing new line?
Ack.
>> + void (*close)(const struct vm_special_mapping *sm,
>> + struct vm_area_struct *vma);
>> };
>>
>> enum tlb_flush_reason {
>> diff --git a/mm/mmap.c b/mm/mmap.c
>> index d0dfc85b209b..24bd6aa9155c 100644
>> --- a/mm/mmap.c
>> +++ b/mm/mmap.c
>> @@ -3624,6 +3624,9 @@ static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
>> */
>
> The above comment should probably be expanded to explain what this is
> about, or removed.
I expanded it slightly, happy for others to wordsmith it further.
>> static void special_mapping_close(struct vm_area_struct *vma)
>> {
>> + const struct vm_special_mapping *sm = vma->vm_private_data;
>> + if (sm->close)
>> + sm->close(sm, vma);
>
> Right now we have the same sort of situation for mremap calls on
> special: we have a call to the specific vma mremap() function.
> ...
> So, are we missing an opportunity to avoid every arch having the same
> implementation here (that will evolve into random bugs existing in some
> archs for years before someone realises the cloned code wasn't fixed)?
> Do we already have a fix in ppc for the size checking that doesn't exist
> in the other archs in the case of mremap?
I took this as more of a meta comment/rant :)
Yes I agree the implementation should eventually be generic, but this series
is just about moving the existing powerpc behaviour from arch_unmap()
into this hook.
cheers
^ permalink raw reply [flat|nested] 25+ messages in thread
* Re: [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping
2024-08-07 15:32 ` [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping David Hildenbrand
@ 2024-08-12 8:23 ` Michael Ellerman
0 siblings, 0 replies; 25+ messages in thread
From: Michael Ellerman @ 2024-08-12 8:23 UTC (permalink / raw)
To: David Hildenbrand, linux-mm
Cc: linuxppc-dev, torvalds, akpm, christophe.leroy, jeffxu, jeffxu,
Liam.Howlett, linux-kernel, npiggin, oliver.sang, pedro.falcato
David Hildenbrand <david@redhat.com> writes:
> On 07.08.24 14:41, Michael Ellerman wrote:
>> Add an optional close() callback to struct vm_special_mapping. It will
>> be used, by powerpc at least, to handle unmapping of the VDSO.
>>
>> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
>> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
>> ---
>> include/linux/mm_types.h | 2 ++
>> mm/mmap.c | 3 +++
>> 2 files changed, 5 insertions(+)
>>
>> diff --git a/mm/mmap.c b/mm/mmap.c
>> index d0dfc85b209b..24bd6aa9155c 100644
>> --- a/mm/mmap.c
>> +++ b/mm/mmap.c
>> @@ -3624,6 +3624,9 @@ static vm_fault_t special_mapping_fault(struct vm_fault *vmf);
>> */
>> static void special_mapping_close(struct vm_area_struct *vma)
>> {
>> + const struct vm_special_mapping *sm = vma->vm_private_data;
>
> I'm old-fashioned, I enjoy an empty line here ;)
Ack.
>> + if (sm->close)
>> + sm->close(sm, vma);
>
> Reviewed-by: David Hildenbrand <david@redhat.com>
Thanks.
cheers
^ permalink raw reply [flat|nested] 25+ messages in thread
end of thread, other threads:[~2024-08-12 8:23 UTC | newest]
Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-08-07 12:41 [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping Michael Ellerman
2024-08-07 12:41 ` [PATCH 2/4] powerpc/mm: Handle VDSO unmapping via close() rather than arch_unmap() Michael Ellerman
2024-08-07 15:33 ` David Hildenbrand
2024-08-07 15:43 ` Jeff Xu
2024-08-07 15:56 ` Liam R. Howlett
2024-08-07 16:36 ` Jeff Xu
2024-08-07 17:11 ` Liam R. Howlett
2024-08-07 20:11 ` Jeff Xu
2024-08-07 23:20 ` Liam R. Howlett
2024-08-08 3:21 ` Linus Torvalds
2024-08-08 3:36 ` Jeff Xu
2024-08-08 18:08 ` Liam R. Howlett
2024-08-08 18:36 ` Jeff Xu
2024-08-08 18:46 ` Liam R. Howlett
2024-08-08 18:52 ` Jeff Xu
2024-08-08 4:18 ` Jeff Xu
2024-08-08 16:15 ` Liam R. Howlett
2024-08-07 12:41 ` [PATCH 3/4] mm: Remove arch_unmap() Michael Ellerman
2024-08-07 15:33 ` David Hildenbrand
2024-08-07 17:42 ` Thomas Gleixner
2024-08-07 12:41 ` [PATCH 4/4] powerpc/vdso: Refactor error handling Michael Ellerman
2024-08-07 15:32 ` [PATCH 1/4] mm: Add optional close() to struct vm_special_mapping David Hildenbrand
2024-08-12 8:23 ` Michael Ellerman
2024-08-07 15:52 ` Liam R. Howlett
2024-08-12 8:22 ` Michael Ellerman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).