* [PATCH rfc -next 01/10] mm: add a generic VMA lock-based page fault handler
2023-07-13 9:53 [PATCH rfc -next 00/10] mm: convert to generic VMA lock-based page fault Kefeng Wang
@ 2023-07-13 9:53 ` Kefeng Wang
2023-07-13 16:15 ` Matthew Wilcox
2023-07-13 9:53 ` [PATCH rfc -next 02/10] x86: mm: use try_vma_locked_page_fault() Kefeng Wang
` (8 subsequent siblings)
9 siblings, 1 reply; 17+ messages in thread
From: Kefeng Wang @ 2023-07-13 9:53 UTC (permalink / raw)
To: linux-mm, Andrew Morton, surenb
Cc: Kefeng Wang, x86, loongarch, Peter Zijlstra, Catalin Marinas,
Dave Hansen, WANG Xuerui, Will Deacon, Alexander Gordeev,
linux-s390, Huacai Chen, Russell King, Ingo Molnar,
Gerald Schaefer, Christian Borntraeger, Albert Ou, Vasily Gorbik,
Heiko Carstens, Nicholas Piggin, Borislav Petkov, Andy Lutomirski,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, linux-kernel,
linux-riscv, Palmer Dabbelt, Sven Schnelle, linuxppc-dev
There are more and more architectures enabled ARCH_SUPPORTS_PER_VMA_LOCK,
eg, x86, arm64, powerpc and s390, and riscv, those implementation are very
similar which results in some duplicated codes, let's add a generic VMA
lock-based page fault handler to eliminate them, and which also make it
easy to support this feature on new architectures.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
include/linux/mm.h | 28 ++++++++++++++++++++++++++++
mm/memory.c | 42 ++++++++++++++++++++++++++++++++++++++++++
2 files changed, 70 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c7886784832b..cba1b7b19c9d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -633,6 +633,15 @@ static inline void vma_numab_state_init(struct vm_area_struct *vma) {}
static inline void vma_numab_state_free(struct vm_area_struct *vma) {}
#endif /* CONFIG_NUMA_BALANCING */
+struct vm_locked_fault {
+ struct mm_struct *mm;
+ unsigned long address;
+ unsigned int fault_flags;
+ unsigned long vm_flags;
+ struct pt_regs *regs;
+ unsigned long fault_code;
+};
+
#ifdef CONFIG_PER_VMA_LOCK
/*
* Try to read-lock a vma. The function is allowed to occasionally yield false
@@ -733,6 +742,19 @@ static inline void assert_fault_locked(struct vm_fault *vmf)
struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
unsigned long address);
+#define VM_LOCKED_FAULT_INIT(_name, _mm, _address, _fault_flags, _vm_flags, _regs, _fault_code) \
+ _name.mm = _mm; \
+ _name.address = _address; \
+ _name.fault_flags = _fault_flags; \
+ _name.vm_flags = _vm_flags; \
+ _name.regs = _regs; \
+ _name.fault_code = _fault_code
+
+int __weak arch_vma_check_access(struct vm_area_struct *vma,
+ struct vm_locked_fault *vmlf);
+
+int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret);
+
#else /* CONFIG_PER_VMA_LOCK */
static inline bool vma_start_read(struct vm_area_struct *vma)
@@ -742,6 +764,12 @@ static inline void vma_start_write(struct vm_area_struct *vma) {}
static inline void vma_assert_write_locked(struct vm_area_struct *vma) {}
static inline void vma_mark_detached(struct vm_area_struct *vma,
bool detached) {}
+#define VM_LOCKED_FAULT_INIT(_name, _mm, _address, _fault_flags, _vm_flags, _regs, _fault_code)
+static inline int try_vma_locked_page_fault(struct vm_locked_fault *vmlf,
+ vm_fault_t *ret)
+{
+ return -EINVAL;
+}
static inline void release_fault_lock(struct vm_fault *vmf)
{
diff --git a/mm/memory.c b/mm/memory.c
index ad790394963a..d3f5d1270e7a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5449,6 +5449,48 @@ struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm,
count_vm_vma_lock_event(VMA_LOCK_ABORT);
return NULL;
}
+
+int __weak arch_vma_check_access(struct vm_area_struct *vma,
+ struct vm_locked_fault *vmlf)
+{
+ if (!(vma->vm_flags & vmlf->vm_flags))
+ return -EINVAL;
+ return 0;
+}
+
+int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
+{
+ struct vm_area_struct *vma;
+ vm_fault_t fault;
+
+ if (!(vmlf->fault_flags & FAULT_FLAG_USER))
+ return -EINVAL;
+
+ vma = lock_vma_under_rcu(vmlf->mm, vmlf->address);
+ if (!vma)
+ return -EINVAL;
+
+ if (arch_vma_check_access(vma, vmlf)) {
+ vma_end_read(vma);
+ return -EINVAL;
+ }
+
+ fault = handle_mm_fault(vma, vmlf->address,
+ vmlf->fault_flags | FAULT_FLAG_VMA_LOCK,
+ vmlf->regs);
+ *ret = fault;
+
+ if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
+ vma_end_read(vma);
+
+ if ((fault & VM_FAULT_RETRY))
+ count_vm_vma_lock_event(VMA_LOCK_RETRY);
+ else
+ count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+
+ return 0;
+}
+
#endif /* CONFIG_PER_VMA_LOCK */
#ifndef __PAGETABLE_P4D_FOLDED
--
2.27.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH rfc -next 01/10] mm: add a generic VMA lock-based page fault handler
2023-07-13 9:53 ` [PATCH rfc -next 01/10] mm: add a generic VMA lock-based page fault handler Kefeng Wang
@ 2023-07-13 16:15 ` Matthew Wilcox
2023-07-13 20:12 ` Suren Baghdasaryan
0 siblings, 1 reply; 17+ messages in thread
From: Matthew Wilcox @ 2023-07-13 16:15 UTC (permalink / raw)
To: Kefeng Wang
Cc: x86, loongarch, Peter Zijlstra, Catalin Marinas, Dave Hansen,
linux-mm, Alexander Gordeev, Will Deacon, WANG Xuerui, linux-s390,
Huacai Chen, Russell King, Ingo Molnar, Gerald Schaefer,
Christian Borntraeger, Albert Ou, Vasily Gorbik, Heiko Carstens,
Nicholas Piggin, Borislav Petkov, Andy Lutomirski, Paul Walmsley,
Thomas Gleixner, surenb, linux-arm-kernel, linux-kernel,
linux-riscv, Palmer Dabbelt, Sven Schnelle <s
> +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
> +{
> + struct vm_area_struct *vma;
> + vm_fault_t fault;
On Thu, Jul 13, 2023 at 05:53:29PM +0800, Kefeng Wang wrote:
> +#define VM_LOCKED_FAULT_INIT(_name, _mm, _address, _fault_flags, _vm_flags, _regs, _fault_code) \
> + _name.mm = _mm; \
> + _name.address = _address; \
> + _name.fault_flags = _fault_flags; \
> + _name.vm_flags = _vm_flags; \
> + _name.regs = _regs; \
> + _name.fault_code = _fault_code
More consolidated code is a good idea; no question. But I don't think
this is the right way to do it.
> +int __weak arch_vma_check_access(struct vm_area_struct *vma,
> + struct vm_locked_fault *vmlf);
This should be:
#ifndef vma_check_access
bool vma_check_access(struct vm_area_struct *vma, )
{
return (vma->vm_flags & vm_flags) == 0;
}
#endif
and then arches which want to do something different can just define
vma_check_access.
> +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
> +{
> + struct vm_area_struct *vma;
> + vm_fault_t fault;
Declaring the vmf in this function and then copying it back is just wrong.
We need to declare vm_fault_t earlier (in the arch fault handler) and
pass it in. I don't think that creating struct vm_locked_fault is the
right idea either.
> + if (!(vmlf->fault_flags & FAULT_FLAG_USER))
> + return -EINVAL;
> +
> + vma = lock_vma_under_rcu(vmlf->mm, vmlf->address);
> + if (!vma)
> + return -EINVAL;
> +
> + if (arch_vma_check_access(vma, vmlf)) {
> + vma_end_read(vma);
> + return -EINVAL;
> + }
> +
> + fault = handle_mm_fault(vma, vmlf->address,
> + vmlf->fault_flags | FAULT_FLAG_VMA_LOCK,
> + vmlf->regs);
> + *ret = fault;
> +
> + if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> + vma_end_read(vma);
> +
> + if ((fault & VM_FAULT_RETRY))
> + count_vm_vma_lock_event(VMA_LOCK_RETRY);
> + else
> + count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> +
> + return 0;
> +}
> +
> #endif /* CONFIG_PER_VMA_LOCK */
>
> #ifndef __PAGETABLE_P4D_FOLDED
> --
> 2.27.0
>
>
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH rfc -next 01/10] mm: add a generic VMA lock-based page fault handler
2023-07-13 16:15 ` Matthew Wilcox
@ 2023-07-13 20:12 ` Suren Baghdasaryan
2023-07-14 1:52 ` Kefeng Wang
0 siblings, 1 reply; 17+ messages in thread
From: Suren Baghdasaryan @ 2023-07-13 20:12 UTC (permalink / raw)
To: Matthew Wilcox
Cc: Kefeng Wang, x86, loongarch, Peter Zijlstra, Catalin Marinas,
Dave Hansen, linux-mm, Alexander Gordeev, Will Deacon,
WANG Xuerui, linux-s390, Huacai Chen, Russell King, Ingo Molnar,
Gerald Schaefer, Christian Borntraeger, Albert Ou, Vasily Gorbik,
Heiko Carstens, Nicholas Piggin, Borislav Petkov, Andy Lutomirski,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, linux-kernel,
linux-riscv, Palmer Dabbelt, Sven Schnelle, Andrew Morton,
linuxppc-dev
On Thu, Jul 13, 2023 at 9:15 AM Matthew Wilcox <willy@infradead.org> wrote:
>
> > +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
> > +{
> > + struct vm_area_struct *vma;
> > + vm_fault_t fault;
>
>
> On Thu, Jul 13, 2023 at 05:53:29PM +0800, Kefeng Wang wrote:
> > +#define VM_LOCKED_FAULT_INIT(_name, _mm, _address, _fault_flags, _vm_flags, _regs, _fault_code) \
> > + _name.mm = _mm; \
> > + _name.address = _address; \
> > + _name.fault_flags = _fault_flags; \
> > + _name.vm_flags = _vm_flags; \
> > + _name.regs = _regs; \
> > + _name.fault_code = _fault_code
>
> More consolidated code is a good idea; no question. But I don't think
> this is the right way to do it.
>
> > +int __weak arch_vma_check_access(struct vm_area_struct *vma,
> > + struct vm_locked_fault *vmlf);
>
> This should be:
>
> #ifndef vma_check_access
> bool vma_check_access(struct vm_area_struct *vma, )
> {
> return (vma->vm_flags & vm_flags) == 0;
> }
> #endif
>
> and then arches which want to do something different can just define
> vma_check_access.
>
> > +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
> > +{
> > + struct vm_area_struct *vma;
> > + vm_fault_t fault;
>
> Declaring the vmf in this function and then copying it back is just wrong.
> We need to declare vm_fault_t earlier (in the arch fault handler) and
> pass it in.
Did you mean to say "we need to declare vmf (struct vm_fault) earlier
(in the arch fault handler) and pass it in." ?
> I don't think that creating struct vm_locked_fault is the
> right idea either.
>
> > + if (!(vmlf->fault_flags & FAULT_FLAG_USER))
> > + return -EINVAL;
> > +
> > + vma = lock_vma_under_rcu(vmlf->mm, vmlf->address);
> > + if (!vma)
> > + return -EINVAL;
> > +
> > + if (arch_vma_check_access(vma, vmlf)) {
> > + vma_end_read(vma);
> > + return -EINVAL;
> > + }
> > +
> > + fault = handle_mm_fault(vma, vmlf->address,
> > + vmlf->fault_flags | FAULT_FLAG_VMA_LOCK,
> > + vmlf->regs);
> > + *ret = fault;
> > +
> > + if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
> > + vma_end_read(vma);
> > +
> > + if ((fault & VM_FAULT_RETRY))
> > + count_vm_vma_lock_event(VMA_LOCK_RETRY);
> > + else
> > + count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
> > +
> > + return 0;
> > +}
> > +
> > #endif /* CONFIG_PER_VMA_LOCK */
> >
> > #ifndef __PAGETABLE_P4D_FOLDED
> > --
> > 2.27.0
> >
> >
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH rfc -next 01/10] mm: add a generic VMA lock-based page fault handler
2023-07-13 20:12 ` Suren Baghdasaryan
@ 2023-07-14 1:52 ` Kefeng Wang
2023-07-15 1:54 ` Kefeng Wang
0 siblings, 1 reply; 17+ messages in thread
From: Kefeng Wang @ 2023-07-14 1:52 UTC (permalink / raw)
To: Suren Baghdasaryan, Matthew Wilcox
Cc: x86, loongarch, Peter Zijlstra, Catalin Marinas, Dave Hansen,
linux-mm, Alexander Gordeev, Will Deacon, WANG Xuerui, linux-s390,
Huacai Chen, Russell King, Ingo Molnar, Gerald Schaefer,
Christian Borntraeger, Albert Ou, Vasily Gorbik, Heiko Carstens,
Nicholas Piggin, Borislav Petkov, Andy Lutomirski, Paul Walmsley,
Thomas Gleixner, linux-arm-kernel, linux-kernel, linux-riscv,
Palmer Dabbelt, Sven Schnelle, Andrew Morton, linuxppc-dev
On 2023/7/14 4:12, Suren Baghdasaryan wrote:
> On Thu, Jul 13, 2023 at 9:15 AM Matthew Wilcox <willy@infradead.org> wrote:
>>
>>> +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
>>> +{
>>> + struct vm_area_struct *vma;
>>> + vm_fault_t fault;
>>
>>
>> On Thu, Jul 13, 2023 at 05:53:29PM +0800, Kefeng Wang wrote:
>>> +#define VM_LOCKED_FAULT_INIT(_name, _mm, _address, _fault_flags, _vm_flags, _regs, _fault_code) \
>>> + _name.mm = _mm; \
>>> + _name.address = _address; \
>>> + _name.fault_flags = _fault_flags; \
>>> + _name.vm_flags = _vm_flags; \
>>> + _name.regs = _regs; \
>>> + _name.fault_code = _fault_code
>>
>> More consolidated code is a good idea; no question. But I don't think
>> this is the right way to do it.
I agree it is not good enough, but the arch's vma check acess has
different implementation, some use vm flags, some need fault code and
regs, and some use both :(
>>
>>> +int __weak arch_vma_check_access(struct vm_area_struct *vma,
>>> + struct vm_locked_fault *vmlf);
>>
>> This should be:
>>
>> #ifndef vma_check_access
>> bool vma_check_access(struct vm_area_struct *vma, )
>> {
>> return (vma->vm_flags & vm_flags) == 0;
>> }
>> #endif
>>
>> and then arches which want to do something different can just define
>> vma_check_access.
Ok, I could convert to use this way.
>>
>>> +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf, vm_fault_t *ret)
>>> +{
>>> + struct vm_area_struct *vma;
>>> + vm_fault_t fault;
>>
>> Declaring the vmf in this function and then copying it back is just wrong.
>> We need to declare vm_fault_t earlier (in the arch fault handler) and
>> pass it in.
Actually I passed the vm_fault_t *ret(in the arch fault handler), we
could directly use *ret instead of a new local variable, and no copy.
>
> Did you mean to say "we need to declare vmf (struct vm_fault) earlier
> (in the arch fault handler) and pass it in." ?
>
>> I don't think that creating struct vm_locked_fault is the
>> right idea either.
As mentioned above for vma check access, we need many arguments for a
function, a new struct looks possible better, is there better solution
or any suggestion?
Thanks.
>>
>>> + if (!(vmlf->fault_flags & FAULT_FLAG_USER))
>>> + return -EINVAL;
>>> +
>>> + vma = lock_vma_under_rcu(vmlf->mm, vmlf->address);
>>> + if (!vma)
>>> + return -EINVAL;
>>> +
>>> + if (arch_vma_check_access(vma, vmlf)) {
>>> + vma_end_read(vma);
>>> + return -EINVAL;
>>> + }
>>> +
>>> + fault = handle_mm_fault(vma, vmlf->address,
>>> + vmlf->fault_flags | FAULT_FLAG_VMA_LOCK,
>>> + vmlf->regs);
>>> + *ret = fault;
>>> +
>>> + if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
>>> + vma_end_read(vma);
>>> +
>>> + if ((fault & VM_FAULT_RETRY))
>>> + count_vm_vma_lock_event(VMA_LOCK_RETRY);
>>> + else
>>> + count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> #endif /* CONFIG_PER_VMA_LOCK */
>>>
>>> #ifndef __PAGETABLE_P4D_FOLDED
>>> --
>>> 2.27.0
>>>
>>>
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH rfc -next 01/10] mm: add a generic VMA lock-based page fault handler
2023-07-14 1:52 ` Kefeng Wang
@ 2023-07-15 1:54 ` Kefeng Wang
0 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-07-15 1:54 UTC (permalink / raw)
To: Suren Baghdasaryan, Matthew Wilcox
Cc: x86, loongarch, Peter Zijlstra, Catalin Marinas, Dave Hansen,
linux-mm, Alexander Gordeev, Will Deacon, WANG Xuerui, linux-s390,
Huacai Chen, Russell King, Ingo Molnar, Gerald Schaefer,
Christian Borntraeger, Albert Ou, Vasily Gorbik, Heiko Carstens,
Nicholas Piggin, Borislav Petkov, Andy Lutomirski, Paul Walmsley,
Thomas Gleixner, linux-arm-kernel, linux-kernel, linux-riscv,
Palmer Dabbelt, Sven Schnelle, Andrew Morton, linuxppc-dev
On 2023/7/14 9:52, Kefeng Wang wrote:
>
>
> On 2023/7/14 4:12, Suren Baghdasaryan wrote:
>> On Thu, Jul 13, 2023 at 9:15 AM Matthew Wilcox <willy@infradead.org>
>> wrote:
>>>
>>>> +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf,
>>>> vm_fault_t *ret)
>>>> +{
>>>> + struct vm_area_struct *vma;
>>>> + vm_fault_t fault;
>>>
>>>
>>> On Thu, Jul 13, 2023 at 05:53:29PM +0800, Kefeng Wang wrote:
>>>> +#define VM_LOCKED_FAULT_INIT(_name, _mm, _address, _fault_flags,
>>>> _vm_flags, _regs, _fault_code) \
>>>> + _name.mm = _mm; \
>>>> + _name.address = _address; \
>>>> + _name.fault_flags = _fault_flags; \
>>>> + _name.vm_flags = _vm_flags; \
>>>> + _name.regs = _regs; \
>>>> + _name.fault_code = _fault_code
>>>
>>> More consolidated code is a good idea; no question. But I don't think
>>> this is the right way to do it.
>
> I agree it is not good enough, but the arch's vma check acess has
> different implementation, some use vm flags, some need fault code and
> regs, and some use both :(
>
>>>
>>>> +int __weak arch_vma_check_access(struct vm_area_struct *vma,
>>>> + struct vm_locked_fault *vmlf);
>>>
>>> This should be:
>>>
>>> #ifndef vma_check_access
>>> bool vma_check_access(struct vm_area_struct *vma, )
>>> {
>>> return (vma->vm_flags & vm_flags) == 0;
>>> }
>>> #endif
>>>
>>> and then arches which want to do something different can just define
>>> vma_check_access.
>
> Ok, I could convert to use this way.
>
>>>
>>>> +int try_vma_locked_page_fault(struct vm_locked_fault *vmlf,
>>>> vm_fault_t *ret)
>>>> +{
>>>> + struct vm_area_struct *vma;
>>>> + vm_fault_t fault;
>>>
>>> Declaring the vmf in this function and then copying it back is just
>>> wrong.
>>> We need to declare vm_fault_t earlier (in the arch fault handler) and
>>> pass it in.
>
> Actually I passed the vm_fault_t *ret(in the arch fault handler), we
> could directly use *ret instead of a new local variable, and no copy.
>>
>> Did you mean to say "we need to declare vmf (struct vm_fault) earlier
>> (in the arch fault handler) and pass it in." ?
After recheck the code, I think Matthew' idea is 'declare vmf (struct
vm_fault) earlier' like Suren said, not vm_fault_t, right? will try
this, thanks.
>>
>>> I don't think that creating struct vm_locked_fault is the
>>> right idea either.
>
> As mentioned above for vma check access, we need many arguments for a
> function, a new struct looks possible better, is there better solution
> or any suggestion?
>
> Thanks.
>
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH rfc -next 02/10] x86: mm: use try_vma_locked_page_fault()
2023-07-13 9:53 [PATCH rfc -next 00/10] mm: convert to generic VMA lock-based page fault Kefeng Wang
2023-07-13 9:53 ` [PATCH rfc -next 01/10] mm: add a generic VMA lock-based page fault handler Kefeng Wang
@ 2023-07-13 9:53 ` Kefeng Wang
2023-07-13 9:53 ` [PATCH rfc -next 03/10] arm64: " Kefeng Wang
` (7 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-07-13 9:53 UTC (permalink / raw)
To: linux-mm, Andrew Morton, surenb
Cc: Kefeng Wang, x86, loongarch, Peter Zijlstra, Catalin Marinas,
Dave Hansen, WANG Xuerui, Will Deacon, Alexander Gordeev,
linux-s390, Huacai Chen, Russell King, Ingo Molnar,
Gerald Schaefer, Christian Borntraeger, Albert Ou, Vasily Gorbik,
Heiko Carstens, Nicholas Piggin, Borislav Petkov, Andy Lutomirski,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, linux-kernel,
linux-riscv, Palmer Dabbelt, Sven Schnelle, linuxppc-dev
Use new try_vma_locked_page_fault() helper to simplify code.
No functional change intended.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/x86/mm/fault.c | 39 +++++++++++++++------------------------
1 file changed, 15 insertions(+), 24 deletions(-)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 56b4f9faf8c4..3f3b8b0a87de 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1213,6 +1213,16 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
}
NOKPROBE_SYMBOL(do_kern_addr_fault);
+#ifdef CONFIG_PER_VMA_LOCK
+int arch_vma_check_access(struct vm_area_struct *vma,
+ struct vm_locked_fault *vmlf)
+{
+ if (unlikely(access_error(vmlf->fault_code, vma)))
+ return -EINVAL;
+ return 0;
+}
+#endif
+
/*
* Handle faults in the user portion of the address space. Nothing in here
* should check X86_PF_USER without a specific justification: for almost
@@ -1231,6 +1241,7 @@ void do_user_addr_fault(struct pt_regs *regs,
struct mm_struct *mm;
vm_fault_t fault;
unsigned int flags = FAULT_FLAG_DEFAULT;
+ struct vm_locked_fault vmlf;
tsk = current;
mm = tsk->mm;
@@ -1328,27 +1339,11 @@ void do_user_addr_fault(struct pt_regs *regs,
}
#endif
-#ifdef CONFIG_PER_VMA_LOCK
- if (!(flags & FAULT_FLAG_USER))
- goto lock_mmap;
-
- vma = lock_vma_under_rcu(mm, address);
- if (!vma)
- goto lock_mmap;
-
- if (unlikely(access_error(error_code, vma))) {
- vma_end_read(vma);
- goto lock_mmap;
- }
- fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
- if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
- vma_end_read(vma);
-
- if (!(fault & VM_FAULT_RETRY)) {
- count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ VM_LOCKED_FAULT_INIT(vmlf, mm, address, flags, 0, regs, error_code);
+ if (try_vma_locked_page_fault(&vmlf, &fault))
+ goto retry;
+ else if (!(fault | VM_FAULT_RETRY))
goto done;
- }
- count_vm_vma_lock_event(VMA_LOCK_RETRY);
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
@@ -1358,8 +1353,6 @@ void do_user_addr_fault(struct pt_regs *regs,
ARCH_DEFAULT_PKEY);
return;
}
-lock_mmap:
-#endif /* CONFIG_PER_VMA_LOCK */
retry:
vma = lock_mm_and_find_vma(mm, address, regs);
@@ -1419,9 +1412,7 @@ void do_user_addr_fault(struct pt_regs *regs,
}
mmap_read_unlock(mm);
-#ifdef CONFIG_PER_VMA_LOCK
done:
-#endif
if (likely(!(fault & VM_FAULT_ERROR)))
return;
--
2.27.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH rfc -next 03/10] arm64: mm: use try_vma_locked_page_fault()
2023-07-13 9:53 [PATCH rfc -next 00/10] mm: convert to generic VMA lock-based page fault Kefeng Wang
2023-07-13 9:53 ` [PATCH rfc -next 01/10] mm: add a generic VMA lock-based page fault handler Kefeng Wang
2023-07-13 9:53 ` [PATCH rfc -next 02/10] x86: mm: use try_vma_locked_page_fault() Kefeng Wang
@ 2023-07-13 9:53 ` Kefeng Wang
2023-07-13 9:53 ` [PATCH rfc -next 04/10] s390: " Kefeng Wang
` (6 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-07-13 9:53 UTC (permalink / raw)
To: linux-mm, Andrew Morton, surenb
Cc: Kefeng Wang, x86, loongarch, Peter Zijlstra, Catalin Marinas,
Dave Hansen, WANG Xuerui, Will Deacon, Alexander Gordeev,
linux-s390, Huacai Chen, Russell King, Ingo Molnar,
Gerald Schaefer, Christian Borntraeger, Albert Ou, Vasily Gorbik,
Heiko Carstens, Nicholas Piggin, Borislav Petkov, Andy Lutomirski,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, linux-kernel,
linux-riscv, Palmer Dabbelt, Sven Schnelle, linuxppc-dev
Use new try_vma_locked_page_fault() helper to simplify code.
No functional change intended.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/arm64/mm/fault.c | 28 +++++-----------------------
1 file changed, 5 insertions(+), 23 deletions(-)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index b8c80f7b8a5f..614bb53fc1bc 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -537,6 +537,7 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
unsigned int mm_flags = FAULT_FLAG_DEFAULT;
unsigned long addr = untagged_addr(far);
struct vm_area_struct *vma;
+ struct vm_locked_fault vmlf;
if (kprobe_page_fault(regs, esr))
return 0;
@@ -587,27 +588,11 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
-#ifdef CONFIG_PER_VMA_LOCK
- if (!(mm_flags & FAULT_FLAG_USER))
- goto lock_mmap;
-
- vma = lock_vma_under_rcu(mm, addr);
- if (!vma)
- goto lock_mmap;
-
- if (!(vma->vm_flags & vm_flags)) {
- vma_end_read(vma);
- goto lock_mmap;
- }
- fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
- if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
- vma_end_read(vma);
-
- if (!(fault & VM_FAULT_RETRY)) {
- count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ VM_LOCKED_FAULT_INIT(vmlf, mm, addr, mm_flags, vm_flags, regs, esr);
+ if (try_vma_locked_page_fault(&vmlf, &fault))
+ goto retry;
+ else if (!(fault | VM_FAULT_RETRY))
goto done;
- }
- count_vm_vma_lock_event(VMA_LOCK_RETRY);
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
@@ -615,9 +600,6 @@ static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
goto no_context;
return 0;
}
-lock_mmap:
-#endif /* CONFIG_PER_VMA_LOCK */
-
retry:
vma = lock_mm_and_find_vma(mm, addr, regs);
if (unlikely(!vma)) {
--
2.27.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH rfc -next 04/10] s390: mm: use try_vma_locked_page_fault()
2023-07-13 9:53 [PATCH rfc -next 00/10] mm: convert to generic VMA lock-based page fault Kefeng Wang
` (2 preceding siblings ...)
2023-07-13 9:53 ` [PATCH rfc -next 03/10] arm64: " Kefeng Wang
@ 2023-07-13 9:53 ` Kefeng Wang
2023-07-13 9:53 ` [PATCH rfc -next 05/10] powerpc: " Kefeng Wang
` (5 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-07-13 9:53 UTC (permalink / raw)
To: linux-mm, Andrew Morton, surenb
Cc: Kefeng Wang, x86, loongarch, Peter Zijlstra, Catalin Marinas,
Dave Hansen, WANG Xuerui, Will Deacon, Alexander Gordeev,
linux-s390, Huacai Chen, Russell King, Ingo Molnar,
Gerald Schaefer, Christian Borntraeger, Albert Ou, Vasily Gorbik,
Heiko Carstens, Nicholas Piggin, Borislav Petkov, Andy Lutomirski,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, linux-kernel,
linux-riscv, Palmer Dabbelt, Sven Schnelle, linuxppc-dev
Use new try_vma_locked_page_fault() helper to simplify code.
No functional change intended.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/s390/mm/fault.c | 23 ++++++-----------------
1 file changed, 6 insertions(+), 17 deletions(-)
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 40a71063949b..97e511690352 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -362,6 +362,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
struct task_struct *tsk;
struct mm_struct *mm;
struct vm_area_struct *vma;
+ struct vm_locked_fault vmlf;
enum fault_type type;
unsigned long address;
unsigned int flags;
@@ -407,31 +408,19 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
access = VM_WRITE;
if (access == VM_WRITE)
flags |= FAULT_FLAG_WRITE;
-#ifdef CONFIG_PER_VMA_LOCK
- if (!(flags & FAULT_FLAG_USER))
- goto lock_mmap;
- vma = lock_vma_under_rcu(mm, address);
- if (!vma)
- goto lock_mmap;
- if (!(vma->vm_flags & access)) {
- vma_end_read(vma);
+
+ VM_LOCKED_FAULT_INIT(vmlf, mm, address, flags, access, regs, 0);
+ if (try_vma_locked_page_fault(&vmlf, &fault))
goto lock_mmap;
- }
- fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
- if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
- vma_end_read(vma);
- if (!(fault & VM_FAULT_RETRY)) {
- count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ else if (!(fault | VM_FAULT_RETRY))
goto out;
- }
- count_vm_vma_lock_event(VMA_LOCK_RETRY);
+
/* Quick path to respond to signals */
if (fault_signal_pending(fault, regs)) {
fault = VM_FAULT_SIGNAL;
goto out;
}
lock_mmap:
-#endif /* CONFIG_PER_VMA_LOCK */
mmap_read_lock(mm);
gmap = NULL;
--
2.27.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH rfc -next 05/10] powerpc: mm: use try_vma_locked_page_fault()
2023-07-13 9:53 [PATCH rfc -next 00/10] mm: convert to generic VMA lock-based page fault Kefeng Wang
` (3 preceding siblings ...)
2023-07-13 9:53 ` [PATCH rfc -next 04/10] s390: " Kefeng Wang
@ 2023-07-13 9:53 ` Kefeng Wang
2023-07-13 9:53 ` [PATCH rfc -next 06/10] riscv: " Kefeng Wang
` (4 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-07-13 9:53 UTC (permalink / raw)
To: linux-mm, Andrew Morton, surenb
Cc: Kefeng Wang, x86, loongarch, Peter Zijlstra, Catalin Marinas,
Dave Hansen, WANG Xuerui, Will Deacon, Alexander Gordeev,
linux-s390, Huacai Chen, Russell King, Ingo Molnar,
Gerald Schaefer, Christian Borntraeger, Albert Ou, Vasily Gorbik,
Heiko Carstens, Nicholas Piggin, Borislav Petkov, Andy Lutomirski,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, linux-kernel,
linux-riscv, Palmer Dabbelt, Sven Schnelle, linuxppc-dev
Use new try_vma_locked_page_fault() helper to simplify code.
No functional change intended.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/powerpc/mm/fault.c | 54 +++++++++++++++++------------------------
1 file changed, 22 insertions(+), 32 deletions(-)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 82954d0e6906..dd4832a3cf10 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -391,6 +391,23 @@ static int page_fault_is_bad(unsigned long err)
#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S)
#endif
+#ifdef CONFIG_PER_VMA_LOCK
+int arch_vma_check_access(struct vm_area_struct *vma,
+ struct vm_locked_fault *vmlf)
+{
+ int is_exec = TRAP(vmlf->regs) == INTERRUPT_INST_STORAGE;
+ int is_write = page_fault_is_write(vmlf->fault_code);
+
+ if (unlikely(access_pkey_error(is_write, is_exec,
+ (vmlf->fault_code & DSISR_KEYFAULT), vma)))
+ return -EINVAL;
+
+ if (unlikely(access_error(is_write, is_exec, vma)))
+ return -EINVAL;
+ return 0;
+}
+#endif
+
/*
* For 600- and 800-family processors, the error_code parameter is DSISR
* for a data fault, SRR1 for an instruction fault.
@@ -413,6 +430,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
int is_write = page_fault_is_write(error_code);
vm_fault_t fault, major = 0;
bool kprobe_fault = kprobe_page_fault(regs, 11);
+ struct vm_locked_fault vmlf;
if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
return 0;
@@ -469,41 +487,15 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
if (is_exec)
flags |= FAULT_FLAG_INSTRUCTION;
-#ifdef CONFIG_PER_VMA_LOCK
- if (!(flags & FAULT_FLAG_USER))
- goto lock_mmap;
-
- vma = lock_vma_under_rcu(mm, address);
- if (!vma)
- goto lock_mmap;
-
- if (unlikely(access_pkey_error(is_write, is_exec,
- (error_code & DSISR_KEYFAULT), vma))) {
- vma_end_read(vma);
- goto lock_mmap;
- }
-
- if (unlikely(access_error(is_write, is_exec, vma))) {
- vma_end_read(vma);
- goto lock_mmap;
- }
-
- fault = handle_mm_fault(vma, address, flags | FAULT_FLAG_VMA_LOCK, regs);
- if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
- vma_end_read(vma);
-
- if (!(fault & VM_FAULT_RETRY)) {
- count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ VM_LOCKED_FAULT_INIT(vmlf, mm, address, flags, 0, regs, error_code);
+ if (try_vma_locked_page_fault(&vmlf, &fault))
+ goto retry;
+ else if (!(fault | VM_FAULT_RETRY))
goto done;
- }
- count_vm_vma_lock_event(VMA_LOCK_RETRY);
if (fault_signal_pending(fault, regs))
return user_mode(regs) ? 0 : SIGBUS;
-lock_mmap:
-#endif /* CONFIG_PER_VMA_LOCK */
-
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -552,9 +544,7 @@ static int ___do_page_fault(struct pt_regs *regs, unsigned long address,
mmap_read_unlock(current->mm);
-#ifdef CONFIG_PER_VMA_LOCK
done:
-#endif
if (unlikely(fault & VM_FAULT_ERROR))
return mm_fault_error(regs, address, fault);
--
2.27.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH rfc -next 06/10] riscv: mm: use try_vma_locked_page_fault()
2023-07-13 9:53 [PATCH rfc -next 00/10] mm: convert to generic VMA lock-based page fault Kefeng Wang
` (4 preceding siblings ...)
2023-07-13 9:53 ` [PATCH rfc -next 05/10] powerpc: " Kefeng Wang
@ 2023-07-13 9:53 ` Kefeng Wang
2023-07-13 9:53 ` [PATCH rfc -next 07/10] ARM: mm: try VMA lock-based page fault handling first Kefeng Wang
` (3 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-07-13 9:53 UTC (permalink / raw)
To: linux-mm, Andrew Morton, surenb
Cc: Kefeng Wang, x86, loongarch, Peter Zijlstra, Catalin Marinas,
Dave Hansen, WANG Xuerui, Will Deacon, Alexander Gordeev,
linux-s390, Huacai Chen, Russell King, Ingo Molnar,
Gerald Schaefer, Christian Borntraeger, Albert Ou, Vasily Gorbik,
Heiko Carstens, Nicholas Piggin, Borislav Petkov, Andy Lutomirski,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, linux-kernel,
linux-riscv, Palmer Dabbelt, Sven Schnelle, linuxppc-dev
Use new try_vma_locked_page_fault() helper to simplify code.
No functional change intended.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/riscv/mm/fault.c | 38 +++++++++++++++-----------------------
1 file changed, 15 insertions(+), 23 deletions(-)
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 6ea2cce4cc17..13bc60370b5c 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -215,6 +215,16 @@ static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
return false;
}
+#ifdef CONFIG_PER_VMA_LOCK
+int arch_vma_check_access(struct vm_area_struct *vma,
+ struct vm_locked_fault *vmlf)
+{
+ if (unlikely(access_error(vmlf->fault_code, vma)))
+ return -EINVAL;
+ return 0;
+}
+#endif
+
/*
* This routine handles page faults. It determines the address and the
* problem, and then passes it off to one of the appropriate routines.
@@ -228,6 +238,7 @@ void handle_page_fault(struct pt_regs *regs)
unsigned int flags = FAULT_FLAG_DEFAULT;
int code = SEGV_MAPERR;
vm_fault_t fault;
+ struct vm_locked_fault vmlf;
cause = regs->cause;
addr = regs->badaddr;
@@ -283,35 +294,18 @@ void handle_page_fault(struct pt_regs *regs)
flags |= FAULT_FLAG_WRITE;
else if (cause == EXC_INST_PAGE_FAULT)
flags |= FAULT_FLAG_INSTRUCTION;
-#ifdef CONFIG_PER_VMA_LOCK
- if (!(flags & FAULT_FLAG_USER))
- goto lock_mmap;
- vma = lock_vma_under_rcu(mm, addr);
- if (!vma)
- goto lock_mmap;
-
- if (unlikely(access_error(cause, vma))) {
- vma_end_read(vma);
- goto lock_mmap;
- }
-
- fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs);
- vma_end_read(vma);
-
- if (!(fault & VM_FAULT_RETRY)) {
- count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ VM_LOCKED_FAULT_INIT(vmlf, mm, addr, flags, 0, regs, cause);
+ if (try_vma_locked_page_fault(&vmlf, &fault))
+ goto retry;
+ else if (!(fault | VM_FAULT_RETRY))
goto done;
- }
- count_vm_vma_lock_event(VMA_LOCK_RETRY);
if (fault_signal_pending(fault, regs)) {
if (!user_mode(regs))
no_context(regs, addr);
return;
}
-lock_mmap:
-#endif /* CONFIG_PER_VMA_LOCK */
retry:
vma = lock_mm_and_find_vma(mm, addr, regs);
@@ -368,9 +362,7 @@ void handle_page_fault(struct pt_regs *regs)
mmap_read_unlock(mm);
-#ifdef CONFIG_PER_VMA_LOCK
done:
-#endif
if (unlikely(fault & VM_FAULT_ERROR)) {
tsk->thread.bad_cause = cause;
mm_fault_error(regs, addr, fault);
--
2.27.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH rfc -next 07/10] ARM: mm: try VMA lock-based page fault handling first
2023-07-13 9:53 [PATCH rfc -next 00/10] mm: convert to generic VMA lock-based page fault Kefeng Wang
` (5 preceding siblings ...)
2023-07-13 9:53 ` [PATCH rfc -next 06/10] riscv: " Kefeng Wang
@ 2023-07-13 9:53 ` Kefeng Wang
2023-07-13 9:53 ` [PATCH rfc -next 08/10] loongarch: mm: cleanup __do_page_fault() Kefeng Wang
` (2 subsequent siblings)
9 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-07-13 9:53 UTC (permalink / raw)
To: linux-mm, Andrew Morton, surenb
Cc: Kefeng Wang, x86, loongarch, Peter Zijlstra, Catalin Marinas,
Dave Hansen, WANG Xuerui, Will Deacon, Alexander Gordeev,
linux-s390, Huacai Chen, Russell King, Ingo Molnar,
Gerald Schaefer, Christian Borntraeger, Albert Ou, Vasily Gorbik,
Heiko Carstens, Nicholas Piggin, Borislav Petkov, Andy Lutomirski,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, linux-kernel,
linux-riscv, Palmer Dabbelt, Sven Schnelle, linuxppc-dev
Attempt VMA lock-based page fault handling first, and fall back
to the existing mmap_lock-based handling if that fails.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/arm/Kconfig | 1 +
arch/arm/mm/fault.c | 15 ++++++++++++++-
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 1a6a6eb48a15..8b6d4507ccee 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -34,6 +34,7 @@ config ARM
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
+ select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_MEMTEST
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index fef62e4a9edd..c44b83841e36 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -244,6 +244,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
vm_fault_t fault;
unsigned int flags = FAULT_FLAG_DEFAULT;
unsigned long vm_flags = VM_ACCESS_FLAGS;
+ struct vm_locked_fault vmlf;
if (kprobe_page_fault(regs, fsr))
return 0;
@@ -278,6 +279,18 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+ VM_LOCKED_FAULT_INIT(vmlf, mm, addr, flags, vm_flags, regs, fsr);
+ if (try_vma_locked_page_fault(&vmlf, &fault))
+ goto retry;
+ else if (!(fault | VM_FAULT_RETRY))
+ goto done;
+
+ if (fault_signal_pending(fault, regs)) {
+ if (!user_mode(regs))
+ goto no_context;
+ return 0;
+ }
+
retry:
vma = lock_mm_and_find_vma(mm, addr, regs);
if (unlikely(!vma)) {
@@ -316,7 +329,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
}
mmap_read_unlock(mm);
-
+done:
/*
* Handle the "normal" case first - VM_FAULT_MAJOR
*/
--
2.27.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH rfc -next 08/10] loongarch: mm: cleanup __do_page_fault()
2023-07-13 9:53 [PATCH rfc -next 00/10] mm: convert to generic VMA lock-based page fault Kefeng Wang
` (6 preceding siblings ...)
2023-07-13 9:53 ` [PATCH rfc -next 07/10] ARM: mm: try VMA lock-based page fault handling first Kefeng Wang
@ 2023-07-13 9:53 ` Kefeng Wang
2023-07-13 9:53 ` [PATCH rfc -next 09/10] loongarch: mm: add access_error() helper Kefeng Wang
2023-07-13 9:53 ` [PATCH rfc -next 10/10] loongarch: mm: try VMA lock-based page fault handling first Kefeng Wang
9 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-07-13 9:53 UTC (permalink / raw)
To: linux-mm, Andrew Morton, surenb
Cc: Kefeng Wang, x86, loongarch, Peter Zijlstra, Catalin Marinas,
Dave Hansen, WANG Xuerui, Will Deacon, Alexander Gordeev,
linux-s390, Huacai Chen, Russell King, Ingo Molnar,
Gerald Schaefer, Christian Borntraeger, Albert Ou, Vasily Gorbik,
Heiko Carstens, Nicholas Piggin, Borislav Petkov, Andy Lutomirski,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, linux-kernel,
linux-riscv, Palmer Dabbelt, Sven Schnelle, linuxppc-dev
Cleanup __do_page_fault() by reuse bad_area_nosemaphore and
bad_area label.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/loongarch/mm/fault.c | 36 +++++++++++-------------------------
1 file changed, 11 insertions(+), 25 deletions(-)
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index da5b6d518cdb..03d06ee184da 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -151,18 +151,15 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
if (!user_mode(regs))
no_context(regs, address);
else
- do_sigsegv(regs, write, address, si_code);
- return;
+ goto bad_area_nosemaphore;
}
/*
* If we're in an interrupt or have no user
* context, we must not take the fault..
*/
- if (faulthandler_disabled() || !mm) {
- do_sigsegv(regs, write, address, si_code);
- return;
- }
+ if (faulthandler_disabled() || !mm)
+ goto bad_area_nosemaphore;
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
@@ -172,23 +169,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
vma = lock_mm_and_find_vma(mm, address, regs);
if (unlikely(!vma))
goto bad_area_nosemaphore;
- goto good_area;
-
-/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
- */
-bad_area:
- mmap_read_unlock(mm);
-bad_area_nosemaphore:
- do_sigsegv(regs, write, address, si_code);
- return;
-/*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
si_code = SEGV_ACCERR;
if (write) {
@@ -229,14 +210,15 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
*/
goto retry;
}
+
+ mmap_read_unlock(mm);
+
if (unlikely(fault & VM_FAULT_ERROR)) {
- mmap_read_unlock(mm);
if (fault & VM_FAULT_OOM) {
do_out_of_memory(regs, address);
return;
} else if (fault & VM_FAULT_SIGSEGV) {
- do_sigsegv(regs, write, address, si_code);
- return;
+ goto bad_area_nosemaphore;
} else if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
do_sigbus(regs, write, address, si_code);
return;
@@ -244,7 +226,11 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
BUG();
}
+ return;
+bad_area:
mmap_read_unlock(mm);
+bad_area_nosemaphore:
+ do_sigsegv(regs, write, address, si_code);
}
asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
--
2.27.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH rfc -next 09/10] loongarch: mm: add access_error() helper
2023-07-13 9:53 [PATCH rfc -next 00/10] mm: convert to generic VMA lock-based page fault Kefeng Wang
` (7 preceding siblings ...)
2023-07-13 9:53 ` [PATCH rfc -next 08/10] loongarch: mm: cleanup __do_page_fault() Kefeng Wang
@ 2023-07-13 9:53 ` Kefeng Wang
2023-07-13 9:53 ` [PATCH rfc -next 10/10] loongarch: mm: try VMA lock-based page fault handling first Kefeng Wang
9 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-07-13 9:53 UTC (permalink / raw)
To: linux-mm, Andrew Morton, surenb
Cc: Kefeng Wang, x86, loongarch, Peter Zijlstra, Catalin Marinas,
Dave Hansen, WANG Xuerui, Will Deacon, Alexander Gordeev,
linux-s390, Huacai Chen, Russell King, Ingo Molnar,
Gerald Schaefer, Christian Borntraeger, Albert Ou, Vasily Gorbik,
Heiko Carstens, Nicholas Piggin, Borislav Petkov, Andy Lutomirski,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, linux-kernel,
linux-riscv, Palmer Dabbelt, Sven Schnelle, linuxppc-dev
Add access_error() to check whether vma could be accessible or not,
which will be used __do_page_fault() and later vma locked based page
fault.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/loongarch/mm/fault.c | 30 ++++++++++++++++++++----------
1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index 03d06ee184da..cde2ea0119fa 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -120,6 +120,22 @@ static void __kprobes do_sigsegv(struct pt_regs *regs,
force_sig_fault(SIGSEGV, si_code, (void __user *)address);
}
+static inline bool access_error(unsigned int flags, struct pt_regs *regs,
+ unsigned long addr, struct vm_area_struct *vma)
+{
+ if (flags & FAULT_FLAG_WRITE) {
+ if (!(vma->vm_flags & VM_WRITE))
+ return true;
+ } else {
+ if (!(vma->vm_flags & VM_READ) && addr != exception_era(regs))
+ return true;
+ if (!(vma->vm_flags & VM_EXEC) && addr == exception_era(regs))
+ return true;
+ }
+
+ return false;
+}
+
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
@@ -163,6 +179,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+ if (write)
+ flags |= FAULT_FLAG_WRITE;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
retry:
@@ -172,16 +190,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
si_code = SEGV_ACCERR;
- if (write) {
- flags |= FAULT_FLAG_WRITE;
- if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
- } else {
- if (!(vma->vm_flags & VM_READ) && address != exception_era(regs))
- goto bad_area;
- if (!(vma->vm_flags & VM_EXEC) && address == exception_era(regs))
- goto bad_area;
- }
+ if (access_error(flags, regs, vma))
+ goto bad_area;
/*
* If for any reason at all we couldn't handle the fault,
--
2.27.0
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH rfc -next 10/10] loongarch: mm: try VMA lock-based page fault handling first
2023-07-13 9:53 [PATCH rfc -next 00/10] mm: convert to generic VMA lock-based page fault Kefeng Wang
` (8 preceding siblings ...)
2023-07-13 9:53 ` [PATCH rfc -next 09/10] loongarch: mm: add access_error() helper Kefeng Wang
@ 2023-07-13 9:53 ` Kefeng Wang
9 siblings, 0 replies; 17+ messages in thread
From: Kefeng Wang @ 2023-07-13 9:53 UTC (permalink / raw)
To: linux-mm, Andrew Morton, surenb
Cc: Kefeng Wang, x86, loongarch, Peter Zijlstra, Catalin Marinas,
Dave Hansen, WANG Xuerui, Will Deacon, Alexander Gordeev,
linux-s390, Huacai Chen, Russell King, Ingo Molnar,
Gerald Schaefer, Christian Borntraeger, Albert Ou, Vasily Gorbik,
Heiko Carstens, Nicholas Piggin, Borislav Petkov, Andy Lutomirski,
Paul Walmsley, Thomas Gleixner, linux-arm-kernel, linux-kernel,
linux-riscv, Palmer Dabbelt, Sven Schnelle, linuxppc-dev
Attempt VMA lock-based page fault handling first, and fall back
to the existing mmap_lock-based handling if that fails.
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
arch/loongarch/Kconfig | 1 +
arch/loongarch/mm/fault.c | 26 ++++++++++++++++++++++++++
2 files changed, 27 insertions(+)
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 397203e18800..afb0ccabab97 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -53,6 +53,7 @@ config LOONGARCH
select ARCH_SUPPORTS_LTO_CLANG
select ARCH_SUPPORTS_LTO_CLANG_THIN
select ARCH_SUPPORTS_NUMA_BALANCING
+ select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_QUEUED_RWLOCKS
diff --git a/arch/loongarch/mm/fault.c b/arch/loongarch/mm/fault.c
index cde2ea0119fa..7e54bc48813e 100644
--- a/arch/loongarch/mm/fault.c
+++ b/arch/loongarch/mm/fault.c
@@ -136,6 +136,17 @@ static inline bool access_error(unsigned int flags, struct pt_regs *regs,
return false;
}
+#ifdef CONFIG_PER_VMA_LOCK
+int arch_vma_check_access(struct vm_area_struct *vma,
+ struct vm_locked_fault *vmlf)
+{
+ if (unlikely(access_error(vmlf->fault_flags, vmlf->regs, vmlf->address,
+ vma)))
+ return -EINVAL;
+ return 0;
+}
+#endif
+
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
@@ -149,6 +160,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
struct vm_area_struct *vma = NULL;
+ struct vm_locked_fault vmlf;
vm_fault_t fault;
if (kprobe_page_fault(regs, current->thread.trap_nr))
@@ -183,6 +195,19 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
flags |= FAULT_FLAG_WRITE;
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
+ VM_LOCKED_FAULT_INIT(vmlf, mm, address, flags, 0, regs, 0);
+ if (try_vma_locked_page_fault(&vmlf, &fault))
+ goto retry;
+ else if (!(fault | VM_FAULT_RETRY))
+ goto done;
+
+ if (fault_signal_pending(fault, regs)) {
+ if (!user_mode(regs))
+ no_context(regs, address);
+ return;
+ }
+
retry:
vma = lock_mm_and_find_vma(mm, address, regs);
if (unlikely(!vma))
@@ -223,6 +248,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs,
mmap_read_unlock(mm);
+done:
if (unlikely(fault & VM_FAULT_ERROR)) {
if (fault & VM_FAULT_OOM) {
do_out_of_memory(regs, address);
--
2.27.0
^ permalink raw reply related [flat|nested] 17+ messages in thread