* [PATCH] x86: fault_{32|64}.c unify do_page_fault
@ 2008-01-03 1:01 Harvey Harrison
2008-01-03 1:45 ` Alexey Dobriyan
0 siblings, 1 reply; 5+ messages in thread
From: Harvey Harrison @ 2008-01-03 1:01 UTC (permalink / raw)
To: Ingo Molnar; +Cc: H. Peter Anvin, LKML, Thomas Gleixner
Begin to unify do_page_fault(), easy code movement first.
Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
---
Ingo, similar to the kprobes unification patches I did, it gets a bit
uglier before it gets better ;-)
arch/x86/mm/fault_32.c | 38 +++++++++++++++++++++++++++++---------
arch/x86/mm/fault_64.c | 23 ++++++++++++++++++-----
2 files changed, 47 insertions(+), 14 deletions(-)
diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
index b1893eb..051a4ec 100644
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -375,19 +375,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
struct mm_struct *mm;
struct vm_area_struct *vma;
unsigned long address;
- int write, si_code;
- int fault;
+ int write, fault;
+#ifdef CONFIG_x86_64
+ unsigned long flags;
+#endif
+ int si_code;
/*
* We can fault from pretty much anywhere, with unknown IRQ state.
*/
trace_hardirqs_fixup();
- /* get the address */
- address = read_cr2();
-
tsk = current;
+ mm = tsk->mm;
+#ifdef CONFIG_x86_64
+ prefetchw(&mm->mmap_sem);
+#endif
+
+ /* get the address */
+ address = read_cr2();
si_code = SEGV_MAPERR;
/*
@@ -403,9 +410,24 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
* (error_code & 4) == 0, and that the fault was not a
* protection error (error_code & 9) == 0.
*/
+#ifdef CONFIG_X86_32
if (unlikely(address >= TASK_SIZE)) {
- if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
+ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+ vmalloc_fault(address) >= 0)
return;
+#else
+ if (unlikely(address >= TASK_SIZE64)) {
+ /*
+ * Don't check for the module range here: its PML4
+ * is always initialized because it's shared with the main
+ * kernel text. Only vmalloc may need PML4 syncups.
+ */
+ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+ ((address >= VMALLOC_START && address < VMALLOC_END))) {
+ if (vmalloc_fault(address) >= 0)
+ return;
+ }
+#endif
if (notify_page_fault(regs))
return;
/*
@@ -423,8 +445,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
local_irq_enable();
- mm = tsk->mm;
-
/*
* If we're in an interrupt, have no user context or are running in an
* atomic region then we must not take the fault.
@@ -495,7 +515,7 @@ good_area:
goto bad_area;
}
- survive:
+survive:
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
index 357a3e0..97b92b6 100644
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -426,7 +426,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
struct vm_area_struct *vma;
unsigned long address;
int write, fault;
+#ifdef CONFIG_x86_64
unsigned long flags;
+#endif
int si_code;
/*
@@ -436,14 +438,15 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
tsk = current;
mm = tsk->mm;
+
+#ifdef CONFIG_x86_64
prefetchw(&mm->mmap_sem);
+#endif
/* get the address */
address = read_cr2();
-
si_code = SEGV_MAPERR;
-
/*
* We fault-in kernel-space virtual memory on-demand. The
* 'reference' page table is init_mm.pgd.
@@ -457,6 +460,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
* (error_code & 4) == 0, and that the fault was not a
* protection error (error_code & 9) == 0.
*/
+#ifdef CONFIG_X86_32
+ if (unlikely(address >= TASK_SIZE)) {
+ if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+ vmalloc_fault(address) >= 0)
+ return;
+#else
if (unlikely(address >= TASK_SIZE64)) {
/*
* Don't check for the module range here: its PML4
@@ -468,6 +477,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
if (vmalloc_fault(address) >= 0)
return;
}
+#endif
if (notify_page_fault(regs))
return;
/*
@@ -500,7 +510,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
if (user_mode_vm(regs))
error_code |= PF_USER;
- again:
+again:
/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
* kernel and should generate an OOPS. Unfortunately, in the case of an
@@ -531,8 +541,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
if (error_code & PF_USER) {
- /* Allow userspace just enough access below the stack pointer
- * to let the 'enter' instruction work.
+ /*
+ * Accessing the stack below %sp is always a bug.
+ * The large cushion allows instructions like enter
+ * and pusha to work. ("enter $65535,$31" pushes
+ * 32 pointers and then decrements %sp by 65535.)
*/
if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
goto bad_area;
--
1.5.4.rc2.1097.gb6e0d
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] x86: fault_{32|64}.c unify do_page_fault
2008-01-03 1:01 [PATCH] x86: fault_{32|64}.c unify do_page_fault Harvey Harrison
@ 2008-01-03 1:45 ` Alexey Dobriyan
2008-01-03 2:09 ` Harvey Harrison
0 siblings, 1 reply; 5+ messages in thread
From: Alexey Dobriyan @ 2008-01-03 1:45 UTC (permalink / raw)
To: Harvey Harrison; +Cc: Ingo Molnar, H. Peter Anvin, LKML, Thomas Gleixner
On Wed, Jan 02, 2008 at 05:01:02PM -0800, Harvey Harrison wrote:
> Begin to unify do_page_fault(), easy code movement first.
>
> Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
> ---
> Ingo, similar to the kprobes unification patches I did, it gets a bit
> uglier before it gets better ;-)
>
> arch/x86/mm/fault_32.c | 38 +++++++++++++++++++++++++++++---------
> arch/x86/mm/fault_64.c | 23 ++++++++++++++++++-----
> 2 files changed, 47 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
> index b1893eb..051a4ec 100644
> --- a/arch/x86/mm/fault_32.c
> +++ b/arch/x86/mm/fault_32.c
> @@ -375,19 +375,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
> struct mm_struct *mm;
> struct vm_area_struct *vma;
> unsigned long address;
> - int write, si_code;
> - int fault;
> + int write, fault;
> +#ifdef CONFIG_x86_64
There is no such thing as CONFIG_x86_64 .
Do you seriously think code is getting better and more readable because
of this liberal #ifdef sprinkling in every possible direction?
> + unsigned long flags;
> +#endif
One.
> + int si_code;
>
> /*
> * We can fault from pretty much anywhere, with unknown IRQ state.
> */
> trace_hardirqs_fixup();
>
> - /* get the address */
> - address = read_cr2();
> -
> tsk = current;
> + mm = tsk->mm;
>
> +#ifdef CONFIG_x86_64
> + prefetchw(&mm->mmap_sem);
> +#endif
Two.
> +
> + /* get the address */
> + address = read_cr2();
> si_code = SEGV_MAPERR;
>
> /*
> @@ -403,9 +410,24 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
> * (error_code & 4) == 0, and that the fault was not a
> * protection error (error_code & 9) == 0.
> */
> +#ifdef CONFIG_X86_32
> if (unlikely(address >= TASK_SIZE)) {
> - if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0)
> + if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
> + vmalloc_fault(address) >= 0)
> return;
> +#else
> + if (unlikely(address >= TASK_SIZE64)) {
> + /*
> + * Don't check for the module range here: its PML4
> + * is always initialized because it's shared with the main
> + * kernel text. Only vmalloc may need PML4 syncups.
> + */
> + if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
> + ((address >= VMALLOC_START && address < VMALLOC_END))) {
> + if (vmalloc_fault(address) >= 0)
> + return;
> + }
> +#endif
Three.
> if (notify_page_fault(regs))
> return;
> /*
> @@ -423,8 +445,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
> if (regs->flags & (X86_EFLAGS_IF|VM_MASK))
> local_irq_enable();
>
> - mm = tsk->mm;
> -
> /*
> * If we're in an interrupt, have no user context or are running in an
> * atomic region then we must not take the fault.
> @@ -495,7 +515,7 @@ good_area:
> goto bad_area;
> }
>
> - survive:
> +survive:
> /*
> * If for any reason at all we couldn't handle the fault,
> * make sure we exit gracefully rather than endlessly redo
> diff --git a/arch/x86/mm/fault_64.c b/arch/x86/mm/fault_64.c
> index 357a3e0..97b92b6 100644
> --- a/arch/x86/mm/fault_64.c
> +++ b/arch/x86/mm/fault_64.c
> @@ -426,7 +426,9 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
> struct vm_area_struct *vma;
> unsigned long address;
> int write, fault;
> +#ifdef CONFIG_x86_64
> unsigned long flags;
> +#endif
Four.
> int si_code;
>
> /*
> @@ -436,14 +438,15 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
>
> tsk = current;
> mm = tsk->mm;
> +
> +#ifdef CONFIG_x86_64
> prefetchw(&mm->mmap_sem);
> +#endif
Five.
>
> /* get the address */
> address = read_cr2();
> -
> si_code = SEGV_MAPERR;
>
> -
> /*
> * We fault-in kernel-space virtual memory on-demand. The
> * 'reference' page table is init_mm.pgd.
> @@ -457,6 +460,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
> * (error_code & 4) == 0, and that the fault was not a
> * protection error (error_code & 9) == 0.
> */
> +#ifdef CONFIG_X86_32
> + if (unlikely(address >= TASK_SIZE)) {
> + if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
> + vmalloc_fault(address) >= 0)
> + return;
> +#else
> if (unlikely(address >= TASK_SIZE64)) {
> /*
> * Don't check for the module range here: its PML4
> @@ -468,6 +477,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
> if (vmalloc_fault(address) >= 0)
> return;
> }
> +#endif
Six.
> if (notify_page_fault(regs))
> return;
> /*
> @@ -500,7 +510,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
> if (user_mode_vm(regs))
> error_code |= PF_USER;
>
> - again:
> +again:
> /* When running in the kernel we expect faults to occur only to
> * addresses in user space. All other faults represent errors in the
> * kernel and should generate an OOPS. Unfortunately, in the case of an
> @@ -531,8 +541,11 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
> if (!(vma->vm_flags & VM_GROWSDOWN))
> goto bad_area;
> if (error_code & PF_USER) {
> - /* Allow userspace just enough access below the stack pointer
> - * to let the 'enter' instruction work.
> + /*
> + * Accessing the stack below %sp is always a bug.
> + * The large cushion allows instructions like enter
> + * and pusha to work. ("enter $65535,$31" pushes
> + * 32 pointers and then decrements %sp by 65535.)
> */
> if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
> goto bad_area;
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] x86: fault_{32|64}.c unify do_page_fault
2008-01-03 1:45 ` Alexey Dobriyan
@ 2008-01-03 2:09 ` Harvey Harrison
2008-01-03 2:30 ` H. Peter Anvin
2008-01-03 3:51 ` H. Peter Anvin
0 siblings, 2 replies; 5+ messages in thread
From: Harvey Harrison @ 2008-01-03 2:09 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: Ingo Molnar, H. Peter Anvin, LKML, Thomas Gleixner
On Thu, 2008-01-03 at 04:45 +0300, Alexey Dobriyan wrote:
> On Wed, Jan 02, 2008 at 05:01:02PM -0800, Harvey Harrison wrote:
> > Begin to unify do_page_fault(), easy code movement first.
> >
> > Signed-off-by: Harvey Harrison <harvey.harrison@gmail.com>
> > ---
> > Ingo, similar to the kprobes unification patches I did, it gets a bit
> > uglier before it gets better ;-)
> >
> > arch/x86/mm/fault_32.c | 38 +++++++++++++++++++++++++++++---------
> > arch/x86/mm/fault_64.c | 23 ++++++++++++++++++-----
> > 2 files changed, 47 insertions(+), 14 deletions(-)
> >
> > diff --git a/arch/x86/mm/fault_32.c b/arch/x86/mm/fault_32.c
> > index b1893eb..051a4ec 100644
> > --- a/arch/x86/mm/fault_32.c
> > +++ b/arch/x86/mm/fault_32.c
> > @@ -375,19 +375,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
> > struct mm_struct *mm;
> > struct vm_area_struct *vma;
> > unsigned long address;
> > - int write, si_code;
> > - int fault;
> > + int write, fault;
> > +#ifdef CONFIG_x86_64
>
> There is no such thing as CONFIG_x86_64 .
My apologies, testing/compiling on X86_32 here.
>
> Do you seriously think code is getting better and more readable because
> of this liberal #ifdef sprinkling in every possible direction?
>
Well, this of course is not the end of the road, but it makes it
obvious where the differences between 32/64 bit lie and allows
further cleanups to unify these areas over time. This is meant as
a no functionality change path at first.....and it does point out that
for the most part the files are _very_ similar to each other.
So my plan for now was to move forward with no functional changes and
esentially ifdef or reorder code until we get to identical fault_32/64.c
which then gets moved to a single fault.c
Then the cleanups happen in one place in one file and it should be easy
to audit the series at the end. But for further patches I'll wait until
the series is further along and tested before submitting. This was how
the kprobes unification went and I think it works fairly well this way.
But, if the end result is too ugly, it won't bother me at all if it
doesn't go in.
Harvey
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] x86: fault_{32|64}.c unify do_page_fault
2008-01-03 2:09 ` Harvey Harrison
@ 2008-01-03 2:30 ` H. Peter Anvin
2008-01-03 3:51 ` H. Peter Anvin
1 sibling, 0 replies; 5+ messages in thread
From: H. Peter Anvin @ 2008-01-03 2:30 UTC (permalink / raw)
To: Harvey Harrison; +Cc: Alexey Dobriyan, Ingo Molnar, LKML, Thomas Gleixner
Harvey Harrison wrote:
>> There is no such thing as CONFIG_x86_64 .
>
> My apologies, testing/compiling on X86_32 here.
Please also compile for x86-64, even if you can't easily test it
(although you can always boot under qemu, even if it's slow.)
Unification patches especially.
-hpa
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] x86: fault_{32|64}.c unify do_page_fault
2008-01-03 2:09 ` Harvey Harrison
2008-01-03 2:30 ` H. Peter Anvin
@ 2008-01-03 3:51 ` H. Peter Anvin
1 sibling, 0 replies; 5+ messages in thread
From: H. Peter Anvin @ 2008-01-03 3:51 UTC (permalink / raw)
To: Harvey Harrison; +Cc: Alexey Dobriyan, Ingo Molnar, LKML, Thomas Gleixner
Harvey Harrison wrote:
>
> My apologies, testing/compiling on X86_32 here.
>
>> Do you seriously think code is getting better and more readable because
>> of this liberal #ifdef sprinkling in every possible direction?
>>
>
> Well, this of course is not the end of the road, but it makes it
> obvious where the differences between 32/64 bit lie and allows
> further cleanups to unify these areas over time. This is meant as
> a no functionality change path at first.....and it does point out that
> for the most part the files are _very_ similar to each other.
>
> So my plan for now was to move forward with no functional changes and
> esentially ifdef or reorder code until we get to identical fault_32/64.c
> which then gets moved to a single fault.c
>
> Then the cleanups happen in one place in one file and it should be easy
> to audit the series at the end. But for further patches I'll wait until
> the series is further along and tested before submitting. This was how
> the kprobes unification went and I think it works fairly well this way.
>
One more thing... for code motion/unification patches it's a good thing
to verify that the i386 and x86-64 binaries are both unchanged.
-hpa
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2008-01-03 3:51 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-03 1:01 [PATCH] x86: fault_{32|64}.c unify do_page_fault Harvey Harrison
2008-01-03 1:45 ` Alexey Dobriyan
2008-01-03 2:09 ` Harvey Harrison
2008-01-03 2:30 ` H. Peter Anvin
2008-01-03 3:51 ` H. Peter Anvin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox