* 2.4.9aa3
@ 2001-08-19 6:07 Andrea Arcangeli
2001-09-03 15:24 ` expand_stack fix [was Re: 2.4.9aa3] Andrea Arcangeli
0 siblings, 1 reply; 9+ messages in thread
From: Andrea Arcangeli @ 2001-08-19 6:07 UTC (permalink / raw)
To: linux-kernel
Only in 2.4.9aa2: 00_silent-stack-overflow-5
Only in 2.4.9aa3: 00_silent-stack-overflow-6
Updated to run expand_stack always with the mm write semaphore acquired
to fix the race conditions. Upgrading the semaphore during
map_user_kiobuf was quite painful so I just disallowed to do direct I/O
on a growsdown VMA (you can still do that as far as it doesn't need to
be live extended on the fly).
Only in 2.4.9aa3: 00_vm_raend-race-1
Sanitize the vm_raend field before trusting it, such field is racy.
Only in 2.4.9aa2: 10_expand-stack-smp-1
Dropped (it wasn't needed).
Only in 2.4.9aa2: 70_mmap-rb-4
Only in 2.4.9aa3: 70_mmap-rb-5
Backed out a few unnecessary minor changes.
Andrea
^ permalink raw reply [flat|nested] 9+ messages in thread
* expand_stack fix [was Re: 2.4.9aa3]
2001-08-19 6:07 2.4.9aa3 Andrea Arcangeli
@ 2001-09-03 15:24 ` Andrea Arcangeli
2001-09-07 18:47 ` Linus Torvalds
0 siblings, 1 reply; 9+ messages in thread
From: Andrea Arcangeli @ 2001-09-03 15:24 UTC (permalink / raw)
To: Linus Torvalds; +Cc: linux-kernel
On Sun, Aug 19, 2001 at 08:07:42AM +0200, Andrea Arcangeli wrote:
> Only in 2.4.9aa2: 00_silent-stack-overflow-5
> Only in 2.4.9aa3: 00_silent-stack-overflow-6
>
> Updated to run expand_stack always with the mm write semaphore acquired
> to fix the race conditions. Upgrading the semaphore during
> map_user_kiobuf was quite painful so I just disallowed to do direct I/O
> on a growsdown VMA (you can still do that as far as it doesn't need to
> be live extended on the fly).
Linus please include the attached patch to the next kernel, expand_stack
is totally broken at the moment, we cannot mess with the mm vma layout
if we don't hold the mmap_sem in write mode. I fixed only alpha and x86,
all other ports will have to be fixed the same way (in the meantime they
works but they're racy even on UP).
I considered implementing a read->write semaphore upgrade primitive but
it cannot be reliable (it needs a fail path) and so it would be only an
optimization when we're the only reader and we should implement anyways
the ugly slow path, so for now this version of the fix doesn't depend on
any new rwsem primitive (and expand_stack isn't an extremely fast path
so it looks an acceptable solution).
The patch also introduces a forced gap (sysctl configurable, 1 page by
default) between a growsdown vma and the previous vma to try not to
silenty overflow the heap with a stack overflow, of course if an huge
stack allocation happens (larger than a page) the heuristic will fail
but the sysctl can be used to increase the probability of a match). This
a feature that helps userspace programming. The reason this feature is
included in the patch is that the two patches were very controversial,
if you disklike the "gap" feature I can do the boring work of extract
the strict expand_stack fix but I hope you won't ask me to do so ;).
diff -urN 2.4.9/arch/alpha/mm/fault.c expand_stack/arch/alpha/mm/fault.c
--- 2.4.9/arch/alpha/mm/fault.c Sun Apr 1 01:17:07 2001
+++ expand_stack/arch/alpha/mm/fault.c Sun Aug 19 06:46:52 2001
@@ -113,16 +113,30 @@
goto vmalloc_fault;
#endif
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (expand_stack(vma, address))
- goto bad_area;
+ for (;;) {
+ int not_expanded;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto bad_area;
+ if (vma->vm_start <= address)
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+ up_read(&mm->mmap_sem);
+ down_write(&mm->mmap_sem);
+ vma = find_vma(mm, address);
+ if (!vma || vma->vm_start <= address || !(vma->vm_flags & VM_GROWSDOWN)) {
+ up_write(&mm->mmap_sem);
+ continue;
+ }
+ not_expanded = expand_stack(vma, address, NULL);
+ up_write(&mm->mmap_sem);
+ if (not_expanded)
+ goto bad_area_nosem;
+ }
+
/*
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
@@ -161,6 +175,7 @@
*/
bad_area:
up_read(&mm->mmap_sem);
+bad_area_nosem:
if (user_mode(regs)) {
force_sig(SIGSEGV, current);
diff -urN 2.4.9/arch/arm/mm/fault-common.c expand_stack/arch/arm/mm/fault-common.c
--- 2.4.9/arch/arm/mm/fault-common.c Thu Aug 16 22:03:23 2001
+++ expand_stack/arch/arm/mm/fault-common.c Sun Aug 19 06:36:04 2001
@@ -229,7 +229,7 @@
goto survive;
check_stack:
- if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
+ if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr, NULL))
goto good_area;
out:
return fault;
diff -urN 2.4.9/arch/cris/mm/fault.c expand_stack/arch/cris/mm/fault.c
--- 2.4.9/arch/cris/mm/fault.c Sat Aug 11 08:03:54 2001
+++ expand_stack/arch/cris/mm/fault.c Sun Aug 19 06:36:04 2001
@@ -284,7 +284,7 @@
if (address + PAGE_SIZE < rdusp())
goto bad_area;
}
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
diff -urN 2.4.9/arch/i386/mm/fault.c expand_stack/arch/i386/mm/fault.c
--- 2.4.9/arch/i386/mm/fault.c Sat May 26 04:03:35 2001
+++ expand_stack/arch/i386/mm/fault.c Sun Aug 19 06:49:37 2001
@@ -30,7 +30,7 @@
*/
int __verify_write(const void * addr, unsigned long size)
{
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * prev_vma;
unsigned long start = (unsigned long) addr;
if (!size)
@@ -70,7 +70,8 @@
check_stack:
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, start) == 0)
+ find_vma_prev(current->mm, start, &prev_vma);
+ if (expand_stack(vma, start, prev_vma) == 0)
goto good_area;
bad_area:
@@ -107,7 +108,7 @@
{
struct task_struct *tsk;
struct mm_struct *mm;
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * prev_vma;
unsigned long address;
unsigned long page;
unsigned long fixup;
@@ -149,27 +150,41 @@
if (in_interrupt() || !mm)
goto no_context;
- down_read(&mm->mmap_sem);
+ for (;;) {
+ int not_expanded;
- vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (error_code & 4) {
- /*
- * accessing the stack below %esp is always a bug.
- * The "+ 32" is there due to some instructions (like
- * pusha) doing post-decrement on the stack and that
- * doesn't show up until later..
- */
- if (address + 32 < regs->esp)
+ down_read(&mm->mmap_sem);
+
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto bad_area;
+ if (vma->vm_start <= address)
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
+ if (error_code & 4) {
+ /*
+ * accessing the stack below %esp is always a bug.
+ * The "+ 32" is there due to some instructions (like
+ * pusha) doing post-decrement on the stack and that
+ * doesn't show up until later..
+ */
+ if (address + 32 < regs->esp)
+ goto bad_area;
+ }
+
+ up_read(&mm->mmap_sem);
+ down_write(&mm->mmap_sem);
+ vma = find_vma_prev(mm, address, &prev_vma);
+ if (!vma || vma->vm_start <= address || !(vma->vm_flags & VM_GROWSDOWN)) {
+ up_write(&mm->mmap_sem);
+ continue;
+ }
+ not_expanded = expand_stack(vma, address, prev_vma);
+ up_write(&mm->mmap_sem);
+ if (not_expanded)
+ goto bad_area_nosem;
}
- if (expand_stack(vma, address))
- goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
* we can handle it..
@@ -231,6 +246,7 @@
*/
bad_area:
up_read(&mm->mmap_sem);
+bad_area_nosem:
/* User mode accesses just cause a SIGSEGV */
if (error_code & 4) {
diff -urN 2.4.9/arch/ia64/mm/fault.c expand_stack/arch/ia64/mm/fault.c
--- 2.4.9/arch/ia64/mm/fault.c Tue May 1 19:35:18 2001
+++ expand_stack/arch/ia64/mm/fault.c Sun Aug 19 06:36:04 2001
@@ -122,7 +122,7 @@
if (rgn_index(address) != rgn_index(vma->vm_start)
|| rgn_offset(address) >= RGN_MAP_LIMIT)
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
} else {
vma = prev_vma;
diff -urN 2.4.9/arch/m68k/mm/fault.c expand_stack/arch/m68k/mm/fault.c
--- 2.4.9/arch/m68k/mm/fault.c Sun Apr 1 01:17:08 2001
+++ expand_stack/arch/m68k/mm/fault.c Sun Aug 19 06:36:04 2001
@@ -120,7 +120,7 @@
if (address + 256 < rdusp())
goto map_err;
}
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto map_err;
/*
diff -urN 2.4.9/arch/mips/mm/fault.c expand_stack/arch/mips/mm/fault.c
--- 2.4.9/arch/mips/mm/fault.c Sat Jul 21 00:04:05 2001
+++ expand_stack/arch/mips/mm/fault.c Sun Aug 19 06:36:04 2001
@@ -80,7 +80,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.9/arch/mips64/mm/fault.c expand_stack/arch/mips64/mm/fault.c
--- 2.4.9/arch/mips64/mm/fault.c Sat Jul 21 00:04:07 2001
+++ expand_stack/arch/mips64/mm/fault.c Sun Aug 19 06:36:04 2001
@@ -134,7 +134,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.9/arch/ppc/mm/fault.c expand_stack/arch/ppc/mm/fault.c
--- 2.4.9/arch/ppc/mm/fault.c Wed Jul 4 04:03:45 2001
+++ expand_stack/arch/ppc/mm/fault.c Sun Aug 19 06:36:04 2001
@@ -64,7 +64,7 @@
void do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * prev_vma;
struct mm_struct *mm = current->mm;
siginfo_t info;
int code = SEGV_MAPERR;
@@ -104,14 +104,14 @@
return;
}
down_read(&mm->mmap_sem);
- vma = find_vma(mm, address);
+ vma = find_vma_prev(mm, address, &prev_vma);
if (!vma)
goto bad_area;
if (vma->vm_start <= address)
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, prev_vma))
goto bad_area;
good_area:
diff -urN 2.4.9/arch/s390/mm/fault.c expand_stack/arch/s390/mm/fault.c
--- 2.4.9/arch/s390/mm/fault.c Sat Aug 11 08:03:59 2001
+++ expand_stack/arch/s390/mm/fault.c Sun Aug 19 06:36:04 2001
@@ -122,7 +122,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.9/arch/s390x/mm/fault.c expand_stack/arch/s390x/mm/fault.c
--- 2.4.9/arch/s390x/mm/fault.c Sat Aug 11 08:04:00 2001
+++ expand_stack/arch/s390x/mm/fault.c Sun Aug 19 06:36:04 2001
@@ -152,7 +152,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.9/arch/sh/mm/fault.c expand_stack/arch/sh/mm/fault.c
--- 2.4.9/arch/sh/mm/fault.c Wed Jul 4 04:03:45 2001
+++ expand_stack/arch/sh/mm/fault.c Sun Aug 19 06:36:04 2001
@@ -78,7 +78,7 @@
check_stack:
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, start) == 0)
+ if (expand_stack(vma, start, NULL) == 0)
goto good_area;
bad_area:
@@ -118,7 +118,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.9/arch/sparc/mm/fault.c expand_stack/arch/sparc/mm/fault.c
--- 2.4.9/arch/sparc/mm/fault.c Sat Aug 11 08:04:01 2001
+++ expand_stack/arch/sparc/mm/fault.c Sun Aug 19 06:36:04 2001
@@ -238,7 +238,7 @@
goto good_area;
if(!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if(expand_stack(vma, address))
+ if(expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
@@ -485,7 +485,7 @@
goto good_area;
if(!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if(expand_stack(vma, address))
+ if(expand_stack(vma, address, NULL))
goto bad_area;
good_area:
info.si_code = SEGV_ACCERR;
diff -urN 2.4.9/arch/sparc64/mm/fault.c expand_stack/arch/sparc64/mm/fault.c
--- 2.4.9/arch/sparc64/mm/fault.c Thu Aug 16 22:03:26 2001
+++ expand_stack/arch/sparc64/mm/fault.c Sun Aug 19 06:36:04 2001
@@ -293,7 +293,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.9/include/linux/mm.h expand_stack/include/linux/mm.h
--- 2.4.9/include/linux/mm.h Fri Aug 17 05:02:27 2001
+++ expand_stack/include/linux/mm.h Sun Aug 19 06:53:07 2001
@@ -553,13 +553,19 @@
#define GFP_DMA __GFP_DMA
+extern int heap_stack_gap;
+
/* vma is the first one with address < vma->vm_end,
* and even address < vma->vm_start. Have to extend vma. */
-static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
+/* NOTE: expand_stack requires the mm semaphore in write mode */
+static inline int expand_stack(struct vm_area_struct * vma, unsigned long address,
+ struct vm_area_struct * prev_vma)
{
unsigned long grow;
address &= PAGE_MASK;
+ if (prev_vma && prev_vma->vm_end + (heap_stack_gap << PAGE_SHIFT) > address)
+ return -ENOMEM;
grow = (vma->vm_start - address) >> PAGE_SHIFT;
if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur)
diff -urN 2.4.9/include/linux/sysctl.h expand_stack/include/linux/sysctl.h
--- 2.4.9/include/linux/sysctl.h Fri Aug 17 05:02:42 2001
+++ expand_stack/include/linux/sysctl.h Sun Aug 19 06:36:04 2001
@@ -134,7 +134,8 @@
VM_PAGECACHE=7, /* struct: Set cache memory thresholds */
VM_PAGERDAEMON=8, /* struct: Control kswapd behaviour */
VM_PGT_CACHE=9, /* struct: Set page table cache parameters */
- VM_PAGE_CLUSTER=10 /* int: set number of pages to swap together */
+ VM_PAGE_CLUSTER=10, /* int: set number of pages to swap together */
+ VM_HEAP_STACK_GAP=11, /* int: page gap between heap and stack */
};
diff -urN 2.4.9/kernel/ptrace.c expand_stack/kernel/ptrace.c
--- 2.4.9/kernel/ptrace.c Sat Jul 21 00:04:34 2001
+++ expand_stack/kernel/ptrace.c Sun Aug 19 06:50:41 2001
@@ -185,13 +185,13 @@
if (!mm)
return 0;
- down_read(&mm->mmap_sem);
+ down_write(&mm->mmap_sem);
vma = find_extend_vma(mm, addr);
copied = 0;
if (vma)
copied = access_mm(mm, vma, addr, buf, len, write);
- up_read(&mm->mmap_sem);
+ up_write(&mm->mmap_sem);
mmput(mm);
return copied;
}
diff -urN 2.4.9/kernel/sysctl.c expand_stack/kernel/sysctl.c
--- 2.4.9/kernel/sysctl.c Sat Aug 11 08:04:32 2001
+++ expand_stack/kernel/sysctl.c Sun Aug 19 06:36:04 2001
@@ -270,6 +270,8 @@
&pgt_cache_water, 2*sizeof(int), 0644, NULL, &proc_dointvec},
{VM_PAGE_CLUSTER, "page-cluster",
&page_cluster, sizeof(int), 0644, NULL, &proc_dointvec},
+ {VM_HEAP_STACK_GAP, "heap-stack-gap",
+ &heap_stack_gap, sizeof(int), 0644, NULL, &proc_dointvec},
{0}
};
diff -urN 2.4.9/mm/memory.c expand_stack/mm/memory.c
--- 2.4.9/mm/memory.c Thu Aug 16 22:03:41 2001
+++ expand_stack/mm/memory.c Sun Aug 19 06:50:58 2001
@@ -442,7 +442,7 @@
unsigned long ptr, end;
int err;
struct mm_struct * mm;
- struct vm_area_struct * vma = 0;
+ struct vm_area_struct * vma;
struct page * map;
int i;
int datain = (rw == READ);
@@ -468,20 +468,25 @@
iobuf->length = len;
i = 0;
+ vma = NULL;
/*
* First of all, try to fault in all of the necessary pages
*/
while (ptr < end) {
if (!vma || ptr >= vma->vm_end) {
- vma = find_vma(current->mm, ptr);
+ vma = find_vma(mm, ptr);
if (!vma)
goto out_unlock;
if (vma->vm_start > ptr) {
+#if 0 /* upgrading the lock is an horrible mess here so just disable the feature for now */
if (!(vma->vm_flags & VM_GROWSDOWN))
goto out_unlock;
- if (expand_stack(vma, ptr))
+ if (expand_stack(vma, ptr, prev_vma))
goto out_unlock;
+#else
+ goto out_unlock;
+#endif
}
if (((datain) && (!(vma->vm_flags & VM_WRITE))) ||
(!(vma->vm_flags & VM_READ))) {
diff -urN 2.4.9/mm/mmap.c expand_stack/mm/mmap.c
--- 2.4.9/mm/mmap.c Sat May 26 04:03:50 2001
+++ expand_stack/mm/mmap.c Sun Aug 19 06:36:04 2001
@@ -38,6 +38,7 @@
};
int sysctl_overcommit_memory;
+int heap_stack_gap = 1;
/* Check that a process has enough memory to allocate a
* new virtual mapping.
@@ -411,9 +412,15 @@
for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
+ unsigned long __heap_stack_gap;
if (TASK_SIZE - len < addr)
return -ENOMEM;
- if (!vma || addr + len <= vma->vm_start)
+ if (!vma)
+ return addr;
+ __heap_stack_gap = 0;
+ if (vma->vm_flags & VM_GROWSDOWN)
+ __heap_stack_gap = heap_stack_gap << PAGE_SHIFT;
+ if (addr + len + __heap_stack_gap <= vma->vm_start)
return addr;
addr = vma->vm_end;
}
@@ -532,11 +539,11 @@
struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr)
{
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * prev_vma;
unsigned long start;
addr &= PAGE_MASK;
- vma = find_vma(mm,addr);
+ vma = find_vma_prev(mm,addr, &prev_vma);
if (!vma)
return NULL;
if (vma->vm_start <= addr)
@@ -544,7 +551,7 @@
if (!(vma->vm_flags & VM_GROWSDOWN))
return NULL;
start = vma->vm_start;
- if (expand_stack(vma, addr))
+ if (expand_stack(vma, addr, prev_vma))
return NULL;
if (vma->vm_flags & VM_LOCKED) {
make_pages_present(addr, start);
Andrea
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: expand_stack fix [was Re: 2.4.9aa3]
2001-09-03 15:24 ` expand_stack fix [was Re: 2.4.9aa3] Andrea Arcangeli
@ 2001-09-07 18:47 ` Linus Torvalds
2001-09-08 16:04 ` Andrea Arcangeli
0 siblings, 1 reply; 9+ messages in thread
From: Linus Torvalds @ 2001-09-07 18:47 UTC (permalink / raw)
To: Andrea Arcangeli; +Cc: linux-kernel
On Mon, 3 Sep 2001, Andrea Arcangeli wrote:
>
> Linus please include the attached patch to the next kernel, expand_stack
> is totally broken at the moment, we cannot mess with the mm vma layout
> if we don't hold the mmap_sem in write mode.
I disagree with the diagnosis..
expand_stack() has _never_ messed with the vma layout, and never should.
As such, from a vma list integrity standpoint it is fine.
Do we mess with the contents? Yes. But I'd much rather see a much more
minimal approach to the problem, on the order of:
- make sure we only accept GROWSDOWN for anonymous areas (which don't
care about the offset)
- make the vm_start update atomic (possibly by just getting the pagetable
spinlock).
> I considered implementing a read->write semaphore upgrade primitive but
> it cannot be reliable
There is no such thing. Never has been. It's a fundamentally impossible
operation. We may, at some point, decide to have a "read_for_write()" and
then "upgrade()" operations on the semaphore, but those inherently imply
some level of single-threading (ie only one read-for-writer accepted at
one time, with many pure readers), which makes it useless for this
particular case anyway.
However, having a finer-granularity spinlock _inside_ the semaphore (see
above suggestion) is a perfectly valid approach.
Linus
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: expand_stack fix [was Re: 2.4.9aa3]
2001-09-07 18:47 ` Linus Torvalds
@ 2001-09-08 16:04 ` Andrea Arcangeli
2001-09-08 16:28 ` Linus Torvalds
2001-09-09 4:23 ` Jeff Dike
0 siblings, 2 replies; 9+ messages in thread
From: Andrea Arcangeli @ 2001-09-08 16:04 UTC (permalink / raw)
To: Linus Torvalds; +Cc: linux-kernel, jdike
On Fri, Sep 07, 2001 at 11:47:09AM -0700, Linus Torvalds wrote:
>
> On Mon, 3 Sep 2001, Andrea Arcangeli wrote:
> >
> > Linus please include the attached patch to the next kernel, expand_stack
> > is totally broken at the moment, we cannot mess with the mm vma layout
> > if we don't hold the mmap_sem in write mode.
>
> I disagree with the diagnosis..
>
> expand_stack() has _never_ messed with the vma layout, and never should.
> As such, from a vma list integrity standpoint it is fine.
>
> Do we mess with the contents? Yes. But I'd much rather see a much more
> minimal approach to the problem, on the order of:
> - make sure we only accept GROWSDOWN for anonymous areas (which don't
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> care about the offset)
> - make the vm_start update atomic (possibly by just getting the pagetable
> spinlock).
We just take the pagetable spinlock there, the race is all about the
pgoff.
In short you agree that the current locking is broken but you propose to
limit the usability of GROWSDOWN and GROWSUP solely to the anonymous
vmas instead of fixing the pgoff race with proper locking as I did.
My fix for the race doesn't drop the usability of GROWSDOWN that could
otherwise break userspace programs. I guess at least uml uses growsdown
vma file backed. Jeff?
Assuming it's acceptable to break GROWSDOWN on file backed vmas as you
suggested, the new locking rules with your approch would be:
1) pgoff is garbage as usual for any anon vma so don't even try to
mess with it in expand_stack
2) with only the read semaphore acquired the vm_start/vm_end of the
vmas can expand under us but the vma tree not
3) with only the read semaphore acquired to get a consistent
vm_start/vm_end of all vmas in the tree (like we need to in
expand_stack) also the page_table_lock of the mm must be
acquired
4) with the write lock vm_start/vm_end are consistent always as
well as tree so no change here
> > I considered implementing a read->write semaphore upgrade primitive but
> > it cannot be reliable
>
> There is no such thing. Never has been. It's a fundamentally impossible
> operation. We may, at some point, decide to have a "read_for_write()" and
Yes, of course with "it cannot be reliable" I meant it cannot work out
always, it would be an optimization for the common case:
spin_lock(&rwsem->lock)
if (only one reader)
gain write privilegies
err = success
else
err = faliure
spin_unlock(&rwsem->lock)
return err
it would still require the fail path in case of the faliure (multiple
readers potentially all trying to upgrade the lock) so I ignored the
optimization (expand_stack isn't a very fast path anyways).
> However, having a finer-granularity spinlock _inside_ the semaphore (see
> above suggestion) is a perfectly valid approach.
if you are 100% sure it's acceptable to break the kernel API for the
GROWSDOWN file backed vmas (which I don't think it's the case) I can
switch to your suggested fix (otherwise I prefer to keep upgrading the
semaphore in expand_stack rather to have to spinlocking at every nopage
private/shared page fault).
Andrea
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: expand_stack fix [was Re: 2.4.9aa3]
2001-09-08 16:04 ` Andrea Arcangeli
@ 2001-09-08 16:28 ` Linus Torvalds
2001-09-09 4:23 ` Jeff Dike
1 sibling, 0 replies; 9+ messages in thread
From: Linus Torvalds @ 2001-09-08 16:28 UTC (permalink / raw)
To: Andrea Arcangeli; +Cc: linux-kernel, jdike
On Sat, 8 Sep 2001, Andrea Arcangeli wrote:
>
> In short you agree that the current locking is broken but you propose to
> limit the usability of GROWSDOWN and GROWSUP solely to the anonymous
> vmas instead of fixing the pgoff race with proper locking as I did.
Yes. Note that GROWSUP has never been implemented, and doesn't work. And
the only normal usage for GROWSDOWN is the stack(s), which through being
anonymous never cares about pgoff anyway.
Note that in theory, GROWSUP would be supported on platforms that have a
stack growing upwards (HP-PA, I think), but as far as I know those
architectures just use a fixed mapping instead.
> My fix for the race doesn't drop the usability of GROWSDOWN that could
> otherwise break userspace programs. I guess at least uml uses growsdown
> vma file backed. Jeff?
It does?
GROWSDOWN doesn't actually tend to be all that useful - even for the stack
we could easily just use a fixed size segment these days. I don't think
other OS's do anything like it, and the only real reason for having
GROWSDOWN is actually a historical mistake - Linux didn't really use to
honour or care about things like stack size limits, which meant that there
was no good default "size" - so GROWSDOWN was a clever way to try to avoid
the problem.
"Clever", of course, is all a matter of context - it made a lot more sense
back when the VM layer didn't have to worry about concurrecy etc at all.
In short, I would not be entirely against just getting rid of GROWSDOWN/UP
altogether, with a fixed (well, dynamic based on rlimit) mapping for the
stack. That would also make it a lot more trivial to do things like the
guard page etc - without impacting any regular code.
> it would still require the fail path in case of the faliure (multiple
> readers potentially all trying to upgrade the lock) so I ignored the
> optimization (expand_stack isn't a very fast path anyways).
Note that we can't even just fall back on the "drop read-lock and
re-aquire s real write-lock", because the page fault might be happening
while the faulter already holds a read lock (core dumping does things like
this, other places might too). That means that your error case doesn't
really have any way to fix things up, so you'd have to actually fail the
page-in - which in turn implies that you'd bet pretty much random failures
depending on subtle past history.. Not good.
> if you are 100% sure it's acceptable to break the kernel API for the
> GROWSDOWN file backed vmas (which I don't think it's the case) I can
> switch to your suggested fix
I'd be _very_ surprised if any real application uses growsdown with
backing store. Anybody?
Linus
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: expand_stack fix [was Re: 2.4.9aa3]
2001-09-09 4:23 ` Jeff Dike
@ 2001-09-09 3:50 ` Andrea Arcangeli
2001-09-09 5:42 ` Jeff Dike
2001-09-11 11:24 ` Andrea Arcangeli
0 siblings, 2 replies; 9+ messages in thread
From: Andrea Arcangeli @ 2001-09-09 3:50 UTC (permalink / raw)
To: Jeff Dike; +Cc: Linus Torvalds, linux-kernel
On Sat, Sep 08, 2001 at 11:23:38PM -0500, Jeff Dike wrote:
> andrea@suse.de said:
> > My fix for the race doesn't drop the usability of GROWSDOWN that could
> > otherwise break userspace programs. I guess at least uml uses
> > growsdown vma file backed. Jeff?
>
> No. In neither the host kernel or UML is there a vma that's file backed and
> growsdown.
>
> UML process stacks are marked growsdown in UML and are file backed on the host,
> but that's not the same thing.
ok, so I guess you're doing the growsdown by hand in the uml sigsegv
handler.
So it's probably fine to allow GROWSDOWN only on anon vmas per Linus's
suggestion. I can attempt to change the race fix that way.
However about last Linus's suggestion it's not obvious to me that
dropping GROWSDOWN/UP completly and forcing a fixed virtual size of the
stack [modulo rlimit of course] is a good idea, because:
1) on 32bit platforms having big vma for the stack means reducing the
space for the dynamic mappings
2) I love not to have a virtual stack limit for software making use of
aggressive recursion.
The gap logic is very simple too.
Andrea
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: expand_stack fix [was Re: 2.4.9aa3]
2001-09-08 16:04 ` Andrea Arcangeli
2001-09-08 16:28 ` Linus Torvalds
@ 2001-09-09 4:23 ` Jeff Dike
2001-09-09 3:50 ` Andrea Arcangeli
1 sibling, 1 reply; 9+ messages in thread
From: Jeff Dike @ 2001-09-09 4:23 UTC (permalink / raw)
To: Andrea Arcangeli, Linus Torvalds; +Cc: linux-kernel
andrea@suse.de said:
> My fix for the race doesn't drop the usability of GROWSDOWN that could
> otherwise break userspace programs. I guess at least uml uses
> growsdown vma file backed. Jeff?
No. In neither the host kernel or UML is there a vma that's file backed and
growsdown.
UML process stacks are marked growsdown in UML and are file backed on the host,
but that's not the same thing.
Jeff
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: expand_stack fix [was Re: 2.4.9aa3]
2001-09-09 3:50 ` Andrea Arcangeli
@ 2001-09-09 5:42 ` Jeff Dike
2001-09-11 11:24 ` Andrea Arcangeli
1 sibling, 0 replies; 9+ messages in thread
From: Jeff Dike @ 2001-09-09 5:42 UTC (permalink / raw)
To: Andrea Arcangeli; +Cc: Linus Torvalds, linux-kernel
andrea@suse.de said:
> ok, so I guess you're doing the growsdown by hand in the uml sigsegv
> handler.
Right, exactly the same way that every other port does it.
Jeff
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: expand_stack fix [was Re: 2.4.9aa3]
2001-09-09 3:50 ` Andrea Arcangeli
2001-09-09 5:42 ` Jeff Dike
@ 2001-09-11 11:24 ` Andrea Arcangeli
1 sibling, 0 replies; 9+ messages in thread
From: Andrea Arcangeli @ 2001-09-11 11:24 UTC (permalink / raw)
To: Jeff Dike; +Cc: Linus Torvalds, linux-kernel
On Sun, Sep 09, 2001 at 05:50:38AM +0200, Andrea Arcangeli wrote:
> On Sat, Sep 08, 2001 at 11:23:38PM -0500, Jeff Dike wrote:
> > andrea@suse.de said:
> > > My fix for the race doesn't drop the usability of GROWSDOWN that could
> > > otherwise break userspace programs. I guess at least uml uses
> > > growsdown vma file backed. Jeff?
> >
> > No. In neither the host kernel or UML is there a vma that's file backed and
> > growsdown.
> >
> > UML process stacks are marked growsdown in UML and are file backed on the host,
> > but that's not the same thing.
>
> ok, so I guess you're doing the growsdown by hand in the uml sigsegv
> handler.
>
> So it's probably fine to allow GROWSDOWN only on anon vmas per Linus's
> suggestion. I can attempt to change the race fix that way.
Here it is against pre[78], in short it forbids MAP_GROWSDOWN (/GROWSUP
even if GROWSUP is basically a noop but checking for it too was nocost
and it makes sense) for file backed mmaps, and it so avoids the vma
pgoff race in expand_stack, plus it fixes a few more bits in
expand_stack. See the comment above expand_stack for the locking
details. Plus as usual it adds the sysctl configurable gap of pages
between a growsdown vma and its previous vma to help userspace
reliability. It also reads vma->vm_start after expand_stack in
find_extend_vma.
this is running on my desktop for one day happily and no app triggered
the new -EINVAL in mmap yet.
diff -urN 2.4.10pre8/arch/alpha/mm/fault.c expand_stack/arch/alpha/mm/fault.c
--- 2.4.10pre8/arch/alpha/mm/fault.c Sun Apr 1 01:17:07 2001
+++ expand_stack/arch/alpha/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -121,7 +121,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.10pre8/arch/arm/mm/fault-common.c expand_stack/arch/arm/mm/fault-common.c
--- 2.4.10pre8/arch/arm/mm/fault-common.c Thu Aug 16 22:03:23 2001
+++ expand_stack/arch/arm/mm/fault-common.c Tue Sep 11 05:03:56 2001
@@ -229,7 +229,7 @@
goto survive;
check_stack:
- if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
+ if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr, NULL))
goto good_area;
out:
return fault;
diff -urN 2.4.10pre8/arch/cris/mm/fault.c expand_stack/arch/cris/mm/fault.c
--- 2.4.10pre8/arch/cris/mm/fault.c Sat Aug 11 08:03:54 2001
+++ expand_stack/arch/cris/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -284,7 +284,7 @@
if (address + PAGE_SIZE < rdusp())
goto bad_area;
}
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
diff -urN 2.4.10pre8/arch/i386/mm/fault.c expand_stack/arch/i386/mm/fault.c
--- 2.4.10pre8/arch/i386/mm/fault.c Tue Sep 11 04:09:20 2001
+++ expand_stack/arch/i386/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -30,7 +30,7 @@
*/
int __verify_write(const void * addr, unsigned long size)
{
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * prev_vma;
unsigned long start = (unsigned long) addr;
if (!size)
@@ -70,7 +70,8 @@
check_stack:
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, start) == 0)
+ find_vma_prev(current->mm, start, &prev_vma);
+ if (expand_stack(vma, start, prev_vma) == 0)
goto good_area;
bad_area:
@@ -107,7 +108,7 @@
{
struct task_struct *tsk;
struct mm_struct *mm;
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * prev_vma;
unsigned long address;
unsigned long page;
unsigned long fixup;
@@ -168,7 +169,8 @@
if (address + 32 < regs->esp)
goto bad_area;
}
- if (expand_stack(vma, address))
+ find_vma_prev(mm, address, &prev_vma);
+ if (expand_stack(vma, address, prev_vma))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.10pre8/arch/ia64/mm/fault.c expand_stack/arch/ia64/mm/fault.c
--- 2.4.10pre8/arch/ia64/mm/fault.c Tue May 1 19:35:18 2001
+++ expand_stack/arch/ia64/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -122,7 +122,7 @@
if (rgn_index(address) != rgn_index(vma->vm_start)
|| rgn_offset(address) >= RGN_MAP_LIMIT)
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
} else {
vma = prev_vma;
diff -urN 2.4.10pre8/arch/m68k/mm/fault.c expand_stack/arch/m68k/mm/fault.c
--- 2.4.10pre8/arch/m68k/mm/fault.c Sun Apr 1 01:17:08 2001
+++ expand_stack/arch/m68k/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -120,7 +120,7 @@
if (address + 256 < rdusp())
goto map_err;
}
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto map_err;
/*
diff -urN 2.4.10pre8/arch/mips/mm/fault.c expand_stack/arch/mips/mm/fault.c
--- 2.4.10pre8/arch/mips/mm/fault.c Sat Jul 21 00:04:05 2001
+++ expand_stack/arch/mips/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -80,7 +80,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.10pre8/arch/mips64/mm/fault.c expand_stack/arch/mips64/mm/fault.c
--- 2.4.10pre8/arch/mips64/mm/fault.c Tue Sep 11 04:09:24 2001
+++ expand_stack/arch/mips64/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -132,7 +132,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.10pre8/arch/ppc/mm/fault.c expand_stack/arch/ppc/mm/fault.c
--- 2.4.10pre8/arch/ppc/mm/fault.c Wed Jul 4 04:03:45 2001
+++ expand_stack/arch/ppc/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -64,7 +64,7 @@
void do_page_fault(struct pt_regs *regs, unsigned long address,
unsigned long error_code)
{
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * prev_vma;
struct mm_struct *mm = current->mm;
siginfo_t info;
int code = SEGV_MAPERR;
@@ -111,7 +111,8 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ vma = find_vma_prev(mm, address, &prev_vma);
+ if (expand_stack(vma, address, prev_vma))
goto bad_area;
good_area:
diff -urN 2.4.10pre8/arch/s390/mm/fault.c expand_stack/arch/s390/mm/fault.c
--- 2.4.10pre8/arch/s390/mm/fault.c Sat Aug 11 08:03:59 2001
+++ expand_stack/arch/s390/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -122,7 +122,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.10pre8/arch/s390x/mm/fault.c expand_stack/arch/s390x/mm/fault.c
--- 2.4.10pre8/arch/s390x/mm/fault.c Sat Aug 11 08:04:00 2001
+++ expand_stack/arch/s390x/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -152,7 +152,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.10pre8/arch/sh/mm/fault.c expand_stack/arch/sh/mm/fault.c
--- 2.4.10pre8/arch/sh/mm/fault.c Tue Sep 11 04:09:28 2001
+++ expand_stack/arch/sh/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -74,7 +74,7 @@
check_stack:
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, start) == 0)
+ if (expand_stack(vma, start, NULL) == 0)
goto good_area;
bad_area:
@@ -114,7 +114,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.10pre8/arch/sparc/mm/fault.c expand_stack/arch/sparc/mm/fault.c
--- 2.4.10pre8/arch/sparc/mm/fault.c Sat Aug 11 08:04:01 2001
+++ expand_stack/arch/sparc/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -238,7 +238,7 @@
goto good_area;
if(!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if(expand_stack(vma, address))
+ if(expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
@@ -485,7 +485,7 @@
goto good_area;
if(!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if(expand_stack(vma, address))
+ if(expand_stack(vma, address, NULL))
goto bad_area;
good_area:
info.si_code = SEGV_ACCERR;
diff -urN 2.4.10pre8/arch/sparc64/mm/fault.c expand_stack/arch/sparc64/mm/fault.c
--- 2.4.10pre8/arch/sparc64/mm/fault.c Tue Sep 11 04:09:29 2001
+++ expand_stack/arch/sparc64/mm/fault.c Tue Sep 11 05:03:56 2001
@@ -340,7 +340,7 @@
goto good_area;
if (!(vma->vm_flags & VM_GROWSDOWN))
goto bad_area;
- if (expand_stack(vma, address))
+ if (expand_stack(vma, address, NULL))
goto bad_area;
/*
* Ok, we have a good vm_area for this memory access, so
diff -urN 2.4.10pre8/include/linux/mm.h expand_stack/include/linux/mm.h
--- 2.4.10pre8/include/linux/mm.h Tue Sep 11 04:10:02 2001
+++ expand_stack/include/linux/mm.h Tue Sep 11 05:03:56 2001
@@ -556,25 +556,42 @@
#define GFP_DMA __GFP_DMA
-/* vma is the first one with address < vma->vm_end,
- * and even address < vma->vm_start. Have to extend vma. */
-static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
+extern int heap_stack_gap;
+
+/*
+ * vma is the first one with address < vma->vm_end,
+ * and even address < vma->vm_start. Have to extend vma.
+ *
+ * Locking: vm_start can decrease under you if you only hold
+ * the read semaphore, you either need the write semaphore
+ * or both the read semaphore and the page_table_lock acquired
+ * if you want vm_start consistent. vm_end and the vma layout
+ * are just consistent with only the read semaphore acquired
+ * instead.
+ */
+static inline int expand_stack(struct vm_area_struct * vma, unsigned long address,
+ struct vm_area_struct * prev_vma)
{
unsigned long grow;
+ int err = -ENOMEM;
address &= PAGE_MASK;
+ if (prev_vma && prev_vma->vm_end + (heap_stack_gap << PAGE_SHIFT) > address)
+ goto out;
+ spin_lock(&vma->vm_mm->page_table_lock);
grow = (vma->vm_start - address) >> PAGE_SHIFT;
if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur)
- return -ENOMEM;
- spin_lock(&vma->vm_mm->page_table_lock);
+ goto out_unlock;
vma->vm_start = address;
- vma->vm_pgoff -= grow;
vma->vm_mm->total_vm += grow;
if (vma->vm_flags & VM_LOCKED)
vma->vm_mm->locked_vm += grow;
+ err = 0;
+ out_unlock:
spin_unlock(&vma->vm_mm->page_table_lock);
- return 0;
+ out:
+ return err;
}
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
diff -urN 2.4.10pre8/include/linux/sysctl.h expand_stack/include/linux/sysctl.h
--- 2.4.10pre8/include/linux/sysctl.h Tue Sep 11 04:10:02 2001
+++ expand_stack/include/linux/sysctl.h Tue Sep 11 05:03:56 2001
@@ -135,7 +135,8 @@
VM_PAGECACHE=7, /* struct: Set cache memory thresholds */
VM_PAGERDAEMON=8, /* struct: Control kswapd behaviour */
VM_PGT_CACHE=9, /* struct: Set page table cache parameters */
- VM_PAGE_CLUSTER=10 /* int: set number of pages to swap together */
+ VM_PAGE_CLUSTER=10, /* int: set number of pages to swap together */
+ VM_HEAP_STACK_GAP=11, /* int: page gap between heap and stack */
};
diff -urN 2.4.10pre8/kernel/sysctl.c expand_stack/kernel/sysctl.c
--- 2.4.10pre8/kernel/sysctl.c Tue Sep 11 04:10:03 2001
+++ expand_stack/kernel/sysctl.c Tue Sep 11 05:03:56 2001
@@ -268,6 +268,8 @@
&pgt_cache_water, 2*sizeof(int), 0644, NULL, &proc_dointvec},
{VM_PAGE_CLUSTER, "page-cluster",
&page_cluster, sizeof(int), 0644, NULL, &proc_dointvec},
+ {VM_HEAP_STACK_GAP, "heap-stack-gap",
+ &heap_stack_gap, sizeof(int), 0644, NULL, &proc_dointvec},
{0}
};
diff -urN 2.4.10pre8/mm/memory.c expand_stack/mm/memory.c
--- 2.4.10pre8/mm/memory.c Tue Sep 11 04:10:03 2001
+++ expand_stack/mm/memory.c Tue Sep 11 05:04:11 2001
@@ -442,7 +442,7 @@
unsigned long ptr, end;
int err;
struct mm_struct * mm;
- struct vm_area_struct * vma = 0;
+ struct vm_area_struct * vma, * prev_vma;
struct page * map;
int i;
int datain = (rw == READ);
@@ -468,19 +468,21 @@
iobuf->length = len;
i = 0;
+ vma = NULL;
/*
* First of all, try to fault in all of the necessary pages
*/
while (ptr < end) {
if (!vma || ptr >= vma->vm_end) {
- vma = find_vma(current->mm, ptr);
+ vma = find_vma(mm, ptr);
if (!vma)
goto out_unlock;
if (vma->vm_start > ptr) {
if (!(vma->vm_flags & VM_GROWSDOWN))
goto out_unlock;
- if (expand_stack(vma, ptr))
+ find_vma_prev(mm, ptr, &prev_vma);
+ if (expand_stack(vma, ptr, prev_vma))
goto out_unlock;
}
if (((datain) && (!(vma->vm_flags & VM_WRITE))) ||
diff -urN 2.4.10pre8/mm/mmap.c expand_stack/mm/mmap.c
--- 2.4.10pre8/mm/mmap.c Sat May 26 04:03:50 2001
+++ expand_stack/mm/mmap.c Tue Sep 11 05:03:56 2001
@@ -38,6 +38,7 @@
};
int sysctl_overcommit_memory;
+int heap_stack_gap = 1;
/* Check that a process has enough memory to allocate a
* new virtual mapping.
@@ -292,7 +293,6 @@
}
/* Clear old maps */
- error = -ENOMEM;
if (do_munmap(mm, addr, len))
return -ENOMEM;
@@ -337,6 +337,9 @@
vma->vm_raend = 0;
if (file) {
+ error = -EINVAL;
+ if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+ goto free_vma;
if (vm_flags & VM_DENYWRITE) {
error = deny_write_access(file);
if (error)
@@ -411,9 +414,15 @@
for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
/* At this point: (!vma || addr < vma->vm_end). */
+ unsigned long __heap_stack_gap;
if (TASK_SIZE - len < addr)
return -ENOMEM;
- if (!vma || addr + len <= vma->vm_start)
+ if (!vma)
+ return addr;
+ __heap_stack_gap = 0;
+ if (vma->vm_flags & VM_GROWSDOWN)
+ __heap_stack_gap = heap_stack_gap << PAGE_SHIFT;
+ if (addr + len + __heap_stack_gap <= vma->vm_start)
return addr;
addr = vma->vm_end;
}
@@ -532,7 +541,7 @@
struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr)
{
- struct vm_area_struct * vma;
+ struct vm_area_struct * vma, * prev_vma;
unsigned long start;
addr &= PAGE_MASK;
@@ -543,9 +552,10 @@
return vma;
if (!(vma->vm_flags & VM_GROWSDOWN))
return NULL;
- start = vma->vm_start;
- if (expand_stack(vma, addr))
+ find_vma_prev(mm, addr, &prev_vma);
+ if (expand_stack(vma, addr, prev_vma))
return NULL;
+ start = vma->vm_start;
if (vma->vm_flags & VM_LOCKED) {
make_pages_present(addr, start);
}
Andrea
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2001-09-11 11:23 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2001-08-19 6:07 2.4.9aa3 Andrea Arcangeli
2001-09-03 15:24 ` expand_stack fix [was Re: 2.4.9aa3] Andrea Arcangeli
2001-09-07 18:47 ` Linus Torvalds
2001-09-08 16:04 ` Andrea Arcangeli
2001-09-08 16:28 ` Linus Torvalds
2001-09-09 4:23 ` Jeff Dike
2001-09-09 3:50 ` Andrea Arcangeli
2001-09-09 5:42 ` Jeff Dike
2001-09-11 11:24 ` Andrea Arcangeli
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox