* [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
@ 2005-09-12 16:49 Kirill Korotaev
2005-09-12 20:23 ` Andrew Morton
0 siblings, 1 reply; 13+ messages in thread
From: Kirill Korotaev @ 2005-09-12 16:49 UTC (permalink / raw)
To: Linus Torvalds, Andrew Morton, linux-kernel, xemul
[-- Attachment #1: Type: text/plain, Size: 365 bytes --]
[PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
This patch fixes error path in setup_arg_pages() functions, since it
misses vm_unacct_memory() after successful call of
security_vm_enough_memory(). Also it cleans up error path.
Signed-Off-By: Pavel Emelianov <xemul@sw.ru>
Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Kirill
P.S. against 2.6.13
[-- Attachment #2: diff-ms-unacct-error-20050912 --]
[-- Type: text/plain, Size: 6712 bytes --]
--- linux-2.6.13.1/arch/ia64/ia32/binfmt_elf32.c.accterr 2005-09-12 13:59:50.000000000 +0400
+++ linux-2.6.13.1/arch/ia64/ia32/binfmt_elf32.c 2005-09-12 15:10:16.000000000 +0400
@@ -199,7 +199,7 @@
int
ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
{
- unsigned long stack_base;
+ unsigned long stack_base, vm_end, vm_start;
struct vm_area_struct *mpnt;
struct mm_struct *mm = current->mm;
int i, ret;
@@ -212,23 +212,24 @@
bprm->loader += stack_base;
bprm->exec += stack_base;
+ ret = -ENOMEM;
mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (!mpnt)
- return -ENOMEM;
+ goto out;
- if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
- >> PAGE_SHIFT)) {
- kmem_cache_free(vm_area_cachep, mpnt);
- return -ENOMEM;
- }
+ vm_end = IA32_STACK_TOP;
+ vm_start = PAGE_MASK & (unsigned long)bprm->p;
+
+ if (security_vm_enough_memory((vm_end - vm_start) >> PAGE_SHIFT))
+ goto out_free;
memset(mpnt, 0, sizeof(*mpnt));
down_write(¤t->mm->mmap_sem);
{
mpnt->vm_mm = current->mm;
- mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
- mpnt->vm_end = IA32_STACK_TOP;
+ mpnt->vm_start = vm_start;
+ mpnt->vm_end = vm_end;
if (executable_stack == EXSTACK_ENABLE_X)
mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
else if (executable_stack == EXSTACK_DISABLE_X)
@@ -237,11 +238,8 @@
mpnt->vm_flags = VM_STACK_FLAGS;
mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
PAGE_COPY_EXEC: PAGE_COPY;
- if ((ret = insert_vm_struct(current->mm, mpnt))) {
- up_write(¤t->mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, mpnt);
- return ret;
- }
+ if ((ret = insert_vm_struct(current->mm, mpnt)))
+ goto out_up;
current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
}
@@ -260,6 +258,14 @@
current->thread.ppl = ia32_init_pp_list();
return 0;
+
+out_up:
+ up_write(¤t->mm->mmap_sem);
+ vm_unacct_memory((vm_end - vm_start) >> PAGE_SHIFT);
+out_free:
+ kmem_cache_free(vm_area_cachep, mpnt);
+out:
+ return ret;
}
static void
--- linux-2.6.13.1/arch/x86_64/ia32/ia32_binfmt.c.accterr 2005-09-12 14:00:30.000000000 +0400
+++ linux-2.6.13.1/arch/x86_64/ia32/ia32_binfmt.c 2005-09-12 15:18:45.000000000 +0400
@@ -337,7 +337,7 @@
int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack)
{
- unsigned long stack_base;
+ unsigned long stack_base, vm_end, vm_start;
struct vm_area_struct *mpnt;
struct mm_struct *mm = current->mm;
int i, ret;
@@ -350,22 +350,24 @@
bprm->loader += stack_base;
bprm->exec += stack_base;
+ ret = -ENOMEM;
mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
- if (!mpnt)
- return -ENOMEM;
+ if (!mpnt)
+ goto out;
+
+ vm_end = IA32_STACK_TOP;
+ vm_start = PAGE_MASK & (unsigned long)bprm->p;
- if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
- kmem_cache_free(vm_area_cachep, mpnt);
- return -ENOMEM;
- }
+ if (security_vm_enough_memory((vm_end - vm_start)>>PAGE_SHIFT))
+ goto out_free;
memset(mpnt, 0, sizeof(*mpnt));
down_write(&mm->mmap_sem);
{
mpnt->vm_mm = mm;
- mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
- mpnt->vm_end = IA32_STACK_TOP;
+ mpnt->vm_start = vm_start;
+ mpnt->vm_end = vm_end;
if (executable_stack == EXSTACK_ENABLE_X)
mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
else if (executable_stack == EXSTACK_DISABLE_X)
@@ -374,11 +376,8 @@
mpnt->vm_flags = VM_STACK_FLAGS;
mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC) ?
PAGE_COPY_EXEC : PAGE_COPY;
- if ((ret = insert_vm_struct(mm, mpnt))) {
- up_write(&mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, mpnt);
- return ret;
- }
+ if ((ret = insert_vm_struct(mm, mpnt)))
+ goto out_up;
mm->stack_vm = mm->total_vm = vma_pages(mpnt);
}
@@ -393,6 +392,14 @@
up_write(&mm->mmap_sem);
return 0;
+
+out_up:
+ up_write(&mm->mmap_sem);
+ vm_unacct_memory((vm_end - vm_start) >> PAGE_SHIFT);
+out_free:
+ kmem_cache_free(vm_area_cachep, mpnt);
+out:
+ return ret;
}
static unsigned long
--- linux-2.6.13.1/arch/x86_64/ia32/syscall32.c.accterr 2005-09-12 14:00:29.000000000 +0400
+++ linux-2.6.13.1/arch/x86_64/ia32/syscall32.c 2005-09-12 15:23:07.000000000 +0400
@@ -10,6 +10,7 @@
#include <linux/init.h>
#include <linux/stringify.h>
#include <linux/security.h>
+#include <linux/mman.h>
#include <asm/proto.h>
#include <asm/tlbflush.h>
#include <asm/ia32_unistd.h>
@@ -49,13 +50,12 @@
struct mm_struct *mm = current->mm;
int ret;
+ ret = -ENOMEM;
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (!vma)
- return -ENOMEM;
- if (security_vm_enough_memory(npages)) {
- kmem_cache_free(vm_area_cachep, vma);
- return -ENOMEM;
- }
+ goto out;
+ if (security_vm_enough_memory(npages))
+ goto out_free;
memset(vma, 0, sizeof(struct vm_area_struct));
/* Could randomize here */
@@ -69,14 +69,19 @@
vma->vm_mm = mm;
down_write(&mm->mmap_sem);
- if ((ret = insert_vm_struct(mm, vma))) {
- up_write(&mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, vma);
- return ret;
- }
+ if ((ret = insert_vm_struct(mm, vma)))
+ goto out_up;
mm->total_vm += npages;
up_write(&mm->mmap_sem);
return 0;
+
+out_up:
+ up_write(&mm->mmap_sem);
+ vm_unacct_memory(npages);
+out_free:
+ kmem_cache_free(vm_area_cachep, vma);
+out:
+ return ret;
}
static int __init init_syscall32(void)
--- linux-2.6.13.1/fs/exec.c.accterr 2005-09-12 14:01:43.000000000 +0400
+++ linux-2.6.13.1/fs/exec.c 2005-09-12 15:20:38.000000000 +0400
@@ -417,14 +417,13 @@
bprm->loader += stack_base;
bprm->exec += stack_base;
+ ret = -ENOMEM;
mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (!mpnt)
- return -ENOMEM;
+ goto out;
- if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) {
- kmem_cache_free(vm_area_cachep, mpnt);
- return -ENOMEM;
- }
+ if (security_vm_enough_memory(arg_size >> PAGE_SHIFT))
+ goto out_free;
memset(mpnt, 0, sizeof(*mpnt));
@@ -449,11 +448,8 @@
mpnt->vm_flags = VM_STACK_FLAGS;
mpnt->vm_flags |= mm->def_flags;
mpnt->vm_page_prot = protection_map[mpnt->vm_flags & 0x7];
- if ((ret = insert_vm_struct(mm, mpnt))) {
- up_write(&mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, mpnt);
- return ret;
- }
+ if ((ret = insert_vm_struct(mm, mpnt)))
+ goto out_up;
mm->stack_vm = mm->total_vm = vma_pages(mpnt);
}
@@ -468,6 +464,14 @@
up_write(&mm->mmap_sem);
return 0;
+
+out_up:
+ up_write(&mm->mmap_sem);
+ vm_unacct_memory(arg_size >> PAGE_SHIFT);
+out_free:
+ kmem_cache_free(vm_area_cachep, mpnt);
+out:
+ return ret;
}
EXPORT_SYMBOL(setup_arg_pages);
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
2005-09-12 16:49 [PATCH] error path in setup_arg_pages() misses vm_unacct_memory() Kirill Korotaev
@ 2005-09-12 20:23 ` Andrew Morton
2005-09-13 8:21 ` Kirill Korotaev
2005-09-13 11:20 ` Hugh Dickins
0 siblings, 2 replies; 13+ messages in thread
From: Andrew Morton @ 2005-09-12 20:23 UTC (permalink / raw)
To: Kirill Korotaev; +Cc: torvalds, linux-kernel, xemul, Hugh Dickins
Kirill Korotaev <dev@sw.ru> wrote:
>
> This patch fixes error path in setup_arg_pages() functions, since it
> misses vm_unacct_memory() after successful call of
> security_vm_enough_memory(). Also it cleans up error path.
Ugh. The identifier `security_vm_enough_memory()' sounds like some
predicate which has no side-effects. Except it performs accounting. Hence
bugs like this.
It's a shame that you mixed a largeish cleanup along with a bugfix - please
don't do that in future.
Patch looks OK to me. Hugh, could you please double-check sometime?
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
2005-09-12 20:23 ` Andrew Morton
@ 2005-09-13 8:21 ` Kirill Korotaev
2005-09-13 8:40 ` Andrew Morton
2005-09-13 11:20 ` Hugh Dickins
1 sibling, 1 reply; 13+ messages in thread
From: Kirill Korotaev @ 2005-09-13 8:21 UTC (permalink / raw)
To: Andrew Morton; +Cc: torvalds, linux-kernel, xemul, Hugh Dickins
>> This patch fixes error path in setup_arg_pages() functions, since it
>> misses vm_unacct_memory() after successful call of
>> security_vm_enough_memory(). Also it cleans up error path.
>
> Ugh. The identifier `security_vm_enough_memory()' sounds like some
> predicate which has no side-effects. Except it performs accounting. Hence
> bugs like this.
yup, this is really done in a leading-to-bugs way... :(
maybe it is worth moving vm_acct_memory() out of
security_vm_enough_memory()? what do you think?
> It's a shame that you mixed a largeish cleanup along with a bugfix - please
> don't do that in future.
not a problem :) thanks for your time and looking at the patches I sent.
Kirill
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
2005-09-13 8:21 ` Kirill Korotaev
@ 2005-09-13 8:40 ` Andrew Morton
2005-09-13 11:30 ` Hugh Dickins
2005-09-13 11:58 ` Alan Cox
0 siblings, 2 replies; 13+ messages in thread
From: Andrew Morton @ 2005-09-13 8:40 UTC (permalink / raw)
To: Kirill Korotaev; +Cc: torvalds, linux-kernel, xemul, hugh
Kirill Korotaev <dev@sw.ru> wrote:
>
> maybe it is worth moving vm_acct_memory() out of
> security_vm_enough_memory()?
I think that would be saner, yes. That means that the callers would call
vm_acct_memory() after security_enough_memory(), if that succeeded.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
2005-09-12 20:23 ` Andrew Morton
2005-09-13 8:21 ` Kirill Korotaev
@ 2005-09-13 11:20 ` Hugh Dickins
2005-09-13 12:13 ` Kirill Korotaev
2005-09-14 5:13 ` Hugh Dickins
1 sibling, 2 replies; 13+ messages in thread
From: Hugh Dickins @ 2005-09-13 11:20 UTC (permalink / raw)
To: Andrew Morton; +Cc: Kirill Korotaev, torvalds, linux-kernel, xemul
On Mon, 12 Sep 2005, Andrew Morton wrote:
> Kirill Korotaev <dev@sw.ru> wrote:
> >
> > This patch fixes error path in setup_arg_pages() functions, since it
> > misses vm_unacct_memory() after successful call of
> > security_vm_enough_memory(). Also it cleans up error path.
>
> Ugh. The identifier `security_vm_enough_memory()' sounds like some
> predicate which has no side-effects. Except it performs accounting. Hence
> bugs like this.
>
> It's a shame that you mixed a largeish cleanup along with a bugfix - please
> don't do that in future.
>
> Patch looks OK to me. Hugh, could you please double-check sometime?
It's a good find, and the patch looks correct to me, so far as it goes.
But I think it's the wrong patch, and incomplete: it can be done more
appropriately, more simply and more completely in insert_vm_struct itself.
I'll post a replacement patch (or admit I'm wrong) in a little while.
Hugh
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
2005-09-13 8:40 ` Andrew Morton
@ 2005-09-13 11:30 ` Hugh Dickins
2005-09-13 18:37 ` Andrew Morton
2005-09-13 11:58 ` Alan Cox
1 sibling, 1 reply; 13+ messages in thread
From: Hugh Dickins @ 2005-09-13 11:30 UTC (permalink / raw)
To: Andrew Morton; +Cc: Kirill Korotaev, torvalds, linux-kernel, xemul
On Tue, 13 Sep 2005, Andrew Morton wrote:
> Kirill Korotaev <dev@sw.ru> wrote:
> >
> > maybe it is worth moving vm_acct_memory() out of
> > security_vm_enough_memory()?
>
> I think that would be saner, yes. That means that the callers would call
> vm_acct_memory() after security_enough_memory(), if that succeeded.
I don't like that at all. The implementation of its tests is necessarily
imprecise, but nonetheless, we do prefer primitives which atomically test
and reserve. We're not moving from request_region to check_region, are we?
But change the naming by all means, it was never good,
and grew worse when "security_" got stuck on the front.
Hugh
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
2005-09-13 8:40 ` Andrew Morton
2005-09-13 11:30 ` Hugh Dickins
@ 2005-09-13 11:58 ` Alan Cox
1 sibling, 0 replies; 13+ messages in thread
From: Alan Cox @ 2005-09-13 11:58 UTC (permalink / raw)
To: Andrew Morton; +Cc: Kirill Korotaev, torvalds, linux-kernel, xemul, hugh
On Maw, 2005-09-13 at 01:40 -0700, Andrew Morton wrote:
> Kirill Korotaev <dev@sw.ru> wrote:
> >
> > maybe it is worth moving vm_acct_memory() out of
> > security_vm_enough_memory()?
>
> I think that would be saner, yes. That means that the callers would call
> vm_acct_memory() after security_enough_memory(), if that succeeded.
It would make much more sense to simply sed security_vm_enough_memory()
into security_vm_claim_memory() or a better name. You need to perform
the process as one thing otherwise two people checking for enough memory
may both succeed and then both reserve memory causing overcommits that
should not be permitted.
If you jut fix the name you get the right semantics still but without
the confusion.
Alan
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
2005-09-13 11:20 ` Hugh Dickins
@ 2005-09-13 12:13 ` Kirill Korotaev
2005-09-14 5:13 ` Hugh Dickins
1 sibling, 0 replies; 13+ messages in thread
From: Kirill Korotaev @ 2005-09-13 12:13 UTC (permalink / raw)
To: Hugh Dickins; +Cc: Andrew Morton, torvalds, linux-kernel, xemul
[-- Attachment #1: Type: text/plain, Size: 1881 bytes --]
>>Kirill Korotaev <dev@sw.ru> wrote:
>>
>>> This patch fixes error path in setup_arg_pages() functions, since it
>>> misses vm_unacct_memory() after successful call of
>>> security_vm_enough_memory(). Also it cleans up error path.
>>
>>Ugh. The identifier `security_vm_enough_memory()' sounds like some
>>predicate which has no side-effects. Except it performs accounting. Hence
>>bugs like this.
>>
>>It's a shame that you mixed a largeish cleanup along with a bugfix - please
>>don't do that in future.
>>
>>Patch looks OK to me. Hugh, could you please double-check sometime?
>
>
> It's a good find, and the patch looks correct to me, so far as it goes.
> But I think it's the wrong patch, and incomplete: it can be done more
> appropriately, more simply and more completely in insert_vm_struct itself.
> I'll post a replacement patch (or admit I'm wrong) in a little while.
this looks like the weirdest solution to me...
also it looks like not all the callers of insert_vm_struct do call
security_vm_enough_memory(), e.g. ./arch/sparc/mm/sun4c.c
------------------------------------------------------------------
Also, Pavel missed ppc64 version of setup_arg_pages due to different
name. I attached 2 new additional patches:
1. diff-ms-ppsleak-20050913:
[PATCH] error path in ppc64::arch_setup_additional_pages() misses
vm_unacct_memory() and kmem_cache_free()
This patch fixes error path in arch_setup_additional_pages() function,
since it misses vm_unacct_memory() and kmmem_cache_free() after
successful call of security_vm_enough_memory().
Signed-Off-By: Pavel Emelianov <xemul@sw.ru>
Signed-Off-By: Kirill Korotaev <dev@sw.ru>
2. diff-ms-ppscleanup-20050913:
[PATCH] This patch cleanups error path in
ppc64::arch_setup_additional_pages() function.
Signed-Off-By: Pavel Emelianov <xemul@sw.ru>
Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Kirill
[-- Attachment #2: diff-ms-ppsleak-20050913 --]
[-- Type: text/plain, Size: 539 bytes --]
--- linux-2.6.13.1/arch/ppc64/kernel/vdso.c.ppcfix 2005-09-13 15:44:17.000000000 +0400
+++ linux-2.6.13.1/arch/ppc64/kernel/vdso.c 2005-09-13 15:47:26.000000000 +0400
@@ -237,8 +237,11 @@ int arch_setup_additional_pages(struct l
*/
vdso_base = get_unmapped_area(NULL, vdso_base,
vdso_pages << PAGE_SHIFT, 0, 0);
- if (vdso_base & ~PAGE_MASK)
+ if (vdso_base & ~PAGE_MASK) {
+ vm_unacct_memory(vdso_pages);
+ kmem_cache_free(vm_area_cachep, vma);
return (int)vdso_base;
+ }
current->thread.vdso_base = vdso_base;
[-- Attachment #3: diff-ms-ppscleanup-20050913 --]
[-- Type: text/plain, Size: 1317 bytes --]
--- linux-2.6.13.1/arch/ppc64/kernel/vdso.c.ppccln 2005-09-13 15:47:26.000000000 +0400
+++ linux-2.6.13.1/arch/ppc64/kernel/vdso.c 2005-09-13 15:55:08.000000000 +0400
@@ -221,13 +221,13 @@ int arch_setup_additional_pages(struct l
if (vdso_pages == 0)
return 0;
+ vdso_base = -ENOMEM;
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (vma == NULL)
- return -ENOMEM;
- if (security_vm_enough_memory(vdso_pages)) {
- kmem_cache_free(vm_area_cachep, vma);
- return -ENOMEM;
- }
+ goto out;
+ if (security_vm_enough_memory(vdso_pages))
+ goto out_free;
+
memset(vma, 0, sizeof(*vma));
/*
@@ -237,11 +237,8 @@ int arch_setup_additional_pages(struct l
*/
vdso_base = get_unmapped_area(NULL, vdso_base,
vdso_pages << PAGE_SHIFT, 0, 0);
- if (vdso_base & ~PAGE_MASK) {
- vm_unacct_memory(vdso_pages);
- kmem_cache_free(vm_area_cachep, vma);
- return (int)vdso_base;
- }
+ if (vdso_base & ~PAGE_MASK)
+ goto out_unacct;
current->thread.vdso_base = vdso_base;
@@ -274,6 +271,13 @@ int arch_setup_additional_pages(struct l
up_write(&mm->mmap_sem);
return 0;
+
+out_unacct:
+ vm_unacct_memory(vdso_pages);
+out_free:
+ kmem_cache_free(vm_area_cachep, vma);
+out:
+ return (int)vdso_base;
}
static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
2005-09-13 11:30 ` Hugh Dickins
@ 2005-09-13 18:37 ` Andrew Morton
2005-09-13 19:10 ` Hugh Dickins
0 siblings, 1 reply; 13+ messages in thread
From: Andrew Morton @ 2005-09-13 18:37 UTC (permalink / raw)
To: Hugh Dickins; +Cc: dev, torvalds, linux-kernel, xemul
Hugh Dickins <hugh@veritas.com> wrote:
>
> On Tue, 13 Sep 2005, Andrew Morton wrote:
> > Kirill Korotaev <dev@sw.ru> wrote:
> > >
> > > maybe it is worth moving vm_acct_memory() out of
> > > security_vm_enough_memory()?
> >
> > I think that would be saner, yes. That means that the callers would call
> > vm_acct_memory() after security_enough_memory(), if that succeeded.
>
> I don't like that at all. The implementation of its tests is necessarily
> imprecise, but nonetheless, we do prefer primitives which atomically test
> and reserve. We're not moving from request_region to check_region, are we?
I don't think that it's any racier to move the allocation to after the
check than to have it before the check. If we're worried, take mmap_sem -
most place already do that, but not all.
> But change the naming by all means, it was never good,
> and grew worse when "security_" got stuck on the front.
Yes, renaming it to something like alloc_vm_space() would suit.
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
2005-09-13 18:37 ` Andrew Morton
@ 2005-09-13 19:10 ` Hugh Dickins
0 siblings, 0 replies; 13+ messages in thread
From: Hugh Dickins @ 2005-09-13 19:10 UTC (permalink / raw)
To: Andrew Morton; +Cc: dev, torvalds, linux-kernel, xemul
On Tue, 13 Sep 2005, Andrew Morton wrote:
>
> I don't think that it's any racier to move the allocation to after the
> check than to have it before the check. If we're worried, take mmap_sem -
> most place already do that, but not all.
mmap_sem? That locks a single mm, but here we're talking about
making reservations from what /proc/meminfo calls CommitLimit,
for the whole machine. I really don't see any need to change
the ordering of what's done at present.
> > But change the naming by all means, it was never good,
> > and grew worse when "security_" got stuck on the front.
>
> Yes, renaming it to something like alloc_vm_space() would suit.
Nor am I in any hurry to change the name, though I agree with
you and Alan that a name change would be good, in due course.
I'm more interested in fixing the bugs Kirill and co discovered,
and those I'm additionally finding on the way to fixing them in
insert_vm_struct. Notice how running a 32-bit binary on x86_64
leaks 4kB into Committed_AS each time?
But I'm puzzled as to why the same leak into Committed_AS doesn't
occur on ppc64, each time an ELF binary is run, if vDSO is enabled.
Or is it indeed leaking, but nobody has noticed? I don't have any
ppc64, could someone please check and see? Thanks.
insert_vm_struct is certainly the way to go (it's not obvious to
callers whether VM_ACCOUNT is set or not), and there won't be any
security_vm_enough_memory calls outside mm/ (and kernel/fork.c)
once I'm done: just a matter of where to stop (should it also
vm_stat_account? can we trust callers to maintain total_vm?
what about locked_vm? rlimits?).
Hugh
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
2005-09-13 11:20 ` Hugh Dickins
2005-09-13 12:13 ` Kirill Korotaev
@ 2005-09-14 5:13 ` Hugh Dickins
2005-09-14 8:41 ` Kirill Korotaev
1 sibling, 1 reply; 13+ messages in thread
From: Hugh Dickins @ 2005-09-14 5:13 UTC (permalink / raw)
To: Andrew Morton; +Cc: Kirill Korotaev, torvalds, linux-kernel, xemul
On Tue, 13 Sep 2005, Hugh Dickins wrote:
> On Mon, 12 Sep 2005, Andrew Morton wrote:
> >
> > Patch looks OK to me. Hugh, could you please double-check sometime?
>
> It's a good find, and the patch looks correct to me, so far as it goes.
> But I think it's the wrong patch, and incomplete: it can be done more
> appropriately, more simply and more completely in insert_vm_struct itself.
> I'll post a replacement patch (or admit I'm wrong) in a little while.
Many thanks, Andrew, for confirming that ppc64 does indeed leak into
Committed_AS, as it looked to me. Here's my version of Pavel/Kirill's
patches: sorry if it seems "weird" to Kirill, perhaps we need to change
the name of insert_vm_struct too; but it seems safer and easier to get
right this way. And I prefer deleting code to adding it...
Pavel Emelianov and Kirill Korotaev observe that fs and arch users of
security_vm_enough_memory tend to forget to vm_unacct_memory when a
failure occurs further down (typically in setup_arg_pages variants).
These are all users of insert_vm_struct, and that reservation will only
be unaccounted on exit if the vma is marked VM_ACCOUNT: which in some
cases it is (hidden inside VM_STACK_FLAGS) and in some cases it isn't.
So x86_64 32-bit and ppc64 vDSO ELFs have been leaking memory into
Committed_AS each time they're run. But don't add VM_ACCOUNT to them,
it's inappropriate to reserve against the very unlikely case that gdb
be used to COW a vDSO page - we ought to do something about that in
do_wp_page, but there are yet other inconsistencies to be resolved.
The safe and economical way to fix this is to let insert_vm_struct do
the security_vm_enough_memory check when it finds VM_ACCOUNT is set.
And the MIPS irix_brk has been calling security_vm_enough_memory before
calling do_brk which repeats it, doubly accounting and so also leaking.
Remove that, and all the fs and arch calls to security_vm_enough_memory:
give it a less misleading name later on.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
---
arch/ia64/ia32/binfmt_elf32.c | 6 ------
arch/mips/kernel/sysirix.c | 9 ++-------
arch/ppc64/kernel/vdso.c | 15 +++++++++------
arch/x86_64/ia32/ia32_binfmt.c | 5 -----
arch/x86_64/ia32/syscall32.c | 6 +-----
fs/exec.c | 5 -----
mm/mmap.c | 3 +++
7 files changed, 15 insertions(+), 34 deletions(-)
--- 2.6.14-rc1/arch/ia64/ia32/binfmt_elf32.c 2005-03-02 07:39:16.000000000 +0000
+++ linux/arch/ia64/ia32/binfmt_elf32.c 2005-09-13 17:58:28.000000000 +0100
@@ -216,12 +216,6 @@ ia32_setup_arg_pages (struct linux_binpr
if (!mpnt)
return -ENOMEM;
- if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
- >> PAGE_SHIFT)) {
- kmem_cache_free(vm_area_cachep, mpnt);
- return -ENOMEM;
- }
-
memset(mpnt, 0, sizeof(*mpnt));
down_write(¤t->mm->mmap_sem);
--- 2.6.14-rc1/arch/mips/kernel/sysirix.c 2005-09-13 15:22:15.000000000 +0100
+++ linux/arch/mips/kernel/sysirix.c 2005-09-13 18:51:43.000000000 +0100
@@ -581,18 +581,13 @@ asmlinkage int irix_brk(unsigned long br
}
/*
- * Check if we have enough memory..
+ * Ok, looks good - let it rip.
*/
- if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) {
+ if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk) {
ret = -ENOMEM;
goto out;
}
-
- /*
- * Ok, looks good - let it rip.
- */
mm->brk = brk;
- do_brk(oldbrk, newbrk-oldbrk);
ret = 0;
out:
--- 2.6.14-rc1/arch/ppc64/kernel/vdso.c 2005-06-17 20:48:29.000000000 +0100
+++ linux/arch/ppc64/kernel/vdso.c 2005-09-13 20:50:02.000000000 +0100
@@ -224,10 +224,7 @@ int arch_setup_additional_pages(struct l
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (vma == NULL)
return -ENOMEM;
- if (security_vm_enough_memory(vdso_pages)) {
- kmem_cache_free(vm_area_cachep, vma);
- return -ENOMEM;
- }
+
memset(vma, 0, sizeof(*vma));
/*
@@ -237,8 +234,10 @@ int arch_setup_additional_pages(struct l
*/
vdso_base = get_unmapped_area(NULL, vdso_base,
vdso_pages << PAGE_SHIFT, 0, 0);
- if (vdso_base & ~PAGE_MASK)
+ if (vdso_base & ~PAGE_MASK) {
+ kmem_cache_free(vm_area_cachep, vma);
return (int)vdso_base;
+ }
current->thread.vdso_base = vdso_base;
@@ -266,7 +265,11 @@ int arch_setup_additional_pages(struct l
vma->vm_ops = &vdso_vmops;
down_write(&mm->mmap_sem);
- insert_vm_struct(mm, vma);
+ if (insert_vm_struct(mm, vma)) {
+ up_write(&mm->mmap_sem);
+ kmem_cache_free(vm_area_cachep, vma);
+ return -ENOMEM;
+ }
mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
up_write(&mm->mmap_sem);
--- 2.6.14-rc1/arch/x86_64/ia32/ia32_binfmt.c 2005-08-29 00:41:01.000000000 +0100
+++ linux/arch/x86_64/ia32/ia32_binfmt.c 2005-09-13 18:05:20.000000000 +0100
@@ -353,11 +353,6 @@ int setup_arg_pages(struct linux_binprm
mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (!mpnt)
return -ENOMEM;
-
- if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
- kmem_cache_free(vm_area_cachep, mpnt);
- return -ENOMEM;
- }
memset(mpnt, 0, sizeof(*mpnt));
--- 2.6.14-rc1/arch/x86_64/ia32/syscall32.c 2005-08-29 00:41:01.000000000 +0100
+++ linux/arch/x86_64/ia32/syscall32.c 2005-09-13 18:53:32.000000000 +0100
@@ -52,17 +52,13 @@ int syscall32_setup_pages(struct linux_b
vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
if (!vma)
return -ENOMEM;
- if (security_vm_enough_memory(npages)) {
- kmem_cache_free(vm_area_cachep, vma);
- return -ENOMEM;
- }
memset(vma, 0, sizeof(struct vm_area_struct));
/* Could randomize here */
vma->vm_start = VSYSCALL32_BASE;
vma->vm_end = VSYSCALL32_END;
/* MAYWRITE to allow gdb to COW and set breakpoints */
- vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYEXEC|VM_MAYWRITE;
+ vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
vma->vm_flags |= mm->def_flags;
vma->vm_page_prot = protection_map[vma->vm_flags & 7];
vma->vm_ops = &syscall32_vm_ops;
--- 2.6.14-rc1/fs/exec.c 2005-09-13 15:22:37.000000000 +0100
+++ linux/fs/exec.c 2005-09-13 17:50:46.000000000 +0100
@@ -421,11 +421,6 @@ int setup_arg_pages(struct linux_binprm
if (!mpnt)
return -ENOMEM;
- if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) {
- kmem_cache_free(vm_area_cachep, mpnt);
- return -ENOMEM;
- }
-
memset(mpnt, 0, sizeof(*mpnt));
down_write(&mm->mmap_sem);
--- 2.6.14-rc1/mm/mmap.c 2005-09-13 15:22:47.000000000 +0100
+++ linux/mm/mmap.c 2005-09-13 18:59:59.000000000 +0100
@@ -1993,6 +1993,9 @@ int insert_vm_struct(struct mm_struct *
__vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
if (__vma && __vma->vm_start < vma->vm_end)
return -ENOMEM;
+ if ((vma->vm_flags & VM_ACCOUNT) &&
+ security_vm_enough_memory(vma_pages(vma)))
+ return -ENOMEM;
vma_link(mm, vma, prev, rb_link, rb_parent);
return 0;
}
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
2005-09-14 5:13 ` Hugh Dickins
@ 2005-09-14 8:41 ` Kirill Korotaev
2005-09-14 9:14 ` Hugh Dickins
0 siblings, 1 reply; 13+ messages in thread
From: Kirill Korotaev @ 2005-09-14 8:41 UTC (permalink / raw)
To: Hugh Dickins; +Cc: Andrew Morton, torvalds, linux-kernel, xemul
Hugh,
is vma accounting in arch/x86_64/ia32/syscall32.c and
arch/ppc64/kernel/vdso.c removed due to fixed size of vma (vsyscall/vdso
mappings)?
in other respects it looks ok.
Signed-Off-By: Kirill Korotaev <dev@sw.ru>
Kirill
> On Tue, 13 Sep 2005, Hugh Dickins wrote:
>
>>On Mon, 12 Sep 2005, Andrew Morton wrote:
>>
>>>Patch looks OK to me. Hugh, could you please double-check sometime?
>>
>>It's a good find, and the patch looks correct to me, so far as it goes.
>>But I think it's the wrong patch, and incomplete: it can be done more
>>appropriately, more simply and more completely in insert_vm_struct itself.
>>I'll post a replacement patch (or admit I'm wrong) in a little while.
>
>
> Many thanks, Andrew, for confirming that ppc64 does indeed leak into
> Committed_AS, as it looked to me. Here's my version of Pavel/Kirill's
> patches: sorry if it seems "weird" to Kirill, perhaps we need to change
> the name of insert_vm_struct too; but it seems safer and easier to get
> right this way. And I prefer deleting code to adding it...
>
>
> Pavel Emelianov and Kirill Korotaev observe that fs and arch users of
> security_vm_enough_memory tend to forget to vm_unacct_memory when a
> failure occurs further down (typically in setup_arg_pages variants).
>
> These are all users of insert_vm_struct, and that reservation will only
> be unaccounted on exit if the vma is marked VM_ACCOUNT: which in some
> cases it is (hidden inside VM_STACK_FLAGS) and in some cases it isn't.
>
> So x86_64 32-bit and ppc64 vDSO ELFs have been leaking memory into
> Committed_AS each time they're run. But don't add VM_ACCOUNT to them,
> it's inappropriate to reserve against the very unlikely case that gdb
> be used to COW a vDSO page - we ought to do something about that in
> do_wp_page, but there are yet other inconsistencies to be resolved.
>
> The safe and economical way to fix this is to let insert_vm_struct do
> the security_vm_enough_memory check when it finds VM_ACCOUNT is set.
>
> And the MIPS irix_brk has been calling security_vm_enough_memory before
> calling do_brk which repeats it, doubly accounting and so also leaking.
> Remove that, and all the fs and arch calls to security_vm_enough_memory:
> give it a less misleading name later on.
>
> Signed-off-by: Hugh Dickins <hugh@veritas.com>
> ---
>
> arch/ia64/ia32/binfmt_elf32.c | 6 ------
> arch/mips/kernel/sysirix.c | 9 ++-------
> arch/ppc64/kernel/vdso.c | 15 +++++++++------
> arch/x86_64/ia32/ia32_binfmt.c | 5 -----
> arch/x86_64/ia32/syscall32.c | 6 +-----
> fs/exec.c | 5 -----
> mm/mmap.c | 3 +++
> 7 files changed, 15 insertions(+), 34 deletions(-)
>
> --- 2.6.14-rc1/arch/ia64/ia32/binfmt_elf32.c 2005-03-02 07:39:16.000000000 +0000
> +++ linux/arch/ia64/ia32/binfmt_elf32.c 2005-09-13 17:58:28.000000000 +0100
> @@ -216,12 +216,6 @@ ia32_setup_arg_pages (struct linux_binpr
> if (!mpnt)
> return -ENOMEM;
>
> - if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
> - >> PAGE_SHIFT)) {
> - kmem_cache_free(vm_area_cachep, mpnt);
> - return -ENOMEM;
> - }
> -
> memset(mpnt, 0, sizeof(*mpnt));
>
> down_write(¤t->mm->mmap_sem);
> --- 2.6.14-rc1/arch/mips/kernel/sysirix.c 2005-09-13 15:22:15.000000000 +0100
> +++ linux/arch/mips/kernel/sysirix.c 2005-09-13 18:51:43.000000000 +0100
> @@ -581,18 +581,13 @@ asmlinkage int irix_brk(unsigned long br
> }
>
> /*
> - * Check if we have enough memory..
> + * Ok, looks good - let it rip.
> */
> - if (security_vm_enough_memory((newbrk-oldbrk) >> PAGE_SHIFT)) {
> + if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk) {
> ret = -ENOMEM;
> goto out;
> }
> -
> - /*
> - * Ok, looks good - let it rip.
> - */
> mm->brk = brk;
> - do_brk(oldbrk, newbrk-oldbrk);
> ret = 0;
>
> out:
> --- 2.6.14-rc1/arch/ppc64/kernel/vdso.c 2005-06-17 20:48:29.000000000 +0100
> +++ linux/arch/ppc64/kernel/vdso.c 2005-09-13 20:50:02.000000000 +0100
> @@ -224,10 +224,7 @@ int arch_setup_additional_pages(struct l
> vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
> if (vma == NULL)
> return -ENOMEM;
> - if (security_vm_enough_memory(vdso_pages)) {
> - kmem_cache_free(vm_area_cachep, vma);
> - return -ENOMEM;
> - }
> +
> memset(vma, 0, sizeof(*vma));
>
> /*
> @@ -237,8 +234,10 @@ int arch_setup_additional_pages(struct l
> */
> vdso_base = get_unmapped_area(NULL, vdso_base,
> vdso_pages << PAGE_SHIFT, 0, 0);
> - if (vdso_base & ~PAGE_MASK)
> + if (vdso_base & ~PAGE_MASK) {
> + kmem_cache_free(vm_area_cachep, vma);
> return (int)vdso_base;
> + }
>
> current->thread.vdso_base = vdso_base;
>
> @@ -266,7 +265,11 @@ int arch_setup_additional_pages(struct l
> vma->vm_ops = &vdso_vmops;
>
> down_write(&mm->mmap_sem);
> - insert_vm_struct(mm, vma);
> + if (insert_vm_struct(mm, vma)) {
> + up_write(&mm->mmap_sem);
> + kmem_cache_free(vm_area_cachep, vma);
> + return -ENOMEM;
> + }
> mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
> up_write(&mm->mmap_sem);
>
> --- 2.6.14-rc1/arch/x86_64/ia32/ia32_binfmt.c 2005-08-29 00:41:01.000000000 +0100
> +++ linux/arch/x86_64/ia32/ia32_binfmt.c 2005-09-13 18:05:20.000000000 +0100
> @@ -353,11 +353,6 @@ int setup_arg_pages(struct linux_binprm
> mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
> if (!mpnt)
> return -ENOMEM;
> -
> - if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))>>PAGE_SHIFT)) {
> - kmem_cache_free(vm_area_cachep, mpnt);
> - return -ENOMEM;
> - }
>
> memset(mpnt, 0, sizeof(*mpnt));
>
> --- 2.6.14-rc1/arch/x86_64/ia32/syscall32.c 2005-08-29 00:41:01.000000000 +0100
> +++ linux/arch/x86_64/ia32/syscall32.c 2005-09-13 18:53:32.000000000 +0100
> @@ -52,17 +52,13 @@ int syscall32_setup_pages(struct linux_b
> vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
> if (!vma)
> return -ENOMEM;
> - if (security_vm_enough_memory(npages)) {
> - kmem_cache_free(vm_area_cachep, vma);
> - return -ENOMEM;
> - }
>
> memset(vma, 0, sizeof(struct vm_area_struct));
> /* Could randomize here */
> vma->vm_start = VSYSCALL32_BASE;
> vma->vm_end = VSYSCALL32_END;
> /* MAYWRITE to allow gdb to COW and set breakpoints */
> - vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYEXEC|VM_MAYWRITE;
> + vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
> vma->vm_flags |= mm->def_flags;
> vma->vm_page_prot = protection_map[vma->vm_flags & 7];
> vma->vm_ops = &syscall32_vm_ops;
> --- 2.6.14-rc1/fs/exec.c 2005-09-13 15:22:37.000000000 +0100
> +++ linux/fs/exec.c 2005-09-13 17:50:46.000000000 +0100
> @@ -421,11 +421,6 @@ int setup_arg_pages(struct linux_binprm
> if (!mpnt)
> return -ENOMEM;
>
> - if (security_vm_enough_memory(arg_size >> PAGE_SHIFT)) {
> - kmem_cache_free(vm_area_cachep, mpnt);
> - return -ENOMEM;
> - }
> -
> memset(mpnt, 0, sizeof(*mpnt));
>
> down_write(&mm->mmap_sem);
> --- 2.6.14-rc1/mm/mmap.c 2005-09-13 15:22:47.000000000 +0100
> +++ linux/mm/mmap.c 2005-09-13 18:59:59.000000000 +0100
> @@ -1993,6 +1993,9 @@ int insert_vm_struct(struct mm_struct *
> __vma = find_vma_prepare(mm,vma->vm_start,&prev,&rb_link,&rb_parent);
> if (__vma && __vma->vm_start < vma->vm_end)
> return -ENOMEM;
> + if ((vma->vm_flags & VM_ACCOUNT) &&
> + security_vm_enough_memory(vma_pages(vma)))
> + return -ENOMEM;
> vma_link(mm, vma, prev, rb_link, rb_parent);
> return 0;
> }
>
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory()
2005-09-14 8:41 ` Kirill Korotaev
@ 2005-09-14 9:14 ` Hugh Dickins
0 siblings, 0 replies; 13+ messages in thread
From: Hugh Dickins @ 2005-09-14 9:14 UTC (permalink / raw)
To: Kirill Korotaev; +Cc: Andrew Morton, torvalds, linux-kernel, xemul
On Wed, 14 Sep 2005, Kirill Korotaev wrote:
>
> is vma accounting in arch/x86_64/ia32/syscall32.c and arch/ppc64/kernel/vdso.c
> removed due to fixed size of vma (vsyscall/vdso mappings)?
> in other respects it looks ok.
It's removed because without VM_ACCOUNT it was steadily leaking:
the main path was wrong, never mind the error path. We could add
VM_ACCOUNT to the flags to fix that, but in 99.999% of cases we
should not be accounting these - all mms are sharing the same page.
Ben designed it to allow for gdb setting breakpoints in the vDSO
page (COW then giving a private page), but that's an very unusual
case, which isn't handled quite right anyway: we need to take a
separate look at that sometime - we account for what's VM_WRITE,
not for what ptrace might write to by the backdoor.
> > So x86_64 32-bit and ppc64 vDSO ELFs have been leaking memory into
> > Committed_AS each time they're run. But don't add VM_ACCOUNT to them,
> > it's inappropriate to reserve against the very unlikely case that gdb
> > be used to COW a vDSO page - we ought to do something about that in
> > do_wp_page, but there are yet other inconsistencies to be resolved.
Hugh
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2005-09-14 9:14 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-09-12 16:49 [PATCH] error path in setup_arg_pages() misses vm_unacct_memory() Kirill Korotaev
2005-09-12 20:23 ` Andrew Morton
2005-09-13 8:21 ` Kirill Korotaev
2005-09-13 8:40 ` Andrew Morton
2005-09-13 11:30 ` Hugh Dickins
2005-09-13 18:37 ` Andrew Morton
2005-09-13 19:10 ` Hugh Dickins
2005-09-13 11:58 ` Alan Cox
2005-09-13 11:20 ` Hugh Dickins
2005-09-13 12:13 ` Kirill Korotaev
2005-09-14 5:13 ` Hugh Dickins
2005-09-14 8:41 ` Kirill Korotaev
2005-09-14 9:14 ` Hugh Dickins
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox