From: Andrew Morton <akpm@linux-foundation.org>
To: David Howells <dhowells@redhat.com>
Cc: torvalds@linux-foundation.org, bernds_cb1@t-online.de,
rgetz@blackfin.uclinux.org, vapier.adi@gmail.com,
lethal@linux-sh.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 04/10] NOMMU: Make VMAs per MM as for MMU-mode linux [ver #3]
Date: Mon, 12 Jan 2009 23:55:50 -0800 [thread overview]
Message-ID: <20090112235550.2d6be428.akpm@linux-foundation.org> (raw)
In-Reply-To: <20090108125407.28454.29221.stgit@warthog.procyon.org.uk>
On Thu, 08 Jan 2009 12:54:07 +0000 David Howells <dhowells@redhat.com> wrote:
> Make VMAs per mm_struct as for MMU-mode linux.
>
>
> ...
>
> --- a/fs/proc/meminfo.c
> +++ b/fs/proc/meminfo.c
> @@ -74,6 +74,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
> "LowTotal: %8lu kB\n"
> "LowFree: %8lu kB\n"
> #endif
> +#ifndef CONFIG_MMU
> + "MmapCopy: %8lu kB\n"
> +#endif
> "SwapTotal: %8lu kB\n"
> "SwapFree: %8lu kB\n"
> "Dirty: %8lu kB\n"
> @@ -116,6 +119,9 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
> K(i.totalram-i.totalhigh),
> K(i.freeram-i.freehigh),
> #endif
> +#ifndef CONFIG_MMU
> + K((unsigned long) atomic_read(&mmap_pages_allocated)),
> +#endif
This is slightly wrong, as atomic_read() returns "int". It will show
weird results on 64-bit nommu machines which allocate more than 2G pages :)
> K(i.totalswap),
> K(i.freeswap),
> K(global_page_state(NR_FILE_DIRTY)),
>
> ...
>
> +static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
> +{
> + unsigned long ino = 0;
> + struct file *file;
> + dev_t dev = 0;
> + int flags, len;
> +
> + flags = vma->vm_flags;
> + file = vma->vm_file;
> +
> + if (file) {
> + struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
> + dev = inode->i_sb->s_dev;
> + ino = inode->i_ino;
> + }
> +
> + seq_printf(m,
> + "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n",
> + vma->vm_start,
> + vma->vm_end,
> + flags & VM_READ ? 'r' : '-',
> + flags & VM_WRITE ? 'w' : '-',
> + flags & VM_EXEC ? 'x' : '-',
> + flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p',
> + vma->vm_pgoff << PAGE_SHIFT,
This will overflow at file offsets >4G?
> + MAJOR(dev), MINOR(dev), ino, &len);
> +
> + if (file) {
> + len = 25 + sizeof(void *) * 6 - len;
> + if (len < 1)
> + len = 1;
> + seq_printf(m, "%*c", len, ' ');
> + seq_path(m, &file->f_path, "");
> + }
> +
> + seq_putc(m, '\n');
> + return 0;
> +}
> +
>
> ...
>
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2472,3 +2472,13 @@ void mm_drop_all_locks(struct mm_struct *mm)
>
> mutex_unlock(&mm_all_locks_mutex);
> }
> +
> +/*
> + * initialise the VMA slab
> + */
> +void __init mmap_init(void)
> +{
> + vm_area_cachep = kmem_cache_create("vm_area_struct",
> + sizeof(struct vm_area_struct), 0,
> + SLAB_PANIC, NULL);
We have the KMEM_CACHE() macro for this.
It was initially added because we'd been through a period of changing
the kmem_cache_create() arguments every ten minutes and we got tired of
patching zillions of callsites.
(I see this was copy-n-pasted from fork.c. Why was it moved?)
>
> ...
>
> + vm_region_jar = kmem_cache_create("vm_region_jar",
> + sizeof(struct vm_region), 0,
> + SLAB_PANIC, NULL);
> + vm_area_cachep = kmem_cache_create("vm_area_struct",
> + sizeof(struct vm_area_struct), 0,
> + SLAB_PANIC, NULL);
dittoes
> -#endif /* DEBUG */
>
> /*
> - * add a VMA into a process's mm_struct in the appropriate place in the list
> - * - should be called with mm->mmap_sem held writelocked
> + * validate the region tree
> + * - the caller must hold the region lock
> */
> -static void add_vma_to_mm(struct mm_struct *mm, struct vm_list_struct *vml)
> +#ifdef CONFIG_DEBUG_NOMMU_REGIONS
> +static noinline void validate_nommu_regions(void)
> {
> - struct vm_list_struct **ppv;
> + struct vm_region *region, *last;
> + struct rb_node *p, *lastp;
>
> - for (ppv = ¤t->mm->context.vmlist; *ppv; ppv = &(*ppv)->next)
> - if ((*ppv)->vma->vm_start > vml->vma->vm_start)
> - break;
> + lastp = rb_first(&nommu_region_tree);
> + if (!lastp)
> + return;
> +
> + last = rb_entry(lastp, struct vm_region, vm_rb);
> + if (unlikely(last->vm_end <= last->vm_start))
> + BUG();
open-coded BUG_ON()
> + while ((p = rb_next(lastp))) {
> + region = rb_entry(p, struct vm_region, vm_rb);
`region' could be local to this block, if one feels so inclined.
> + last = rb_entry(lastp, struct vm_region, vm_rb);
> +
> + if (unlikely(region->vm_end <= region->vm_start))
> + BUG();
> + if (unlikely(region->vm_start < last->vm_end))
> + BUG();
BUG_ON()
> - vml->next = *ppv;
> - *ppv = vml;
> + lastp = p;
> + }
> }
> +#else
> +#define validate_nommu_regions() do {} while(0)
There's no need to implement this in cpp...
>
> ...
>
> -static inline struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
> - unsigned long addr)
> +static void free_page_series(unsigned long from, unsigned long to)
> {
> - struct vm_list_struct *vml;
> -
> - /* search the vm_start ordered list */
> - for (vml = mm->context.vmlist; vml; vml = vml->next) {
> - if (vml->vma->vm_start == addr)
> - return vml->vma;
> - if (vml->vma->vm_start > addr)
> - break;
> + for (; from < to; from += PAGE_SIZE) {
> + struct page *page = virt_to_page(from);
> +
> + kdebug("- free %lx", from);
> + atomic_dec(&mmap_pages_allocated);
This isn't counting "pages allocated". It's counting page *refcounts*.
Now, perhaps these pages will always have a refcount of 1 (why?), in
which case things happen to work out. But if so, the next line isn't
needed:
> + if (page_count(page) != 1)
should be "== 1", surely?
> + kdebug("free page %p [%d]", page, page_count(page));
> + put_page(page);
> }
> -
> - return NULL;
> }
>
> /*
> - * find a VMA in the global tree
> + * release a reference to a region
> + * - the caller must hold the region semaphore, which this releases
"for writing"
>
> ...
>
> +int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
> +{
> + struct vm_area_struct *vma;
> + struct rb_node *rb;
> + unsigned long end = start + len;
> + int ret;
>
> - update_hiwater_vm(mm);
> - mm->total_vm -= len >> PAGE_SHIFT;
> + kenter(",%lx,%zx", start, len);
>
> -#ifdef DEBUG
> - show_process_blocks();
> -#endif
> + if (len == 0)
> + return -EINVAL;
> +
> + /* find the first potentially overlapping VMA */
> + vma = find_vma(mm, start);
> + if (!vma) {
> + printk(KERN_WARNING
> + "munmap of memory not mmapped by process %d (%s):"
> + " 0x%lx-0x%lx\n",
> + current->pid, current->comm, start, start + len - 1);
Unprivileged userspace can spam logs? Perhaps not a concern on typical
nommu systems. But buggy userspace can spam logs, too, which _is_ a
problem?
> + return -EINVAL;
> + }
>
> + /* we're allowed to split an anonymous VMA but not a file-backed one */
> + if (vma->vm_file) {
> + do {
> + if (start > vma->vm_start) {
> + kleave(" = -EINVAL [miss]");
> + return -EINVAL;
> + }
> + if (end == vma->vm_end)
> + goto erase_whole_vma;
> + rb = rb_next(&vma->vm_rb);
> + vma = rb_entry(rb, struct vm_area_struct, vm_rb);
> + } while (rb);
> + kleave(" = -EINVAL [split file]");
> + return -EINVAL;
> + } else {
> + /* the chunk must be a subset of the VMA found */
> + if (start == vma->vm_start && end == vma->vm_end)
> + goto erase_whole_vma;
> + if (start < vma->vm_start || end > vma->vm_end) {
> + kleave(" = -EINVAL [superset]");
> + return -EINVAL;
> + }
> + if (start & ~PAGE_MASK) {
> + kleave(" = -EINVAL [unaligned start]");
> + return -EINVAL;
> + }
> + if (end != vma->vm_end && end & ~PAGE_MASK) {
> + kleave(" = -EINVAL [unaligned split]");
> + return -EINVAL;
> + }
> + if (start != vma->vm_start && end != vma->vm_end) {
> + ret = split_vma(mm, vma, start, 1);
> + if (ret < 0) {
> + kleave(" = %d [split]", ret);
> + return ret;
> + }
> + }
> + return shrink_vma(mm, vma, start, end);
> + }
> +
> +erase_whole_vma:
> + delete_vma_from_mm(vma);
> + delete_vma(mm, vma);
> + kleave(" = 0");
> return 0;
> }
>
> ...
>
next prev parent reply other threads:[~2009-01-13 7:57 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-01-08 12:53 [PATCH 00/10] Alter NOMMU mmap handling [ver #3] David Howells
2009-01-08 12:53 ` [PATCH 01/10] NOMMU: Fix cleanup handling in ramfs_nommu_get_umapped_area() " David Howells
2009-01-08 12:53 ` [PATCH 02/10] NOMMU: Rename ARM's struct vm_region " David Howells
2009-01-08 12:54 ` [PATCH 03/10] NOMMU: Delete askedalloc and realalloc variables " David Howells
2009-01-08 12:54 ` [PATCH 04/10] NOMMU: Make VMAs per MM as for MMU-mode linux " David Howells
2009-01-13 7:55 ` Andrew Morton [this message]
2009-01-13 10:51 ` David Howells
2009-01-08 12:54 ` [PATCH 05/10] NOMMU: Make mmap allocation page trimming behaviour configurable. " David Howells
2009-01-08 12:54 ` [PATCH 06/10] NOMMU: Improve procfs output using per-MM VMAs " David Howells
2009-01-08 12:54 ` [PATCH 07/10] FDPIC: Don't attempt to expand the userspace stack to fill the space allocated " David Howells
2009-01-08 12:54 ` [PATCH 08/10] FLAT: " David Howells
2009-01-08 12:54 ` [PATCH 09/10] NOMMU: Teach kobjsize() about VMA regions. " David Howells
2009-01-08 12:54 ` [PATCH 10/10] NOMMU: Support XIP on initramfs " David Howells
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090112235550.2d6be428.akpm@linux-foundation.org \
--to=akpm@linux-foundation.org \
--cc=bernds_cb1@t-online.de \
--cc=dhowells@redhat.com \
--cc=lethal@linux-sh.org \
--cc=linux-kernel@vger.kernel.org \
--cc=rgetz@blackfin.uclinux.org \
--cc=torvalds@linux-foundation.org \
--cc=vapier.adi@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.