public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: "David S. Miller" <davem@davemloft.net>
To: Hugh Dickins <hugh@veritas.com>
Cc: akpm@osdl.org, nickpiggin@yahoo.com.au, tony.luck@intel.com,
	benh@kernel.crashing.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH 1/5] freepgt: free_pgtables use vma list
Date: Tue, 22 Mar 2005 13:28:18 -0800	[thread overview]
Message-ID: <20050322132818.4bf6c8a6.davem@davemloft.net> (raw)
In-Reply-To: <Pine.LNX.4.61.0503221931150.9348@goblin.wat.veritas.com>

[-- Attachment #1: Type: text/plain, Size: 791 bytes --]


Hugh, I got tired of rebooting just to get address walking
traces :-)  So I wrote a little simulator.

Basically, it's free_pgtables() with the page table pointer
stuff ripped out.

You run it like this:

./simulator vma_file

Where vma_file is a text file composed of lines of the form:

START END

in hex.  The ordered list of VMA's is built from this text file.
I enclose a sample file I took from  that trace I built earlier
today.

If you want to get fancy, it's probably very easy to make it so
that parse_file() can read in live /proc/${pid}/maps files too :)

It is easy to stick in support for platforms other than SPARC64,
the machinery is all there, just pop the definitions out from
the platforms include/asm-*/*.h and relevant include/asm-generic/*.h
header files.

Enjoy. :-)

[-- Attachment #2: simulator.c --]
[-- Type: text/x-csrc, Size: 5620 bytes --]

#include <stddef.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <malloc.h>

#define ARCH_SPARC64	1
#define ARCH_IA64	2
#define ARCH_PPC64	3

#define ARCH	ARCH_SPARC64

#if (ARCH == ARCH_SPARC64)
#define PAGE_SHIFT	13
#define PMD_SHIFT	(PAGE_SHIFT + (PAGE_SHIFT-3))
#define PMD_SIZE	(1UL << PMD_SHIFT)
#define PMD_MASK	(~(PMD_SIZE-1))
#define PMD_BITS	11
#define PGDIR_SHIFT	(PAGE_SHIFT + (PAGE_SHIFT-3) + PMD_BITS)
#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
#define PGDIR_MASK	(~(PGDIR_SIZE-1))
#define PUD_SHIFT	PGDIR_SHIFT
#define PTRS_PER_PUD	1
#define PUD_SIZE  	(1UL << PUD_SHIFT)
#define PUD_MASK  	(~(PUD_SIZE-1))
#define pgd_addr_end(addr, end)						\
({	unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;	\
	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
})
#define pud_addr_end(addr, end)			(end)
#define pmd_addr_end(addr, end)						\
({	unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;	\
	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
})
#else
#error please add definitions for your architecture
#endif

/*
 * Note: this doesn't free the actual pages themselves. That
 * has been handled earlier when unmapping all the memory regions.
 */
static inline void free_pte_range(unsigned long addr)
{
}

static inline void free_pmd_range(unsigned long addr, unsigned long end,
				  unsigned long floor, unsigned long ceiling)
{
	unsigned long next;
	unsigned long start;

	start = addr;
	do {
		next = pmd_addr_end(addr, end);
#if 1
		printf("free_pgtables: free_pte_range() addr(%lx) next(%lx) end(%lx)\n",
		       addr, next, end);
#endif
		free_pte_range(addr);
	} while (addr = next, addr != end);

	start &= PUD_MASK;
	ceiling &= PUD_MASK;
#if 1
	printf("free_pmd_range: start(%lx) ceiling(%lx) ", start, ceiling);
#endif
	if ((start < floor) ||
	    (end - 1 > ceiling - 1)) {
#if 1
		printf("SKIP pud_clear()\n");
#endif
		return;
	}
#if 1
	printf("DO pud_clear()\n");
#endif
}

static inline void free_pud_range(unsigned long addr, unsigned long end,
				  unsigned long floor, unsigned long ceiling)
{
	unsigned long next;
	unsigned long start;

	start = addr;
	do {
		next = pud_addr_end(addr, end);
#if 1
		printf("free_pud_range: free_pmd_range(%lx,%lx,%lx,%lx)\n",
		       addr, next, floor, ceiling);
#endif
		free_pmd_range(addr, next, floor, ceiling);
	} while (addr = next, addr != end);

	start &= PGDIR_MASK;
	ceiling &= PGDIR_MASK;
#if 1
	printf("free_pud_range: start(%lx) ceiling(%lx) ",
	       start, ceiling);
#endif
	if ((start < floor) ||
	    (end - 1 > ceiling - 1)) {
#if 1
		printf("SKIP pgd_clear()\n");
#endif
		return;
	}
#if 1
	printf("DO pgd_clear()\n");
#endif
}

/*
 * This function frees user-level page tables of a process.
 * Unlike other pagetable walks, some memory layouts might give end 0.
 * Must be called with pagetable lock held.
 */
static inline void free_pgd_range(unsigned long addr, unsigned long end,
				  unsigned long floor, unsigned long ceiling)
{
	unsigned long next;
	unsigned long start;

	addr &= PMD_MASK;
	if (addr < floor) {
		addr += PMD_SIZE;
		if (!addr)
			return;
	}
	ceiling &= PMD_MASK;
	if (end - 1 > ceiling - 1)
		end -= PMD_SIZE;
	if (addr > end - 1)
		return;

	start = addr;
	do {
		next = pgd_addr_end(addr, end);
#if 1
		printf("free_pgd_range: free_pud_range(%lx,%lx,%lx,%lx)\n",
		       addr, next, floor, ceiling);
#endif
		free_pud_range(addr, next, floor, ceiling);
	} while (addr = next, addr != end);

	if (1 /*!tlb_is_full_mm(tlb)*/) {
#if 1
		printf("free_pgd_range: Doing flush_tlb_pgtables(%lx,%lx)\n",
		       start, end);
#endif
		/*flush_tlb_pgtables(tlb->mm, start, end);*/
	}
}

struct vm_area_struct {
	struct vm_area_struct *vm_next;
	unsigned long vm_start, vm_end;
};

void free_pgtables(struct vm_area_struct *vma,
		   unsigned long floor, unsigned long ceiling)
{
#if 1
	struct vm_area_struct *tmp = vma;

	printf("VMA DUMP: ");
	while (tmp) {
		printf("[%lx:%lx] ", tmp->vm_start, tmp->vm_end);
		tmp = tmp->vm_next;
	}
	printf("\n");
#endif
	while (vma) {
		struct vm_area_struct *next = vma->vm_next;
		unsigned long addr = vma->vm_start;

		/* Optimization: gather nearby vmas into a single call down */
		while (next && next->vm_start <= vma->vm_end + 2*PMD_SIZE) {
			vma = next;
			next = vma->vm_next;
		}
#if 1
		printf("free_pgtables: free_pgd_range(%lx,%lx,%lx,%lx)\n",
		       addr, vma->vm_end, floor,
		       next ? next->vm_start : ceiling);
#endif
		free_pgd_range(addr, vma->vm_end,
			       floor, next? next->vm_start: ceiling);
		vma = next;
	}
}

/* Note that we have to build the list in ascending order.  */
static struct vm_area_struct *parse_file(FILE *fp)
{
	struct vm_area_struct **pprev = NULL;
	struct vm_area_struct *mpnt, *head;
	unsigned long start, end;

	for (;;) {
		int ret = fscanf(fp, "%lx %lx", &start, &end);

		if (ret < 2 || ret == EOF)
			break;

		mpnt = malloc(sizeof(*mpnt));
		if (mpnt == NULL)
			goto fail;

#if 1
		printf("parse_file: Building VMA %lx --> %lx\n",
		       start, end);
#endif
		mpnt->vm_next = NULL;
		mpnt->vm_start = start;
		mpnt->vm_end = end;
		if (pprev)
			*pprev = mpnt;
		else
			head = mpnt;
		pprev = &mpnt->vm_next;
	}

	return head;

fail:
	while (head) {
		struct vm_area_struct *next = head->vm_next;

		free(head);
		head = next;
	}
	return NULL;
}

int main(int argc, char **argv, char **envp)
{
	FILE *fp;
	struct vm_area_struct *vma;

	if (argc <= 1)
		return 1;

	fp = fopen(argv[1], "r");
	if (fp == NULL) {
		perror("fopen()");
		return 1;
	}
	if ((vma = parse_file(fp)) == NULL)
		return 1;

	free_pgtables(vma, 0, 0);

	return 0;
}

[-- Attachment #3: vma_list.txt --]
[-- Type: text/plain, Size: 638 bytes --]

0x00010000 0x000a4000
0x000b2000 0x000b8000
0x000b8000 0x000de000
0x70000000 0x7001a000
0x70028000 0x7002a000
0x7002c000 0x7006a000
0x7006a000 0x7006c000
0x7006c000 0x70084000
0x70084000 0x70088000
0x70088000 0x70094000
0x70094000 0x70098000
0x70098000 0x701da000
0x701da000 0x701e8000
0x701e8000 0x701f2000
0x701f2000 0x701f4000
0x701f4000 0x701fc000
0x701fc000 0x70204000
0x70204000 0x7020c000
0x7020c000 0x7021e000
0x7021e000 0x7022c000
0x7022c000 0x70230000
0x70230000 0x70232000
0x70234000 0x7023e000
0x7023e000 0x70244000
0x70244000 0x7024e000
0x70250000 0x7025a000
0x7025a000 0x70260000
0x70260000 0x7026c000
0xefbfe000 0xefc28000

  parent reply	other threads:[~2005-03-22 21:33 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-03-21 20:52 [PATCH 1/5] freepgt: free_pgtables use vma list Hugh Dickins
2005-03-21 20:55 ` [PATCH 2/5] freepgt: remove MM_VM_SIZE(mm) Hugh Dickins
2005-03-21 20:56 ` [PATCH 3/5] freepgt: hugetlb_free_pgd_range Hugh Dickins
2005-03-21 20:57 ` [PATCH 4/5] freepgt: remove arch pgd_addr_end Hugh Dickins
2005-03-21 20:58 ` [PATCH 5/5] freepgt: mpnt to vma cleanup Hugh Dickins
2005-03-21 22:26 ` [PATCH 1/5] freepgt: free_pgtables use vma list David S. Miller
2005-03-22  5:47   ` Hugh Dickins
2005-03-22 17:41     ` David S. Miller
2005-03-22 11:40 ` Andrew Morton
2005-03-22 12:17   ` Nick Piggin
2005-03-22 16:37   ` Hugh Dickins
2005-03-22 18:34     ` David S. Miller
2005-03-22 19:01     ` David S. Miller
2005-03-22 19:21       ` David S. Miller
2005-03-22 19:23         ` David S. Miller
2005-03-22 19:36           ` Hugh Dickins
2005-03-22 20:21             ` David S. Miller
2005-03-22 23:45               ` Benjamin Herrenschmidt
2005-03-22 20:33             ` David S. Miller
2005-03-22 21:51               ` Hugh Dickins
2005-03-22 22:41                 ` David S. Miller
2005-03-23  0:51                   ` Hugh Dickins
2005-03-23  2:09                     ` David S. Miller
2005-03-22 23:32                 ` Nick Piggin
2005-03-22 23:44                   ` David S. Miller
2005-03-23  0:19                     ` Nick Piggin
2005-03-23  0:20                       ` David S. Miller
2005-03-23  0:00                   ` David S. Miller
2005-03-23  0:03                   ` David S. Miller
2005-03-22 21:28             ` David S. Miller [this message]
2005-03-22 23:30     ` Benjamin Herrenschmidt
2005-03-23 13:28       ` Hugh Dickins
2005-03-23 23:07         ` Benjamin Herrenschmidt
  -- strict thread matches above, loose matches on Subject: below --
2005-03-21 22:31 Luck, Tony
2005-03-21 23:02 ` David S. Miller
2005-03-22  4:14   ` Nick Piggin
2005-03-22  5:29     ` David S. Miller
2005-03-22  6:08       ` Hugh Dickins
2005-03-22  6:33         ` Nick Piggin
2005-03-22 17:52         ` David S. Miller
2005-03-22 17:55     ` David S. Miller
2005-03-22  5:42   ` Hugh Dickins
2005-03-22 18:06 Luck, Tony
2005-03-22 18:48 ` Hugh Dickins
2005-03-22 22:40 Luck, Tony
2005-03-22 23:30 ` David S. Miller
2005-03-23  0:40 ` Hugh Dickins
2005-03-22 23:53 Luck, Tony
2005-03-22 23:56 ` David S. Miller
2005-03-23  0:56 ` Hugh Dickins
2005-03-23  1:10   ` Andrew Morton
2005-03-23  2:00     ` David S. Miller
2005-03-23  2:10       ` Nick Piggin
2005-03-23  2:15         ` David S. Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050322132818.4bf6c8a6.davem@davemloft.net \
    --to=davem@davemloft.net \
    --cc=akpm@osdl.org \
    --cc=benh@kernel.crashing.org \
    --cc=hugh@veritas.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nickpiggin@yahoo.com.au \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox