All of lore.kernel.org
 help / color / mirror / Atom feed
From: Nick Craig-Wood <ncw1@axis.demon.co.uk>
To: linux-kernel@vger.kernel.org
Cc: Rohit Seth <rohit.seth@intel.com>
Subject: 2.6.0 Huge pages not working as expected
Date: Fri, 26 Dec 2003 10:54:33 +0000	[thread overview]
Message-ID: <20031226105433.GA25970@axis.demon.co.uk> (raw)

I've been trying out the huge page support using 2.6.0.  I compiled
with :-

CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y

And all looks good in /proc/meminfo

HugePages_Total:     8
HugePages_Free:      8
Hugepagesize:     4096 kB

I mounted a hugetlbfs on /mnt/hugetlb.

I wrote a little test program to show the benefits of huge pages by
reducing TLB thrashing - it fills up 16 MB with sequential numbers
then adds them with different strides - very much the sort of thing
FFTs do.  However huge pages show a performance decrease not increase
for large strides!  For smaller ones there is a small speedup.

I've been testing on

processor       : 0
vendor_id       : GenuineIntel
cpu family      : 6
model           : 8
model name      : Pentium III (Coppermine)
stepping        : 6
cpu MHz         : 551.405
cache size      : 256 KB
fdiv_bug        : no
hlt_bug         : no
f00f_bug        : no
coma_bug        : no
fpu             : yes
fpu_exception   : yes
cpuid level     : 2
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 sep mtrr pge mca cmov pat pse36 mmx fxsr sse
bogomips        : 1089.53

Whats happening? Is there something broken in my program, the kernel,
or my understanding?  I know this isn't a particularly good
demonstration of reducing TLB thrashing as it should only read in
cacheline multiples, but I wasn't expecting it to slow down!

I've also been trying huge pages with mprime (which does lots of FFTs)
which does show some improvement (just 2% or so because it is already
very TLB aware).

Here are the results :-

------------------------------------------------------------
Memory from malloc()
Testing memory at 0x4015e008
span =        1, time =     71.212 ms, total = -2097152
span =        2, time =     71.744 ms, total = -2097152
span =        4, time =     88.352 ms, total = -2097152
span =        8, time =    176.207 ms, total = -2097152
span =       16, time =    176.166 ms, total = -2097152
span =       32, time =    176.385 ms, total = -2097152
span =       64, time =    179.042 ms, total = -2097152
span =      128, time =    184.059 ms, total = -2097152
span =      256, time =    195.014 ms, total = -2097152
span =      512, time =    217.084 ms, total = -2097152
span =     1024, time =    260.899 ms, total = -2097152
span =     2048, time =    259.714 ms, total = -2097152
span =     4096, time =    261.059 ms, total = -2097152

Memory from hugetlbfs
Testing memory at 0x41400000
span =        1, time =     70.815 ms, total = -2097152
span =        2, time =     71.261 ms, total = -2097152
span =        4, time =     88.178 ms, total = -2097152
span =        8, time =    175.512 ms, total = -2097152
span =       16, time =    174.996 ms, total = -2097152
span =       32, time =    175.689 ms, total = -2097152
span =       64, time =    177.301 ms, total = -2097152
span =      128, time =    181.705 ms, total = -2097152
span =      256, time =    191.232 ms, total = -2097152
span =      512, time =    209.886 ms, total = -2097152
span =     1024, time =    247.646 ms, total = -2097152
span =     2048, time =    279.525 ms, total = -2097152
span =     4096, time =    344.605 ms, total = -2097152

Memory from /dev/zero
Testing memory at 0x42400000
span =        1, time =     70.916 ms, total = -2097152
span =        2, time =     71.405 ms, total = -2097152
span =        4, time =     89.584 ms, total = -2097152
span =        8, time =    176.190 ms, total = -2097152
span =       16, time =    175.730 ms, total = -2097152
span =       32, time =    176.377 ms, total = -2097152
span =       64, time =    178.675 ms, total = -2097152
span =      128, time =    183.429 ms, total = -2097152
span =      256, time =    194.153 ms, total = -2097152
span =      512, time =    215.089 ms, total = -2097152
span =     1024, time =    256.428 ms, total = -2097152
span =     2048, time =    268.468 ms, total = -2097152
span =     4096, time =    268.702 ms, total = -2097152
------------------------------------------------------------

And here is the program...

#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/mman.h>

#define MEMORY_FILE_NAME "/mnt/hugetlb/z"
#define MEMORY_SIZE (4*1024*1024)

/****************************************************************************
Returns the time in floating point seconds since the epoch - useful for more
accurate timing that time() allows for
****************************************************************************/

static double timef(void)
{
    struct timeval tv = {0, 0};
    gettimeofday(&tv, 0);
    return (double)tv.tv_sec + ((double)tv.tv_usec)/1E6;
}

/****************************************************************************
Test the memory with different spans - should show TLB thrashing nicely
****************************************************************************/

static void test(int *p)
{
    int i;
    int span;

    printf("Testing memory at %p\n", p);

    /* fill it */
    for (i = 0; i < MEMORY_SIZE; i++)
	p[i] = i;

    /* test it with different spans */
    for (span = 1; span <= 4096; span *= 2)
    {
	double start = timef();
	int j;
	int total = 0;

	for (j = 0; j < span; j++)
	{
	    for (i = j; i < MEMORY_SIZE; i+= span)
		total += p[i];
	}
	start = timef() - start;
	printf("span = %8d, time = %10.3f ms, total = %d\n", span, 1000*start, total);
    }
    printf("\n");
}

/****************************************************************************
Thrash the hugetlb
****************************************************************************/

int main(void)
{
    int *malloc_memory;
    int *hugepage_memory;
    int *devzero_memory;
    int fd;

    /* get some malloc memory */
    malloc_memory = calloc(MEMORY_SIZE, sizeof(int));
    if (malloc_memory == 0)
    {
	fprintf(stderr, "Couldn't allocate memory\n");
	exit(EXIT_FAILURE);
    }

    /* get some hugepage memory */
    fd = open(MEMORY_FILE_NAME, O_CREAT|O_RDWR, 0600);
    if (fd < 0)
    {
	fprintf(stderr, "Failed to open huge page memory file '%s': %s\n", MEMORY_FILE_NAME, strerror(errno));
	exit(EXIT_FAILURE);
    }
    hugepage_memory = mmap(0, MEMORY_SIZE * sizeof(int), PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
    if (hugepage_memory == MAP_FAILED)
    {
	fprintf(stderr, "Huge page mmap() failed: %s\n", strerror(errno));
	exit(EXIT_FAILURE);
    }

    /* get some /dev/zero memory */
    fd = open("/dev/zero", O_CREAT|O_RDWR, 0600);
    if (fd < 0)
    {
	fprintf(stderr, "Failed to open /dev/zero memory file: %s\n", strerror(errno));
	exit(EXIT_FAILURE);
    }
    devzero_memory = mmap(0, MEMORY_SIZE * sizeof(int), PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
    if (devzero_memory == MAP_FAILED)
    {
	fprintf(stderr, "Huge page mmap() failed: %s\n", strerror(errno));
	exit(EXIT_FAILURE);
    }

    printf("Memory from malloc()\n");
    test(malloc_memory);

    printf("Memory from hugetlbfs\n");
    test(hugepage_memory);

    printf("Memory from /dev/zero\n");
    test(devzero_memory);

    unlink(MEMORY_FILE_NAME);

    return EXIT_SUCCESS;
}


-- 
Nick Craig-Wood
ncw1@axis.demon.co.uk

             reply	other threads:[~2003-12-26 10:54 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-12-26 10:54 Nick Craig-Wood [this message]
2003-12-26 11:56 ` 2.6.0 Huge pages not working as expected William Lee Irwin III
2003-12-26 20:10   ` Nick Craig-Wood
2003-12-26 20:15     ` William Lee Irwin III
2003-12-26 20:33     ` Linus Torvalds
2003-12-27  3:36       ` Andrea Arcangeli
2003-12-27  4:01         ` Linus Torvalds
2003-12-27  9:28           ` David S. Miller
2003-12-27 15:58           ` Andrea Arcangeli
2003-12-27  9:01       ` Nick Craig-Wood
2004-01-06 14:24     ` Kurt Garloff

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20031226105433.GA25970@axis.demon.co.uk \
    --to=ncw1@axis.demon.co.uk \
    --cc=linux-kernel@vger.kernel.org \
    --cc=rohit.seth@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.