Re: [PATCH v8 8/8] x86/tlb: do flush_tlb_kernel_range by 'invlpg'

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Alex Shi <alex.shi-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
To: Andi Kleen <ak-VuQAYsv1563Yd54FQh9/CA@public.gmane.org>,
	linux-tegra-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	linux-omap-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org,
	mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
	hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org,
	arnd-r2nGTMty4D4@public.gmane.org,
	rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org,
	fweisbec-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org,
	jeremy-TSDbQ3PG+2Y@public.gmane.org,
	seto.hidetoshi-+CUm20s59erQFUHtdCDX3A@public.gmane.org,
	borislav.petkov-5C7GfCeVMHo@public.gmane.org,
	tony.luck-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org,
	luto-3s7WtUTddSA@public.gmane.org,
	riel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
	avi-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
	len.brown-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org,
	tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org,
	cl-gkYfJU5Cukgdnm+yROfE0A@public.gmane.org,
	jbeulich-IBi9RG/b67k@public.gmane.org,
	eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org,
	akinobu.mita-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org,
	cpw-sJ/iWh9BUns@public.gmane.org,
	penberg-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org,
	steiner-sJ/iWh9BUns@public.gmane.org,
	viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn@public.gmane.org,
	kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org,
	aarcange-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org,
	rientjes-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Subject: Re: [PATCH v8 8/8] x86/tlb: do flush_tlb_kernel_range by 'invlpg'
Date: Thu, 21 Jun 2012 13:25:27 +0800	[thread overview]
Message-ID: <4FE2B047.503@intel.com> (raw)
In-Reply-To: <4FD93DC7.8020501-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>

On 06/14/2012 09:26 AM, Alex Shi wrote:

> On 06/14/2012 09:10 AM, Alex Shi wrote:
> 
>> On 06/13/2012 10:56 PM, Andi Kleen wrote:
>>
>>> On Tue, Jun 12, 2012 at 05:06:45PM +0800, Alex Shi wrote:
>>>> This patch do flush_tlb_kernel_range by 'invlpg'. The performance pay
>>>> and gain was analysed in my patch (x86/flush_tlb: try flush_tlb_single
>>>> one by one in flush_tlb_range). Now we move this logical into kernel
>>>> part. The pay is multiple 'invlpg' execution cost, that is same. but
>>>>  the gain(cost reducing of TLB entries refilling) is absolutely
>>>> increased.
>>>
>>> The subtle point is whether INVLPG flushes global pages or not.
>>> After some digging I found a sentence in the SDM that says it does.
>>> So it may be safe.
>>
>>
>> Many thanks for your time!
>>
>>>
>>> What does it improve?
>>
>>




I just write a rough kernel modules that alloc some page arrays in kernel and then map to vaddr by 'vmap'. 

Then my macro benchmark inject a 'unmap_kernel_range' request from a sysfs interface, and doing random memory access in user level during the time.

On my NHM EP 2P * 4 Cores * HT.

Without this patch, the memory access with 4 threads is ~12ns/time.
With this patch, the memory access with 4 threads is ~9ns/time.

With threads number increasing the benefit becomes small and nearly disappeared after thread number up to 256.

But no any regression. 


The rough user macro-benchmark and kernel module is here:

--- kernel module--

#include <linux/init.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/gfp.h>
#include <linux/fs.h>
#include <linux/mman.h>
#include <linux/uaccess.h>
#include <linux/sysfs.h>
#include <linux/hrtimer.h>
#include <linux/device.h>
#include <linux/cpu.h>

MODULE_LICENSE("Dual BSD/GPL");

/* 
 * $cat Makefile 
 * obj-m := modvmalloc.o
 *
 * compile command:
 *  #cd linux; make /home/alexs/exec/modules/modvmalloc.ko 
 */
#define NR_PAGES	(4)
#define NR_BLOCKS	(1024)

struct block {
	struct page ** page_array; 
	void *vaddr;
	int page_count;
};
struct block *block;

static int blocks = NR_BLOCKS;
module_param(blocks, uint, 0400);
MODULE_PARM_DESC(blocks, "map unmap blocks number ");

static struct page **relay_alloc_page_array(unsigned int nr_pages) 
{ 
	const size_t pa_size = NR_PAGES * sizeof(struct page *); 
	if (pa_size > PAGE_SIZE) 
		return vzalloc(pa_size); 
	return kzalloc(pa_size, GFP_KERNEL); 
} 

static void relay_free_page_array(struct page **array) 
{ 
	if (is_vmalloc_addr(array)) 
		vfree(array); 
	else
		kfree(array);
}

static void vmap_unmap(void)
{
	//purge_vmap_area_lazy();
	//vm_unmap_aliases();
	int i;
	for (i=0; i< blocks; i++)
		unmap_kernel_range((unsigned long)(block->vaddr), NR_PAGES*PAGE_SIZE);
}

// ---------------
long vmap_num = 0;

static ssize_t __vmap_num_store(const char *buf,
		size_t count, int smt)
{
	long factor = 0;
	long i;
	unsigned long start, stop;

	if (sscanf(buf, "%ld", &factor) != 1)
		return -EINVAL;

	vmap_num = factor;
	start = ktime_to_ns(ktime_get());

	vmap_unmap();

	stop = ktime_to_ns(ktime_get());
	i = blocks;
	printk(KERN_ERR "vunmap %ld times cost %ld ns/time\n", 
			i, (stop - start)/i);
	return count;
}

static ssize_t vmap_num_show(struct device *dev,
		struct device_attribute *attr,
		char *buf)
{
	return sprintf(buf, "%ld\n", vmap_num);
}
static ssize_t vmap_num_store(struct device *dev,
		struct device_attribute *attr,
		const char *buf, size_t count)
{
	return __vmap_num_store(buf, count, 0);
}

DEVICE_ATTR(vmap_num, 0644,
		vmap_num_show,
		vmap_num_store);

int create_sysfs_vmap_num(struct device *dev)
{
	return device_create_file(dev, &dev_attr_vmap_num);
}

static int mapunmap_init(void){
	long i,j,k;

	create_sysfs_vmap_num(cpu_subsys.dev_root);
	block = kmalloc(sizeof(struct block)*blocks, GFP_KERNEL);

	for (k=0; k< blocks; k++) {
		block[k].page_count = 0;
		block[k].page_array = relay_alloc_page_array(NR_PAGES);
		if (!block[k].page_array)
			return -1;

		for (i = 0; i < NR_PAGES; i++) {
			block[k].page_array[i] = alloc_page(GFP_KERNEL);
			if (unlikely(!block[k].page_array[i])) {
				printk(KERN_ERR "\talloc page error \n");
				goto depopulate;
			}
		}

		if (i!=NR_PAGES)	goto depopulate;

		block[k].page_count = i;
		block[k].vaddr = vmap(block[k].page_array, NR_PAGES, VM_MAP, PAGE_KERNEL);
		if (!(block[k].vaddr)) {
			printk(KERN_ERR "\t\t vmap error !\n");
			goto depopulate;
		}
	}
	printk(KERN_INFO "vmalloc module init OK \n");
	return 0;

depopulate:
	for (i=0; i< k; i++)
		if (block[i].page_count !=0) {
			for (j = 0; j < block[i].page_count; j++)
				__free_page((block[j].page_array[j]));
			relay_free_page_array(block[j].page_array);
		}
	printk(KERN_INFO "vmalloc module init fail\n");
	return -1;
}


static void mapunmap_exit(void){
	long i, j;

	printk(KERN_INFO "bye! this is test module\n");
	device_remove_file(cpu_subsys.dev_root, &dev_attr_vmap_num);

	for (i=0; i< blocks; i++)
		if (block[i].page_count !=0) {
			for (j = 0; j < block[i].page_count; j++)
				__free_page((block[j].page_array[j]));
			relay_free_page_array(block[j].page_array);
		}
}


module_init(mapunmap_init);
module_exit(mapunmap_exit);

--- benchmark ---

/*
   maccess.c
   This is a macrobenchmark for TLB flush range testing.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

   Copyright (C) Intel 2012
   Coypright Alex Shi alex.shi-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org 

   gcc -o maccess maccess.c -lrt -lpthread -O2

    #perf stat -e r881,r882,r884 -e r801,r802,r810,r820,r840,r880,r807 -e rc01 -e r4901,r4902,r4910,r4920,r4940,r4980 -e r5f01  -e rbd01,rdb20  -e r4f02 -e r8004,r8201,r8501,r8502,r8504,r8510,r8520,r8540,r8580  -e rae01,rc820,rc102,rc900 -e r8600  -e rcb10  ./maccess 
*/

#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>
#include <time.h>
#include <sys/types.h>
#include <pthread.h>

#define FILE_SIZE	(1024*1024*1024)

#define PAGE_SIZE 	(4096)
#define HPAGE_SIZE 	(4096*512)

#ifndef MAP_HUGETLB
#define MAP_HUGETLB	0x40000
#endif


long getnsec(clockid_t clockid) {
        struct timespec ts;
        if (clock_gettime(clockid, &ts) == -1)
                perror("clock_gettime failed");
        return (long) ts.tv_sec * 1000000000 + (long) ts.tv_nsec;
}

//data for threads
struct data{
	int pagenum;
	void *startaddr;
	int rw;
	int loop;
};
volatile int * threadstart;
//thread for memory accessing
void *accessmm(void *data){
	struct data *d = data;
	long *actimes;
	char x;
	int i, k;
	int randn[PAGE_SIZE];
	
	for (i=0;i<PAGE_SIZE; i++)
		randn[i] = rand();

	actimes = malloc(sizeof(long));

	while (*threadstart == 0 )
		usleep(1);

	if (d->rw == 0)
		for (*actimes=0; *threadstart == 1; (*actimes)++)
			for (k=0; k < d->pagenum; k++)
				x = *(volatile char *)(d->startaddr + randn[k]%FILE_SIZE); 
	else
		for (*actimes=0; *threadstart == 1; (*actimes)++)
			for (k=0; k < d->pagenum; k++)
				*(char *)(d->startaddr + randn[k]%FILE_SIZE) = 1; 
	return actimes;
}

int main(int argc, char *argv[])
{
        static  char            optstr[] = "p:w:ht:s:";
	int s = 1;	/* */
	int p = 512;	/* default accessed page number, after maccess */
	int er = 0, rw = 0, h = 0, t = 2; /* d: debug; h: use huge page; t thread number */
	int pagesize = PAGE_SIZE; /*default for regular page */
	volatile char x;
	long protindex = 0;

	int i, j, k, c;
	void *m1, *startaddr;
	unsigned long *startaddr2[1024*512];
	volatile void *tempaddr;
	clockid_t clockid = CLOCK_MONOTONIC;
	unsigned long start, stop, mptime, actime;
	int randn[PAGE_SIZE];

	pthread_t	pid[1024];
	void * res;
	struct data data;

	char command[1024];

	for (i=0;i<PAGE_SIZE; i++)
		randn[i] = rand();

        while ((c = getopt(argc, argv, optstr)) != EOF)
                switch (c) {
                case 's':
                        s = atoi(optarg);
                        break;
                case 'p':
                        p = atoi(optarg);
                        break;
                case 'h':
                        h = 1;
                        break;
                case 'w':
                        rw = atoi(optarg);
                        break;
                case 't':
                        t = atoi(optarg);
                        break;
                case '?':
                        er = 1;
                        break;
                }
        if (er) {
                printf("usage: %s %s\n", argv[0], optstr);
                exit(1);
	}

	printf("pid is %d, thread number %d active %d seconds, access page num %d\n", getpid(), t, s, p);
	if (h == 0){
		startaddr = mmap(0, FILE_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
		pagesize = PAGE_SIZE;
	} else {
		startaddr = mmap(0, FILE_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED | MAP_HUGETLB, -1, 0);
		pagesize = HPAGE_SIZE;
	}

	start = getnsec(clockid);
	//access whole memory, will generate many page faults 
	for (tempaddr = startaddr; tempaddr < startaddr + FILE_SIZE; tempaddr += pagesize)
		memset((char *)tempaddr, 0, 1);
        stop = getnsec(clockid);

	threadstart = malloc(sizeof(int));
	*threadstart = 0;
	data.pagenum = p; data.startaddr = startaddr; data.rw = rw;
	for (i=0; i< t; i++)
		if(pthread_create(&pid[i], NULL, accessmm, &data))
			perror("pthread create");
	//wait for randn[] filling.
	sleep(1);

	mptime = actime = 0;
	sprintf(command, "sudo sh -c 'echo %d > /sys/devices/system/cpu/vmap_num'", s);
	printf("%s\n", command);

	start = getnsec(clockid);
	//kick threads, let them running.
	*threadstart = 1;

	system(command);
	*threadstart = 0;

	stop = getnsec(clockid);
	mptime += stop - start;

	//get threads' result.
	for (i=0; i< t; i++) {
		if (pthread_join(pid[i], &res))
			perror("pthread_join");
		actime += *(long*)res;
	}
end:
	printf("maccess %ld ms, memory access %ld times/thread/ms, cost %ldns/time\n",
		 mptime/1000000, actime*p*1000000/t/mptime, mptime*t/(actime*p));
	exit(0);
}

> 
>>
>>> -Andi
>>
>>
> 
>

WARNING: multiple messages have this Message-ID (diff)

From: Alex Shi <alex.shi@intel.com>
To: Andi Kleen <ak@linux.intel.com>,
	linux-tegra@vger.kernel.org, linux-omap@vger.kernel.org
Cc: tglx@linutronix.de, mingo@redhat.com, hpa@zytor.com,
	arnd@arndb.de, rostedt@goodmis.org, fweisbec@gmail.com,
	jeremy@goop.org, seto.hidetoshi@jp.fujitsu.com,
	borislav.petkov@amd.com, tony.luck@intel.com, luto@mit.edu,
	riel@redhat.com, avi@redhat.com, len.brown@intel.com,
	tj@kernel.org, akpm@linux-foundation.org, cl@gentwo.org,
	jbeulich@suse.com, eric.dumazet@gmail.com,
	akinobu.mita@gmail.com, cpw@sgi.com, penberg@kernel.org,
	steiner@sgi.com, viro@zeniv.linux.org.uk,
	kamezawa.hiroyu@jp.fujitsu.com, aarcange@redhat.com,
	rientjes@google.com, linux-kernel@vger.kernel.org
Subject: Re: [PATCH v8 8/8] x86/tlb: do flush_tlb_kernel_range by 'invlpg'
Date: Thu, 21 Jun 2012 13:25:27 +0800	[thread overview]
Message-ID: <4FE2B047.503@intel.com> (raw)
In-Reply-To: <4FD93DC7.8020501@intel.com>

On 06/14/2012 09:26 AM, Alex Shi wrote:

> On 06/14/2012 09:10 AM, Alex Shi wrote:
> 
>> On 06/13/2012 10:56 PM, Andi Kleen wrote:
>>
>>> On Tue, Jun 12, 2012 at 05:06:45PM +0800, Alex Shi wrote:
>>>> This patch do flush_tlb_kernel_range by 'invlpg'. The performance pay
>>>> and gain was analysed in my patch (x86/flush_tlb: try flush_tlb_single
>>>> one by one in flush_tlb_range). Now we move this logical into kernel
>>>> part. The pay is multiple 'invlpg' execution cost, that is same. but
>>>>  the gain(cost reducing of TLB entries refilling) is absolutely
>>>> increased.
>>>
>>> The subtle point is whether INVLPG flushes global pages or not.
>>> After some digging I found a sentence in the SDM that says it does.
>>> So it may be safe.
>>
>>
>> Many thanks for your time!
>>
>>>
>>> What does it improve?
>>
>>




I just write a rough kernel modules that alloc some page arrays in kernel and then map to vaddr by 'vmap'. 

Then my macro benchmark inject a 'unmap_kernel_range' request from a sysfs interface, and doing random memory access in user level during the time.

On my NHM EP 2P * 4 Cores * HT.

Without this patch, the memory access with 4 threads is ~12ns/time.
With this patch, the memory access with 4 threads is ~9ns/time.

With threads number increasing the benefit becomes small and nearly disappeared after thread number up to 256.

But no any regression. 


The rough user macro-benchmark and kernel module is here:

--- kernel module--

#include <linux/init.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/gfp.h>
#include <linux/fs.h>
#include <linux/mman.h>
#include <linux/uaccess.h>
#include <linux/sysfs.h>
#include <linux/hrtimer.h>
#include <linux/device.h>
#include <linux/cpu.h>

MODULE_LICENSE("Dual BSD/GPL");

/* 
 * $cat Makefile 
 * obj-m := modvmalloc.o
 *
 * compile command:
 *  #cd linux; make /home/alexs/exec/modules/modvmalloc.ko 
 */
#define NR_PAGES	(4)
#define NR_BLOCKS	(1024)

struct block {
	struct page ** page_array; 
	void *vaddr;
	int page_count;
};
struct block *block;

static int blocks = NR_BLOCKS;
module_param(blocks, uint, 0400);
MODULE_PARM_DESC(blocks, "map unmap blocks number ");

static struct page **relay_alloc_page_array(unsigned int nr_pages) 
{ 
	const size_t pa_size = NR_PAGES * sizeof(struct page *); 
	if (pa_size > PAGE_SIZE) 
		return vzalloc(pa_size); 
	return kzalloc(pa_size, GFP_KERNEL); 
} 

static void relay_free_page_array(struct page **array) 
{ 
	if (is_vmalloc_addr(array)) 
		vfree(array); 
	else
		kfree(array);
}

static void vmap_unmap(void)
{
	//purge_vmap_area_lazy();
	//vm_unmap_aliases();
	int i;
	for (i=0; i< blocks; i++)
		unmap_kernel_range((unsigned long)(block->vaddr), NR_PAGES*PAGE_SIZE);
}

// ---------------
long vmap_num = 0;

static ssize_t __vmap_num_store(const char *buf,
		size_t count, int smt)
{
	long factor = 0;
	long i;
	unsigned long start, stop;

	if (sscanf(buf, "%ld", &factor) != 1)
		return -EINVAL;

	vmap_num = factor;
	start = ktime_to_ns(ktime_get());

	vmap_unmap();

	stop = ktime_to_ns(ktime_get());
	i = blocks;
	printk(KERN_ERR "vunmap %ld times cost %ld ns/time\n", 
			i, (stop - start)/i);
	return count;
}

static ssize_t vmap_num_show(struct device *dev,
		struct device_attribute *attr,
		char *buf)
{
	return sprintf(buf, "%ld\n", vmap_num);
}
static ssize_t vmap_num_store(struct device *dev,
		struct device_attribute *attr,
		const char *buf, size_t count)
{
	return __vmap_num_store(buf, count, 0);
}

DEVICE_ATTR(vmap_num, 0644,
		vmap_num_show,
		vmap_num_store);

int create_sysfs_vmap_num(struct device *dev)
{
	return device_create_file(dev, &dev_attr_vmap_num);
}

static int mapunmap_init(void){
	long i,j,k;

	create_sysfs_vmap_num(cpu_subsys.dev_root);
	block = kmalloc(sizeof(struct block)*blocks, GFP_KERNEL);

	for (k=0; k< blocks; k++) {
		block[k].page_count = 0;
		block[k].page_array = relay_alloc_page_array(NR_PAGES);
		if (!block[k].page_array)
			return -1;

		for (i = 0; i < NR_PAGES; i++) {
			block[k].page_array[i] = alloc_page(GFP_KERNEL);
			if (unlikely(!block[k].page_array[i])) {
				printk(KERN_ERR "\talloc page error \n");
				goto depopulate;
			}
		}

		if (i!=NR_PAGES)	goto depopulate;

		block[k].page_count = i;
		block[k].vaddr = vmap(block[k].page_array, NR_PAGES, VM_MAP, PAGE_KERNEL);
		if (!(block[k].vaddr)) {
			printk(KERN_ERR "\t\t vmap error !\n");
			goto depopulate;
		}
	}
	printk(KERN_INFO "vmalloc module init OK \n");
	return 0;

depopulate:
	for (i=0; i< k; i++)
		if (block[i].page_count !=0) {
			for (j = 0; j < block[i].page_count; j++)
				__free_page((block[j].page_array[j]));
			relay_free_page_array(block[j].page_array);
		}
	printk(KERN_INFO "vmalloc module init fail\n");
	return -1;
}


static void mapunmap_exit(void){
	long i, j;

	printk(KERN_INFO "bye! this is test module\n");
	device_remove_file(cpu_subsys.dev_root, &dev_attr_vmap_num);

	for (i=0; i< blocks; i++)
		if (block[i].page_count !=0) {
			for (j = 0; j < block[i].page_count; j++)
				__free_page((block[j].page_array[j]));
			relay_free_page_array(block[j].page_array);
		}
}


module_init(mapunmap_init);
module_exit(mapunmap_exit);

--- benchmark ---

/*
   maccess.c
   This is a macrobenchmark for TLB flush range testing.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

   Copyright (C) Intel 2012
   Coypright Alex Shi alex.shi@intel.com 

   gcc -o maccess maccess.c -lrt -lpthread -O2

    #perf stat -e r881,r882,r884 -e r801,r802,r810,r820,r840,r880,r807 -e rc01 -e r4901,r4902,r4910,r4920,r4940,r4980 -e r5f01  -e rbd01,rdb20  -e r4f02 -e r8004,r8201,r8501,r8502,r8504,r8510,r8520,r8540,r8580  -e rae01,rc820,rc102,rc900 -e r8600  -e rcb10  ./maccess 
*/

#define _GNU_SOURCE
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <sys/mman.h>
#include <time.h>
#include <sys/types.h>
#include <pthread.h>

#define FILE_SIZE	(1024*1024*1024)

#define PAGE_SIZE 	(4096)
#define HPAGE_SIZE 	(4096*512)

#ifndef MAP_HUGETLB
#define MAP_HUGETLB	0x40000
#endif


long getnsec(clockid_t clockid) {
        struct timespec ts;
        if (clock_gettime(clockid, &ts) == -1)
                perror("clock_gettime failed");
        return (long) ts.tv_sec * 1000000000 + (long) ts.tv_nsec;
}

//data for threads
struct data{
	int pagenum;
	void *startaddr;
	int rw;
	int loop;
};
volatile int * threadstart;
//thread for memory accessing
void *accessmm(void *data){
	struct data *d = data;
	long *actimes;
	char x;
	int i, k;
	int randn[PAGE_SIZE];
	
	for (i=0;i<PAGE_SIZE; i++)
		randn[i] = rand();

	actimes = malloc(sizeof(long));

	while (*threadstart == 0 )
		usleep(1);

	if (d->rw == 0)
		for (*actimes=0; *threadstart == 1; (*actimes)++)
			for (k=0; k < d->pagenum; k++)
				x = *(volatile char *)(d->startaddr + randn[k]%FILE_SIZE); 
	else
		for (*actimes=0; *threadstart == 1; (*actimes)++)
			for (k=0; k < d->pagenum; k++)
				*(char *)(d->startaddr + randn[k]%FILE_SIZE) = 1; 
	return actimes;
}

int main(int argc, char *argv[])
{
        static  char            optstr[] = "p:w:ht:s:";
	int s = 1;	/* */
	int p = 512;	/* default accessed page number, after maccess */
	int er = 0, rw = 0, h = 0, t = 2; /* d: debug; h: use huge page; t thread number */
	int pagesize = PAGE_SIZE; /*default for regular page */
	volatile char x;
	long protindex = 0;

	int i, j, k, c;
	void *m1, *startaddr;
	unsigned long *startaddr2[1024*512];
	volatile void *tempaddr;
	clockid_t clockid = CLOCK_MONOTONIC;
	unsigned long start, stop, mptime, actime;
	int randn[PAGE_SIZE];

	pthread_t	pid[1024];
	void * res;
	struct data data;

	char command[1024];

	for (i=0;i<PAGE_SIZE; i++)
		randn[i] = rand();

        while ((c = getopt(argc, argv, optstr)) != EOF)
                switch (c) {
                case 's':
                        s = atoi(optarg);
                        break;
                case 'p':
                        p = atoi(optarg);
                        break;
                case 'h':
                        h = 1;
                        break;
                case 'w':
                        rw = atoi(optarg);
                        break;
                case 't':
                        t = atoi(optarg);
                        break;
                case '?':
                        er = 1;
                        break;
                }
        if (er) {
                printf("usage: %s %s\n", argv[0], optstr);
                exit(1);
	}

	printf("pid is %d, thread number %d active %d seconds, access page num %d\n", getpid(), t, s, p);
	if (h == 0){
		startaddr = mmap(0, FILE_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
		pagesize = PAGE_SIZE;
	} else {
		startaddr = mmap(0, FILE_SIZE, PROT_READ|PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED | MAP_HUGETLB, -1, 0);
		pagesize = HPAGE_SIZE;
	}

	start = getnsec(clockid);
	//access whole memory, will generate many page faults 
	for (tempaddr = startaddr; tempaddr < startaddr + FILE_SIZE; tempaddr += pagesize)
		memset((char *)tempaddr, 0, 1);
        stop = getnsec(clockid);

	threadstart = malloc(sizeof(int));
	*threadstart = 0;
	data.pagenum = p; data.startaddr = startaddr; data.rw = rw;
	for (i=0; i< t; i++)
		if(pthread_create(&pid[i], NULL, accessmm, &data))
			perror("pthread create");
	//wait for randn[] filling.
	sleep(1);

	mptime = actime = 0;
	sprintf(command, "sudo sh -c 'echo %d > /sys/devices/system/cpu/vmap_num'", s);
	printf("%s\n", command);

	start = getnsec(clockid);
	//kick threads, let them running.
	*threadstart = 1;

	system(command);
	*threadstart = 0;

	stop = getnsec(clockid);
	mptime += stop - start;

	//get threads' result.
	for (i=0; i< t; i++) {
		if (pthread_join(pid[i], &res))
			perror("pthread_join");
		actime += *(long*)res;
	}
end:
	printf("maccess %ld ms, memory access %ld times/thread/ms, cost %ldns/time\n",
		 mptime/1000000, actime*p*1000000/t/mptime, mptime*t/(actime*p));
	exit(0);
}

> 
>>
>>> -Andi
>>
>>
> 
>

next prev parent reply	other threads:[~2012-06-21  5:25 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-06-12  9:06 [PATCH v8 0/8] x86 TLB flush range optimizing Alex Shi
2012-06-12  9:06 ` [PATCH v8 1/8] x86/tlb_info: get last level TLB entry number of CPU Alex Shi
2012-06-12  9:06 ` [PATCH v8 2/8] x86/flush_tlb: try flush_tlb_single one by one in flush_tlb_range Alex Shi
2012-06-12  9:06 ` [PATCH v8 3/8] x86/tlb: fall back to flush all when meet a THP large page Alex Shi
2012-06-12  9:06 ` [PATCH v8 4/8] x86/tlb: add tlb_flushall_shift for specific CPU Alex Shi
2012-06-12  9:06 ` [PATCH v8 5/8] x86/tlb: add tlb_flushall_shift knob into debugfs Alex Shi
2012-06-12  9:06 ` [PATCH v8 6/8] x86/tlb: enable tlb flush range support for generic mmu and x86 Alex Shi
2012-06-12  9:06 ` [PATCH v8 7/8] x86/tlb: replace INVALIDATE_TLB_VECTOR by CALL_FUNCTION_VECTOR Alex Shi
2012-06-12  9:06 ` [PATCH v8 8/8] x86/tlb: do flush_tlb_kernel_range by 'invlpg' Alex Shi
2012-06-13 14:56   ` Andi Kleen
2012-06-14  1:10     ` Alex Shi
2012-06-14  1:26       ` Alex Shi
2012-06-14  2:10         ` Alex Shi
     [not found]         ` <4FD93DC7.8020501-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
2012-06-21  5:25           ` Alex Shi [this message]
2012-06-21  5:25             ` Alex Shi
2012-06-13  7:42 ` [PATCH v8 0/8] x86 TLB flush range optimizing Alex Shi
2012-06-21  5:27   ` Alex Shi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4FE2B047.503@intel.com \
    --to=alex.shi-ral2jqcrhueavxtiumwx3w@public.gmane.org \
    --cc=aarcange-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=ak-VuQAYsv1563Yd54FQh9/CA@public.gmane.org \
    --cc=akinobu.mita-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    --cc=akpm-de/tnXTf+JLsfHDXvbKv3WD2FQJk+8+b@public.gmane.org \
    --cc=arnd-r2nGTMty4D4@public.gmane.org \
    --cc=avi-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=borislav.petkov-5C7GfCeVMHo@public.gmane.org \
    --cc=cl-gkYfJU5Cukgdnm+yROfE0A@public.gmane.org \
    --cc=cpw-sJ/iWh9BUns@public.gmane.org \
    --cc=eric.dumazet-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    --cc=fweisbec-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org \
    --cc=hpa-YMNOUZJC4hwAvxtiuMwx3w@public.gmane.org \
    --cc=jbeulich-IBi9RG/b67k@public.gmane.org \
    --cc=jeremy-TSDbQ3PG+2Y@public.gmane.org \
    --cc=kamezawa.hiroyu-+CUm20s59erQFUHtdCDX3A@public.gmane.org \
    --cc=len.brown-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    --cc=linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-omap-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=linux-tegra-u79uwXL29TY76Z2rM5mHXA@public.gmane.org \
    --cc=luto-3s7WtUTddSA@public.gmane.org \
    --cc=mingo-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=penberg-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    --cc=riel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org \
    --cc=rientjes-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org \
    --cc=rostedt-nx8X9YLhiw1AfugRpC6u6w@public.gmane.org \
    --cc=seto.hidetoshi-+CUm20s59erQFUHtdCDX3A@public.gmane.org \
    --cc=steiner-sJ/iWh9BUns@public.gmane.org \
    --cc=tglx-hfZtesqFncYOwBW4kG4KsQ@public.gmane.org \
    --cc=tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org \
    --cc=tony.luck-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org \
    --cc=viro-RmSDqhL/yNMiFSDQTTA3OLVCufUGDwFn@public.gmane.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.