From: Rafael Aquini <aquini@redhat.com>
To: Shaohua Li <shli@kernel.org>
Cc: linux-mm@kvack.org, hughd@google.com, riel@redhat.com,
minchan@kernel.org, kmpark@infradead.org
Subject: Re: [patch 1/4 v3]swap: change block allocation algorithm for SSD
Date: Tue, 12 Mar 2013 12:19:02 -0300 [thread overview]
Message-ID: <20130312151901.GB1739@optiplex.redhat.com> (raw)
In-Reply-To: <20130312151244.GA1739@optiplex.redhat.com>
[-- Attachment #1: Type: text/plain, Size: 415 bytes --]
On Tue, Mar 12, 2013 at 12:12:46PM -0300, Rafael Aquini wrote:
> Nice work! I've been testing your series with the attached test-prog, as it was
Sorry, I forgot to attach the test-prog on my last message. here it is.
It just takes running it for a couple of minutes to get to that softlockup storm
I told you.
If your box has, lets say 4GB of ram, trigger it as follows:
./threaded_memtest -qpv -m4096m
Rafael
[-- Attachment #2: threaded_memtest.c --]
[-- Type: text/plain, Size: 16299 bytes --]
/* $Id: threaded_memtest.c,v 1.7 2008/02/12 01:17:07 gnichols Exp $
*
* A scalable, threaded memory exerciser/tester.
*
* Author: Will Woods <wwoods@redhat.com>
* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Notes:
* This program uses sched_setaffinity(), which is Linux-specific. This could
* probably be ported to other systems with a fairly simple #ifdef / #define
* of setaffinity(), below. You might also have to find a replacement for
* sysconf(), which (while a POSIX function) is not available on some other
* systems (e.g. OSX).
*/
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/sysinfo.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <signal.h>
#define __USE_GNU 1
#include <pthread.h>
#include <sched.h>
#ifdef OLD_SCHED_SETAFFINITY
#define setaffinity(mask) sched_setaffinity(0,&mask)
#else
#define setaffinity(mask) sched_setaffinity(0,sizeof(mask),&mask)
#endif
#define VERSION "$Revision: 1.7 $" /* CVS version info */
#define DEFAULT_THREADS 2
#define DEFAULT_RUNTIME 60*15
#define DEFAULT_MEMPCT 0.95
#define BARLEN 40
/* configurable values used by the threads */
int verbose = 0;
int quiet = 0;
int parallel = 0;
unsigned num_threads, default_threads = DEFAULT_THREADS;
unsigned runtime, default_runtime = DEFAULT_RUNTIME;
unsigned long memsize, default_memsize;
/* system info */
unsigned num_cpus;
unsigned long total_ram;
/* statistic gathering */
struct timeval start={0,0}, finish={0,0}, duration={0,0};
unsigned long *loop_counters = NULL;
/* pointers for threads and their memory regions */
pthread_t *threads;
char **mmap_regions = NULL;
/* Thread mutexes and conditions */
unsigned created_threads = 0;
pthread_mutex_t ct_mutex = PTHREAD_MUTEX_INITIALIZER;
unsigned live_threads = 0;
pthread_mutex_t lt_mutex = PTHREAD_MUTEX_INITIALIZER;
unsigned mmap_done = 0;
pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t init_cond = PTHREAD_COND_INITIALIZER;
pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t mmap_cond = PTHREAD_COND_INITIALIZER;
pthread_mutex_t test_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t test_start = PTHREAD_COND_INITIALIZER;
pthread_mutex_t finish_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t finish_cond = PTHREAD_COND_INITIALIZER;
pthread_mutex_t running_threads_mutex = PTHREAD_MUTEX_INITIALIZER;
pthread_cond_t running_threads_cond = PTHREAD_COND_INITIALIZER;
unsigned done = 0;
long unsigned running_threads = 0;
/* short name of the program */
char *basename = NULL;
#ifdef CPU_ALLOC
/* RHEL6+ set the affinity for the current task to the given CPU */
/* This now uses dynamic cpu_sets as the convention cpu_set_t
was limited to 1024p */
int on_cpu(unsigned cpu){
cpu_set_t* mask;
size_t masksize;
mask = CPU_ALLOC(num_cpus);
masksize = CPU_ALLOC_SIZE(num_cpus);
CPU_ZERO_S(masksize, mask);
CPU_SET_S(cpu, masksize, mask);
if (sched_setaffinity(0, masksize, mask) < 0) {
perror("sched_setaffinity");
return -1;
}
return 0;
}
#else
/* use old setup - RHEL5*/
int on_cpu(unsigned cpu){
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(cpu,&mask);
if (setaffinity(mask) < 0){
perror("sched_setaffinity");
return -1;
}
return 0;
}
#endif
/* Parse a memsize string like '34m' or '128k' into a long int */
long unsigned parse_memsize(const char *str) {
long unsigned size;
char okchars[] = "GgMmKk%";
char unit;
size=atoi(str); /* ignores trailing non-digit chars */
unit=str[strlen(str)-1];
if (index(okchars,unit)) {
switch (unit) {
case 'G':
case 'g':size *= 1024;
case 'M':
case 'm':size *= 1024;
case 'K':
case 'k':size *= 1024; break;
case '%':size = (size/100.0)*total_ram; break;
}
}
return size;
}
char memsize_str[22]; /* a 64-bit int is 20 digits long */
/* print a nice human-readable string for a large number of bytes */
char *human_memsize(long unsigned size) {
char unit=' ';
if (size > 10240) { unit='K'; size /= 1024; }
if (size > 10240) { unit='M'; size /= 1024; }
if (size > 10240) { unit='G'; size /= 1024; }
snprintf(memsize_str,22,"%ld%c",size,unit);
return memsize_str;
}
/* A cute little progress bar */
void progressbar(char *label, unsigned cur, unsigned total) {
unsigned pos;
char bar[BARLEN+1],spinner[]="-\\|/";
pos=(BARLEN*cur)/total;
memset(bar,'.',BARLEN);
memset(bar,'#',pos);
bar[BARLEN]='\0';
if ((pos < BARLEN) && (total >= BARLEN*2))
bar[pos]=spinner[cur%4];
printf("\r%18s [%s] %u/%u",label,bar,cur,total);
fflush(stdout);
}
/* This is the function that the threads run */
void *mem_twiddler(void *arg) {
unsigned long thread_id, pages, pagesize, i, p;
volatile long garbage;
long *lp;
int t,offset;
char *my_region;
unsigned long mapsize = *(unsigned long *)arg;
/* Make sure each thread gets a unique ID */
pthread_mutex_lock(&ct_mutex);
thread_id=created_threads++;
pthread_mutex_unlock(&ct_mutex);
if (parallel) {
/* let main() go as soon as the thread is created */
pthread_mutex_lock(&mmap_mutex);
mmap_done=1;
pthread_cond_signal(&mmap_cond);
pthread_mutex_unlock(&mmap_mutex);
}
on_cpu(thread_id % num_cpus);
pagesize=getpagesize();
pages=mapsize/pagesize;
/* Map a chunk of memory */
if (verbose) printf("thread %ld: mapping %s RAM\n",
thread_id,human_memsize(mapsize));
my_region=mmap(NULL,mapsize,PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE,-1,0);
if (my_region == MAP_FAILED) { perror("mmap"); exit(1); }
mmap_regions[thread_id] = my_region;
/* Dirty each page of the mem region to fault them into existence */
for (i=0;i<pages;i++) {
lp=(long *)&(my_region[i*pagesize]);
lp[0]=0xDEADBEEF; /* magic number */
lp[1]=thread_id;
lp[2]=i;
}
/* Okay, we have grabbed our memory */
if (verbose) printf("thread %ld: mapping complete\n",thread_id);
/* let main() go now that the thread is finished initializing. */
if (!parallel) {
mmap_done=1;
pthread_cond_signal(&mmap_cond);
pthread_mutex_unlock(&mmap_mutex);
}
/*
* Incrementing live_threads inside test_mutex avoids a timing
* sensitive race -- otherwise some threads could miss the
* pthread_cond_broadcast of test_start.
*/
pthread_mutex_lock(&test_mutex);
live_threads++;
if (live_threads == num_threads) {
/* if this is the last thread to init, let main() know we're done */
/* NOTE: only the last thread sends this signal */
pthread_cond_signal(&init_cond);
}
/* Wait for the signal to begin testing */
while (start.tv_sec == 0) {
pthread_cond_wait(&test_start,&test_mutex);
}
pthread_mutex_unlock(&test_mutex);
pthread_mutex_lock(&running_threads_mutex);
running_threads++;
if (verbose)
printf("thread %lu (%lu): test start\n",thread_id,running_threads);
pthread_mutex_unlock(&running_threads_mutex);
loop_counters[thread_id]=0;
while (!done) {
/* Choose a random thread and a random page */
t = rand() % num_threads;
p = rand() % pages;
lp = (long *)&(mmap_regions[t][p*pagesize]);
/* Check the info we wrote there earlier */
if (lp[0] != 0xDEADBEEF || lp[1] != t || lp[2] != p) {
fprintf(stderr,"MEMORY CORRUPTION DETECTED\n");
fprintf(stderr,"thread %lu (CPU %lu) reading map %u, page %lu\n",
thread_id,thread_id % num_cpus,t,p);
fprintf(stderr,"read: %#lx %lu %lu should be: %#x %i %lu\n",
lp[0],lp[1],lp[2],0xDEADBEEF,t,p);
}
/* choose a random word (other than the first 3 */
offset = (rand() % ((pagesize/sizeof(long))-3))+3;
if (rand() % 2) {
lp[offset] = rand();
} else {
garbage = lp[offset];
}
loop_counters[thread_id]++;
}
/* make sure everyone's finished before we unmap */
pthread_mutex_lock(&finish_mutex);
running_threads--;
if (verbose)
printf("thread %lu (%lu): test start\n",thread_id,running_threads);
if (running_threads == 0)
pthread_cond_broadcast(&finish_cond); /* This is the cleanup thread */
else {
while (running_threads > 0) {
pthread_cond_wait(&finish_cond,&finish_mutex);
}
}
pthread_mutex_unlock(&finish_mutex);
/* Clean up and exit. */
if (verbose) printf("thread %lu unmapping and exiting\n",thread_id);
if (munmap(my_region,mapsize) != 0) {
perror("munmap"); exit(2);
}
return NULL;
}
/* Function to be called on interrupt */
void int_handler(int signum) { done=1; }
/* print usage info (with name of binary) */
void usage(void) {
printf("usage: %s [-h] [-v] [-q] [-p] [-t sec] [-n threads] [-m size]\n",
basename);
printf(" -h: show this help\n");
printf(" -v: verbose\n");
printf(" -q: quiet (do not show progress meters)\n");
printf(" -p: parallel thread startup\n");
printf(" -t: test time, in seconds. default: %u\n",default_runtime);
printf(" -n: number of threads. default: %u (2*num_cpus)\n",default_threads);
printf(" -m: memory usage. default: %s (%.0f%% of free RAM)\n",
human_memsize(default_memsize),DEFAULT_MEMPCT*100.0);
printf("memory size may use k/m/g suffixes, or may be a percentage of total RAM.\n");
}
int main(int argc, char **argv) {
struct sysinfo info;
struct sigaction mysig;
int i,rv=0;
float duration_f, loops_per_sec;
unsigned long free_mem, mapsize;
basename=strrchr(argv[0],'/');
if (basename) basename++; else basename=argv[0];
/* Calculate default values */
/* Get processor count. */
num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
/* Ensure we have at least two threads per CPU */
if (num_cpus*2 > default_threads)
default_threads = num_cpus*2;
/* Get memory info */
if (sysinfo(&info) != 0) { perror("sysinfo"); return -1; }
free_mem=(info.freeram+info.bufferram)*info.mem_unit;
total_ram=info.totalram*info.mem_unit;
/* default to using most of free_mem */
default_memsize = free_mem * DEFAULT_MEMPCT;
/* Set configurable values to reasonable defaults */
runtime = default_runtime;
num_threads = default_threads;
memsize = default_memsize;
/* parse options */
while ((i = getopt(argc,argv,"hvqpt:n:m:")) != -1) {
switch (i) {
case 'h':
usage();
return 0;
case 'v':
verbose=1;
break;
case 'q':
quiet=1;
break;
case 'p':
parallel=1;
break;
case 't':
runtime=atoi(optarg);
if (!runtime) {
printf("%s: error: bad runtime \"%s\"\n",basename,optarg);
return 1;
}
break;
case 'n':
num_threads=atoi(optarg);
if (!num_threads) {
printf("%s: error: bad thread count \"%s\"\n",basename,optarg);
return 1;
}
break;
case 'm':
memsize=parse_memsize(optarg);
if (!memsize) {
printf("%s: error: bad memory size \"%s\"\n",basename,optarg);
return 1;
}
break;
}
}
/* calculate mapsize now that memsize/num_threads is set */
mapsize = memsize/num_threads;
/* sanity checks */
if (num_threads < num_cpus)
printf("Warning: num_threads < num_cpus. This isn't usually a good idea.\n");
if (memsize > free_mem)
printf("Warning: memsize > free_mem. You will probably hit swap.\n");
/* A little information */
if (verbose) {
printf("Detected %u processors.\n",num_cpus);
printf("RAM: %.1f%% free (%s/",
100.0*(double)free_mem/(double)total_ram,
human_memsize(free_mem));
printf("%s)\n",human_memsize(total_ram));
}
printf("Testing %s RAM for %u seconds using %u threads:\n",
human_memsize(memsize),runtime,num_threads);
/* Allocate room for thread info */
threads=(pthread_t *)malloc(num_threads*sizeof(pthread_t));
mmap_regions=(char **)malloc(num_threads*sizeof(char *));
loop_counters=(unsigned long *)malloc(num_threads*sizeof(unsigned long *));
for (i = 0; i < num_threads; i++) {
mmap_regions[i] = NULL;
loop_counters[i] = 0;
}
/* Create all our threads! */
while (created_threads < num_threads) {
pthread_mutex_lock(&mmap_mutex);
mmap_done=0;
if (pthread_create(&threads[created_threads],NULL,
mem_twiddler,(void*)&mapsize) != 0) {
perror("pthread_create"); exit(1);
}
/* Wait for it to finish initializing */
while (!mmap_done) { pthread_cond_wait(&mmap_cond,&mmap_mutex); }
pthread_mutex_unlock(&mmap_mutex);
if (!verbose && !quiet)
progressbar("Starting threads",created_threads,num_threads);
}
if (parallel) {
/* Wait for the signal that everyone is finished initializing */
pthread_mutex_lock(&init_mutex);
while (live_threads < num_threads) {
pthread_cond_wait(&init_cond,&init_mutex);
}
pthread_mutex_unlock(&init_mutex);
}
/* Let the testing begin! */
if (!verbose && !quiet) printf("\n");
gettimeofday(&start,NULL);
pthread_cond_broadcast(&test_start);
/* catch ^C signal */
mysig.sa_handler=int_handler;
sigemptyset(&mysig.sa_mask);
mysig.sa_flags=0;
sigaction(SIGINT,&mysig,NULL);
/* Wait until all threads are actually running otherwise some threads
never get started before done is set on large UV systems */
while (running_threads < num_threads) {
if (!quiet) progressbar("Running Threads", running_threads, num_threads);
sleep((num_threads - running_threads) / 60 + 1);
}
/* Wait for the allotted time */
i=0;
while (!done && (i<runtime)) {
if (sleep(1) == 0) i++;
if (!quiet) progressbar("Testing RAM",i,runtime);
}
if (i != runtime)
rv=1;
/* Signal completion and join all threads */
done=1;
while (live_threads) {
pthread_join(threads[live_threads-1],NULL);
live_threads--;
if (!quiet) progressbar("Joined Threads", (num_threads - live_threads), num_threads);
}
gettimeofday(&finish,NULL);
if (!quiet) printf("\n");
/* Test is officially complete. Calculate run speed. */
timersub(&finish,&start,&duration);
duration_f=(float)duration.tv_sec + (float)duration.tv_usec / 1000000.0;
loops_per_sec=0;
if (verbose) printf("Runtime was %.2fs\n",duration_f);
for (i=0;i<num_threads;i++) {
if (verbose) printf("thread %i: %lu loops\n",i,loop_counters[i]);
loops_per_sec += (float)loop_counters[i]/duration_f;
}
printf("Total loops per second: %.2f\n",loops_per_sec);
/* All done. Return success. */
printf("Testing complete.\n");
return rv;
}
next prev parent reply other threads:[~2013-03-12 15:19 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-02-21 2:17 [patch 1/4 v3]swap: change block allocation algorithm for SSD Shaohua Li
2013-02-21 8:13 ` Kyungmin Park
2013-02-21 9:35 ` Shaohua Li
2013-03-12 15:12 ` Rafael Aquini
2013-03-12 15:19 ` Rafael Aquini [this message]
2013-03-18 5:09 ` Shaohua Li
2013-03-18 5:16 ` Simon Jeons
2013-03-18 6:40 ` Shaohua Li
2013-03-18 6:49 ` Simon Jeons
2013-03-18 21:02 ` Rafael Aquini
2013-03-19 1:31 ` Shaohua Li
2013-03-19 20:50 ` Hugh Dickins
2013-03-20 20:58 ` Andrew Morton
2013-03-21 2:02 ` Shaohua Li
2013-03-20 20:36 ` Andrew Morton
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130312151901.GB1739@optiplex.redhat.com \
--to=aquini@redhat.com \
--cc=hughd@google.com \
--cc=kmpark@infradead.org \
--cc=linux-mm@kvack.org \
--cc=minchan@kernel.org \
--cc=riel@redhat.com \
--cc=shli@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).