* [PATCH v4] Improve the performance of --num-threads -d 31
@ 2016-03-09 0:27 Zhou Wenjian
2016-03-09 0:35 ` "Zhou, Wenjian/周文剑"
` (2 more replies)
0 siblings, 3 replies; 33+ messages in thread
From: Zhou Wenjian @ 2016-03-09 0:27 UTC (permalink / raw)
To: kexec
v4:
1. fix a bug caused by the logic
v3:
1. remove some unused variables
2. fix a bug caused by the wrong logic
3. fix a bug caused by optimising
4. improve more performance by using Minoru Usui's code
multi-threads implementation will introduce extra cost when handling
each page. The origin implementation will also do the extra work for
filtered pages. So there is a big performance degradation in
--num-threads -d 31.
The new implementation won't do the extra work for filtered pages any
more. So the performance of -d 31 is close to that of serial processing.
The new implementation is just like the following:
* The basic idea is producer producing page and consumer writing page.
* Each producer have a page_flag_buf list which is used for storing
page's description.
* The size of page_flag_buf is little so it won't take too much memory.
* And all producers will share a page_data_buf array which is
used for storing page's compressed data.
* The main thread is the consumer. It will find the next pfn and write
it into file.
* The next pfn is smallest pfn in all page_flag_buf.
Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com>
Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com>
---
makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++----------------------
makedumpfile.h | 35 ++++---
2 files changed, 202 insertions(+), 131 deletions(-)
diff --git a/makedumpfile.c b/makedumpfile.c
index fa0b779..2b0864a 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -3483,7 +3483,8 @@ initial_for_parallel()
unsigned long page_data_buf_size;
unsigned long limit_size;
int page_data_num;
- int i;
+ struct page_flag *current;
+ int i, j;
len_buf_out = calculate_len_buf_out(info->page_size);
@@ -3560,10 +3561,16 @@ initial_for_parallel()
limit_size = (get_free_memory_size()
- MAP_REGION * info->num_threads) * 0.6;
+ if (limit_size < 0) {
+ MSG("Free memory is not enough for multi-threads\n");
+ return FALSE;
+ }
page_data_num = limit_size / page_data_buf_size;
+ info->num_buffers = 3 * info->num_threads;
- info->num_buffers = MIN(NUM_BUFFERS, page_data_num);
+ info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS);
+ info->num_buffers = MIN(info->num_buffers, page_data_num);
DEBUG_MSG("Number of struct page_data for produce/consume: %d\n",
info->num_buffers);
@@ -3588,6 +3595,36 @@ initial_for_parallel()
}
/*
+ * initial page_flag for each thread
+ */
+ if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
+ == NULL) {
+ MSG("Can't allocate memory for page_flag_buf. %s\n",
+ strerror(errno));
+ return FALSE;
+ }
+ memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
+
+ for (i = 0; i < info->num_threads; i++) {
+ if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
+ MSG("Can't allocate memory for page_flag. %s\n",
+ strerror(errno));
+ return FALSE;
+ }
+ current = info->page_flag_buf[i];
+
+ for (j = 1; j < NUM_BUFFERS; j++) {
+ if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) {
+ MSG("Can't allocate memory for page_flag. %s\n",
+ strerror(errno));
+ return FALSE;
+ }
+ current = current->next;
+ }
+ current->next = info->page_flag_buf[i];
+ }
+
+ /*
* initial fd_memory for threads
*/
for (i = 0; i < info->num_threads; i++) {
@@ -3612,7 +3649,8 @@ initial_for_parallel()
void
free_for_parallel()
{
- int i;
+ int i, j;
+ struct page_flag *current;
if (info->threads != NULL) {
for (i = 0; i < info->num_threads; i++) {
@@ -3655,6 +3693,19 @@ free_for_parallel()
free(info->page_data_buf);
}
+ if (info->page_flag_buf != NULL) {
+ for (i = 0; i < info->num_threads; i++) {
+ for (j = 0; j < NUM_BUFFERS; j++) {
+ if (info->page_flag_buf[i] != NULL) {
+ current = info->page_flag_buf[i];
+ info->page_flag_buf[i] = current->next;
+ free(current);
+ }
+ }
+ }
+ free(info->page_flag_buf);
+ }
+
if (info->parallel_info == NULL)
return;
@@ -7075,11 +7126,11 @@ void *
kdump_thread_function_cyclic(void *arg) {
void *retval = PTHREAD_FAIL;
struct thread_args *kdump_thread_args = (struct thread_args *)arg;
- struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
+ volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf;
+ volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
struct cycle *cycle = kdump_thread_args->cycle;
- int page_data_num = kdump_thread_args->page_data_num;
- mdf_pfn_t pfn;
- int index;
+ mdf_pfn_t pfn = cycle->start_pfn;
+ int index = kdump_thread_args->thread_num;
int buf_ready;
int dumpable;
int fd_memory = 0;
@@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) {
kdump_thread_args->thread_num);
}
- while (1) {
- /* get next pfn */
- pthread_mutex_lock(&info->current_pfn_mutex);
- pfn = info->current_pfn;
- info->current_pfn++;
- pthread_mutex_unlock(&info->current_pfn_mutex);
-
- if (pfn >= kdump_thread_args->end_pfn)
- break;
-
- index = -1;
+ /*
+ * filtered page won't take anything
+ * unfiltered zero page will only take a page_flag_buf
+ * unfiltered non-zero page will take a page_flag_buf and a page_data_buf
+ */
+ while (pfn < cycle->end_pfn) {
buf_ready = FALSE;
+ pthread_mutex_lock(&info->page_data_mutex);
+ while (page_data_buf[index].used != FALSE) {
+ index = (index + 1) % info->num_buffers;
+ }
+ page_data_buf[index].used = TRUE;
+ pthread_mutex_unlock(&info->page_data_mutex);
+
while (buf_ready == FALSE) {
pthread_testcancel();
-
- index = pfn % page_data_num;
-
- if (pfn - info->consumed_pfn > info->num_buffers)
+ if (page_flag_buf->ready == FLAG_READY)
continue;
- if (page_data_buf[index].ready != 0)
- continue;
-
- pthread_mutex_lock(&page_data_buf[index].mutex);
-
- if (page_data_buf[index].ready != 0)
- goto unlock;
-
- buf_ready = TRUE;
+ /* get next dumpable pfn */
+ pthread_mutex_lock(&info->current_pfn_mutex);
+ for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) {
+ dumpable = is_dumpable(
+ info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
+ pfn,
+ cycle);
+ if (dumpable)
+ break;
+ }
+ info->current_pfn = pfn + 1;
- page_data_buf[index].pfn = pfn;
- page_data_buf[index].ready = 1;
+ page_flag_buf->pfn = pfn;
+ page_flag_buf->ready = FLAG_FILLING;
+ pthread_mutex_unlock(&info->current_pfn_mutex);
+ sem_post(&info->page_flag_buf_sem);
- dumpable = is_dumpable(
- info->fd_bitmap ? &bitmap_parallel : info->bitmap2,
- pfn,
- cycle);
- page_data_buf[index].dumpable = dumpable;
- if (!dumpable)
- goto unlock;
+ if (pfn >= cycle->end_pfn) {
+ info->current_pfn = cycle->end_pfn;
+ page_data_buf[index].used = FALSE;
+ break;
+ }
if (!read_pfn_parallel(fd_memory, pfn, buf,
&bitmap_memory_parallel,
@@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) {
if ((info->dump_level & DL_EXCLUDE_ZERO)
&& is_zero_page(buf, info->page_size)) {
- page_data_buf[index].zero = TRUE;
- goto unlock;
+ page_flag_buf->zero = TRUE;
+ goto next;
}
- page_data_buf[index].zero = FALSE;
+ page_flag_buf->zero = FALSE;
/*
* Compress the page data.
@@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) {
page_data_buf[index].flags =
DUMP_DH_COMPRESSED_LZO;
page_data_buf[index].size = size_out;
+
memcpy(page_data_buf[index].buf, buf_out, size_out);
#endif
#ifdef USESNAPPY
@@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) {
page_data_buf[index].size = info->page_size;
memcpy(page_data_buf[index].buf, buf, info->page_size);
}
-unlock:
- pthread_mutex_unlock(&page_data_buf[index].mutex);
+ page_flag_buf->index = index;
+ buf_ready = TRUE;
+next:
+ page_flag_buf->ready = FLAG_READY;
+ page_flag_buf = page_flag_buf->next;
}
}
-
retval = NULL;
fail:
@@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
struct page_desc pd;
struct timeval tv_start;
struct timeval last, new;
- unsigned long long consuming_pfn;
pthread_t **threads = NULL;
struct thread_args *kdump_thread_args = NULL;
void *thread_result;
- int page_data_num;
+ int page_buf_num;
struct page_data *page_data_buf = NULL;
int i;
int index;
+ int end_count, consuming, check_count;
+ mdf_pfn_t current_pfn, temp_pfn;
if (info->flag_elf_dumpfile)
return FALSE;
@@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
goto out;
}
- res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL);
- if (res != 0) {
- ERRMSG("Can't initialize consumed_pfn_mutex. %s\n",
- strerror(res));
- goto out;
- }
-
res = pthread_mutex_init(&info->filter_mutex, NULL);
if (res != 0) {
ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res));
@@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
end_pfn = cycle->end_pfn;
info->current_pfn = start_pfn;
- info->consumed_pfn = start_pfn - 1;
threads = info->threads;
kdump_thread_args = info->kdump_thread_args;
- page_data_num = info->num_buffers;
+ page_buf_num = info->num_buffers;
page_data_buf = info->page_data_buf;
+ pthread_mutex_init(&info->page_data_mutex, NULL);
+ sem_init(&info->page_flag_buf_sem, 0, 0);
- for (i = 0; i < page_data_num; i++) {
- /*
- * producer will use pfn in page_data_buf to decide the
- * consumed pfn
- */
- page_data_buf[i].pfn = start_pfn - 1;
- page_data_buf[i].ready = 0;
- res = pthread_mutex_init(&page_data_buf[i].mutex, NULL);
- if (res != 0) {
- ERRMSG("Can't initialize mutex of page_data_buf. %s\n",
- strerror(res));
- goto out;
- }
- }
+ for (i = 0; i < page_buf_num; i++)
+ page_data_buf[i].used = FALSE;
for (i = 0; i < info->num_threads; i++) {
kdump_thread_args[i].thread_num = i;
kdump_thread_args[i].len_buf_out = len_buf_out;
- kdump_thread_args[i].start_pfn = start_pfn;
- kdump_thread_args[i].end_pfn = end_pfn;
- kdump_thread_args[i].page_data_num = page_data_num;
kdump_thread_args[i].page_data_buf = page_data_buf;
+ kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
kdump_thread_args[i].cycle = cycle;
res = pthread_create(threads[i], NULL,
@@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
}
}
- consuming_pfn = start_pfn;
- index = -1;
+ end_count = 0;
+ while (1) {
+ consuming = 0;
+ check_count = 0;
- gettimeofday(&last, NULL);
+ /*
+ * The basic idea is producer producing page and consumer writing page.
+ * Each producer have a page_flag_buf list which is used for storing page's description.
+ * The size of page_flag_buf is little so it won't take too much memory.
+ * And all producers will share a page_data_buf array which is used for storing page's compressed data.
+ * The main thread is the consumer. It will find the next pfn and write it into file.
+ * The next pfn is smallest pfn in all page_flag_buf.
+ */
+ sem_wait(&info->page_flag_buf_sem);
+ gettimeofday(&last, NULL);
+ while (1) {
+ current_pfn = end_pfn;
- while (consuming_pfn < end_pfn) {
- index = consuming_pfn % page_data_num;
+ /*
+ * page_flag_buf is in circular linked list.
+ * The array info->page_flag_buf[] records the current page_flag_buf in each thread's
+ * page_flag_buf list.
+ * consuming is used for recording in which thread the pfn is the smallest.
+ * current_pfn is used for recording the value of pfn when checking the pfn.
+ */
+ for (i = 0; i < info->num_threads; i++) {
+ if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
+ continue;
+ temp_pfn = info->page_flag_buf[i]->pfn;
- gettimeofday(&new, NULL);
- if (new.tv_sec - last.tv_sec > WAIT_TIME) {
- ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn);
- goto out;
- }
+ /*
+ * count how many threads have reached the end.
+ */
+ if (temp_pfn >= end_pfn) {
+ info->page_flag_buf[i]->ready = FLAG_UNUSED;
+ end_count++;
+ continue;
+ }
- /*
- * check pfn first without mutex locked to reduce the time
- * trying to lock the mutex
- */
- if (page_data_buf[index].pfn != consuming_pfn)
- continue;
+ if (current_pfn < temp_pfn)
+ continue;
- if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0)
- continue;
+ check_count++;
+ consuming = i;
+ current_pfn = temp_pfn;
+ }
- /* check whether the found one is ready to be consumed */
- if (page_data_buf[index].pfn != consuming_pfn ||
- page_data_buf[index].ready != 1) {
- goto unlock;
+ /*
+ * If all the threads have reached the end, we will finish writing.
+ */
+ if (end_count >= info->num_threads)
+ goto finish;
+
+ /*
+ * If the page_flag_buf is not ready, the pfn recorded may be changed.
+ * So we should recheck.
+ */
+ if (info->page_flag_buf[consuming]->ready != FLAG_READY) {
+ gettimeofday(&new, NULL);
+ if (new.tv_sec - last.tv_sec > WAIT_TIME) {
+ ERRMSG("Can't get data of pfn.\n");
+ goto out;
+ }
+ continue;
+ }
+
+ if (current_pfn == info->page_flag_buf[consuming]->pfn)
+ break;
}
if ((num_dumped % per) == 0)
print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable);
- /* next pfn is found, refresh last here */
- last = new;
- consuming_pfn++;
- info->consumed_pfn++;
- page_data_buf[index].ready = 0;
-
- if (page_data_buf[index].dumpable == FALSE)
- goto unlock;
-
num_dumped++;
- if (page_data_buf[index].zero == TRUE) {
+
+ if (info->page_flag_buf[consuming]->zero == TRUE) {
if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t)))
goto out;
pfn_zero++;
} else {
+ index = info->page_flag_buf[consuming]->index;
pd.flags = page_data_buf[index].flags;
pd.size = page_data_buf[index].size;
pd.page_flags = 0;
@@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
*/
if (!write_cache(cd_page, page_data_buf[index].buf, pd.size))
goto out;
-
+ page_data_buf[index].used = FALSE;
}
-unlock:
- pthread_mutex_unlock(&page_data_buf[index].mutex);
+ info->page_flag_buf[consuming]->ready = FLAG_UNUSED;
+ info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next;
}
-
+finish:
ret = TRUE;
/*
* print [100 %]
@@ -7463,15 +7532,9 @@ out:
}
}
- if (page_data_buf != NULL) {
- for (i = 0; i < page_data_num; i++) {
- pthread_mutex_destroy(&page_data_buf[i].mutex);
- }
- }
-
+ sem_destroy(&info->page_flag_buf_sem);
pthread_rwlock_destroy(&info->usemmap_rwlock);
pthread_mutex_destroy(&info->filter_mutex);
- pthread_mutex_destroy(&info->consumed_pfn_mutex);
pthread_mutex_destroy(&info->current_pfn_mutex);
return ret;
@@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag
num_dumped++;
if (!read_pfn(pfn, buf))
goto out;
+
filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size);
/*
diff --git a/makedumpfile.h b/makedumpfile.h
index e0b5bbf..4b315c0 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -44,6 +44,7 @@
#include "print_info.h"
#include "sadump_mod.h"
#include <pthread.h>
+#include <semaphore.h>
/*
* Result of command
@@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong;
#define PAGE_DATA_NUM (50)
#define WAIT_TIME (60 * 10)
#define PTHREAD_FAIL ((void *)-2)
-#define NUM_BUFFERS (50)
+#define NUM_BUFFERS (20)
struct mmap_cache {
char *mmap_buf;
@@ -985,28 +986,33 @@ struct mmap_cache {
off_t mmap_end_offset;
};
+enum {
+ FLAG_UNUSED,
+ FLAG_READY,
+ FLAG_FILLING
+};
+struct page_flag {
+ mdf_pfn_t pfn;
+ char zero;
+ char ready;
+ short index;
+ struct page_flag *next;
+};
+
struct page_data
{
- mdf_pfn_t pfn;
- int dumpable;
- int zero;
- unsigned int flags;
long size;
unsigned char *buf;
- pthread_mutex_t mutex;
- /*
- * whether the page_data is ready to be consumed
- */
- int ready;
+ int flags;
+ int used;
};
struct thread_args {
int thread_num;
unsigned long len_buf_out;
- mdf_pfn_t start_pfn, end_pfn;
- int page_data_num;
struct cycle *cycle;
struct page_data *page_data_buf;
+ struct page_flag *page_flag_buf;
};
/*
@@ -1295,11 +1301,12 @@ struct DumpInfo {
pthread_t **threads;
struct thread_args *kdump_thread_args;
struct page_data *page_data_buf;
+ struct page_flag **page_flag_buf;
+ sem_t page_flag_buf_sem;
pthread_rwlock_t usemmap_rwlock;
mdf_pfn_t current_pfn;
pthread_mutex_t current_pfn_mutex;
- mdf_pfn_t consumed_pfn;
- pthread_mutex_t consumed_pfn_mutex;
+ pthread_mutex_t page_data_mutex;
pthread_mutex_t filter_mutex;
};
extern struct DumpInfo *info;
--
1.8.3.1
_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec
^ permalink raw reply related [flat|nested] 33+ messages in thread* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-09 0:27 [PATCH v4] Improve the performance of --num-threads -d 31 Zhou Wenjian @ 2016-03-09 0:35 ` "Zhou, Wenjian/周文剑" 2016-03-11 1:00 ` "Zhou, Wenjian/周文剑" 2016-03-15 6:34 ` Minfei Huang 2 siblings, 0 replies; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-09 0:35 UTC (permalink / raw) To: kexec; +Cc: Minfei Huang, Minoru Usui Hi Minfei and Minoru, The bug has been fixed. -- Thanks Zhou On 03/09/2016 08:27 AM, Zhou Wenjian wrote: > v4: > 1. fix a bug caused by the logic > v3: > 1. remove some unused variables > 2. fix a bug caused by the wrong logic > 3. fix a bug caused by optimising > 4. improve more performance by using Minoru Usui's code > > multi-threads implementation will introduce extra cost when handling > each page. The origin implementation will also do the extra work for > filtered pages. So there is a big performance degradation in > --num-threads -d 31. > The new implementation won't do the extra work for filtered pages any > more. So the performance of -d 31 is close to that of serial processing. > > The new implementation is just like the following: > * The basic idea is producer producing page and consumer writing page. > * Each producer have a page_flag_buf list which is used for storing > page's description. > * The size of page_flag_buf is little so it won't take too much memory. > * And all producers will share a page_data_buf array which is > used for storing page's compressed data. > * The main thread is the consumer. It will find the next pfn and write > it into file. > * The next pfn is smallest pfn in all page_flag_buf. > > Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> > Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > --- > makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- > makedumpfile.h | 35 ++++--- > 2 files changed, 202 insertions(+), 131 deletions(-) > > diff --git a/makedumpfile.c b/makedumpfile.c > index fa0b779..2b0864a 100644 > --- a/makedumpfile.c > +++ b/makedumpfile.c > @@ -3483,7 +3483,8 @@ initial_for_parallel() > unsigned long page_data_buf_size; > unsigned long limit_size; > int page_data_num; > - int i; > + struct page_flag *current; > + int i, j; > > len_buf_out = calculate_len_buf_out(info->page_size); > > @@ -3560,10 +3561,16 @@ initial_for_parallel() > > limit_size = (get_free_memory_size() > - MAP_REGION * info->num_threads) * 0.6; > + if (limit_size < 0) { > + MSG("Free memory is not enough for multi-threads\n"); > + return FALSE; > + } > > page_data_num = limit_size / page_data_buf_size; > + info->num_buffers = 3 * info->num_threads; > > - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); > + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); > + info->num_buffers = MIN(info->num_buffers, page_data_num); > > DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", > info->num_buffers); > @@ -3588,6 +3595,36 @@ initial_for_parallel() > } > > /* > + * initial page_flag for each thread > + */ > + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) > + == NULL) { > + MSG("Can't allocate memory for page_flag_buf. %s\n", > + strerror(errno)); > + return FALSE; > + } > + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); > + > + for (i = 0; i < info->num_threads; i++) { > + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { > + MSG("Can't allocate memory for page_flag. %s\n", > + strerror(errno)); > + return FALSE; > + } > + current = info->page_flag_buf[i]; > + > + for (j = 1; j < NUM_BUFFERS; j++) { > + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { > + MSG("Can't allocate memory for page_flag. %s\n", > + strerror(errno)); > + return FALSE; > + } > + current = current->next; > + } > + current->next = info->page_flag_buf[i]; > + } > + > + /* > * initial fd_memory for threads > */ > for (i = 0; i < info->num_threads; i++) { > @@ -3612,7 +3649,8 @@ initial_for_parallel() > void > free_for_parallel() > { > - int i; > + int i, j; > + struct page_flag *current; > > if (info->threads != NULL) { > for (i = 0; i < info->num_threads; i++) { > @@ -3655,6 +3693,19 @@ free_for_parallel() > free(info->page_data_buf); > } > > + if (info->page_flag_buf != NULL) { > + for (i = 0; i < info->num_threads; i++) { > + for (j = 0; j < NUM_BUFFERS; j++) { > + if (info->page_flag_buf[i] != NULL) { > + current = info->page_flag_buf[i]; > + info->page_flag_buf[i] = current->next; > + free(current); > + } > + } > + } > + free(info->page_flag_buf); > + } > + > if (info->parallel_info == NULL) > return; > > @@ -7075,11 +7126,11 @@ void * > kdump_thread_function_cyclic(void *arg) { > void *retval = PTHREAD_FAIL; > struct thread_args *kdump_thread_args = (struct thread_args *)arg; > - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; > struct cycle *cycle = kdump_thread_args->cycle; > - int page_data_num = kdump_thread_args->page_data_num; > - mdf_pfn_t pfn; > - int index; > + mdf_pfn_t pfn = cycle->start_pfn; > + int index = kdump_thread_args->thread_num; > int buf_ready; > int dumpable; > int fd_memory = 0; > @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { > kdump_thread_args->thread_num); > } > > - while (1) { > - /* get next pfn */ > - pthread_mutex_lock(&info->current_pfn_mutex); > - pfn = info->current_pfn; > - info->current_pfn++; > - pthread_mutex_unlock(&info->current_pfn_mutex); > - > - if (pfn >= kdump_thread_args->end_pfn) > - break; > - > - index = -1; > + /* > + * filtered page won't take anything > + * unfiltered zero page will only take a page_flag_buf > + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf > + */ > + while (pfn < cycle->end_pfn) { > buf_ready = FALSE; > > + pthread_mutex_lock(&info->page_data_mutex); > + while (page_data_buf[index].used != FALSE) { > + index = (index + 1) % info->num_buffers; > + } > + page_data_buf[index].used = TRUE; > + pthread_mutex_unlock(&info->page_data_mutex); > + > while (buf_ready == FALSE) { > pthread_testcancel(); > - > - index = pfn % page_data_num; > - > - if (pfn - info->consumed_pfn > info->num_buffers) > + if (page_flag_buf->ready == FLAG_READY) > continue; > > - if (page_data_buf[index].ready != 0) > - continue; > - > - pthread_mutex_lock(&page_data_buf[index].mutex); > - > - if (page_data_buf[index].ready != 0) > - goto unlock; > - > - buf_ready = TRUE; > + /* get next dumpable pfn */ > + pthread_mutex_lock(&info->current_pfn_mutex); > + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { > + dumpable = is_dumpable( > + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > + pfn, > + cycle); > + if (dumpable) > + break; > + } > + info->current_pfn = pfn + 1; > > - page_data_buf[index].pfn = pfn; > - page_data_buf[index].ready = 1; > + page_flag_buf->pfn = pfn; > + page_flag_buf->ready = FLAG_FILLING; > + pthread_mutex_unlock(&info->current_pfn_mutex); > + sem_post(&info->page_flag_buf_sem); > > - dumpable = is_dumpable( > - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > - pfn, > - cycle); > - page_data_buf[index].dumpable = dumpable; > - if (!dumpable) > - goto unlock; > + if (pfn >= cycle->end_pfn) { > + info->current_pfn = cycle->end_pfn; > + page_data_buf[index].used = FALSE; > + break; > + } > > if (!read_pfn_parallel(fd_memory, pfn, buf, > &bitmap_memory_parallel, > @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { > > if ((info->dump_level & DL_EXCLUDE_ZERO) > && is_zero_page(buf, info->page_size)) { > - page_data_buf[index].zero = TRUE; > - goto unlock; > + page_flag_buf->zero = TRUE; > + goto next; > } > > - page_data_buf[index].zero = FALSE; > + page_flag_buf->zero = FALSE; > > /* > * Compress the page data. > @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { > page_data_buf[index].flags = > DUMP_DH_COMPRESSED_LZO; > page_data_buf[index].size = size_out; > + > memcpy(page_data_buf[index].buf, buf_out, size_out); > #endif > #ifdef USESNAPPY > @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { > page_data_buf[index].size = info->page_size; > memcpy(page_data_buf[index].buf, buf, info->page_size); > } > -unlock: > - pthread_mutex_unlock(&page_data_buf[index].mutex); > + page_flag_buf->index = index; > + buf_ready = TRUE; > +next: > + page_flag_buf->ready = FLAG_READY; > + page_flag_buf = page_flag_buf->next; > > } > } > - > retval = NULL; > > fail: > @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > struct page_desc pd; > struct timeval tv_start; > struct timeval last, new; > - unsigned long long consuming_pfn; > pthread_t **threads = NULL; > struct thread_args *kdump_thread_args = NULL; > void *thread_result; > - int page_data_num; > + int page_buf_num; > struct page_data *page_data_buf = NULL; > int i; > int index; > + int end_count, consuming, check_count; > + mdf_pfn_t current_pfn, temp_pfn; > > if (info->flag_elf_dumpfile) > return FALSE; > @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > goto out; > } > > - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); > - if (res != 0) { > - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", > - strerror(res)); > - goto out; > - } > - > res = pthread_mutex_init(&info->filter_mutex, NULL); > if (res != 0) { > ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); > @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > end_pfn = cycle->end_pfn; > > info->current_pfn = start_pfn; > - info->consumed_pfn = start_pfn - 1; > > threads = info->threads; > kdump_thread_args = info->kdump_thread_args; > > - page_data_num = info->num_buffers; > + page_buf_num = info->num_buffers; > page_data_buf = info->page_data_buf; > + pthread_mutex_init(&info->page_data_mutex, NULL); > + sem_init(&info->page_flag_buf_sem, 0, 0); > > - for (i = 0; i < page_data_num; i++) { > - /* > - * producer will use pfn in page_data_buf to decide the > - * consumed pfn > - */ > - page_data_buf[i].pfn = start_pfn - 1; > - page_data_buf[i].ready = 0; > - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); > - if (res != 0) { > - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", > - strerror(res)); > - goto out; > - } > - } > + for (i = 0; i < page_buf_num; i++) > + page_data_buf[i].used = FALSE; > > for (i = 0; i < info->num_threads; i++) { > kdump_thread_args[i].thread_num = i; > kdump_thread_args[i].len_buf_out = len_buf_out; > - kdump_thread_args[i].start_pfn = start_pfn; > - kdump_thread_args[i].end_pfn = end_pfn; > - kdump_thread_args[i].page_data_num = page_data_num; > kdump_thread_args[i].page_data_buf = page_data_buf; > + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; > kdump_thread_args[i].cycle = cycle; > > res = pthread_create(threads[i], NULL, > @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > } > } > > - consuming_pfn = start_pfn; > - index = -1; > + end_count = 0; > + while (1) { > + consuming = 0; > + check_count = 0; > > - gettimeofday(&last, NULL); > + /* > + * The basic idea is producer producing page and consumer writing page. > + * Each producer have a page_flag_buf list which is used for storing page's description. > + * The size of page_flag_buf is little so it won't take too much memory. > + * And all producers will share a page_data_buf array which is used for storing page's compressed data. > + * The main thread is the consumer. It will find the next pfn and write it into file. > + * The next pfn is smallest pfn in all page_flag_buf. > + */ > + sem_wait(&info->page_flag_buf_sem); > + gettimeofday(&last, NULL); > + while (1) { > + current_pfn = end_pfn; > > - while (consuming_pfn < end_pfn) { > - index = consuming_pfn % page_data_num; > + /* > + * page_flag_buf is in circular linked list. > + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's > + * page_flag_buf list. > + * consuming is used for recording in which thread the pfn is the smallest. > + * current_pfn is used for recording the value of pfn when checking the pfn. > + */ > + for (i = 0; i < info->num_threads; i++) { > + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > + continue; > + temp_pfn = info->page_flag_buf[i]->pfn; > > - gettimeofday(&new, NULL); > - if (new.tv_sec - last.tv_sec > WAIT_TIME) { > - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); > - goto out; > - } > + /* > + * count how many threads have reached the end. > + */ > + if (temp_pfn >= end_pfn) { > + info->page_flag_buf[i]->ready = FLAG_UNUSED; > + end_count++; > + continue; > + } > > - /* > - * check pfn first without mutex locked to reduce the time > - * trying to lock the mutex > - */ > - if (page_data_buf[index].pfn != consuming_pfn) > - continue; > + if (current_pfn < temp_pfn) > + continue; > > - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) > - continue; > + check_count++; > + consuming = i; > + current_pfn = temp_pfn; > + } > > - /* check whether the found one is ready to be consumed */ > - if (page_data_buf[index].pfn != consuming_pfn || > - page_data_buf[index].ready != 1) { > - goto unlock; > + /* > + * If all the threads have reached the end, we will finish writing. > + */ > + if (end_count >= info->num_threads) > + goto finish; > + > + /* > + * If the page_flag_buf is not ready, the pfn recorded may be changed. > + * So we should recheck. > + */ > + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > + gettimeofday(&new, NULL); > + if (new.tv_sec - last.tv_sec > WAIT_TIME) { > + ERRMSG("Can't get data of pfn.\n"); > + goto out; > + } > + continue; > + } > + > + if (current_pfn == info->page_flag_buf[consuming]->pfn) > + break; > } > > if ((num_dumped % per) == 0) > print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); > > - /* next pfn is found, refresh last here */ > - last = new; > - consuming_pfn++; > - info->consumed_pfn++; > - page_data_buf[index].ready = 0; > - > - if (page_data_buf[index].dumpable == FALSE) > - goto unlock; > - > num_dumped++; > > - if (page_data_buf[index].zero == TRUE) { > + > + if (info->page_flag_buf[consuming]->zero == TRUE) { > if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > goto out; > pfn_zero++; > } else { > + index = info->page_flag_buf[consuming]->index; > pd.flags = page_data_buf[index].flags; > pd.size = page_data_buf[index].size; > pd.page_flags = 0; > @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > */ > if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) > goto out; > - > + page_data_buf[index].used = FALSE; > } > -unlock: > - pthread_mutex_unlock(&page_data_buf[index].mutex); > + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > } > - > +finish: > ret = TRUE; > /* > * print [100 %] > @@ -7463,15 +7532,9 @@ out: > } > } > > - if (page_data_buf != NULL) { > - for (i = 0; i < page_data_num; i++) { > - pthread_mutex_destroy(&page_data_buf[i].mutex); > - } > - } > - > + sem_destroy(&info->page_flag_buf_sem); > pthread_rwlock_destroy(&info->usemmap_rwlock); > pthread_mutex_destroy(&info->filter_mutex); > - pthread_mutex_destroy(&info->consumed_pfn_mutex); > pthread_mutex_destroy(&info->current_pfn_mutex); > > return ret; > @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag > num_dumped++; > if (!read_pfn(pfn, buf)) > goto out; > + > filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); > > /* > diff --git a/makedumpfile.h b/makedumpfile.h > index e0b5bbf..4b315c0 100644 > --- a/makedumpfile.h > +++ b/makedumpfile.h > @@ -44,6 +44,7 @@ > #include "print_info.h" > #include "sadump_mod.h" > #include <pthread.h> > +#include <semaphore.h> > > /* > * Result of command > @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; > #define PAGE_DATA_NUM (50) > #define WAIT_TIME (60 * 10) > #define PTHREAD_FAIL ((void *)-2) > -#define NUM_BUFFERS (50) > +#define NUM_BUFFERS (20) > > struct mmap_cache { > char *mmap_buf; > @@ -985,28 +986,33 @@ struct mmap_cache { > off_t mmap_end_offset; > }; > > +enum { > + FLAG_UNUSED, > + FLAG_READY, > + FLAG_FILLING > +}; > +struct page_flag { > + mdf_pfn_t pfn; > + char zero; > + char ready; > + short index; > + struct page_flag *next; > +}; > + > struct page_data > { > - mdf_pfn_t pfn; > - int dumpable; > - int zero; > - unsigned int flags; > long size; > unsigned char *buf; > - pthread_mutex_t mutex; > - /* > - * whether the page_data is ready to be consumed > - */ > - int ready; > + int flags; > + int used; > }; > > struct thread_args { > int thread_num; > unsigned long len_buf_out; > - mdf_pfn_t start_pfn, end_pfn; > - int page_data_num; > struct cycle *cycle; > struct page_data *page_data_buf; > + struct page_flag *page_flag_buf; > }; > > /* > @@ -1295,11 +1301,12 @@ struct DumpInfo { > pthread_t **threads; > struct thread_args *kdump_thread_args; > struct page_data *page_data_buf; > + struct page_flag **page_flag_buf; > + sem_t page_flag_buf_sem; > pthread_rwlock_t usemmap_rwlock; > mdf_pfn_t current_pfn; > pthread_mutex_t current_pfn_mutex; > - mdf_pfn_t consumed_pfn; > - pthread_mutex_t consumed_pfn_mutex; > + pthread_mutex_t page_data_mutex; > pthread_mutex_t filter_mutex; > }; > extern struct DumpInfo *info; > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-09 0:27 [PATCH v4] Improve the performance of --num-threads -d 31 Zhou Wenjian 2016-03-09 0:35 ` "Zhou, Wenjian/周文剑" @ 2016-03-11 1:00 ` "Zhou, Wenjian/周文剑" 2016-03-11 3:03 ` Minoru Usui 2016-03-11 5:33 ` Minfei Huang 2016-03-15 6:34 ` Minfei Huang 2 siblings, 2 replies; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-11 1:00 UTC (permalink / raw) To: kexec Hello all, Do you have any comments ? -- Thanks Zhou On 03/09/2016 08:27 AM, Zhou Wenjian wrote: > v4: > 1. fix a bug caused by the logic > v3: > 1. remove some unused variables > 2. fix a bug caused by the wrong logic > 3. fix a bug caused by optimising > 4. improve more performance by using Minoru Usui's code > > multi-threads implementation will introduce extra cost when handling > each page. The origin implementation will also do the extra work for > filtered pages. So there is a big performance degradation in > --num-threads -d 31. > The new implementation won't do the extra work for filtered pages any > more. So the performance of -d 31 is close to that of serial processing. > > The new implementation is just like the following: > * The basic idea is producer producing page and consumer writing page. > * Each producer have a page_flag_buf list which is used for storing > page's description. > * The size of page_flag_buf is little so it won't take too much memory. > * And all producers will share a page_data_buf array which is > used for storing page's compressed data. > * The main thread is the consumer. It will find the next pfn and write > it into file. > * The next pfn is smallest pfn in all page_flag_buf. > > Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> > Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > --- > makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- > makedumpfile.h | 35 ++++--- > 2 files changed, 202 insertions(+), 131 deletions(-) > > diff --git a/makedumpfile.c b/makedumpfile.c > index fa0b779..2b0864a 100644 > --- a/makedumpfile.c > +++ b/makedumpfile.c > @@ -3483,7 +3483,8 @@ initial_for_parallel() > unsigned long page_data_buf_size; > unsigned long limit_size; > int page_data_num; > - int i; > + struct page_flag *current; > + int i, j; > > len_buf_out = calculate_len_buf_out(info->page_size); > > @@ -3560,10 +3561,16 @@ initial_for_parallel() > > limit_size = (get_free_memory_size() > - MAP_REGION * info->num_threads) * 0.6; > + if (limit_size < 0) { > + MSG("Free memory is not enough for multi-threads\n"); > + return FALSE; > + } > > page_data_num = limit_size / page_data_buf_size; > + info->num_buffers = 3 * info->num_threads; > > - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); > + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); > + info->num_buffers = MIN(info->num_buffers, page_data_num); > > DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", > info->num_buffers); > @@ -3588,6 +3595,36 @@ initial_for_parallel() > } > > /* > + * initial page_flag for each thread > + */ > + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) > + == NULL) { > + MSG("Can't allocate memory for page_flag_buf. %s\n", > + strerror(errno)); > + return FALSE; > + } > + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); > + > + for (i = 0; i < info->num_threads; i++) { > + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { > + MSG("Can't allocate memory for page_flag. %s\n", > + strerror(errno)); > + return FALSE; > + } > + current = info->page_flag_buf[i]; > + > + for (j = 1; j < NUM_BUFFERS; j++) { > + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { > + MSG("Can't allocate memory for page_flag. %s\n", > + strerror(errno)); > + return FALSE; > + } > + current = current->next; > + } > + current->next = info->page_flag_buf[i]; > + } > + > + /* > * initial fd_memory for threads > */ > for (i = 0; i < info->num_threads; i++) { > @@ -3612,7 +3649,8 @@ initial_for_parallel() > void > free_for_parallel() > { > - int i; > + int i, j; > + struct page_flag *current; > > if (info->threads != NULL) { > for (i = 0; i < info->num_threads; i++) { > @@ -3655,6 +3693,19 @@ free_for_parallel() > free(info->page_data_buf); > } > > + if (info->page_flag_buf != NULL) { > + for (i = 0; i < info->num_threads; i++) { > + for (j = 0; j < NUM_BUFFERS; j++) { > + if (info->page_flag_buf[i] != NULL) { > + current = info->page_flag_buf[i]; > + info->page_flag_buf[i] = current->next; > + free(current); > + } > + } > + } > + free(info->page_flag_buf); > + } > + > if (info->parallel_info == NULL) > return; > > @@ -7075,11 +7126,11 @@ void * > kdump_thread_function_cyclic(void *arg) { > void *retval = PTHREAD_FAIL; > struct thread_args *kdump_thread_args = (struct thread_args *)arg; > - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; > struct cycle *cycle = kdump_thread_args->cycle; > - int page_data_num = kdump_thread_args->page_data_num; > - mdf_pfn_t pfn; > - int index; > + mdf_pfn_t pfn = cycle->start_pfn; > + int index = kdump_thread_args->thread_num; > int buf_ready; > int dumpable; > int fd_memory = 0; > @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { > kdump_thread_args->thread_num); > } > > - while (1) { > - /* get next pfn */ > - pthread_mutex_lock(&info->current_pfn_mutex); > - pfn = info->current_pfn; > - info->current_pfn++; > - pthread_mutex_unlock(&info->current_pfn_mutex); > - > - if (pfn >= kdump_thread_args->end_pfn) > - break; > - > - index = -1; > + /* > + * filtered page won't take anything > + * unfiltered zero page will only take a page_flag_buf > + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf > + */ > + while (pfn < cycle->end_pfn) { > buf_ready = FALSE; > > + pthread_mutex_lock(&info->page_data_mutex); > + while (page_data_buf[index].used != FALSE) { > + index = (index + 1) % info->num_buffers; > + } > + page_data_buf[index].used = TRUE; > + pthread_mutex_unlock(&info->page_data_mutex); > + > while (buf_ready == FALSE) { > pthread_testcancel(); > - > - index = pfn % page_data_num; > - > - if (pfn - info->consumed_pfn > info->num_buffers) > + if (page_flag_buf->ready == FLAG_READY) > continue; > > - if (page_data_buf[index].ready != 0) > - continue; > - > - pthread_mutex_lock(&page_data_buf[index].mutex); > - > - if (page_data_buf[index].ready != 0) > - goto unlock; > - > - buf_ready = TRUE; > + /* get next dumpable pfn */ > + pthread_mutex_lock(&info->current_pfn_mutex); > + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { > + dumpable = is_dumpable( > + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > + pfn, > + cycle); > + if (dumpable) > + break; > + } > + info->current_pfn = pfn + 1; > > - page_data_buf[index].pfn = pfn; > - page_data_buf[index].ready = 1; > + page_flag_buf->pfn = pfn; > + page_flag_buf->ready = FLAG_FILLING; > + pthread_mutex_unlock(&info->current_pfn_mutex); > + sem_post(&info->page_flag_buf_sem); > > - dumpable = is_dumpable( > - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > - pfn, > - cycle); > - page_data_buf[index].dumpable = dumpable; > - if (!dumpable) > - goto unlock; > + if (pfn >= cycle->end_pfn) { > + info->current_pfn = cycle->end_pfn; > + page_data_buf[index].used = FALSE; > + break; > + } > > if (!read_pfn_parallel(fd_memory, pfn, buf, > &bitmap_memory_parallel, > @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { > > if ((info->dump_level & DL_EXCLUDE_ZERO) > && is_zero_page(buf, info->page_size)) { > - page_data_buf[index].zero = TRUE; > - goto unlock; > + page_flag_buf->zero = TRUE; > + goto next; > } > > - page_data_buf[index].zero = FALSE; > + page_flag_buf->zero = FALSE; > > /* > * Compress the page data. > @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { > page_data_buf[index].flags = > DUMP_DH_COMPRESSED_LZO; > page_data_buf[index].size = size_out; > + > memcpy(page_data_buf[index].buf, buf_out, size_out); > #endif > #ifdef USESNAPPY > @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { > page_data_buf[index].size = info->page_size; > memcpy(page_data_buf[index].buf, buf, info->page_size); > } > -unlock: > - pthread_mutex_unlock(&page_data_buf[index].mutex); > + page_flag_buf->index = index; > + buf_ready = TRUE; > +next: > + page_flag_buf->ready = FLAG_READY; > + page_flag_buf = page_flag_buf->next; > > } > } > - > retval = NULL; > > fail: > @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > struct page_desc pd; > struct timeval tv_start; > struct timeval last, new; > - unsigned long long consuming_pfn; > pthread_t **threads = NULL; > struct thread_args *kdump_thread_args = NULL; > void *thread_result; > - int page_data_num; > + int page_buf_num; > struct page_data *page_data_buf = NULL; > int i; > int index; > + int end_count, consuming, check_count; > + mdf_pfn_t current_pfn, temp_pfn; > > if (info->flag_elf_dumpfile) > return FALSE; > @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > goto out; > } > > - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); > - if (res != 0) { > - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", > - strerror(res)); > - goto out; > - } > - > res = pthread_mutex_init(&info->filter_mutex, NULL); > if (res != 0) { > ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); > @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > end_pfn = cycle->end_pfn; > > info->current_pfn = start_pfn; > - info->consumed_pfn = start_pfn - 1; > > threads = info->threads; > kdump_thread_args = info->kdump_thread_args; > > - page_data_num = info->num_buffers; > + page_buf_num = info->num_buffers; > page_data_buf = info->page_data_buf; > + pthread_mutex_init(&info->page_data_mutex, NULL); > + sem_init(&info->page_flag_buf_sem, 0, 0); > > - for (i = 0; i < page_data_num; i++) { > - /* > - * producer will use pfn in page_data_buf to decide the > - * consumed pfn > - */ > - page_data_buf[i].pfn = start_pfn - 1; > - page_data_buf[i].ready = 0; > - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); > - if (res != 0) { > - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", > - strerror(res)); > - goto out; > - } > - } > + for (i = 0; i < page_buf_num; i++) > + page_data_buf[i].used = FALSE; > > for (i = 0; i < info->num_threads; i++) { > kdump_thread_args[i].thread_num = i; > kdump_thread_args[i].len_buf_out = len_buf_out; > - kdump_thread_args[i].start_pfn = start_pfn; > - kdump_thread_args[i].end_pfn = end_pfn; > - kdump_thread_args[i].page_data_num = page_data_num; > kdump_thread_args[i].page_data_buf = page_data_buf; > + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; > kdump_thread_args[i].cycle = cycle; > > res = pthread_create(threads[i], NULL, > @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > } > } > > - consuming_pfn = start_pfn; > - index = -1; > + end_count = 0; > + while (1) { > + consuming = 0; > + check_count = 0; > > - gettimeofday(&last, NULL); > + /* > + * The basic idea is producer producing page and consumer writing page. > + * Each producer have a page_flag_buf list which is used for storing page's description. > + * The size of page_flag_buf is little so it won't take too much memory. > + * And all producers will share a page_data_buf array which is used for storing page's compressed data. > + * The main thread is the consumer. It will find the next pfn and write it into file. > + * The next pfn is smallest pfn in all page_flag_buf. > + */ > + sem_wait(&info->page_flag_buf_sem); > + gettimeofday(&last, NULL); > + while (1) { > + current_pfn = end_pfn; > > - while (consuming_pfn < end_pfn) { > - index = consuming_pfn % page_data_num; > + /* > + * page_flag_buf is in circular linked list. > + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's > + * page_flag_buf list. > + * consuming is used for recording in which thread the pfn is the smallest. > + * current_pfn is used for recording the value of pfn when checking the pfn. > + */ > + for (i = 0; i < info->num_threads; i++) { > + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > + continue; > + temp_pfn = info->page_flag_buf[i]->pfn; > > - gettimeofday(&new, NULL); > - if (new.tv_sec - last.tv_sec > WAIT_TIME) { > - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); > - goto out; > - } > + /* > + * count how many threads have reached the end. > + */ > + if (temp_pfn >= end_pfn) { > + info->page_flag_buf[i]->ready = FLAG_UNUSED; > + end_count++; > + continue; > + } > > - /* > - * check pfn first without mutex locked to reduce the time > - * trying to lock the mutex > - */ > - if (page_data_buf[index].pfn != consuming_pfn) > - continue; > + if (current_pfn < temp_pfn) > + continue; > > - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) > - continue; > + check_count++; > + consuming = i; > + current_pfn = temp_pfn; > + } > > - /* check whether the found one is ready to be consumed */ > - if (page_data_buf[index].pfn != consuming_pfn || > - page_data_buf[index].ready != 1) { > - goto unlock; > + /* > + * If all the threads have reached the end, we will finish writing. > + */ > + if (end_count >= info->num_threads) > + goto finish; > + > + /* > + * If the page_flag_buf is not ready, the pfn recorded may be changed. > + * So we should recheck. > + */ > + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > + gettimeofday(&new, NULL); > + if (new.tv_sec - last.tv_sec > WAIT_TIME) { > + ERRMSG("Can't get data of pfn.\n"); > + goto out; > + } > + continue; > + } > + > + if (current_pfn == info->page_flag_buf[consuming]->pfn) > + break; > } > > if ((num_dumped % per) == 0) > print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); > > - /* next pfn is found, refresh last here */ > - last = new; > - consuming_pfn++; > - info->consumed_pfn++; > - page_data_buf[index].ready = 0; > - > - if (page_data_buf[index].dumpable == FALSE) > - goto unlock; > - > num_dumped++; > > - if (page_data_buf[index].zero == TRUE) { > + > + if (info->page_flag_buf[consuming]->zero == TRUE) { > if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > goto out; > pfn_zero++; > } else { > + index = info->page_flag_buf[consuming]->index; > pd.flags = page_data_buf[index].flags; > pd.size = page_data_buf[index].size; > pd.page_flags = 0; > @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > */ > if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) > goto out; > - > + page_data_buf[index].used = FALSE; > } > -unlock: > - pthread_mutex_unlock(&page_data_buf[index].mutex); > + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > } > - > +finish: > ret = TRUE; > /* > * print [100 %] > @@ -7463,15 +7532,9 @@ out: > } > } > > - if (page_data_buf != NULL) { > - for (i = 0; i < page_data_num; i++) { > - pthread_mutex_destroy(&page_data_buf[i].mutex); > - } > - } > - > + sem_destroy(&info->page_flag_buf_sem); > pthread_rwlock_destroy(&info->usemmap_rwlock); > pthread_mutex_destroy(&info->filter_mutex); > - pthread_mutex_destroy(&info->consumed_pfn_mutex); > pthread_mutex_destroy(&info->current_pfn_mutex); > > return ret; > @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag > num_dumped++; > if (!read_pfn(pfn, buf)) > goto out; > + > filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); > > /* > diff --git a/makedumpfile.h b/makedumpfile.h > index e0b5bbf..4b315c0 100644 > --- a/makedumpfile.h > +++ b/makedumpfile.h > @@ -44,6 +44,7 @@ > #include "print_info.h" > #include "sadump_mod.h" > #include <pthread.h> > +#include <semaphore.h> > > /* > * Result of command > @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; > #define PAGE_DATA_NUM (50) > #define WAIT_TIME (60 * 10) > #define PTHREAD_FAIL ((void *)-2) > -#define NUM_BUFFERS (50) > +#define NUM_BUFFERS (20) > > struct mmap_cache { > char *mmap_buf; > @@ -985,28 +986,33 @@ struct mmap_cache { > off_t mmap_end_offset; > }; > > +enum { > + FLAG_UNUSED, > + FLAG_READY, > + FLAG_FILLING > +}; > +struct page_flag { > + mdf_pfn_t pfn; > + char zero; > + char ready; > + short index; > + struct page_flag *next; > +}; > + > struct page_data > { > - mdf_pfn_t pfn; > - int dumpable; > - int zero; > - unsigned int flags; > long size; > unsigned char *buf; > - pthread_mutex_t mutex; > - /* > - * whether the page_data is ready to be consumed > - */ > - int ready; > + int flags; > + int used; > }; > > struct thread_args { > int thread_num; > unsigned long len_buf_out; > - mdf_pfn_t start_pfn, end_pfn; > - int page_data_num; > struct cycle *cycle; > struct page_data *page_data_buf; > + struct page_flag *page_flag_buf; > }; > > /* > @@ -1295,11 +1301,12 @@ struct DumpInfo { > pthread_t **threads; > struct thread_args *kdump_thread_args; > struct page_data *page_data_buf; > + struct page_flag **page_flag_buf; > + sem_t page_flag_buf_sem; > pthread_rwlock_t usemmap_rwlock; > mdf_pfn_t current_pfn; > pthread_mutex_t current_pfn_mutex; > - mdf_pfn_t consumed_pfn; > - pthread_mutex_t consumed_pfn_mutex; > + pthread_mutex_t page_data_mutex; > pthread_mutex_t filter_mutex; > }; > extern struct DumpInfo *info; > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-11 1:00 ` "Zhou, Wenjian/周文剑" @ 2016-03-11 3:03 ` Minoru Usui 2016-03-11 3:10 ` "Zhou, Wenjian/周文剑" 2016-03-11 5:33 ` Minfei Huang 1 sibling, 1 reply; 33+ messages in thread From: Minoru Usui @ 2016-03-11 3:03 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑", kexec@lists.infradead.org Hi, Zhou > -----Original Message----- > From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of "Zhou, Wenjian/周文?" > Sent: Friday, March 11, 2016 10:01 AM > To: kexec@lists.infradead.org > Subject: Re: [PATCH v4] Improve the performance of --num-threads -d 31 > > Hello all, > > Do you have any comments ? > > -- > Thanks > Zhou > > On 03/09/2016 08:27 AM, Zhou Wenjian wrote: > > v4: > > 1. fix a bug caused by the logic Thank you for your work. I tested your v4 patch in 5GB dump file over 12000 times, outputted dumpfile did not be broken. And I measured performance of it. I think there is no problem. [-d31] num-thread real vs num-threads 0 ---------------------------------------- 0 11.913 100.0% 1 8.385 70.4% 2 4.849 40.7% 4 3.100 26.0% 8 2.286 19.2% [-d0] num-thread real vs num-threads 0 ---------------------------------------- 0 221.968 100.0% 1 157.433 70.9% 2 86.679 39.1% 4 71.892 32.4% 8 37.034 16.7% Thanks Minoru Usui > > v3: > > 1. remove some unused variables > > 2. fix a bug caused by the wrong logic > > 3. fix a bug caused by optimising > > 4. improve more performance by using Minoru Usui's code > > > > multi-threads implementation will introduce extra cost when handling > > each page. The origin implementation will also do the extra work for > > filtered pages. So there is a big performance degradation in > > --num-threads -d 31. > > The new implementation won't do the extra work for filtered pages any > > more. So the performance of -d 31 is close to that of serial processing. > > > > The new implementation is just like the following: > > * The basic idea is producer producing page and consumer writing page. > > * Each producer have a page_flag_buf list which is used for storing > > page's description. > > * The size of page_flag_buf is little so it won't take too much memory. > > * And all producers will share a page_data_buf array which is > > used for storing page's compressed data. > > * The main thread is the consumer. It will find the next pfn and write > > it into file. > > * The next pfn is smallest pfn in all page_flag_buf. > > > > Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> > > Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > > --- > > makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- > > makedumpfile.h | 35 ++++--- > > 2 files changed, 202 insertions(+), 131 deletions(-) > > > > diff --git a/makedumpfile.c b/makedumpfile.c > > index fa0b779..2b0864a 100644 > > --- a/makedumpfile.c > > +++ b/makedumpfile.c > > @@ -3483,7 +3483,8 @@ initial_for_parallel() > > unsigned long page_data_buf_size; > > unsigned long limit_size; > > int page_data_num; > > - int i; > > + struct page_flag *current; > > + int i, j; > > > > len_buf_out = calculate_len_buf_out(info->page_size); > > > > @@ -3560,10 +3561,16 @@ initial_for_parallel() > > > > limit_size = (get_free_memory_size() > > - MAP_REGION * info->num_threads) * 0.6; > > + if (limit_size < 0) { > > + MSG("Free memory is not enough for multi-threads\n"); > > + return FALSE; > > + } > > > > page_data_num = limit_size / page_data_buf_size; > > + info->num_buffers = 3 * info->num_threads; > > > > - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); > > + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); > > + info->num_buffers = MIN(info->num_buffers, page_data_num); > > > > DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", > > info->num_buffers); > > @@ -3588,6 +3595,36 @@ initial_for_parallel() > > } > > > > /* > > + * initial page_flag for each thread > > + */ > > + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) > > + == NULL) { > > + MSG("Can't allocate memory for page_flag_buf. %s\n", > > + strerror(errno)); > > + return FALSE; > > + } > > + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); > > + > > + for (i = 0; i < info->num_threads; i++) { > > + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { > > + MSG("Can't allocate memory for page_flag. %s\n", > > + strerror(errno)); > > + return FALSE; > > + } > > + current = info->page_flag_buf[i]; > > + > > + for (j = 1; j < NUM_BUFFERS; j++) { > > + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { > > + MSG("Can't allocate memory for page_flag. %s\n", > > + strerror(errno)); > > + return FALSE; > > + } > > + current = current->next; > > + } > > + current->next = info->page_flag_buf[i]; > > + } > > + > > + /* > > * initial fd_memory for threads > > */ > > for (i = 0; i < info->num_threads; i++) { > > @@ -3612,7 +3649,8 @@ initial_for_parallel() > > void > > free_for_parallel() > > { > > - int i; > > + int i, j; > > + struct page_flag *current; > > > > if (info->threads != NULL) { > > for (i = 0; i < info->num_threads; i++) { > > @@ -3655,6 +3693,19 @@ free_for_parallel() > > free(info->page_data_buf); > > } > > > > + if (info->page_flag_buf != NULL) { > > + for (i = 0; i < info->num_threads; i++) { > > + for (j = 0; j < NUM_BUFFERS; j++) { > > + if (info->page_flag_buf[i] != NULL) { > > + current = info->page_flag_buf[i]; > > + info->page_flag_buf[i] = current->next; > > + free(current); > > + } > > + } > > + } > > + free(info->page_flag_buf); > > + } > > + > > if (info->parallel_info == NULL) > > return; > > > > @@ -7075,11 +7126,11 @@ void * > > kdump_thread_function_cyclic(void *arg) { > > void *retval = PTHREAD_FAIL; > > struct thread_args *kdump_thread_args = (struct thread_args *)arg; > > - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > > + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > > + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; > > struct cycle *cycle = kdump_thread_args->cycle; > > - int page_data_num = kdump_thread_args->page_data_num; > > - mdf_pfn_t pfn; > > - int index; > > + mdf_pfn_t pfn = cycle->start_pfn; > > + int index = kdump_thread_args->thread_num; > > int buf_ready; > > int dumpable; > > int fd_memory = 0; > > @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { > > kdump_thread_args->thread_num); > > } > > > > - while (1) { > > - /* get next pfn */ > > - pthread_mutex_lock(&info->current_pfn_mutex); > > - pfn = info->current_pfn; > > - info->current_pfn++; > > - pthread_mutex_unlock(&info->current_pfn_mutex); > > - > > - if (pfn >= kdump_thread_args->end_pfn) > > - break; > > - > > - index = -1; > > + /* > > + * filtered page won't take anything > > + * unfiltered zero page will only take a page_flag_buf > > + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf > > + */ > > + while (pfn < cycle->end_pfn) { > > buf_ready = FALSE; > > > > + pthread_mutex_lock(&info->page_data_mutex); > > + while (page_data_buf[index].used != FALSE) { > > + index = (index + 1) % info->num_buffers; > > + } > > + page_data_buf[index].used = TRUE; > > + pthread_mutex_unlock(&info->page_data_mutex); > > + > > while (buf_ready == FALSE) { > > pthread_testcancel(); > > - > > - index = pfn % page_data_num; > > - > > - if (pfn - info->consumed_pfn > info->num_buffers) > > + if (page_flag_buf->ready == FLAG_READY) > > continue; > > > > - if (page_data_buf[index].ready != 0) > > - continue; > > - > > - pthread_mutex_lock(&page_data_buf[index].mutex); > > - > > - if (page_data_buf[index].ready != 0) > > - goto unlock; > > - > > - buf_ready = TRUE; > > + /* get next dumpable pfn */ > > + pthread_mutex_lock(&info->current_pfn_mutex); > > + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { > > + dumpable = is_dumpable( > > + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > > + pfn, > > + cycle); > > + if (dumpable) > > + break; > > + } > > + info->current_pfn = pfn + 1; > > > > - page_data_buf[index].pfn = pfn; > > - page_data_buf[index].ready = 1; > > + page_flag_buf->pfn = pfn; > > + page_flag_buf->ready = FLAG_FILLING; > > + pthread_mutex_unlock(&info->current_pfn_mutex); > > + sem_post(&info->page_flag_buf_sem); > > > > - dumpable = is_dumpable( > > - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > > - pfn, > > - cycle); > > - page_data_buf[index].dumpable = dumpable; > > - if (!dumpable) > > - goto unlock; > > + if (pfn >= cycle->end_pfn) { > > + info->current_pfn = cycle->end_pfn; > > + page_data_buf[index].used = FALSE; > > + break; > > + } > > > > if (!read_pfn_parallel(fd_memory, pfn, buf, > > &bitmap_memory_parallel, > > @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { > > > > if ((info->dump_level & DL_EXCLUDE_ZERO) > > && is_zero_page(buf, info->page_size)) { > > - page_data_buf[index].zero = TRUE; > > - goto unlock; > > + page_flag_buf->zero = TRUE; > > + goto next; > > } > > > > - page_data_buf[index].zero = FALSE; > > + page_flag_buf->zero = FALSE; > > > > /* > > * Compress the page data. > > @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { > > page_data_buf[index].flags = > > DUMP_DH_COMPRESSED_LZO; > > page_data_buf[index].size = size_out; > > + > > memcpy(page_data_buf[index].buf, buf_out, size_out); > > #endif > > #ifdef USESNAPPY > > @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { > > page_data_buf[index].size = info->page_size; > > memcpy(page_data_buf[index].buf, buf, info->page_size); > > } > > -unlock: > > - pthread_mutex_unlock(&page_data_buf[index].mutex); > > + page_flag_buf->index = index; > > + buf_ready = TRUE; > > +next: > > + page_flag_buf->ready = FLAG_READY; > > + page_flag_buf = page_flag_buf->next; > > > > } > > } > > - > > retval = NULL; > > > > fail: > > @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > struct page_desc pd; > > struct timeval tv_start; > > struct timeval last, new; > > - unsigned long long consuming_pfn; > > pthread_t **threads = NULL; > > struct thread_args *kdump_thread_args = NULL; > > void *thread_result; > > - int page_data_num; > > + int page_buf_num; > > struct page_data *page_data_buf = NULL; > > int i; > > int index; > > + int end_count, consuming, check_count; > > + mdf_pfn_t current_pfn, temp_pfn; > > > > if (info->flag_elf_dumpfile) > > return FALSE; > > @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > goto out; > > } > > > > - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); > > - if (res != 0) { > > - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", > > - strerror(res)); > > - goto out; > > - } > > - > > res = pthread_mutex_init(&info->filter_mutex, NULL); > > if (res != 0) { > > ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); > > @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > end_pfn = cycle->end_pfn; > > > > info->current_pfn = start_pfn; > > - info->consumed_pfn = start_pfn - 1; > > > > threads = info->threads; > > kdump_thread_args = info->kdump_thread_args; > > > > - page_data_num = info->num_buffers; > > + page_buf_num = info->num_buffers; > > page_data_buf = info->page_data_buf; > > + pthread_mutex_init(&info->page_data_mutex, NULL); > > + sem_init(&info->page_flag_buf_sem, 0, 0); > > > > - for (i = 0; i < page_data_num; i++) { > > - /* > > - * producer will use pfn in page_data_buf to decide the > > - * consumed pfn > > - */ > > - page_data_buf[i].pfn = start_pfn - 1; > > - page_data_buf[i].ready = 0; > > - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); > > - if (res != 0) { > > - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", > > - strerror(res)); > > - goto out; > > - } > > - } > > + for (i = 0; i < page_buf_num; i++) > > + page_data_buf[i].used = FALSE; > > > > for (i = 0; i < info->num_threads; i++) { > > kdump_thread_args[i].thread_num = i; > > kdump_thread_args[i].len_buf_out = len_buf_out; > > - kdump_thread_args[i].start_pfn = start_pfn; > > - kdump_thread_args[i].end_pfn = end_pfn; > > - kdump_thread_args[i].page_data_num = page_data_num; > > kdump_thread_args[i].page_data_buf = page_data_buf; > > + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; > > kdump_thread_args[i].cycle = cycle; > > > > res = pthread_create(threads[i], NULL, > > @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > } > > } > > > > - consuming_pfn = start_pfn; > > - index = -1; > > + end_count = 0; > > + while (1) { > > + consuming = 0; > > + check_count = 0; > > > > - gettimeofday(&last, NULL); > > + /* > > + * The basic idea is producer producing page and consumer writing page. > > + * Each producer have a page_flag_buf list which is used for storing page's description. > > + * The size of page_flag_buf is little so it won't take too much memory. > > + * And all producers will share a page_data_buf array which is used for storing page's compressed data. > > + * The main thread is the consumer. It will find the next pfn and write it into file. > > + * The next pfn is smallest pfn in all page_flag_buf. > > + */ > > + sem_wait(&info->page_flag_buf_sem); > > + gettimeofday(&last, NULL); > > + while (1) { > > + current_pfn = end_pfn; > > > > - while (consuming_pfn < end_pfn) { > > - index = consuming_pfn % page_data_num; > > + /* > > + * page_flag_buf is in circular linked list. > > + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's > > + * page_flag_buf list. > > + * consuming is used for recording in which thread the pfn is the smallest. > > + * current_pfn is used for recording the value of pfn when checking the pfn. > > + */ > > + for (i = 0; i < info->num_threads; i++) { > > + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > > + continue; > > + temp_pfn = info->page_flag_buf[i]->pfn; > > > > - gettimeofday(&new, NULL); > > - if (new.tv_sec - last.tv_sec > WAIT_TIME) { > > - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); > > - goto out; > > - } > > + /* > > + * count how many threads have reached the end. > > + */ > > + if (temp_pfn >= end_pfn) { > > + info->page_flag_buf[i]->ready = FLAG_UNUSED; > > + end_count++; > > + continue; > > + } > > > > - /* > > - * check pfn first without mutex locked to reduce the time > > - * trying to lock the mutex > > - */ > > - if (page_data_buf[index].pfn != consuming_pfn) > > - continue; > > + if (current_pfn < temp_pfn) > > + continue; > > > > - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) > > - continue; > > + check_count++; > > + consuming = i; > > + current_pfn = temp_pfn; > > + } > > > > - /* check whether the found one is ready to be consumed */ > > - if (page_data_buf[index].pfn != consuming_pfn || > > - page_data_buf[index].ready != 1) { > > - goto unlock; > > + /* > > + * If all the threads have reached the end, we will finish writing. > > + */ > > + if (end_count >= info->num_threads) > > + goto finish; > > + > > + /* > > + * If the page_flag_buf is not ready, the pfn recorded may be changed. > > + * So we should recheck. > > + */ > > + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > > + gettimeofday(&new, NULL); > > + if (new.tv_sec - last.tv_sec > WAIT_TIME) { > > + ERRMSG("Can't get data of pfn.\n"); > > + goto out; > > + } > > + continue; > > + } > > + > > + if (current_pfn == info->page_flag_buf[consuming]->pfn) > > + break; > > } > > > > if ((num_dumped % per) == 0) > > print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); > > > > - /* next pfn is found, refresh last here */ > > - last = new; > > - consuming_pfn++; > > - info->consumed_pfn++; > > - page_data_buf[index].ready = 0; > > - > > - if (page_data_buf[index].dumpable == FALSE) > > - goto unlock; > > - > > num_dumped++; > > > > - if (page_data_buf[index].zero == TRUE) { > > + > > + if (info->page_flag_buf[consuming]->zero == TRUE) { > > if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > > goto out; > > pfn_zero++; > > } else { > > + index = info->page_flag_buf[consuming]->index; > > pd.flags = page_data_buf[index].flags; > > pd.size = page_data_buf[index].size; > > pd.page_flags = 0; > > @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > */ > > if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) > > goto out; > > - > > + page_data_buf[index].used = FALSE; > > } > > -unlock: > > - pthread_mutex_unlock(&page_data_buf[index].mutex); > > + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > > + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > > } > > - > > +finish: > > ret = TRUE; > > /* > > * print [100 %] > > @@ -7463,15 +7532,9 @@ out: > > } > > } > > > > - if (page_data_buf != NULL) { > > - for (i = 0; i < page_data_num; i++) { > > - pthread_mutex_destroy(&page_data_buf[i].mutex); > > - } > > - } > > - > > + sem_destroy(&info->page_flag_buf_sem); > > pthread_rwlock_destroy(&info->usemmap_rwlock); > > pthread_mutex_destroy(&info->filter_mutex); > > - pthread_mutex_destroy(&info->consumed_pfn_mutex); > > pthread_mutex_destroy(&info->current_pfn_mutex); > > > > return ret; > > @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag > > num_dumped++; > > if (!read_pfn(pfn, buf)) > > goto out; > > + > > filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); > > > > /* > > diff --git a/makedumpfile.h b/makedumpfile.h > > index e0b5bbf..4b315c0 100644 > > --- a/makedumpfile.h > > +++ b/makedumpfile.h > > @@ -44,6 +44,7 @@ > > #include "print_info.h" > > #include "sadump_mod.h" > > #include <pthread.h> > > +#include <semaphore.h> > > > > /* > > * Result of command > > @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; > > #define PAGE_DATA_NUM (50) > > #define WAIT_TIME (60 * 10) > > #define PTHREAD_FAIL ((void *)-2) > > -#define NUM_BUFFERS (50) > > +#define NUM_BUFFERS (20) > > > > struct mmap_cache { > > char *mmap_buf; > > @@ -985,28 +986,33 @@ struct mmap_cache { > > off_t mmap_end_offset; > > }; > > > > +enum { > > + FLAG_UNUSED, > > + FLAG_READY, > > + FLAG_FILLING > > +}; > > +struct page_flag { > > + mdf_pfn_t pfn; > > + char zero; > > + char ready; > > + short index; > > + struct page_flag *next; > > +}; > > + > > struct page_data > > { > > - mdf_pfn_t pfn; > > - int dumpable; > > - int zero; > > - unsigned int flags; > > long size; > > unsigned char *buf; > > - pthread_mutex_t mutex; > > - /* > > - * whether the page_data is ready to be consumed > > - */ > > - int ready; > > + int flags; > > + int used; > > }; > > > > struct thread_args { > > int thread_num; > > unsigned long len_buf_out; > > - mdf_pfn_t start_pfn, end_pfn; > > - int page_data_num; > > struct cycle *cycle; > > struct page_data *page_data_buf; > > + struct page_flag *page_flag_buf; > > }; > > > > /* > > @@ -1295,11 +1301,12 @@ struct DumpInfo { > > pthread_t **threads; > > struct thread_args *kdump_thread_args; > > struct page_data *page_data_buf; > > + struct page_flag **page_flag_buf; > > + sem_t page_flag_buf_sem; > > pthread_rwlock_t usemmap_rwlock; > > mdf_pfn_t current_pfn; > > pthread_mutex_t current_pfn_mutex; > > - mdf_pfn_t consumed_pfn; > > - pthread_mutex_t consumed_pfn_mutex; > > + pthread_mutex_t page_data_mutex; > > pthread_mutex_t filter_mutex; > > }; > > extern struct DumpInfo *info; > > > > > > _______________________________________________ > kexec mailing list > kexec@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/kexec _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-11 3:03 ` Minoru Usui @ 2016-03-11 3:10 ` "Zhou, Wenjian/周文剑" 2016-03-11 4:55 ` Atsushi Kumagai 0 siblings, 1 reply; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-11 3:10 UTC (permalink / raw) To: Minoru Usui, kexec@lists.infradead.org Hi Minoru, Thanks for testing it. -- Thanks Zhou On 03/11/2016 11:03 AM, Minoru Usui wrote: > Hi, Zhou > >> -----Original Message----- >> From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of "Zhou, Wenjian/周文?" >> Sent: Friday, March 11, 2016 10:01 AM >> To: kexec@lists.infradead.org >> Subject: Re: [PATCH v4] Improve the performance of --num-threads -d 31 >> >> Hello all, >> >> Do you have any comments ? >> >> -- >> Thanks >> Zhou >> >> On 03/09/2016 08:27 AM, Zhou Wenjian wrote: >>> v4: >>> 1. fix a bug caused by the logic > > Thank you for your work. > > I tested your v4 patch in 5GB dump file over 12000 times, > outputted dumpfile did not be broken. > > And I measured performance of it. > I think there is no problem. > > [-d31] > num-thread real vs num-threads 0 > ---------------------------------------- > 0 11.913 100.0% > 1 8.385 70.4% > 2 4.849 40.7% > 4 3.100 26.0% > 8 2.286 19.2% > > [-d0] > num-thread real vs num-threads 0 > ---------------------------------------- > 0 221.968 100.0% > 1 157.433 70.9% > 2 86.679 39.1% > 4 71.892 32.4% > 8 37.034 16.7% > > Thanks > Minoru Usui > >>> v3: >>> 1. remove some unused variables >>> 2. fix a bug caused by the wrong logic >>> 3. fix a bug caused by optimising >>> 4. improve more performance by using Minoru Usui's code >>> >>> multi-threads implementation will introduce extra cost when handling >>> each page. The origin implementation will also do the extra work for >>> filtered pages. So there is a big performance degradation in >>> --num-threads -d 31. >>> The new implementation won't do the extra work for filtered pages any >>> more. So the performance of -d 31 is close to that of serial processing. >>> >>> The new implementation is just like the following: >>> * The basic idea is producer producing page and consumer writing page. >>> * Each producer have a page_flag_buf list which is used for storing >>> page's description. >>> * The size of page_flag_buf is little so it won't take too much memory. >>> * And all producers will share a page_data_buf array which is >>> used for storing page's compressed data. >>> * The main thread is the consumer. It will find the next pfn and write >>> it into file. >>> * The next pfn is smallest pfn in all page_flag_buf. >>> >>> Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> >>> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >>> --- >>> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- >>> makedumpfile.h | 35 ++++--- >>> 2 files changed, 202 insertions(+), 131 deletions(-) >>> >>> diff --git a/makedumpfile.c b/makedumpfile.c >>> index fa0b779..2b0864a 100644 >>> --- a/makedumpfile.c >>> +++ b/makedumpfile.c >>> @@ -3483,7 +3483,8 @@ initial_for_parallel() >>> unsigned long page_data_buf_size; >>> unsigned long limit_size; >>> int page_data_num; >>> - int i; >>> + struct page_flag *current; >>> + int i, j; >>> >>> len_buf_out = calculate_len_buf_out(info->page_size); >>> >>> @@ -3560,10 +3561,16 @@ initial_for_parallel() >>> >>> limit_size = (get_free_memory_size() >>> - MAP_REGION * info->num_threads) * 0.6; >>> + if (limit_size < 0) { >>> + MSG("Free memory is not enough for multi-threads\n"); >>> + return FALSE; >>> + } >>> >>> page_data_num = limit_size / page_data_buf_size; >>> + info->num_buffers = 3 * info->num_threads; >>> >>> - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); >>> + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); >>> + info->num_buffers = MIN(info->num_buffers, page_data_num); >>> >>> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >>> info->num_buffers); >>> @@ -3588,6 +3595,36 @@ initial_for_parallel() >>> } >>> >>> /* >>> + * initial page_flag for each thread >>> + */ >>> + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >>> + == NULL) { >>> + MSG("Can't allocate memory for page_flag_buf. %s\n", >>> + strerror(errno)); >>> + return FALSE; >>> + } >>> + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); >>> + >>> + for (i = 0; i < info->num_threads; i++) { >>> + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { >>> + MSG("Can't allocate memory for page_flag. %s\n", >>> + strerror(errno)); >>> + return FALSE; >>> + } >>> + current = info->page_flag_buf[i]; >>> + >>> + for (j = 1; j < NUM_BUFFERS; j++) { >>> + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { >>> + MSG("Can't allocate memory for page_flag. %s\n", >>> + strerror(errno)); >>> + return FALSE; >>> + } >>> + current = current->next; >>> + } >>> + current->next = info->page_flag_buf[i]; >>> + } >>> + >>> + /* >>> * initial fd_memory for threads >>> */ >>> for (i = 0; i < info->num_threads; i++) { >>> @@ -3612,7 +3649,8 @@ initial_for_parallel() >>> void >>> free_for_parallel() >>> { >>> - int i; >>> + int i, j; >>> + struct page_flag *current; >>> >>> if (info->threads != NULL) { >>> for (i = 0; i < info->num_threads; i++) { >>> @@ -3655,6 +3693,19 @@ free_for_parallel() >>> free(info->page_data_buf); >>> } >>> >>> + if (info->page_flag_buf != NULL) { >>> + for (i = 0; i < info->num_threads; i++) { >>> + for (j = 0; j < NUM_BUFFERS; j++) { >>> + if (info->page_flag_buf[i] != NULL) { >>> + current = info->page_flag_buf[i]; >>> + info->page_flag_buf[i] = current->next; >>> + free(current); >>> + } >>> + } >>> + } >>> + free(info->page_flag_buf); >>> + } >>> + >>> if (info->parallel_info == NULL) >>> return; >>> >>> @@ -7075,11 +7126,11 @@ void * >>> kdump_thread_function_cyclic(void *arg) { >>> void *retval = PTHREAD_FAIL; >>> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >>> - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>> + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>> + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >>> struct cycle *cycle = kdump_thread_args->cycle; >>> - int page_data_num = kdump_thread_args->page_data_num; >>> - mdf_pfn_t pfn; >>> - int index; >>> + mdf_pfn_t pfn = cycle->start_pfn; >>> + int index = kdump_thread_args->thread_num; >>> int buf_ready; >>> int dumpable; >>> int fd_memory = 0; >>> @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { >>> kdump_thread_args->thread_num); >>> } >>> >>> - while (1) { >>> - /* get next pfn */ >>> - pthread_mutex_lock(&info->current_pfn_mutex); >>> - pfn = info->current_pfn; >>> - info->current_pfn++; >>> - pthread_mutex_unlock(&info->current_pfn_mutex); >>> - >>> - if (pfn >= kdump_thread_args->end_pfn) >>> - break; >>> - >>> - index = -1; >>> + /* >>> + * filtered page won't take anything >>> + * unfiltered zero page will only take a page_flag_buf >>> + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf >>> + */ >>> + while (pfn < cycle->end_pfn) { >>> buf_ready = FALSE; >>> >>> + pthread_mutex_lock(&info->page_data_mutex); >>> + while (page_data_buf[index].used != FALSE) { >>> + index = (index + 1) % info->num_buffers; >>> + } >>> + page_data_buf[index].used = TRUE; >>> + pthread_mutex_unlock(&info->page_data_mutex); >>> + >>> while (buf_ready == FALSE) { >>> pthread_testcancel(); >>> - >>> - index = pfn % page_data_num; >>> - >>> - if (pfn - info->consumed_pfn > info->num_buffers) >>> + if (page_flag_buf->ready == FLAG_READY) >>> continue; >>> >>> - if (page_data_buf[index].ready != 0) >>> - continue; >>> - >>> - pthread_mutex_lock(&page_data_buf[index].mutex); >>> - >>> - if (page_data_buf[index].ready != 0) >>> - goto unlock; >>> - >>> - buf_ready = TRUE; >>> + /* get next dumpable pfn */ >>> + pthread_mutex_lock(&info->current_pfn_mutex); >>> + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { >>> + dumpable = is_dumpable( >>> + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>> + pfn, >>> + cycle); >>> + if (dumpable) >>> + break; >>> + } >>> + info->current_pfn = pfn + 1; >>> >>> - page_data_buf[index].pfn = pfn; >>> - page_data_buf[index].ready = 1; >>> + page_flag_buf->pfn = pfn; >>> + page_flag_buf->ready = FLAG_FILLING; >>> + pthread_mutex_unlock(&info->current_pfn_mutex); >>> + sem_post(&info->page_flag_buf_sem); >>> >>> - dumpable = is_dumpable( >>> - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>> - pfn, >>> - cycle); >>> - page_data_buf[index].dumpable = dumpable; >>> - if (!dumpable) >>> - goto unlock; >>> + if (pfn >= cycle->end_pfn) { >>> + info->current_pfn = cycle->end_pfn; >>> + page_data_buf[index].used = FALSE; >>> + break; >>> + } >>> >>> if (!read_pfn_parallel(fd_memory, pfn, buf, >>> &bitmap_memory_parallel, >>> @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { >>> >>> if ((info->dump_level & DL_EXCLUDE_ZERO) >>> && is_zero_page(buf, info->page_size)) { >>> - page_data_buf[index].zero = TRUE; >>> - goto unlock; >>> + page_flag_buf->zero = TRUE; >>> + goto next; >>> } >>> >>> - page_data_buf[index].zero = FALSE; >>> + page_flag_buf->zero = FALSE; >>> >>> /* >>> * Compress the page data. >>> @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { >>> page_data_buf[index].flags = >>> DUMP_DH_COMPRESSED_LZO; >>> page_data_buf[index].size = size_out; >>> + >>> memcpy(page_data_buf[index].buf, buf_out, size_out); >>> #endif >>> #ifdef USESNAPPY >>> @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { >>> page_data_buf[index].size = info->page_size; >>> memcpy(page_data_buf[index].buf, buf, info->page_size); >>> } >>> -unlock: >>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>> + page_flag_buf->index = index; >>> + buf_ready = TRUE; >>> +next: >>> + page_flag_buf->ready = FLAG_READY; >>> + page_flag_buf = page_flag_buf->next; >>> >>> } >>> } >>> - >>> retval = NULL; >>> >>> fail: >>> @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>> struct page_desc pd; >>> struct timeval tv_start; >>> struct timeval last, new; >>> - unsigned long long consuming_pfn; >>> pthread_t **threads = NULL; >>> struct thread_args *kdump_thread_args = NULL; >>> void *thread_result; >>> - int page_data_num; >>> + int page_buf_num; >>> struct page_data *page_data_buf = NULL; >>> int i; >>> int index; >>> + int end_count, consuming, check_count; >>> + mdf_pfn_t current_pfn, temp_pfn; >>> >>> if (info->flag_elf_dumpfile) >>> return FALSE; >>> @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>> goto out; >>> } >>> >>> - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); >>> - if (res != 0) { >>> - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", >>> - strerror(res)); >>> - goto out; >>> - } >>> - >>> res = pthread_mutex_init(&info->filter_mutex, NULL); >>> if (res != 0) { >>> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); >>> @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>> end_pfn = cycle->end_pfn; >>> >>> info->current_pfn = start_pfn; >>> - info->consumed_pfn = start_pfn - 1; >>> >>> threads = info->threads; >>> kdump_thread_args = info->kdump_thread_args; >>> >>> - page_data_num = info->num_buffers; >>> + page_buf_num = info->num_buffers; >>> page_data_buf = info->page_data_buf; >>> + pthread_mutex_init(&info->page_data_mutex, NULL); >>> + sem_init(&info->page_flag_buf_sem, 0, 0); >>> >>> - for (i = 0; i < page_data_num; i++) { >>> - /* >>> - * producer will use pfn in page_data_buf to decide the >>> - * consumed pfn >>> - */ >>> - page_data_buf[i].pfn = start_pfn - 1; >>> - page_data_buf[i].ready = 0; >>> - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); >>> - if (res != 0) { >>> - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", >>> - strerror(res)); >>> - goto out; >>> - } >>> - } >>> + for (i = 0; i < page_buf_num; i++) >>> + page_data_buf[i].used = FALSE; >>> >>> for (i = 0; i < info->num_threads; i++) { >>> kdump_thread_args[i].thread_num = i; >>> kdump_thread_args[i].len_buf_out = len_buf_out; >>> - kdump_thread_args[i].start_pfn = start_pfn; >>> - kdump_thread_args[i].end_pfn = end_pfn; >>> - kdump_thread_args[i].page_data_num = page_data_num; >>> kdump_thread_args[i].page_data_buf = page_data_buf; >>> + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >>> kdump_thread_args[i].cycle = cycle; >>> >>> res = pthread_create(threads[i], NULL, >>> @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>> } >>> } >>> >>> - consuming_pfn = start_pfn; >>> - index = -1; >>> + end_count = 0; >>> + while (1) { >>> + consuming = 0; >>> + check_count = 0; >>> >>> - gettimeofday(&last, NULL); >>> + /* >>> + * The basic idea is producer producing page and consumer writing page. >>> + * Each producer have a page_flag_buf list which is used for storing page's description. >>> + * The size of page_flag_buf is little so it won't take too much memory. >>> + * And all producers will share a page_data_buf array which is used for storing page's compressed data. >>> + * The main thread is the consumer. It will find the next pfn and write it into file. >>> + * The next pfn is smallest pfn in all page_flag_buf. >>> + */ >>> + sem_wait(&info->page_flag_buf_sem); >>> + gettimeofday(&last, NULL); >>> + while (1) { >>> + current_pfn = end_pfn; >>> >>> - while (consuming_pfn < end_pfn) { >>> - index = consuming_pfn % page_data_num; >>> + /* >>> + * page_flag_buf is in circular linked list. >>> + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's >>> + * page_flag_buf list. >>> + * consuming is used for recording in which thread the pfn is the smallest. >>> + * current_pfn is used for recording the value of pfn when checking the pfn. >>> + */ >>> + for (i = 0; i < info->num_threads; i++) { >>> + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >>> + continue; >>> + temp_pfn = info->page_flag_buf[i]->pfn; >>> >>> - gettimeofday(&new, NULL); >>> - if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>> - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); >>> - goto out; >>> - } >>> + /* >>> + * count how many threads have reached the end. >>> + */ >>> + if (temp_pfn >= end_pfn) { >>> + info->page_flag_buf[i]->ready = FLAG_UNUSED; >>> + end_count++; >>> + continue; >>> + } >>> >>> - /* >>> - * check pfn first without mutex locked to reduce the time >>> - * trying to lock the mutex >>> - */ >>> - if (page_data_buf[index].pfn != consuming_pfn) >>> - continue; >>> + if (current_pfn < temp_pfn) >>> + continue; >>> >>> - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) >>> - continue; >>> + check_count++; >>> + consuming = i; >>> + current_pfn = temp_pfn; >>> + } >>> >>> - /* check whether the found one is ready to be consumed */ >>> - if (page_data_buf[index].pfn != consuming_pfn || >>> - page_data_buf[index].ready != 1) { >>> - goto unlock; >>> + /* >>> + * If all the threads have reached the end, we will finish writing. >>> + */ >>> + if (end_count >= info->num_threads) >>> + goto finish; >>> + >>> + /* >>> + * If the page_flag_buf is not ready, the pfn recorded may be changed. >>> + * So we should recheck. >>> + */ >>> + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >>> + gettimeofday(&new, NULL); >>> + if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>> + ERRMSG("Can't get data of pfn.\n"); >>> + goto out; >>> + } >>> + continue; >>> + } >>> + >>> + if (current_pfn == info->page_flag_buf[consuming]->pfn) >>> + break; >>> } >>> >>> if ((num_dumped % per) == 0) >>> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); >>> >>> - /* next pfn is found, refresh last here */ >>> - last = new; >>> - consuming_pfn++; >>> - info->consumed_pfn++; >>> - page_data_buf[index].ready = 0; >>> - >>> - if (page_data_buf[index].dumpable == FALSE) >>> - goto unlock; >>> - >>> num_dumped++; >>> >>> - if (page_data_buf[index].zero == TRUE) { >>> + >>> + if (info->page_flag_buf[consuming]->zero == TRUE) { >>> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >>> goto out; >>> pfn_zero++; >>> } else { >>> + index = info->page_flag_buf[consuming]->index; >>> pd.flags = page_data_buf[index].flags; >>> pd.size = page_data_buf[index].size; >>> pd.page_flags = 0; >>> @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>> */ >>> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) >>> goto out; >>> - >>> + page_data_buf[index].used = FALSE; >>> } >>> -unlock: >>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>> + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >>> + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >>> } >>> - >>> +finish: >>> ret = TRUE; >>> /* >>> * print [100 %] >>> @@ -7463,15 +7532,9 @@ out: >>> } >>> } >>> >>> - if (page_data_buf != NULL) { >>> - for (i = 0; i < page_data_num; i++) { >>> - pthread_mutex_destroy(&page_data_buf[i].mutex); >>> - } >>> - } >>> - >>> + sem_destroy(&info->page_flag_buf_sem); >>> pthread_rwlock_destroy(&info->usemmap_rwlock); >>> pthread_mutex_destroy(&info->filter_mutex); >>> - pthread_mutex_destroy(&info->consumed_pfn_mutex); >>> pthread_mutex_destroy(&info->current_pfn_mutex); >>> >>> return ret; >>> @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag >>> num_dumped++; >>> if (!read_pfn(pfn, buf)) >>> goto out; >>> + >>> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); >>> >>> /* >>> diff --git a/makedumpfile.h b/makedumpfile.h >>> index e0b5bbf..4b315c0 100644 >>> --- a/makedumpfile.h >>> +++ b/makedumpfile.h >>> @@ -44,6 +44,7 @@ >>> #include "print_info.h" >>> #include "sadump_mod.h" >>> #include <pthread.h> >>> +#include <semaphore.h> >>> >>> /* >>> * Result of command >>> @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; >>> #define PAGE_DATA_NUM (50) >>> #define WAIT_TIME (60 * 10) >>> #define PTHREAD_FAIL ((void *)-2) >>> -#define NUM_BUFFERS (50) >>> +#define NUM_BUFFERS (20) >>> >>> struct mmap_cache { >>> char *mmap_buf; >>> @@ -985,28 +986,33 @@ struct mmap_cache { >>> off_t mmap_end_offset; >>> }; >>> >>> +enum { >>> + FLAG_UNUSED, >>> + FLAG_READY, >>> + FLAG_FILLING >>> +}; >>> +struct page_flag { >>> + mdf_pfn_t pfn; >>> + char zero; >>> + char ready; >>> + short index; >>> + struct page_flag *next; >>> +}; >>> + >>> struct page_data >>> { >>> - mdf_pfn_t pfn; >>> - int dumpable; >>> - int zero; >>> - unsigned int flags; >>> long size; >>> unsigned char *buf; >>> - pthread_mutex_t mutex; >>> - /* >>> - * whether the page_data is ready to be consumed >>> - */ >>> - int ready; >>> + int flags; >>> + int used; >>> }; >>> >>> struct thread_args { >>> int thread_num; >>> unsigned long len_buf_out; >>> - mdf_pfn_t start_pfn, end_pfn; >>> - int page_data_num; >>> struct cycle *cycle; >>> struct page_data *page_data_buf; >>> + struct page_flag *page_flag_buf; >>> }; >>> >>> /* >>> @@ -1295,11 +1301,12 @@ struct DumpInfo { >>> pthread_t **threads; >>> struct thread_args *kdump_thread_args; >>> struct page_data *page_data_buf; >>> + struct page_flag **page_flag_buf; >>> + sem_t page_flag_buf_sem; >>> pthread_rwlock_t usemmap_rwlock; >>> mdf_pfn_t current_pfn; >>> pthread_mutex_t current_pfn_mutex; >>> - mdf_pfn_t consumed_pfn; >>> - pthread_mutex_t consumed_pfn_mutex; >>> + pthread_mutex_t page_data_mutex; >>> pthread_mutex_t filter_mutex; >>> }; >>> extern struct DumpInfo *info; >>> >> >> >> >> _______________________________________________ >> kexec mailing list >> kexec@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/kexec _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-11 3:10 ` "Zhou, Wenjian/周文剑" @ 2016-03-11 4:55 ` Atsushi Kumagai 0 siblings, 0 replies; 33+ messages in thread From: Atsushi Kumagai @ 2016-03-11 4:55 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑" Cc: kexec@lists.infradead.org Hello, Thank you guys for all your efforts. I'll merge the v4 patch into v1.6.0. Regards, Atsushi Kumagai >Hi Minoru, > >Thanks for testing it. > >-- >Thanks >Zhou > >On 03/11/2016 11:03 AM, Minoru Usui wrote: >> Hi, Zhou >> >>> -----Original Message----- >>> From: kexec [mailto:kexec-bounces@lists.infradead.org] On Behalf Of "Zhou, Wenjian/周文?" >>> Sent: Friday, March 11, 2016 10:01 AM >>> To: kexec@lists.infradead.org >>> Subject: Re: [PATCH v4] Improve the performance of --num-threads -d 31 >>> >>> Hello all, >>> >>> Do you have any comments ? >>> >>> -- >>> Thanks >>> Zhou >>> >>> On 03/09/2016 08:27 AM, Zhou Wenjian wrote: >>>> v4: >>>> 1. fix a bug caused by the logic >> >> Thank you for your work. >> >> I tested your v4 patch in 5GB dump file over 12000 times, >> outputted dumpfile did not be broken. >> >> And I measured performance of it. >> I think there is no problem. >> >> [-d31] >> num-thread real vs num-threads 0 >> ---------------------------------------- >> 0 11.913 100.0% >> 1 8.385 70.4% >> 2 4.849 40.7% >> 4 3.100 26.0% >> 8 2.286 19.2% >> >> [-d0] >> num-thread real vs num-threads 0 >> ---------------------------------------- >> 0 221.968 100.0% >> 1 157.433 70.9% >> 2 86.679 39.1% >> 4 71.892 32.4% >> 8 37.034 16.7% >> >> Thanks >> Minoru Usui >> >>>> v3: >>>> 1. remove some unused variables >>>> 2. fix a bug caused by the wrong logic >>>> 3. fix a bug caused by optimising >>>> 4. improve more performance by using Minoru Usui's code >>>> >>>> multi-threads implementation will introduce extra cost when handling >>>> each page. The origin implementation will also do the extra work for >>>> filtered pages. So there is a big performance degradation in >>>> --num-threads -d 31. >>>> The new implementation won't do the extra work for filtered pages any >>>> more. So the performance of -d 31 is close to that of serial processing. >>>> >>>> The new implementation is just like the following: >>>> * The basic idea is producer producing page and consumer writing page. >>>> * Each producer have a page_flag_buf list which is used for storing >>>> page's description. >>>> * The size of page_flag_buf is little so it won't take too much memory. >>>> * And all producers will share a page_data_buf array which is >>>> used for storing page's compressed data. >>>> * The main thread is the consumer. It will find the next pfn and write >>>> it into file. >>>> * The next pfn is smallest pfn in all page_flag_buf. >>>> >>>> Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> >>>> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >>>> --- >>>> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- >>>> makedumpfile.h | 35 ++++--- >>>> 2 files changed, 202 insertions(+), 131 deletions(-) >>>> >>>> diff --git a/makedumpfile.c b/makedumpfile.c >>>> index fa0b779..2b0864a 100644 >>>> --- a/makedumpfile.c >>>> +++ b/makedumpfile.c >>>> @@ -3483,7 +3483,8 @@ initial_for_parallel() >>>> unsigned long page_data_buf_size; >>>> unsigned long limit_size; >>>> int page_data_num; >>>> - int i; >>>> + struct page_flag *current; >>>> + int i, j; >>>> >>>> len_buf_out = calculate_len_buf_out(info->page_size); >>>> >>>> @@ -3560,10 +3561,16 @@ initial_for_parallel() >>>> >>>> limit_size = (get_free_memory_size() >>>> - MAP_REGION * info->num_threads) * 0.6; >>>> + if (limit_size < 0) { >>>> + MSG("Free memory is not enough for multi-threads\n"); >>>> + return FALSE; >>>> + } >>>> >>>> page_data_num = limit_size / page_data_buf_size; >>>> + info->num_buffers = 3 * info->num_threads; >>>> >>>> - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); >>>> + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); >>>> + info->num_buffers = MIN(info->num_buffers, page_data_num); >>>> >>>> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >>>> info->num_buffers); >>>> @@ -3588,6 +3595,36 @@ initial_for_parallel() >>>> } >>>> >>>> /* >>>> + * initial page_flag for each thread >>>> + */ >>>> + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >>>> + == NULL) { >>>> + MSG("Can't allocate memory for page_flag_buf. %s\n", >>>> + strerror(errno)); >>>> + return FALSE; >>>> + } >>>> + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); >>>> + >>>> + for (i = 0; i < info->num_threads; i++) { >>>> + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { >>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>> + strerror(errno)); >>>> + return FALSE; >>>> + } >>>> + current = info->page_flag_buf[i]; >>>> + >>>> + for (j = 1; j < NUM_BUFFERS; j++) { >>>> + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { >>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>> + strerror(errno)); >>>> + return FALSE; >>>> + } >>>> + current = current->next; >>>> + } >>>> + current->next = info->page_flag_buf[i]; >>>> + } >>>> + >>>> + /* >>>> * initial fd_memory for threads >>>> */ >>>> for (i = 0; i < info->num_threads; i++) { >>>> @@ -3612,7 +3649,8 @@ initial_for_parallel() >>>> void >>>> free_for_parallel() >>>> { >>>> - int i; >>>> + int i, j; >>>> + struct page_flag *current; >>>> >>>> if (info->threads != NULL) { >>>> for (i = 0; i < info->num_threads; i++) { >>>> @@ -3655,6 +3693,19 @@ free_for_parallel() >>>> free(info->page_data_buf); >>>> } >>>> >>>> + if (info->page_flag_buf != NULL) { >>>> + for (i = 0; i < info->num_threads; i++) { >>>> + for (j = 0; j < NUM_BUFFERS; j++) { >>>> + if (info->page_flag_buf[i] != NULL) { >>>> + current = info->page_flag_buf[i]; >>>> + info->page_flag_buf[i] = current->next; >>>> + free(current); >>>> + } >>>> + } >>>> + } >>>> + free(info->page_flag_buf); >>>> + } >>>> + >>>> if (info->parallel_info == NULL) >>>> return; >>>> >>>> @@ -7075,11 +7126,11 @@ void * >>>> kdump_thread_function_cyclic(void *arg) { >>>> void *retval = PTHREAD_FAIL; >>>> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >>>> - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>> + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>> + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >>>> struct cycle *cycle = kdump_thread_args->cycle; >>>> - int page_data_num = kdump_thread_args->page_data_num; >>>> - mdf_pfn_t pfn; >>>> - int index; >>>> + mdf_pfn_t pfn = cycle->start_pfn; >>>> + int index = kdump_thread_args->thread_num; >>>> int buf_ready; >>>> int dumpable; >>>> int fd_memory = 0; >>>> @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { >>>> kdump_thread_args->thread_num); >>>> } >>>> >>>> - while (1) { >>>> - /* get next pfn */ >>>> - pthread_mutex_lock(&info->current_pfn_mutex); >>>> - pfn = info->current_pfn; >>>> - info->current_pfn++; >>>> - pthread_mutex_unlock(&info->current_pfn_mutex); >>>> - >>>> - if (pfn >= kdump_thread_args->end_pfn) >>>> - break; >>>> - >>>> - index = -1; >>>> + /* >>>> + * filtered page won't take anything >>>> + * unfiltered zero page will only take a page_flag_buf >>>> + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf >>>> + */ >>>> + while (pfn < cycle->end_pfn) { >>>> buf_ready = FALSE; >>>> >>>> + pthread_mutex_lock(&info->page_data_mutex); >>>> + while (page_data_buf[index].used != FALSE) { >>>> + index = (index + 1) % info->num_buffers; >>>> + } >>>> + page_data_buf[index].used = TRUE; >>>> + pthread_mutex_unlock(&info->page_data_mutex); >>>> + >>>> while (buf_ready == FALSE) { >>>> pthread_testcancel(); >>>> - >>>> - index = pfn % page_data_num; >>>> - >>>> - if (pfn - info->consumed_pfn > info->num_buffers) >>>> + if (page_flag_buf->ready == FLAG_READY) >>>> continue; >>>> >>>> - if (page_data_buf[index].ready != 0) >>>> - continue; >>>> - >>>> - pthread_mutex_lock(&page_data_buf[index].mutex); >>>> - >>>> - if (page_data_buf[index].ready != 0) >>>> - goto unlock; >>>> - >>>> - buf_ready = TRUE; >>>> + /* get next dumpable pfn */ >>>> + pthread_mutex_lock(&info->current_pfn_mutex); >>>> + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { >>>> + dumpable = is_dumpable( >>>> + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>> + pfn, >>>> + cycle); >>>> + if (dumpable) >>>> + break; >>>> + } >>>> + info->current_pfn = pfn + 1; >>>> >>>> - page_data_buf[index].pfn = pfn; >>>> - page_data_buf[index].ready = 1; >>>> + page_flag_buf->pfn = pfn; >>>> + page_flag_buf->ready = FLAG_FILLING; >>>> + pthread_mutex_unlock(&info->current_pfn_mutex); >>>> + sem_post(&info->page_flag_buf_sem); >>>> >>>> - dumpable = is_dumpable( >>>> - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>> - pfn, >>>> - cycle); >>>> - page_data_buf[index].dumpable = dumpable; >>>> - if (!dumpable) >>>> - goto unlock; >>>> + if (pfn >= cycle->end_pfn) { >>>> + info->current_pfn = cycle->end_pfn; >>>> + page_data_buf[index].used = FALSE; >>>> + break; >>>> + } >>>> >>>> if (!read_pfn_parallel(fd_memory, pfn, buf, >>>> &bitmap_memory_parallel, >>>> @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { >>>> >>>> if ((info->dump_level & DL_EXCLUDE_ZERO) >>>> && is_zero_page(buf, info->page_size)) { >>>> - page_data_buf[index].zero = TRUE; >>>> - goto unlock; >>>> + page_flag_buf->zero = TRUE; >>>> + goto next; >>>> } >>>> >>>> - page_data_buf[index].zero = FALSE; >>>> + page_flag_buf->zero = FALSE; >>>> >>>> /* >>>> * Compress the page data. >>>> @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { >>>> page_data_buf[index].flags = >>>> DUMP_DH_COMPRESSED_LZO; >>>> page_data_buf[index].size = size_out; >>>> + >>>> memcpy(page_data_buf[index].buf, buf_out, size_out); >>>> #endif >>>> #ifdef USESNAPPY >>>> @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { >>>> page_data_buf[index].size = info->page_size; >>>> memcpy(page_data_buf[index].buf, buf, info->page_size); >>>> } >>>> -unlock: >>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>> + page_flag_buf->index = index; >>>> + buf_ready = TRUE; >>>> +next: >>>> + page_flag_buf->ready = FLAG_READY; >>>> + page_flag_buf = page_flag_buf->next; >>>> >>>> } >>>> } >>>> - >>>> retval = NULL; >>>> >>>> fail: >>>> @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> struct page_desc pd; >>>> struct timeval tv_start; >>>> struct timeval last, new; >>>> - unsigned long long consuming_pfn; >>>> pthread_t **threads = NULL; >>>> struct thread_args *kdump_thread_args = NULL; >>>> void *thread_result; >>>> - int page_data_num; >>>> + int page_buf_num; >>>> struct page_data *page_data_buf = NULL; >>>> int i; >>>> int index; >>>> + int end_count, consuming, check_count; >>>> + mdf_pfn_t current_pfn, temp_pfn; >>>> >>>> if (info->flag_elf_dumpfile) >>>> return FALSE; >>>> @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> goto out; >>>> } >>>> >>>> - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); >>>> - if (res != 0) { >>>> - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", >>>> - strerror(res)); >>>> - goto out; >>>> - } >>>> - >>>> res = pthread_mutex_init(&info->filter_mutex, NULL); >>>> if (res != 0) { >>>> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); >>>> @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> end_pfn = cycle->end_pfn; >>>> >>>> info->current_pfn = start_pfn; >>>> - info->consumed_pfn = start_pfn - 1; >>>> >>>> threads = info->threads; >>>> kdump_thread_args = info->kdump_thread_args; >>>> >>>> - page_data_num = info->num_buffers; >>>> + page_buf_num = info->num_buffers; >>>> page_data_buf = info->page_data_buf; >>>> + pthread_mutex_init(&info->page_data_mutex, NULL); >>>> + sem_init(&info->page_flag_buf_sem, 0, 0); >>>> >>>> - for (i = 0; i < page_data_num; i++) { >>>> - /* >>>> - * producer will use pfn in page_data_buf to decide the >>>> - * consumed pfn >>>> - */ >>>> - page_data_buf[i].pfn = start_pfn - 1; >>>> - page_data_buf[i].ready = 0; >>>> - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); >>>> - if (res != 0) { >>>> - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", >>>> - strerror(res)); >>>> - goto out; >>>> - } >>>> - } >>>> + for (i = 0; i < page_buf_num; i++) >>>> + page_data_buf[i].used = FALSE; >>>> >>>> for (i = 0; i < info->num_threads; i++) { >>>> kdump_thread_args[i].thread_num = i; >>>> kdump_thread_args[i].len_buf_out = len_buf_out; >>>> - kdump_thread_args[i].start_pfn = start_pfn; >>>> - kdump_thread_args[i].end_pfn = end_pfn; >>>> - kdump_thread_args[i].page_data_num = page_data_num; >>>> kdump_thread_args[i].page_data_buf = page_data_buf; >>>> + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >>>> kdump_thread_args[i].cycle = cycle; >>>> >>>> res = pthread_create(threads[i], NULL, >>>> @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> } >>>> } >>>> >>>> - consuming_pfn = start_pfn; >>>> - index = -1; >>>> + end_count = 0; >>>> + while (1) { >>>> + consuming = 0; >>>> + check_count = 0; >>>> >>>> - gettimeofday(&last, NULL); >>>> + /* >>>> + * The basic idea is producer producing page and consumer writing page. >>>> + * Each producer have a page_flag_buf list which is used for storing page's description. >>>> + * The size of page_flag_buf is little so it won't take too much memory. >>>> + * And all producers will share a page_data_buf array which is used for storing page's compressed data. >>>> + * The main thread is the consumer. It will find the next pfn and write it into file. >>>> + * The next pfn is smallest pfn in all page_flag_buf. >>>> + */ >>>> + sem_wait(&info->page_flag_buf_sem); >>>> + gettimeofday(&last, NULL); >>>> + while (1) { >>>> + current_pfn = end_pfn; >>>> >>>> - while (consuming_pfn < end_pfn) { >>>> - index = consuming_pfn % page_data_num; >>>> + /* >>>> + * page_flag_buf is in circular linked list. >>>> + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's >>>> + * page_flag_buf list. >>>> + * consuming is used for recording in which thread the pfn is the smallest. >>>> + * current_pfn is used for recording the value of pfn when checking the pfn. >>>> + */ >>>> + for (i = 0; i < info->num_threads; i++) { >>>> + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >>>> + continue; >>>> + temp_pfn = info->page_flag_buf[i]->pfn; >>>> >>>> - gettimeofday(&new, NULL); >>>> - if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>> - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); >>>> - goto out; >>>> - } >>>> + /* >>>> + * count how many threads have reached the end. >>>> + */ >>>> + if (temp_pfn >= end_pfn) { >>>> + info->page_flag_buf[i]->ready = FLAG_UNUSED; >>>> + end_count++; >>>> + continue; >>>> + } >>>> >>>> - /* >>>> - * check pfn first without mutex locked to reduce the time >>>> - * trying to lock the mutex >>>> - */ >>>> - if (page_data_buf[index].pfn != consuming_pfn) >>>> - continue; >>>> + if (current_pfn < temp_pfn) >>>> + continue; >>>> >>>> - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) >>>> - continue; >>>> + check_count++; >>>> + consuming = i; >>>> + current_pfn = temp_pfn; >>>> + } >>>> >>>> - /* check whether the found one is ready to be consumed */ >>>> - if (page_data_buf[index].pfn != consuming_pfn || >>>> - page_data_buf[index].ready != 1) { >>>> - goto unlock; >>>> + /* >>>> + * If all the threads have reached the end, we will finish writing. >>>> + */ >>>> + if (end_count >= info->num_threads) >>>> + goto finish; >>>> + >>>> + /* >>>> + * If the page_flag_buf is not ready, the pfn recorded may be changed. >>>> + * So we should recheck. >>>> + */ >>>> + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >>>> + gettimeofday(&new, NULL); >>>> + if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>> + ERRMSG("Can't get data of pfn.\n"); >>>> + goto out; >>>> + } >>>> + continue; >>>> + } >>>> + >>>> + if (current_pfn == info->page_flag_buf[consuming]->pfn) >>>> + break; >>>> } >>>> >>>> if ((num_dumped % per) == 0) >>>> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); >>>> >>>> - /* next pfn is found, refresh last here */ >>>> - last = new; >>>> - consuming_pfn++; >>>> - info->consumed_pfn++; >>>> - page_data_buf[index].ready = 0; >>>> - >>>> - if (page_data_buf[index].dumpable == FALSE) >>>> - goto unlock; >>>> - >>>> num_dumped++; >>>> >>>> - if (page_data_buf[index].zero == TRUE) { >>>> + >>>> + if (info->page_flag_buf[consuming]->zero == TRUE) { >>>> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >>>> goto out; >>>> pfn_zero++; >>>> } else { >>>> + index = info->page_flag_buf[consuming]->index; >>>> pd.flags = page_data_buf[index].flags; >>>> pd.size = page_data_buf[index].size; >>>> pd.page_flags = 0; >>>> @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> */ >>>> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) >>>> goto out; >>>> - >>>> + page_data_buf[index].used = FALSE; >>>> } >>>> -unlock: >>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>> + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >>>> + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >>>> } >>>> - >>>> +finish: >>>> ret = TRUE; >>>> /* >>>> * print [100 %] >>>> @@ -7463,15 +7532,9 @@ out: >>>> } >>>> } >>>> >>>> - if (page_data_buf != NULL) { >>>> - for (i = 0; i < page_data_num; i++) { >>>> - pthread_mutex_destroy(&page_data_buf[i].mutex); >>>> - } >>>> - } >>>> - >>>> + sem_destroy(&info->page_flag_buf_sem); >>>> pthread_rwlock_destroy(&info->usemmap_rwlock); >>>> pthread_mutex_destroy(&info->filter_mutex); >>>> - pthread_mutex_destroy(&info->consumed_pfn_mutex); >>>> pthread_mutex_destroy(&info->current_pfn_mutex); >>>> >>>> return ret; >>>> @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag >>>> num_dumped++; >>>> if (!read_pfn(pfn, buf)) >>>> goto out; >>>> + >>>> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); >>>> >>>> /* >>>> diff --git a/makedumpfile.h b/makedumpfile.h >>>> index e0b5bbf..4b315c0 100644 >>>> --- a/makedumpfile.h >>>> +++ b/makedumpfile.h >>>> @@ -44,6 +44,7 @@ >>>> #include "print_info.h" >>>> #include "sadump_mod.h" >>>> #include <pthread.h> >>>> +#include <semaphore.h> >>>> >>>> /* >>>> * Result of command >>>> @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; >>>> #define PAGE_DATA_NUM (50) >>>> #define WAIT_TIME (60 * 10) >>>> #define PTHREAD_FAIL ((void *)-2) >>>> -#define NUM_BUFFERS (50) >>>> +#define NUM_BUFFERS (20) >>>> >>>> struct mmap_cache { >>>> char *mmap_buf; >>>> @@ -985,28 +986,33 @@ struct mmap_cache { >>>> off_t mmap_end_offset; >>>> }; >>>> >>>> +enum { >>>> + FLAG_UNUSED, >>>> + FLAG_READY, >>>> + FLAG_FILLING >>>> +}; >>>> +struct page_flag { >>>> + mdf_pfn_t pfn; >>>> + char zero; >>>> + char ready; >>>> + short index; >>>> + struct page_flag *next; >>>> +}; >>>> + >>>> struct page_data >>>> { >>>> - mdf_pfn_t pfn; >>>> - int dumpable; >>>> - int zero; >>>> - unsigned int flags; >>>> long size; >>>> unsigned char *buf; >>>> - pthread_mutex_t mutex; >>>> - /* >>>> - * whether the page_data is ready to be consumed >>>> - */ >>>> - int ready; >>>> + int flags; >>>> + int used; >>>> }; >>>> >>>> struct thread_args { >>>> int thread_num; >>>> unsigned long len_buf_out; >>>> - mdf_pfn_t start_pfn, end_pfn; >>>> - int page_data_num; >>>> struct cycle *cycle; >>>> struct page_data *page_data_buf; >>>> + struct page_flag *page_flag_buf; >>>> }; >>>> >>>> /* >>>> @@ -1295,11 +1301,12 @@ struct DumpInfo { >>>> pthread_t **threads; >>>> struct thread_args *kdump_thread_args; >>>> struct page_data *page_data_buf; >>>> + struct page_flag **page_flag_buf; >>>> + sem_t page_flag_buf_sem; >>>> pthread_rwlock_t usemmap_rwlock; >>>> mdf_pfn_t current_pfn; >>>> pthread_mutex_t current_pfn_mutex; >>>> - mdf_pfn_t consumed_pfn; >>>> - pthread_mutex_t consumed_pfn_mutex; >>>> + pthread_mutex_t page_data_mutex; >>>> pthread_mutex_t filter_mutex; >>>> }; >>>> extern struct DumpInfo *info; >>>> >>> >>> >>> >>> _______________________________________________ >>> kexec mailing list >>> kexec@lists.infradead.org >>> http://lists.infradead.org/mailman/listinfo/kexec > > > > >_______________________________________________ >kexec mailing list >kexec@lists.infradead.org >http://lists.infradead.org/mailman/listinfo/kexec _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-11 1:00 ` "Zhou, Wenjian/周文剑" 2016-03-11 3:03 ` Minoru Usui @ 2016-03-11 5:33 ` Minfei Huang 1 sibling, 0 replies; 33+ messages in thread From: Minfei Huang @ 2016-03-11 5:33 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑"; +Cc: kexec On 03/11/16 at 09:00am, "Zhou, Wenjian/周文剑" wrote: > Hello all, > > Do you have any comments ? Hi, Wenjian. I will test this patch, and update the result in several days, since the machine has been loaned to other developer. Thanks Minfei > > -- > Thanks > Zhou > > On 03/09/2016 08:27 AM, Zhou Wenjian wrote: > >v4: > > 1. fix a bug caused by the logic > >v3: > > 1. remove some unused variables > > 2. fix a bug caused by the wrong logic > > 3. fix a bug caused by optimising > > 4. improve more performance by using Minoru Usui's code > > > >multi-threads implementation will introduce extra cost when handling > >each page. The origin implementation will also do the extra work for > >filtered pages. So there is a big performance degradation in > >--num-threads -d 31. > >The new implementation won't do the extra work for filtered pages any > >more. So the performance of -d 31 is close to that of serial processing. > > > >The new implementation is just like the following: > > * The basic idea is producer producing page and consumer writing page. > > * Each producer have a page_flag_buf list which is used for storing > > page's description. > > * The size of page_flag_buf is little so it won't take too much memory. > > * And all producers will share a page_data_buf array which is > > used for storing page's compressed data. > > * The main thread is the consumer. It will find the next pfn and write > > it into file. > > * The next pfn is smallest pfn in all page_flag_buf. > > > >Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> > >Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > >--- > > makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- > > makedumpfile.h | 35 ++++--- > > 2 files changed, 202 insertions(+), 131 deletions(-) > > > >diff --git a/makedumpfile.c b/makedumpfile.c > >index fa0b779..2b0864a 100644 > >--- a/makedumpfile.c > >+++ b/makedumpfile.c > >@@ -3483,7 +3483,8 @@ initial_for_parallel() > > unsigned long page_data_buf_size; > > unsigned long limit_size; > > int page_data_num; > >- int i; > >+ struct page_flag *current; > >+ int i, j; > > > > len_buf_out = calculate_len_buf_out(info->page_size); > > > >@@ -3560,10 +3561,16 @@ initial_for_parallel() > > > > limit_size = (get_free_memory_size() > > - MAP_REGION * info->num_threads) * 0.6; > >+ if (limit_size < 0) { > >+ MSG("Free memory is not enough for multi-threads\n"); > >+ return FALSE; > >+ } > > > > page_data_num = limit_size / page_data_buf_size; > >+ info->num_buffers = 3 * info->num_threads; > > > >- info->num_buffers = MIN(NUM_BUFFERS, page_data_num); > >+ info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); > >+ info->num_buffers = MIN(info->num_buffers, page_data_num); > > > > DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", > > info->num_buffers); > >@@ -3588,6 +3595,36 @@ initial_for_parallel() > > } > > > > /* > >+ * initial page_flag for each thread > >+ */ > >+ if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) > >+ == NULL) { > >+ MSG("Can't allocate memory for page_flag_buf. %s\n", > >+ strerror(errno)); > >+ return FALSE; > >+ } > >+ memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); > >+ > >+ for (i = 0; i < info->num_threads; i++) { > >+ if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { > >+ MSG("Can't allocate memory for page_flag. %s\n", > >+ strerror(errno)); > >+ return FALSE; > >+ } > >+ current = info->page_flag_buf[i]; > >+ > >+ for (j = 1; j < NUM_BUFFERS; j++) { > >+ if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { > >+ MSG("Can't allocate memory for page_flag. %s\n", > >+ strerror(errno)); > >+ return FALSE; > >+ } > >+ current = current->next; > >+ } > >+ current->next = info->page_flag_buf[i]; > >+ } > >+ > >+ /* > > * initial fd_memory for threads > > */ > > for (i = 0; i < info->num_threads; i++) { > >@@ -3612,7 +3649,8 @@ initial_for_parallel() > > void > > free_for_parallel() > > { > >- int i; > >+ int i, j; > >+ struct page_flag *current; > > > > if (info->threads != NULL) { > > for (i = 0; i < info->num_threads; i++) { > >@@ -3655,6 +3693,19 @@ free_for_parallel() > > free(info->page_data_buf); > > } > > > >+ if (info->page_flag_buf != NULL) { > >+ for (i = 0; i < info->num_threads; i++) { > >+ for (j = 0; j < NUM_BUFFERS; j++) { > >+ if (info->page_flag_buf[i] != NULL) { > >+ current = info->page_flag_buf[i]; > >+ info->page_flag_buf[i] = current->next; > >+ free(current); > >+ } > >+ } > >+ } > >+ free(info->page_flag_buf); > >+ } > >+ > > if (info->parallel_info == NULL) > > return; > > > >@@ -7075,11 +7126,11 @@ void * > > kdump_thread_function_cyclic(void *arg) { > > void *retval = PTHREAD_FAIL; > > struct thread_args *kdump_thread_args = (struct thread_args *)arg; > >- struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > >+ volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > >+ volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; > > struct cycle *cycle = kdump_thread_args->cycle; > >- int page_data_num = kdump_thread_args->page_data_num; > >- mdf_pfn_t pfn; > >- int index; > >+ mdf_pfn_t pfn = cycle->start_pfn; > >+ int index = kdump_thread_args->thread_num; > > int buf_ready; > > int dumpable; > > int fd_memory = 0; > >@@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { > > kdump_thread_args->thread_num); > > } > > > >- while (1) { > >- /* get next pfn */ > >- pthread_mutex_lock(&info->current_pfn_mutex); > >- pfn = info->current_pfn; > >- info->current_pfn++; > >- pthread_mutex_unlock(&info->current_pfn_mutex); > >- > >- if (pfn >= kdump_thread_args->end_pfn) > >- break; > >- > >- index = -1; > >+ /* > >+ * filtered page won't take anything > >+ * unfiltered zero page will only take a page_flag_buf > >+ * unfiltered non-zero page will take a page_flag_buf and a page_data_buf > >+ */ > >+ while (pfn < cycle->end_pfn) { > > buf_ready = FALSE; > > > >+ pthread_mutex_lock(&info->page_data_mutex); > >+ while (page_data_buf[index].used != FALSE) { > >+ index = (index + 1) % info->num_buffers; > >+ } > >+ page_data_buf[index].used = TRUE; > >+ pthread_mutex_unlock(&info->page_data_mutex); > >+ > > while (buf_ready == FALSE) { > > pthread_testcancel(); > >- > >- index = pfn % page_data_num; > >- > >- if (pfn - info->consumed_pfn > info->num_buffers) > >+ if (page_flag_buf->ready == FLAG_READY) > > continue; > > > >- if (page_data_buf[index].ready != 0) > >- continue; > >- > >- pthread_mutex_lock(&page_data_buf[index].mutex); > >- > >- if (page_data_buf[index].ready != 0) > >- goto unlock; > >- > >- buf_ready = TRUE; > >+ /* get next dumpable pfn */ > >+ pthread_mutex_lock(&info->current_pfn_mutex); > >+ for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { > >+ dumpable = is_dumpable( > >+ info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > >+ pfn, > >+ cycle); > >+ if (dumpable) > >+ break; > >+ } > >+ info->current_pfn = pfn + 1; > > > >- page_data_buf[index].pfn = pfn; > >- page_data_buf[index].ready = 1; > >+ page_flag_buf->pfn = pfn; > >+ page_flag_buf->ready = FLAG_FILLING; > >+ pthread_mutex_unlock(&info->current_pfn_mutex); > >+ sem_post(&info->page_flag_buf_sem); > > > >- dumpable = is_dumpable( > >- info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > >- pfn, > >- cycle); > >- page_data_buf[index].dumpable = dumpable; > >- if (!dumpable) > >- goto unlock; > >+ if (pfn >= cycle->end_pfn) { > >+ info->current_pfn = cycle->end_pfn; > >+ page_data_buf[index].used = FALSE; > >+ break; > >+ } > > > > if (!read_pfn_parallel(fd_memory, pfn, buf, > > &bitmap_memory_parallel, > >@@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { > > > > if ((info->dump_level & DL_EXCLUDE_ZERO) > > && is_zero_page(buf, info->page_size)) { > >- page_data_buf[index].zero = TRUE; > >- goto unlock; > >+ page_flag_buf->zero = TRUE; > >+ goto next; > > } > > > >- page_data_buf[index].zero = FALSE; > >+ page_flag_buf->zero = FALSE; > > > > /* > > * Compress the page data. > >@@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { > > page_data_buf[index].flags = > > DUMP_DH_COMPRESSED_LZO; > > page_data_buf[index].size = size_out; > >+ > > memcpy(page_data_buf[index].buf, buf_out, size_out); > > #endif > > #ifdef USESNAPPY > >@@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { > > page_data_buf[index].size = info->page_size; > > memcpy(page_data_buf[index].buf, buf, info->page_size); > > } > >-unlock: > >- pthread_mutex_unlock(&page_data_buf[index].mutex); > >+ page_flag_buf->index = index; > >+ buf_ready = TRUE; > >+next: > >+ page_flag_buf->ready = FLAG_READY; > >+ page_flag_buf = page_flag_buf->next; > > > > } > > } > >- > > retval = NULL; > > > > fail: > >@@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > struct page_desc pd; > > struct timeval tv_start; > > struct timeval last, new; > >- unsigned long long consuming_pfn; > > pthread_t **threads = NULL; > > struct thread_args *kdump_thread_args = NULL; > > void *thread_result; > >- int page_data_num; > >+ int page_buf_num; > > struct page_data *page_data_buf = NULL; > > int i; > > int index; > >+ int end_count, consuming, check_count; > >+ mdf_pfn_t current_pfn, temp_pfn; > > > > if (info->flag_elf_dumpfile) > > return FALSE; > >@@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > goto out; > > } > > > >- res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); > >- if (res != 0) { > >- ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", > >- strerror(res)); > >- goto out; > >- } > >- > > res = pthread_mutex_init(&info->filter_mutex, NULL); > > if (res != 0) { > > ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); > >@@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > end_pfn = cycle->end_pfn; > > > > info->current_pfn = start_pfn; > >- info->consumed_pfn = start_pfn - 1; > > > > threads = info->threads; > > kdump_thread_args = info->kdump_thread_args; > > > >- page_data_num = info->num_buffers; > >+ page_buf_num = info->num_buffers; > > page_data_buf = info->page_data_buf; > >+ pthread_mutex_init(&info->page_data_mutex, NULL); > >+ sem_init(&info->page_flag_buf_sem, 0, 0); > > > >- for (i = 0; i < page_data_num; i++) { > >- /* > >- * producer will use pfn in page_data_buf to decide the > >- * consumed pfn > >- */ > >- page_data_buf[i].pfn = start_pfn - 1; > >- page_data_buf[i].ready = 0; > >- res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); > >- if (res != 0) { > >- ERRMSG("Can't initialize mutex of page_data_buf. %s\n", > >- strerror(res)); > >- goto out; > >- } > >- } > >+ for (i = 0; i < page_buf_num; i++) > >+ page_data_buf[i].used = FALSE; > > > > for (i = 0; i < info->num_threads; i++) { > > kdump_thread_args[i].thread_num = i; > > kdump_thread_args[i].len_buf_out = len_buf_out; > >- kdump_thread_args[i].start_pfn = start_pfn; > >- kdump_thread_args[i].end_pfn = end_pfn; > >- kdump_thread_args[i].page_data_num = page_data_num; > > kdump_thread_args[i].page_data_buf = page_data_buf; > >+ kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; > > kdump_thread_args[i].cycle = cycle; > > > > res = pthread_create(threads[i], NULL, > >@@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > } > > } > > > >- consuming_pfn = start_pfn; > >- index = -1; > >+ end_count = 0; > >+ while (1) { > >+ consuming = 0; > >+ check_count = 0; > > > >- gettimeofday(&last, NULL); > >+ /* > >+ * The basic idea is producer producing page and consumer writing page. > >+ * Each producer have a page_flag_buf list which is used for storing page's description. > >+ * The size of page_flag_buf is little so it won't take too much memory. > >+ * And all producers will share a page_data_buf array which is used for storing page's compressed data. > >+ * The main thread is the consumer. It will find the next pfn and write it into file. > >+ * The next pfn is smallest pfn in all page_flag_buf. > >+ */ > >+ sem_wait(&info->page_flag_buf_sem); > >+ gettimeofday(&last, NULL); > >+ while (1) { > >+ current_pfn = end_pfn; > > > >- while (consuming_pfn < end_pfn) { > >- index = consuming_pfn % page_data_num; > >+ /* > >+ * page_flag_buf is in circular linked list. > >+ * The array info->page_flag_buf[] records the current page_flag_buf in each thread's > >+ * page_flag_buf list. > >+ * consuming is used for recording in which thread the pfn is the smallest. > >+ * current_pfn is used for recording the value of pfn when checking the pfn. > >+ */ > >+ for (i = 0; i < info->num_threads; i++) { > >+ if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > >+ continue; > >+ temp_pfn = info->page_flag_buf[i]->pfn; > > > >- gettimeofday(&new, NULL); > >- if (new.tv_sec - last.tv_sec > WAIT_TIME) { > >- ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); > >- goto out; > >- } > >+ /* > >+ * count how many threads have reached the end. > >+ */ > >+ if (temp_pfn >= end_pfn) { > >+ info->page_flag_buf[i]->ready = FLAG_UNUSED; > >+ end_count++; > >+ continue; > >+ } > > > >- /* > >- * check pfn first without mutex locked to reduce the time > >- * trying to lock the mutex > >- */ > >- if (page_data_buf[index].pfn != consuming_pfn) > >- continue; > >+ if (current_pfn < temp_pfn) > >+ continue; > > > >- if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) > >- continue; > >+ check_count++; > >+ consuming = i; > >+ current_pfn = temp_pfn; > >+ } > > > >- /* check whether the found one is ready to be consumed */ > >- if (page_data_buf[index].pfn != consuming_pfn || > >- page_data_buf[index].ready != 1) { > >- goto unlock; > >+ /* > >+ * If all the threads have reached the end, we will finish writing. > >+ */ > >+ if (end_count >= info->num_threads) > >+ goto finish; > >+ > >+ /* > >+ * If the page_flag_buf is not ready, the pfn recorded may be changed. > >+ * So we should recheck. > >+ */ > >+ if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > >+ gettimeofday(&new, NULL); > >+ if (new.tv_sec - last.tv_sec > WAIT_TIME) { > >+ ERRMSG("Can't get data of pfn.\n"); > >+ goto out; > >+ } > >+ continue; > >+ } > >+ > >+ if (current_pfn == info->page_flag_buf[consuming]->pfn) > >+ break; > > } > > > > if ((num_dumped % per) == 0) > > print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); > > > >- /* next pfn is found, refresh last here */ > >- last = new; > >- consuming_pfn++; > >- info->consumed_pfn++; > >- page_data_buf[index].ready = 0; > >- > >- if (page_data_buf[index].dumpable == FALSE) > >- goto unlock; > >- > > num_dumped++; > > > >- if (page_data_buf[index].zero == TRUE) { > >+ > >+ if (info->page_flag_buf[consuming]->zero == TRUE) { > > if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > > goto out; > > pfn_zero++; > > } else { > >+ index = info->page_flag_buf[consuming]->index; > > pd.flags = page_data_buf[index].flags; > > pd.size = page_data_buf[index].size; > > pd.page_flags = 0; > >@@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > */ > > if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) > > goto out; > >- > >+ page_data_buf[index].used = FALSE; > > } > >-unlock: > >- pthread_mutex_unlock(&page_data_buf[index].mutex); > >+ info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > >+ info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > > } > >- > >+finish: > > ret = TRUE; > > /* > > * print [100 %] > >@@ -7463,15 +7532,9 @@ out: > > } > > } > > > >- if (page_data_buf != NULL) { > >- for (i = 0; i < page_data_num; i++) { > >- pthread_mutex_destroy(&page_data_buf[i].mutex); > >- } > >- } > >- > >+ sem_destroy(&info->page_flag_buf_sem); > > pthread_rwlock_destroy(&info->usemmap_rwlock); > > pthread_mutex_destroy(&info->filter_mutex); > >- pthread_mutex_destroy(&info->consumed_pfn_mutex); > > pthread_mutex_destroy(&info->current_pfn_mutex); > > > > return ret; > >@@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag > > num_dumped++; > > if (!read_pfn(pfn, buf)) > > goto out; > >+ > > filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); > > > > /* > >diff --git a/makedumpfile.h b/makedumpfile.h > >index e0b5bbf..4b315c0 100644 > >--- a/makedumpfile.h > >+++ b/makedumpfile.h > >@@ -44,6 +44,7 @@ > > #include "print_info.h" > > #include "sadump_mod.h" > > #include <pthread.h> > >+#include <semaphore.h> > > > > /* > > * Result of command > >@@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; > > #define PAGE_DATA_NUM (50) > > #define WAIT_TIME (60 * 10) > > #define PTHREAD_FAIL ((void *)-2) > >-#define NUM_BUFFERS (50) > >+#define NUM_BUFFERS (20) > > > > struct mmap_cache { > > char *mmap_buf; > >@@ -985,28 +986,33 @@ struct mmap_cache { > > off_t mmap_end_offset; > > }; > > > >+enum { > >+ FLAG_UNUSED, > >+ FLAG_READY, > >+ FLAG_FILLING > >+}; > >+struct page_flag { > >+ mdf_pfn_t pfn; > >+ char zero; > >+ char ready; > >+ short index; > >+ struct page_flag *next; > >+}; > >+ > > struct page_data > > { > >- mdf_pfn_t pfn; > >- int dumpable; > >- int zero; > >- unsigned int flags; > > long size; > > unsigned char *buf; > >- pthread_mutex_t mutex; > >- /* > >- * whether the page_data is ready to be consumed > >- */ > >- int ready; > >+ int flags; > >+ int used; > > }; > > > > struct thread_args { > > int thread_num; > > unsigned long len_buf_out; > >- mdf_pfn_t start_pfn, end_pfn; > >- int page_data_num; > > struct cycle *cycle; > > struct page_data *page_data_buf; > >+ struct page_flag *page_flag_buf; > > }; > > > > /* > >@@ -1295,11 +1301,12 @@ struct DumpInfo { > > pthread_t **threads; > > struct thread_args *kdump_thread_args; > > struct page_data *page_data_buf; > >+ struct page_flag **page_flag_buf; > >+ sem_t page_flag_buf_sem; > > pthread_rwlock_t usemmap_rwlock; > > mdf_pfn_t current_pfn; > > pthread_mutex_t current_pfn_mutex; > >- mdf_pfn_t consumed_pfn; > >- pthread_mutex_t consumed_pfn_mutex; > >+ pthread_mutex_t page_data_mutex; > > pthread_mutex_t filter_mutex; > > }; > > extern struct DumpInfo *info; > > > > > > _______________________________________________ > kexec mailing list > kexec@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/kexec _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-09 0:27 [PATCH v4] Improve the performance of --num-threads -d 31 Zhou Wenjian 2016-03-09 0:35 ` "Zhou, Wenjian/周文剑" 2016-03-11 1:00 ` "Zhou, Wenjian/周文剑" @ 2016-03-15 6:34 ` Minfei Huang 2016-03-15 7:12 ` "Zhou, Wenjian/周文剑" ` (2 more replies) 2 siblings, 3 replies; 33+ messages in thread From: Minfei Huang @ 2016-03-15 6:34 UTC (permalink / raw) To: Zhou Wenjian; +Cc: kexec Hi, Zhou. I have applied this patch base on 1.5.9. There are several testcases I have tested. - makedumpfile --num-threads 64 -d 31 real 0m0.010s user 0m0.002s sys 0m0.009s - makedumpfile --num-threads 31 -d 31 real 2m40.915s user 10m50.900s sys 23m9.664s makedumpfile --num-threads 30 -d 31 real 0m0.006s user 0m0.002s sys 0m0.004s makedumpfile --num-threads 32 -d 31 real 0m0.007s user 0m0.002s sys 0m0.005s - makedumpfile --num-threads 8 -d 31 real 2m32.692s user 7m4.630s sys 2m0.369s - makedumpfile --num-threads 1 -d 31 real 4m42.423s user 7m27.153s sys 0m22.490s - makedumpfile.orig -d 31 real 4m1.297s user 3m39.696s sys 0m15.200s This patch has a huge increment to the filter performance under 31. But it is not stable, since makedumpfile fails to dump vmcore intermittently. You can find the above test result, makedumpfile fails to dump vmcore with option --num-threads 64, also it may occur with option --number-threads 8. Thanks Minfei On 03/09/16 at 08:27am, Zhou Wenjian wrote: > v4: > 1. fix a bug caused by the logic > v3: > 1. remove some unused variables > 2. fix a bug caused by the wrong logic > 3. fix a bug caused by optimising > 4. improve more performance by using Minoru Usui's code > > multi-threads implementation will introduce extra cost when handling > each page. The origin implementation will also do the extra work for > filtered pages. So there is a big performance degradation in > --num-threads -d 31. > The new implementation won't do the extra work for filtered pages any > more. So the performance of -d 31 is close to that of serial processing. > > The new implementation is just like the following: > * The basic idea is producer producing page and consumer writing page. > * Each producer have a page_flag_buf list which is used for storing > page's description. > * The size of page_flag_buf is little so it won't take too much memory. > * And all producers will share a page_data_buf array which is > used for storing page's compressed data. > * The main thread is the consumer. It will find the next pfn and write > it into file. > * The next pfn is smallest pfn in all page_flag_buf. > > Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> > Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > --- > makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- > makedumpfile.h | 35 ++++--- > 2 files changed, 202 insertions(+), 131 deletions(-) > > diff --git a/makedumpfile.c b/makedumpfile.c > index fa0b779..2b0864a 100644 > --- a/makedumpfile.c > +++ b/makedumpfile.c > @@ -3483,7 +3483,8 @@ initial_for_parallel() > unsigned long page_data_buf_size; > unsigned long limit_size; > int page_data_num; > - int i; > + struct page_flag *current; > + int i, j; > > len_buf_out = calculate_len_buf_out(info->page_size); > > @@ -3560,10 +3561,16 @@ initial_for_parallel() > > limit_size = (get_free_memory_size() > - MAP_REGION * info->num_threads) * 0.6; > + if (limit_size < 0) { > + MSG("Free memory is not enough for multi-threads\n"); > + return FALSE; > + } > > page_data_num = limit_size / page_data_buf_size; > + info->num_buffers = 3 * info->num_threads; > > - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); > + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); > + info->num_buffers = MIN(info->num_buffers, page_data_num); > > DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", > info->num_buffers); > @@ -3588,6 +3595,36 @@ initial_for_parallel() > } > > /* > + * initial page_flag for each thread > + */ > + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) > + == NULL) { > + MSG("Can't allocate memory for page_flag_buf. %s\n", > + strerror(errno)); > + return FALSE; > + } > + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); > + > + for (i = 0; i < info->num_threads; i++) { > + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { > + MSG("Can't allocate memory for page_flag. %s\n", > + strerror(errno)); > + return FALSE; > + } > + current = info->page_flag_buf[i]; > + > + for (j = 1; j < NUM_BUFFERS; j++) { > + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { > + MSG("Can't allocate memory for page_flag. %s\n", > + strerror(errno)); > + return FALSE; > + } > + current = current->next; > + } > + current->next = info->page_flag_buf[i]; > + } > + > + /* > * initial fd_memory for threads > */ > for (i = 0; i < info->num_threads; i++) { > @@ -3612,7 +3649,8 @@ initial_for_parallel() > void > free_for_parallel() > { > - int i; > + int i, j; > + struct page_flag *current; > > if (info->threads != NULL) { > for (i = 0; i < info->num_threads; i++) { > @@ -3655,6 +3693,19 @@ free_for_parallel() > free(info->page_data_buf); > } > > + if (info->page_flag_buf != NULL) { > + for (i = 0; i < info->num_threads; i++) { > + for (j = 0; j < NUM_BUFFERS; j++) { > + if (info->page_flag_buf[i] != NULL) { > + current = info->page_flag_buf[i]; > + info->page_flag_buf[i] = current->next; > + free(current); > + } > + } > + } > + free(info->page_flag_buf); > + } > + > if (info->parallel_info == NULL) > return; > > @@ -7075,11 +7126,11 @@ void * > kdump_thread_function_cyclic(void *arg) { > void *retval = PTHREAD_FAIL; > struct thread_args *kdump_thread_args = (struct thread_args *)arg; > - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; > struct cycle *cycle = kdump_thread_args->cycle; > - int page_data_num = kdump_thread_args->page_data_num; > - mdf_pfn_t pfn; > - int index; > + mdf_pfn_t pfn = cycle->start_pfn; > + int index = kdump_thread_args->thread_num; > int buf_ready; > int dumpable; > int fd_memory = 0; > @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { > kdump_thread_args->thread_num); > } > > - while (1) { > - /* get next pfn */ > - pthread_mutex_lock(&info->current_pfn_mutex); > - pfn = info->current_pfn; > - info->current_pfn++; > - pthread_mutex_unlock(&info->current_pfn_mutex); > - > - if (pfn >= kdump_thread_args->end_pfn) > - break; > - > - index = -1; > + /* > + * filtered page won't take anything > + * unfiltered zero page will only take a page_flag_buf > + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf > + */ > + while (pfn < cycle->end_pfn) { > buf_ready = FALSE; > > + pthread_mutex_lock(&info->page_data_mutex); > + while (page_data_buf[index].used != FALSE) { > + index = (index + 1) % info->num_buffers; > + } > + page_data_buf[index].used = TRUE; > + pthread_mutex_unlock(&info->page_data_mutex); > + > while (buf_ready == FALSE) { > pthread_testcancel(); > - > - index = pfn % page_data_num; > - > - if (pfn - info->consumed_pfn > info->num_buffers) > + if (page_flag_buf->ready == FLAG_READY) > continue; > > - if (page_data_buf[index].ready != 0) > - continue; > - > - pthread_mutex_lock(&page_data_buf[index].mutex); > - > - if (page_data_buf[index].ready != 0) > - goto unlock; > - > - buf_ready = TRUE; > + /* get next dumpable pfn */ > + pthread_mutex_lock(&info->current_pfn_mutex); > + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { > + dumpable = is_dumpable( > + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > + pfn, > + cycle); > + if (dumpable) > + break; > + } > + info->current_pfn = pfn + 1; > > - page_data_buf[index].pfn = pfn; > - page_data_buf[index].ready = 1; > + page_flag_buf->pfn = pfn; > + page_flag_buf->ready = FLAG_FILLING; > + pthread_mutex_unlock(&info->current_pfn_mutex); > + sem_post(&info->page_flag_buf_sem); > > - dumpable = is_dumpable( > - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > - pfn, > - cycle); > - page_data_buf[index].dumpable = dumpable; > - if (!dumpable) > - goto unlock; > + if (pfn >= cycle->end_pfn) { > + info->current_pfn = cycle->end_pfn; > + page_data_buf[index].used = FALSE; > + break; > + } > > if (!read_pfn_parallel(fd_memory, pfn, buf, > &bitmap_memory_parallel, > @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { > > if ((info->dump_level & DL_EXCLUDE_ZERO) > && is_zero_page(buf, info->page_size)) { > - page_data_buf[index].zero = TRUE; > - goto unlock; > + page_flag_buf->zero = TRUE; > + goto next; > } > > - page_data_buf[index].zero = FALSE; > + page_flag_buf->zero = FALSE; > > /* > * Compress the page data. > @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { > page_data_buf[index].flags = > DUMP_DH_COMPRESSED_LZO; > page_data_buf[index].size = size_out; > + > memcpy(page_data_buf[index].buf, buf_out, size_out); > #endif > #ifdef USESNAPPY > @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { > page_data_buf[index].size = info->page_size; > memcpy(page_data_buf[index].buf, buf, info->page_size); > } > -unlock: > - pthread_mutex_unlock(&page_data_buf[index].mutex); > + page_flag_buf->index = index; > + buf_ready = TRUE; > +next: > + page_flag_buf->ready = FLAG_READY; > + page_flag_buf = page_flag_buf->next; > > } > } > - > retval = NULL; > > fail: > @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > struct page_desc pd; > struct timeval tv_start; > struct timeval last, new; > - unsigned long long consuming_pfn; > pthread_t **threads = NULL; > struct thread_args *kdump_thread_args = NULL; > void *thread_result; > - int page_data_num; > + int page_buf_num; > struct page_data *page_data_buf = NULL; > int i; > int index; > + int end_count, consuming, check_count; > + mdf_pfn_t current_pfn, temp_pfn; > > if (info->flag_elf_dumpfile) > return FALSE; > @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > goto out; > } > > - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); > - if (res != 0) { > - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", > - strerror(res)); > - goto out; > - } > - > res = pthread_mutex_init(&info->filter_mutex, NULL); > if (res != 0) { > ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); > @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > end_pfn = cycle->end_pfn; > > info->current_pfn = start_pfn; > - info->consumed_pfn = start_pfn - 1; > > threads = info->threads; > kdump_thread_args = info->kdump_thread_args; > > - page_data_num = info->num_buffers; > + page_buf_num = info->num_buffers; > page_data_buf = info->page_data_buf; > + pthread_mutex_init(&info->page_data_mutex, NULL); > + sem_init(&info->page_flag_buf_sem, 0, 0); > > - for (i = 0; i < page_data_num; i++) { > - /* > - * producer will use pfn in page_data_buf to decide the > - * consumed pfn > - */ > - page_data_buf[i].pfn = start_pfn - 1; > - page_data_buf[i].ready = 0; > - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); > - if (res != 0) { > - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", > - strerror(res)); > - goto out; > - } > - } > + for (i = 0; i < page_buf_num; i++) > + page_data_buf[i].used = FALSE; > > for (i = 0; i < info->num_threads; i++) { > kdump_thread_args[i].thread_num = i; > kdump_thread_args[i].len_buf_out = len_buf_out; > - kdump_thread_args[i].start_pfn = start_pfn; > - kdump_thread_args[i].end_pfn = end_pfn; > - kdump_thread_args[i].page_data_num = page_data_num; > kdump_thread_args[i].page_data_buf = page_data_buf; > + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; > kdump_thread_args[i].cycle = cycle; > > res = pthread_create(threads[i], NULL, > @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > } > } > > - consuming_pfn = start_pfn; > - index = -1; > + end_count = 0; > + while (1) { > + consuming = 0; > + check_count = 0; > > - gettimeofday(&last, NULL); > + /* > + * The basic idea is producer producing page and consumer writing page. > + * Each producer have a page_flag_buf list which is used for storing page's description. > + * The size of page_flag_buf is little so it won't take too much memory. > + * And all producers will share a page_data_buf array which is used for storing page's compressed data. > + * The main thread is the consumer. It will find the next pfn and write it into file. > + * The next pfn is smallest pfn in all page_flag_buf. > + */ > + sem_wait(&info->page_flag_buf_sem); > + gettimeofday(&last, NULL); > + while (1) { > + current_pfn = end_pfn; > > - while (consuming_pfn < end_pfn) { > - index = consuming_pfn % page_data_num; > + /* > + * page_flag_buf is in circular linked list. > + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's > + * page_flag_buf list. > + * consuming is used for recording in which thread the pfn is the smallest. > + * current_pfn is used for recording the value of pfn when checking the pfn. > + */ > + for (i = 0; i < info->num_threads; i++) { > + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > + continue; > + temp_pfn = info->page_flag_buf[i]->pfn; > > - gettimeofday(&new, NULL); > - if (new.tv_sec - last.tv_sec > WAIT_TIME) { > - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); > - goto out; > - } > + /* > + * count how many threads have reached the end. > + */ > + if (temp_pfn >= end_pfn) { > + info->page_flag_buf[i]->ready = FLAG_UNUSED; > + end_count++; > + continue; > + } > > - /* > - * check pfn first without mutex locked to reduce the time > - * trying to lock the mutex > - */ > - if (page_data_buf[index].pfn != consuming_pfn) > - continue; > + if (current_pfn < temp_pfn) > + continue; > > - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) > - continue; > + check_count++; > + consuming = i; > + current_pfn = temp_pfn; > + } > > - /* check whether the found one is ready to be consumed */ > - if (page_data_buf[index].pfn != consuming_pfn || > - page_data_buf[index].ready != 1) { > - goto unlock; > + /* > + * If all the threads have reached the end, we will finish writing. > + */ > + if (end_count >= info->num_threads) > + goto finish; > + > + /* > + * If the page_flag_buf is not ready, the pfn recorded may be changed. > + * So we should recheck. > + */ > + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > + gettimeofday(&new, NULL); > + if (new.tv_sec - last.tv_sec > WAIT_TIME) { > + ERRMSG("Can't get data of pfn.\n"); > + goto out; > + } > + continue; > + } > + > + if (current_pfn == info->page_flag_buf[consuming]->pfn) > + break; > } > > if ((num_dumped % per) == 0) > print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); > > - /* next pfn is found, refresh last here */ > - last = new; > - consuming_pfn++; > - info->consumed_pfn++; > - page_data_buf[index].ready = 0; > - > - if (page_data_buf[index].dumpable == FALSE) > - goto unlock; > - > num_dumped++; > > - if (page_data_buf[index].zero == TRUE) { > + > + if (info->page_flag_buf[consuming]->zero == TRUE) { > if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > goto out; > pfn_zero++; > } else { > + index = info->page_flag_buf[consuming]->index; > pd.flags = page_data_buf[index].flags; > pd.size = page_data_buf[index].size; > pd.page_flags = 0; > @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > */ > if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) > goto out; > - > + page_data_buf[index].used = FALSE; > } > -unlock: > - pthread_mutex_unlock(&page_data_buf[index].mutex); > + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > } > - > +finish: > ret = TRUE; > /* > * print [100 %] > @@ -7463,15 +7532,9 @@ out: > } > } > > - if (page_data_buf != NULL) { > - for (i = 0; i < page_data_num; i++) { > - pthread_mutex_destroy(&page_data_buf[i].mutex); > - } > - } > - > + sem_destroy(&info->page_flag_buf_sem); > pthread_rwlock_destroy(&info->usemmap_rwlock); > pthread_mutex_destroy(&info->filter_mutex); > - pthread_mutex_destroy(&info->consumed_pfn_mutex); > pthread_mutex_destroy(&info->current_pfn_mutex); > > return ret; > @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag > num_dumped++; > if (!read_pfn(pfn, buf)) > goto out; > + > filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); > > /* > diff --git a/makedumpfile.h b/makedumpfile.h > index e0b5bbf..4b315c0 100644 > --- a/makedumpfile.h > +++ b/makedumpfile.h > @@ -44,6 +44,7 @@ > #include "print_info.h" > #include "sadump_mod.h" > #include <pthread.h> > +#include <semaphore.h> > > /* > * Result of command > @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; > #define PAGE_DATA_NUM (50) > #define WAIT_TIME (60 * 10) > #define PTHREAD_FAIL ((void *)-2) > -#define NUM_BUFFERS (50) > +#define NUM_BUFFERS (20) > > struct mmap_cache { > char *mmap_buf; > @@ -985,28 +986,33 @@ struct mmap_cache { > off_t mmap_end_offset; > }; > > +enum { > + FLAG_UNUSED, > + FLAG_READY, > + FLAG_FILLING > +}; > +struct page_flag { > + mdf_pfn_t pfn; > + char zero; > + char ready; > + short index; > + struct page_flag *next; > +}; > + > struct page_data > { > - mdf_pfn_t pfn; > - int dumpable; > - int zero; > - unsigned int flags; > long size; > unsigned char *buf; > - pthread_mutex_t mutex; > - /* > - * whether the page_data is ready to be consumed > - */ > - int ready; > + int flags; > + int used; > }; > > struct thread_args { > int thread_num; > unsigned long len_buf_out; > - mdf_pfn_t start_pfn, end_pfn; > - int page_data_num; > struct cycle *cycle; > struct page_data *page_data_buf; > + struct page_flag *page_flag_buf; > }; > > /* > @@ -1295,11 +1301,12 @@ struct DumpInfo { > pthread_t **threads; > struct thread_args *kdump_thread_args; > struct page_data *page_data_buf; > + struct page_flag **page_flag_buf; > + sem_t page_flag_buf_sem; > pthread_rwlock_t usemmap_rwlock; > mdf_pfn_t current_pfn; > pthread_mutex_t current_pfn_mutex; > - mdf_pfn_t consumed_pfn; > - pthread_mutex_t consumed_pfn_mutex; > + pthread_mutex_t page_data_mutex; > pthread_mutex_t filter_mutex; > }; > extern struct DumpInfo *info; > -- > 1.8.3.1 > > > > > _______________________________________________ > kexec mailing list > kexec@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/kexec _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-15 6:34 ` Minfei Huang @ 2016-03-15 7:12 ` "Zhou, Wenjian/周文剑" 2016-03-15 7:38 ` Minfei Huang 2016-03-15 9:33 ` Minfei Huang 2016-03-15 8:35 ` "Zhou, Wenjian/周文剑" 2016-03-18 2:46 ` "Zhou, Wenjian/周文剑" 2 siblings, 2 replies; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-15 7:12 UTC (permalink / raw) To: Minfei Huang; +Cc: kexec Hello Minfei, I guess the result is affected by the caches. How about executing the following command before running makedumpfile each time? # echo 3 > /proc/sys/vm/drop_caches -- Thanks Zhou On 03/15/2016 02:34 PM, Minfei Huang wrote: > Hi, Zhou. > > I have applied this patch base on 1.5.9. There are several testcases I > have tested. > > - makedumpfile --num-threads 64 -d 31 > real 0m0.010s > user 0m0.002s > sys 0m0.009s > > - makedumpfile --num-threads 31 -d 31 > real 2m40.915s > user 10m50.900s > sys 23m9.664s > > makedumpfile --num-threads 30 -d 31 > real 0m0.006s > user 0m0.002s > sys 0m0.004s > > makedumpfile --num-threads 32 -d 31 > real 0m0.007s > user 0m0.002s > sys 0m0.005s > > - makedumpfile --num-threads 8 -d 31 > real 2m32.692s > user 7m4.630s > sys 2m0.369s > > - makedumpfile --num-threads 1 -d 31 > real 4m42.423s > user 7m27.153s > sys 0m22.490s > > - makedumpfile.orig -d 31 > real 4m1.297s > user 3m39.696s > sys 0m15.200s > > This patch has a huge increment to the filter performance under 31. But > it is not stable, since makedumpfile fails to dump vmcore intermittently. > You can find the above test result, makedumpfile fails to dump vmcore > with option --num-threads 64, also it may occur with option > --number-threads 8. > > Thanks > Minfei > > On 03/09/16 at 08:27am, Zhou Wenjian wrote: >> v4: >> 1. fix a bug caused by the logic >> v3: >> 1. remove some unused variables >> 2. fix a bug caused by the wrong logic >> 3. fix a bug caused by optimising >> 4. improve more performance by using Minoru Usui's code >> >> multi-threads implementation will introduce extra cost when handling >> each page. The origin implementation will also do the extra work for >> filtered pages. So there is a big performance degradation in >> --num-threads -d 31. >> The new implementation won't do the extra work for filtered pages any >> more. So the performance of -d 31 is close to that of serial processing. >> >> The new implementation is just like the following: >> * The basic idea is producer producing page and consumer writing page. >> * Each producer have a page_flag_buf list which is used for storing >> page's description. >> * The size of page_flag_buf is little so it won't take too much memory. >> * And all producers will share a page_data_buf array which is >> used for storing page's compressed data. >> * The main thread is the consumer. It will find the next pfn and write >> it into file. >> * The next pfn is smallest pfn in all page_flag_buf. >> >> Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> >> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >> --- >> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- >> makedumpfile.h | 35 ++++--- >> 2 files changed, 202 insertions(+), 131 deletions(-) >> >> diff --git a/makedumpfile.c b/makedumpfile.c >> index fa0b779..2b0864a 100644 >> --- a/makedumpfile.c >> +++ b/makedumpfile.c >> @@ -3483,7 +3483,8 @@ initial_for_parallel() >> unsigned long page_data_buf_size; >> unsigned long limit_size; >> int page_data_num; >> - int i; >> + struct page_flag *current; >> + int i, j; >> >> len_buf_out = calculate_len_buf_out(info->page_size); >> >> @@ -3560,10 +3561,16 @@ initial_for_parallel() >> >> limit_size = (get_free_memory_size() >> - MAP_REGION * info->num_threads) * 0.6; >> + if (limit_size < 0) { >> + MSG("Free memory is not enough for multi-threads\n"); >> + return FALSE; >> + } >> >> page_data_num = limit_size / page_data_buf_size; >> + info->num_buffers = 3 * info->num_threads; >> >> - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); >> + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); >> + info->num_buffers = MIN(info->num_buffers, page_data_num); >> >> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >> info->num_buffers); >> @@ -3588,6 +3595,36 @@ initial_for_parallel() >> } >> >> /* >> + * initial page_flag for each thread >> + */ >> + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >> + == NULL) { >> + MSG("Can't allocate memory for page_flag_buf. %s\n", >> + strerror(errno)); >> + return FALSE; >> + } >> + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); >> + >> + for (i = 0; i < info->num_threads; i++) { >> + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { >> + MSG("Can't allocate memory for page_flag. %s\n", >> + strerror(errno)); >> + return FALSE; >> + } >> + current = info->page_flag_buf[i]; >> + >> + for (j = 1; j < NUM_BUFFERS; j++) { >> + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { >> + MSG("Can't allocate memory for page_flag. %s\n", >> + strerror(errno)); >> + return FALSE; >> + } >> + current = current->next; >> + } >> + current->next = info->page_flag_buf[i]; >> + } >> + >> + /* >> * initial fd_memory for threads >> */ >> for (i = 0; i < info->num_threads; i++) { >> @@ -3612,7 +3649,8 @@ initial_for_parallel() >> void >> free_for_parallel() >> { >> - int i; >> + int i, j; >> + struct page_flag *current; >> >> if (info->threads != NULL) { >> for (i = 0; i < info->num_threads; i++) { >> @@ -3655,6 +3693,19 @@ free_for_parallel() >> free(info->page_data_buf); >> } >> >> + if (info->page_flag_buf != NULL) { >> + for (i = 0; i < info->num_threads; i++) { >> + for (j = 0; j < NUM_BUFFERS; j++) { >> + if (info->page_flag_buf[i] != NULL) { >> + current = info->page_flag_buf[i]; >> + info->page_flag_buf[i] = current->next; >> + free(current); >> + } >> + } >> + } >> + free(info->page_flag_buf); >> + } >> + >> if (info->parallel_info == NULL) >> return; >> >> @@ -7075,11 +7126,11 @@ void * >> kdump_thread_function_cyclic(void *arg) { >> void *retval = PTHREAD_FAIL; >> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >> - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >> + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >> + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >> struct cycle *cycle = kdump_thread_args->cycle; >> - int page_data_num = kdump_thread_args->page_data_num; >> - mdf_pfn_t pfn; >> - int index; >> + mdf_pfn_t pfn = cycle->start_pfn; >> + int index = kdump_thread_args->thread_num; >> int buf_ready; >> int dumpable; >> int fd_memory = 0; >> @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { >> kdump_thread_args->thread_num); >> } >> >> - while (1) { >> - /* get next pfn */ >> - pthread_mutex_lock(&info->current_pfn_mutex); >> - pfn = info->current_pfn; >> - info->current_pfn++; >> - pthread_mutex_unlock(&info->current_pfn_mutex); >> - >> - if (pfn >= kdump_thread_args->end_pfn) >> - break; >> - >> - index = -1; >> + /* >> + * filtered page won't take anything >> + * unfiltered zero page will only take a page_flag_buf >> + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf >> + */ >> + while (pfn < cycle->end_pfn) { >> buf_ready = FALSE; >> >> + pthread_mutex_lock(&info->page_data_mutex); >> + while (page_data_buf[index].used != FALSE) { >> + index = (index + 1) % info->num_buffers; >> + } >> + page_data_buf[index].used = TRUE; >> + pthread_mutex_unlock(&info->page_data_mutex); >> + >> while (buf_ready == FALSE) { >> pthread_testcancel(); >> - >> - index = pfn % page_data_num; >> - >> - if (pfn - info->consumed_pfn > info->num_buffers) >> + if (page_flag_buf->ready == FLAG_READY) >> continue; >> >> - if (page_data_buf[index].ready != 0) >> - continue; >> - >> - pthread_mutex_lock(&page_data_buf[index].mutex); >> - >> - if (page_data_buf[index].ready != 0) >> - goto unlock; >> - >> - buf_ready = TRUE; >> + /* get next dumpable pfn */ >> + pthread_mutex_lock(&info->current_pfn_mutex); >> + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { >> + dumpable = is_dumpable( >> + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >> + pfn, >> + cycle); >> + if (dumpable) >> + break; >> + } >> + info->current_pfn = pfn + 1; >> >> - page_data_buf[index].pfn = pfn; >> - page_data_buf[index].ready = 1; >> + page_flag_buf->pfn = pfn; >> + page_flag_buf->ready = FLAG_FILLING; >> + pthread_mutex_unlock(&info->current_pfn_mutex); >> + sem_post(&info->page_flag_buf_sem); >> >> - dumpable = is_dumpable( >> - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >> - pfn, >> - cycle); >> - page_data_buf[index].dumpable = dumpable; >> - if (!dumpable) >> - goto unlock; >> + if (pfn >= cycle->end_pfn) { >> + info->current_pfn = cycle->end_pfn; >> + page_data_buf[index].used = FALSE; >> + break; >> + } >> >> if (!read_pfn_parallel(fd_memory, pfn, buf, >> &bitmap_memory_parallel, >> @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { >> >> if ((info->dump_level & DL_EXCLUDE_ZERO) >> && is_zero_page(buf, info->page_size)) { >> - page_data_buf[index].zero = TRUE; >> - goto unlock; >> + page_flag_buf->zero = TRUE; >> + goto next; >> } >> >> - page_data_buf[index].zero = FALSE; >> + page_flag_buf->zero = FALSE; >> >> /* >> * Compress the page data. >> @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { >> page_data_buf[index].flags = >> DUMP_DH_COMPRESSED_LZO; >> page_data_buf[index].size = size_out; >> + >> memcpy(page_data_buf[index].buf, buf_out, size_out); >> #endif >> #ifdef USESNAPPY >> @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { >> page_data_buf[index].size = info->page_size; >> memcpy(page_data_buf[index].buf, buf, info->page_size); >> } >> -unlock: >> - pthread_mutex_unlock(&page_data_buf[index].mutex); >> + page_flag_buf->index = index; >> + buf_ready = TRUE; >> +next: >> + page_flag_buf->ready = FLAG_READY; >> + page_flag_buf = page_flag_buf->next; >> >> } >> } >> - >> retval = NULL; >> >> fail: >> @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> struct page_desc pd; >> struct timeval tv_start; >> struct timeval last, new; >> - unsigned long long consuming_pfn; >> pthread_t **threads = NULL; >> struct thread_args *kdump_thread_args = NULL; >> void *thread_result; >> - int page_data_num; >> + int page_buf_num; >> struct page_data *page_data_buf = NULL; >> int i; >> int index; >> + int end_count, consuming, check_count; >> + mdf_pfn_t current_pfn, temp_pfn; >> >> if (info->flag_elf_dumpfile) >> return FALSE; >> @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> goto out; >> } >> >> - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); >> - if (res != 0) { >> - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", >> - strerror(res)); >> - goto out; >> - } >> - >> res = pthread_mutex_init(&info->filter_mutex, NULL); >> if (res != 0) { >> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); >> @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> end_pfn = cycle->end_pfn; >> >> info->current_pfn = start_pfn; >> - info->consumed_pfn = start_pfn - 1; >> >> threads = info->threads; >> kdump_thread_args = info->kdump_thread_args; >> >> - page_data_num = info->num_buffers; >> + page_buf_num = info->num_buffers; >> page_data_buf = info->page_data_buf; >> + pthread_mutex_init(&info->page_data_mutex, NULL); >> + sem_init(&info->page_flag_buf_sem, 0, 0); >> >> - for (i = 0; i < page_data_num; i++) { >> - /* >> - * producer will use pfn in page_data_buf to decide the >> - * consumed pfn >> - */ >> - page_data_buf[i].pfn = start_pfn - 1; >> - page_data_buf[i].ready = 0; >> - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); >> - if (res != 0) { >> - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", >> - strerror(res)); >> - goto out; >> - } >> - } >> + for (i = 0; i < page_buf_num; i++) >> + page_data_buf[i].used = FALSE; >> >> for (i = 0; i < info->num_threads; i++) { >> kdump_thread_args[i].thread_num = i; >> kdump_thread_args[i].len_buf_out = len_buf_out; >> - kdump_thread_args[i].start_pfn = start_pfn; >> - kdump_thread_args[i].end_pfn = end_pfn; >> - kdump_thread_args[i].page_data_num = page_data_num; >> kdump_thread_args[i].page_data_buf = page_data_buf; >> + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >> kdump_thread_args[i].cycle = cycle; >> >> res = pthread_create(threads[i], NULL, >> @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> } >> } >> >> - consuming_pfn = start_pfn; >> - index = -1; >> + end_count = 0; >> + while (1) { >> + consuming = 0; >> + check_count = 0; >> >> - gettimeofday(&last, NULL); >> + /* >> + * The basic idea is producer producing page and consumer writing page. >> + * Each producer have a page_flag_buf list which is used for storing page's description. >> + * The size of page_flag_buf is little so it won't take too much memory. >> + * And all producers will share a page_data_buf array which is used for storing page's compressed data. >> + * The main thread is the consumer. It will find the next pfn and write it into file. >> + * The next pfn is smallest pfn in all page_flag_buf. >> + */ >> + sem_wait(&info->page_flag_buf_sem); >> + gettimeofday(&last, NULL); >> + while (1) { >> + current_pfn = end_pfn; >> >> - while (consuming_pfn < end_pfn) { >> - index = consuming_pfn % page_data_num; >> + /* >> + * page_flag_buf is in circular linked list. >> + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's >> + * page_flag_buf list. >> + * consuming is used for recording in which thread the pfn is the smallest. >> + * current_pfn is used for recording the value of pfn when checking the pfn. >> + */ >> + for (i = 0; i < info->num_threads; i++) { >> + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >> + continue; >> + temp_pfn = info->page_flag_buf[i]->pfn; >> >> - gettimeofday(&new, NULL); >> - if (new.tv_sec - last.tv_sec > WAIT_TIME) { >> - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); >> - goto out; >> - } >> + /* >> + * count how many threads have reached the end. >> + */ >> + if (temp_pfn >= end_pfn) { >> + info->page_flag_buf[i]->ready = FLAG_UNUSED; >> + end_count++; >> + continue; >> + } >> >> - /* >> - * check pfn first without mutex locked to reduce the time >> - * trying to lock the mutex >> - */ >> - if (page_data_buf[index].pfn != consuming_pfn) >> - continue; >> + if (current_pfn < temp_pfn) >> + continue; >> >> - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) >> - continue; >> + check_count++; >> + consuming = i; >> + current_pfn = temp_pfn; >> + } >> >> - /* check whether the found one is ready to be consumed */ >> - if (page_data_buf[index].pfn != consuming_pfn || >> - page_data_buf[index].ready != 1) { >> - goto unlock; >> + /* >> + * If all the threads have reached the end, we will finish writing. >> + */ >> + if (end_count >= info->num_threads) >> + goto finish; >> + >> + /* >> + * If the page_flag_buf is not ready, the pfn recorded may be changed. >> + * So we should recheck. >> + */ >> + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >> + gettimeofday(&new, NULL); >> + if (new.tv_sec - last.tv_sec > WAIT_TIME) { >> + ERRMSG("Can't get data of pfn.\n"); >> + goto out; >> + } >> + continue; >> + } >> + >> + if (current_pfn == info->page_flag_buf[consuming]->pfn) >> + break; >> } >> >> if ((num_dumped % per) == 0) >> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); >> >> - /* next pfn is found, refresh last here */ >> - last = new; >> - consuming_pfn++; >> - info->consumed_pfn++; >> - page_data_buf[index].ready = 0; >> - >> - if (page_data_buf[index].dumpable == FALSE) >> - goto unlock; >> - >> num_dumped++; >> >> - if (page_data_buf[index].zero == TRUE) { >> + >> + if (info->page_flag_buf[consuming]->zero == TRUE) { >> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >> goto out; >> pfn_zero++; >> } else { >> + index = info->page_flag_buf[consuming]->index; >> pd.flags = page_data_buf[index].flags; >> pd.size = page_data_buf[index].size; >> pd.page_flags = 0; >> @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> */ >> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) >> goto out; >> - >> + page_data_buf[index].used = FALSE; >> } >> -unlock: >> - pthread_mutex_unlock(&page_data_buf[index].mutex); >> + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >> + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >> } >> - >> +finish: >> ret = TRUE; >> /* >> * print [100 %] >> @@ -7463,15 +7532,9 @@ out: >> } >> } >> >> - if (page_data_buf != NULL) { >> - for (i = 0; i < page_data_num; i++) { >> - pthread_mutex_destroy(&page_data_buf[i].mutex); >> - } >> - } >> - >> + sem_destroy(&info->page_flag_buf_sem); >> pthread_rwlock_destroy(&info->usemmap_rwlock); >> pthread_mutex_destroy(&info->filter_mutex); >> - pthread_mutex_destroy(&info->consumed_pfn_mutex); >> pthread_mutex_destroy(&info->current_pfn_mutex); >> >> return ret; >> @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag >> num_dumped++; >> if (!read_pfn(pfn, buf)) >> goto out; >> + >> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); >> >> /* >> diff --git a/makedumpfile.h b/makedumpfile.h >> index e0b5bbf..4b315c0 100644 >> --- a/makedumpfile.h >> +++ b/makedumpfile.h >> @@ -44,6 +44,7 @@ >> #include "print_info.h" >> #include "sadump_mod.h" >> #include <pthread.h> >> +#include <semaphore.h> >> >> /* >> * Result of command >> @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; >> #define PAGE_DATA_NUM (50) >> #define WAIT_TIME (60 * 10) >> #define PTHREAD_FAIL ((void *)-2) >> -#define NUM_BUFFERS (50) >> +#define NUM_BUFFERS (20) >> >> struct mmap_cache { >> char *mmap_buf; >> @@ -985,28 +986,33 @@ struct mmap_cache { >> off_t mmap_end_offset; >> }; >> >> +enum { >> + FLAG_UNUSED, >> + FLAG_READY, >> + FLAG_FILLING >> +}; >> +struct page_flag { >> + mdf_pfn_t pfn; >> + char zero; >> + char ready; >> + short index; >> + struct page_flag *next; >> +}; >> + >> struct page_data >> { >> - mdf_pfn_t pfn; >> - int dumpable; >> - int zero; >> - unsigned int flags; >> long size; >> unsigned char *buf; >> - pthread_mutex_t mutex; >> - /* >> - * whether the page_data is ready to be consumed >> - */ >> - int ready; >> + int flags; >> + int used; >> }; >> >> struct thread_args { >> int thread_num; >> unsigned long len_buf_out; >> - mdf_pfn_t start_pfn, end_pfn; >> - int page_data_num; >> struct cycle *cycle; >> struct page_data *page_data_buf; >> + struct page_flag *page_flag_buf; >> }; >> >> /* >> @@ -1295,11 +1301,12 @@ struct DumpInfo { >> pthread_t **threads; >> struct thread_args *kdump_thread_args; >> struct page_data *page_data_buf; >> + struct page_flag **page_flag_buf; >> + sem_t page_flag_buf_sem; >> pthread_rwlock_t usemmap_rwlock; >> mdf_pfn_t current_pfn; >> pthread_mutex_t current_pfn_mutex; >> - mdf_pfn_t consumed_pfn; >> - pthread_mutex_t consumed_pfn_mutex; >> + pthread_mutex_t page_data_mutex; >> pthread_mutex_t filter_mutex; >> }; >> extern struct DumpInfo *info; >> -- >> 1.8.3.1 >> >> >> >> >> _______________________________________________ >> kexec mailing list >> kexec@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/kexec > > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-15 7:12 ` "Zhou, Wenjian/周文剑" @ 2016-03-15 7:38 ` Minfei Huang 2016-03-15 9:33 ` Minfei Huang 1 sibling, 0 replies; 33+ messages in thread From: Minfei Huang @ 2016-03-15 7:38 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑"; +Cc: kexec On 03/15/16 at 03:12pm, "Zhou, Wenjian/周文剑" wrote: > Hello Minfei, > > I guess the result is affected by the caches. > How about executing the following command before running makedumpfile each time? > # echo 3 > /proc/sys/vm/drop_caches All of cache will be dropped before starting new test. Hmm, it is limited to debug in kdump kernel. I will use "strace" to debug it later. Thanks Minfei > > -- > Thanks > Zhou > > On 03/15/2016 02:34 PM, Minfei Huang wrote: > >Hi, Zhou. > > > >I have applied this patch base on 1.5.9. There are several testcases I > >have tested. > > > >- makedumpfile --num-threads 64 -d 31 > > real 0m0.010s > > user 0m0.002s > > sys 0m0.009s > > > >- makedumpfile --num-threads 31 -d 31 > > real 2m40.915s > > user 10m50.900s > > sys 23m9.664s > > > >makedumpfile --num-threads 30 -d 31 > > real 0m0.006s > > user 0m0.002s > > sys 0m0.004s > > > >makedumpfile --num-threads 32 -d 31 > > real 0m0.007s > > user 0m0.002s > > sys 0m0.005s > > > >- makedumpfile --num-threads 8 -d 31 > > real 2m32.692s > > user 7m4.630s > > sys 2m0.369s > > > >- makedumpfile --num-threads 1 -d 31 > > real 4m42.423s > > user 7m27.153s > > sys 0m22.490s > > > >- makedumpfile.orig -d 31 > > real 4m1.297s > > user 3m39.696s > > sys 0m15.200s > > > >This patch has a huge increment to the filter performance under 31. But > >it is not stable, since makedumpfile fails to dump vmcore intermittently. > >You can find the above test result, makedumpfile fails to dump vmcore > >with option --num-threads 64, also it may occur with option > >--number-threads 8. > > > >Thanks > >Minfei _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-15 7:12 ` "Zhou, Wenjian/周文剑" 2016-03-15 7:38 ` Minfei Huang @ 2016-03-15 9:33 ` Minfei Huang 2016-03-16 1:55 ` "Zhou, Wenjian/周文剑" 1 sibling, 1 reply; 33+ messages in thread From: Minfei Huang @ 2016-03-15 9:33 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑"; +Cc: kexec On 03/15/16 at 03:12pm, "Zhou, Wenjian/周文剑" wrote: > Hello Minfei, > > I guess the result is affected by the caches. > How about executing the following command before running makedumpfile each time? > # echo 3 > /proc/sys/vm/drop_caches Hi, Zhou. Seem there is a bug during dumping vmcore with option num-threads. 1307 open("/proc/meminfo", O_RDONLY) = 4 1308 fstat(4, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 1309 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f59322d3000 1310 read(4, "MemTotal: 385452 kB\nMemF"..., 1024) = 1024 1311 close(4) = 0 1312 munmap(0x7f59322d3000, 4096) = 0 1313 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) 1314 mmap(NULL, 18446744048584523776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) 1315 mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x7f5927bb2000 1316 munmap(0x7f5927bb2000, 4513792) = 0 1317 munmap(0x7f592c000000, 62595072) = 0 1318 mprotect(0x7f5928000000, 135168, PROT_READ|PROT_WRITE) = 0 1319 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) Thanks Minfei > > -- > Thanks > Zhou > > On 03/15/2016 02:34 PM, Minfei Huang wrote: > >Hi, Zhou. > > > >I have applied this patch base on 1.5.9. There are several testcases I > >have tested. > > > >- makedumpfile --num-threads 64 -d 31 > > real 0m0.010s > > user 0m0.002s > > sys 0m0.009s > > > >- makedumpfile --num-threads 31 -d 31 > > real 2m40.915s > > user 10m50.900s > > sys 23m9.664s > > > >makedumpfile --num-threads 30 -d 31 > > real 0m0.006s > > user 0m0.002s > > sys 0m0.004s > > > >makedumpfile --num-threads 32 -d 31 > > real 0m0.007s > > user 0m0.002s > > sys 0m0.005s > > > >- makedumpfile --num-threads 8 -d 31 > > real 2m32.692s > > user 7m4.630s > > sys 2m0.369s > > > >- makedumpfile --num-threads 1 -d 31 > > real 4m42.423s > > user 7m27.153s > > sys 0m22.490s > > > >- makedumpfile.orig -d 31 > > real 4m1.297s > > user 3m39.696s > > sys 0m15.200s > > > >This patch has a huge increment to the filter performance under 31. But > >it is not stable, since makedumpfile fails to dump vmcore intermittently. > >You can find the above test result, makedumpfile fails to dump vmcore > >with option --num-threads 64, also it may occur with option > >--number-threads 8. > > > >Thanks > >Minfei > > > >On 03/09/16 at 08:27am, Zhou Wenjian wrote: > >>v4: > >> 1. fix a bug caused by the logic > >>v3: > >> 1. remove some unused variables > >> 2. fix a bug caused by the wrong logic > >> 3. fix a bug caused by optimising > >> 4. improve more performance by using Minoru Usui's code > >> > >>multi-threads implementation will introduce extra cost when handling > >>each page. The origin implementation will also do the extra work for > >>filtered pages. So there is a big performance degradation in > >>--num-threads -d 31. > >>The new implementation won't do the extra work for filtered pages any > >>more. So the performance of -d 31 is close to that of serial processing. > >> > >>The new implementation is just like the following: > >> * The basic idea is producer producing page and consumer writing page. > >> * Each producer have a page_flag_buf list which is used for storing > >> page's description. > >> * The size of page_flag_buf is little so it won't take too much memory. > >> * And all producers will share a page_data_buf array which is > >> used for storing page's compressed data. > >> * The main thread is the consumer. It will find the next pfn and write > >> it into file. > >> * The next pfn is smallest pfn in all page_flag_buf. > >> > >>Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> > >>Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > >>--- > >> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- > >> makedumpfile.h | 35 ++++--- > >> 2 files changed, 202 insertions(+), 131 deletions(-) > >> > >>diff --git a/makedumpfile.c b/makedumpfile.c > >>index fa0b779..2b0864a 100644 > >>--- a/makedumpfile.c > >>+++ b/makedumpfile.c > >>@@ -3483,7 +3483,8 @@ initial_for_parallel() > >> unsigned long page_data_buf_size; > >> unsigned long limit_size; > >> int page_data_num; > >>- int i; > >>+ struct page_flag *current; > >>+ int i, j; > >> > >> len_buf_out = calculate_len_buf_out(info->page_size); > >> > >>@@ -3560,10 +3561,16 @@ initial_for_parallel() > >> > >> limit_size = (get_free_memory_size() > >> - MAP_REGION * info->num_threads) * 0.6; > >>+ if (limit_size < 0) { > >>+ MSG("Free memory is not enough for multi-threads\n"); > >>+ return FALSE; > >>+ } > >> > >> page_data_num = limit_size / page_data_buf_size; > >>+ info->num_buffers = 3 * info->num_threads; > >> > >>- info->num_buffers = MIN(NUM_BUFFERS, page_data_num); > >>+ info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); > >>+ info->num_buffers = MIN(info->num_buffers, page_data_num); > >> > >> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", > >> info->num_buffers); > >>@@ -3588,6 +3595,36 @@ initial_for_parallel() > >> } > >> > >> /* > >>+ * initial page_flag for each thread > >>+ */ > >>+ if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) > >>+ == NULL) { > >>+ MSG("Can't allocate memory for page_flag_buf. %s\n", > >>+ strerror(errno)); > >>+ return FALSE; > >>+ } > >>+ memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); > >>+ > >>+ for (i = 0; i < info->num_threads; i++) { > >>+ if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { > >>+ MSG("Can't allocate memory for page_flag. %s\n", > >>+ strerror(errno)); > >>+ return FALSE; > >>+ } > >>+ current = info->page_flag_buf[i]; > >>+ > >>+ for (j = 1; j < NUM_BUFFERS; j++) { > >>+ if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { > >>+ MSG("Can't allocate memory for page_flag. %s\n", > >>+ strerror(errno)); > >>+ return FALSE; > >>+ } > >>+ current = current->next; > >>+ } > >>+ current->next = info->page_flag_buf[i]; > >>+ } > >>+ > >>+ /* > >> * initial fd_memory for threads > >> */ > >> for (i = 0; i < info->num_threads; i++) { > >>@@ -3612,7 +3649,8 @@ initial_for_parallel() > >> void > >> free_for_parallel() > >> { > >>- int i; > >>+ int i, j; > >>+ struct page_flag *current; > >> > >> if (info->threads != NULL) { > >> for (i = 0; i < info->num_threads; i++) { > >>@@ -3655,6 +3693,19 @@ free_for_parallel() > >> free(info->page_data_buf); > >> } > >> > >>+ if (info->page_flag_buf != NULL) { > >>+ for (i = 0; i < info->num_threads; i++) { > >>+ for (j = 0; j < NUM_BUFFERS; j++) { > >>+ if (info->page_flag_buf[i] != NULL) { > >>+ current = info->page_flag_buf[i]; > >>+ info->page_flag_buf[i] = current->next; > >>+ free(current); > >>+ } > >>+ } > >>+ } > >>+ free(info->page_flag_buf); > >>+ } > >>+ > >> if (info->parallel_info == NULL) > >> return; > >> > >>@@ -7075,11 +7126,11 @@ void * > >> kdump_thread_function_cyclic(void *arg) { > >> void *retval = PTHREAD_FAIL; > >> struct thread_args *kdump_thread_args = (struct thread_args *)arg; > >>- struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > >>+ volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > >>+ volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; > >> struct cycle *cycle = kdump_thread_args->cycle; > >>- int page_data_num = kdump_thread_args->page_data_num; > >>- mdf_pfn_t pfn; > >>- int index; > >>+ mdf_pfn_t pfn = cycle->start_pfn; > >>+ int index = kdump_thread_args->thread_num; > >> int buf_ready; > >> int dumpable; > >> int fd_memory = 0; > >>@@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { > >> kdump_thread_args->thread_num); > >> } > >> > >>- while (1) { > >>- /* get next pfn */ > >>- pthread_mutex_lock(&info->current_pfn_mutex); > >>- pfn = info->current_pfn; > >>- info->current_pfn++; > >>- pthread_mutex_unlock(&info->current_pfn_mutex); > >>- > >>- if (pfn >= kdump_thread_args->end_pfn) > >>- break; > >>- > >>- index = -1; > >>+ /* > >>+ * filtered page won't take anything > >>+ * unfiltered zero page will only take a page_flag_buf > >>+ * unfiltered non-zero page will take a page_flag_buf and a page_data_buf > >>+ */ > >>+ while (pfn < cycle->end_pfn) { > >> buf_ready = FALSE; > >> > >>+ pthread_mutex_lock(&info->page_data_mutex); > >>+ while (page_data_buf[index].used != FALSE) { > >>+ index = (index + 1) % info->num_buffers; > >>+ } > >>+ page_data_buf[index].used = TRUE; > >>+ pthread_mutex_unlock(&info->page_data_mutex); > >>+ > >> while (buf_ready == FALSE) { > >> pthread_testcancel(); > >>- > >>- index = pfn % page_data_num; > >>- > >>- if (pfn - info->consumed_pfn > info->num_buffers) > >>+ if (page_flag_buf->ready == FLAG_READY) > >> continue; > >> > >>- if (page_data_buf[index].ready != 0) > >>- continue; > >>- > >>- pthread_mutex_lock(&page_data_buf[index].mutex); > >>- > >>- if (page_data_buf[index].ready != 0) > >>- goto unlock; > >>- > >>- buf_ready = TRUE; > >>+ /* get next dumpable pfn */ > >>+ pthread_mutex_lock(&info->current_pfn_mutex); > >>+ for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { > >>+ dumpable = is_dumpable( > >>+ info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > >>+ pfn, > >>+ cycle); > >>+ if (dumpable) > >>+ break; > >>+ } > >>+ info->current_pfn = pfn + 1; > >> > >>- page_data_buf[index].pfn = pfn; > >>- page_data_buf[index].ready = 1; > >>+ page_flag_buf->pfn = pfn; > >>+ page_flag_buf->ready = FLAG_FILLING; > >>+ pthread_mutex_unlock(&info->current_pfn_mutex); > >>+ sem_post(&info->page_flag_buf_sem); > >> > >>- dumpable = is_dumpable( > >>- info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > >>- pfn, > >>- cycle); > >>- page_data_buf[index].dumpable = dumpable; > >>- if (!dumpable) > >>- goto unlock; > >>+ if (pfn >= cycle->end_pfn) { > >>+ info->current_pfn = cycle->end_pfn; > >>+ page_data_buf[index].used = FALSE; > >>+ break; > >>+ } > >> > >> if (!read_pfn_parallel(fd_memory, pfn, buf, > >> &bitmap_memory_parallel, > >>@@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { > >> > >> if ((info->dump_level & DL_EXCLUDE_ZERO) > >> && is_zero_page(buf, info->page_size)) { > >>- page_data_buf[index].zero = TRUE; > >>- goto unlock; > >>+ page_flag_buf->zero = TRUE; > >>+ goto next; > >> } > >> > >>- page_data_buf[index].zero = FALSE; > >>+ page_flag_buf->zero = FALSE; > >> > >> /* > >> * Compress the page data. > >>@@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { > >> page_data_buf[index].flags = > >> DUMP_DH_COMPRESSED_LZO; > >> page_data_buf[index].size = size_out; > >>+ > >> memcpy(page_data_buf[index].buf, buf_out, size_out); > >> #endif > >> #ifdef USESNAPPY > >>@@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { > >> page_data_buf[index].size = info->page_size; > >> memcpy(page_data_buf[index].buf, buf, info->page_size); > >> } > >>-unlock: > >>- pthread_mutex_unlock(&page_data_buf[index].mutex); > >>+ page_flag_buf->index = index; > >>+ buf_ready = TRUE; > >>+next: > >>+ page_flag_buf->ready = FLAG_READY; > >>+ page_flag_buf = page_flag_buf->next; > >> > >> } > >> } > >>- > >> retval = NULL; > >> > >> fail: > >>@@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> struct page_desc pd; > >> struct timeval tv_start; > >> struct timeval last, new; > >>- unsigned long long consuming_pfn; > >> pthread_t **threads = NULL; > >> struct thread_args *kdump_thread_args = NULL; > >> void *thread_result; > >>- int page_data_num; > >>+ int page_buf_num; > >> struct page_data *page_data_buf = NULL; > >> int i; > >> int index; > >>+ int end_count, consuming, check_count; > >>+ mdf_pfn_t current_pfn, temp_pfn; > >> > >> if (info->flag_elf_dumpfile) > >> return FALSE; > >>@@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> goto out; > >> } > >> > >>- res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); > >>- if (res != 0) { > >>- ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", > >>- strerror(res)); > >>- goto out; > >>- } > >>- > >> res = pthread_mutex_init(&info->filter_mutex, NULL); > >> if (res != 0) { > >> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); > >>@@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> end_pfn = cycle->end_pfn; > >> > >> info->current_pfn = start_pfn; > >>- info->consumed_pfn = start_pfn - 1; > >> > >> threads = info->threads; > >> kdump_thread_args = info->kdump_thread_args; > >> > >>- page_data_num = info->num_buffers; > >>+ page_buf_num = info->num_buffers; > >> page_data_buf = info->page_data_buf; > >>+ pthread_mutex_init(&info->page_data_mutex, NULL); > >>+ sem_init(&info->page_flag_buf_sem, 0, 0); > >> > >>- for (i = 0; i < page_data_num; i++) { > >>- /* > >>- * producer will use pfn in page_data_buf to decide the > >>- * consumed pfn > >>- */ > >>- page_data_buf[i].pfn = start_pfn - 1; > >>- page_data_buf[i].ready = 0; > >>- res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); > >>- if (res != 0) { > >>- ERRMSG("Can't initialize mutex of page_data_buf. %s\n", > >>- strerror(res)); > >>- goto out; > >>- } > >>- } > >>+ for (i = 0; i < page_buf_num; i++) > >>+ page_data_buf[i].used = FALSE; > >> > >> for (i = 0; i < info->num_threads; i++) { > >> kdump_thread_args[i].thread_num = i; > >> kdump_thread_args[i].len_buf_out = len_buf_out; > >>- kdump_thread_args[i].start_pfn = start_pfn; > >>- kdump_thread_args[i].end_pfn = end_pfn; > >>- kdump_thread_args[i].page_data_num = page_data_num; > >> kdump_thread_args[i].page_data_buf = page_data_buf; > >>+ kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; > >> kdump_thread_args[i].cycle = cycle; > >> > >> res = pthread_create(threads[i], NULL, > >>@@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> } > >> } > >> > >>- consuming_pfn = start_pfn; > >>- index = -1; > >>+ end_count = 0; > >>+ while (1) { > >>+ consuming = 0; > >>+ check_count = 0; > >> > >>- gettimeofday(&last, NULL); > >>+ /* > >>+ * The basic idea is producer producing page and consumer writing page. > >>+ * Each producer have a page_flag_buf list which is used for storing page's description. > >>+ * The size of page_flag_buf is little so it won't take too much memory. > >>+ * And all producers will share a page_data_buf array which is used for storing page's compressed data. > >>+ * The main thread is the consumer. It will find the next pfn and write it into file. > >>+ * The next pfn is smallest pfn in all page_flag_buf. > >>+ */ > >>+ sem_wait(&info->page_flag_buf_sem); > >>+ gettimeofday(&last, NULL); > >>+ while (1) { > >>+ current_pfn = end_pfn; > >> > >>- while (consuming_pfn < end_pfn) { > >>- index = consuming_pfn % page_data_num; > >>+ /* > >>+ * page_flag_buf is in circular linked list. > >>+ * The array info->page_flag_buf[] records the current page_flag_buf in each thread's > >>+ * page_flag_buf list. > >>+ * consuming is used for recording in which thread the pfn is the smallest. > >>+ * current_pfn is used for recording the value of pfn when checking the pfn. > >>+ */ > >>+ for (i = 0; i < info->num_threads; i++) { > >>+ if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > >>+ continue; > >>+ temp_pfn = info->page_flag_buf[i]->pfn; > >> > >>- gettimeofday(&new, NULL); > >>- if (new.tv_sec - last.tv_sec > WAIT_TIME) { > >>- ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); > >>- goto out; > >>- } > >>+ /* > >>+ * count how many threads have reached the end. > >>+ */ > >>+ if (temp_pfn >= end_pfn) { > >>+ info->page_flag_buf[i]->ready = FLAG_UNUSED; > >>+ end_count++; > >>+ continue; > >>+ } > >> > >>- /* > >>- * check pfn first without mutex locked to reduce the time > >>- * trying to lock the mutex > >>- */ > >>- if (page_data_buf[index].pfn != consuming_pfn) > >>- continue; > >>+ if (current_pfn < temp_pfn) > >>+ continue; > >> > >>- if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) > >>- continue; > >>+ check_count++; > >>+ consuming = i; > >>+ current_pfn = temp_pfn; > >>+ } > >> > >>- /* check whether the found one is ready to be consumed */ > >>- if (page_data_buf[index].pfn != consuming_pfn || > >>- page_data_buf[index].ready != 1) { > >>- goto unlock; > >>+ /* > >>+ * If all the threads have reached the end, we will finish writing. > >>+ */ > >>+ if (end_count >= info->num_threads) > >>+ goto finish; > >>+ > >>+ /* > >>+ * If the page_flag_buf is not ready, the pfn recorded may be changed. > >>+ * So we should recheck. > >>+ */ > >>+ if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > >>+ gettimeofday(&new, NULL); > >>+ if (new.tv_sec - last.tv_sec > WAIT_TIME) { > >>+ ERRMSG("Can't get data of pfn.\n"); > >>+ goto out; > >>+ } > >>+ continue; > >>+ } > >>+ > >>+ if (current_pfn == info->page_flag_buf[consuming]->pfn) > >>+ break; > >> } > >> > >> if ((num_dumped % per) == 0) > >> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); > >> > >>- /* next pfn is found, refresh last here */ > >>- last = new; > >>- consuming_pfn++; > >>- info->consumed_pfn++; > >>- page_data_buf[index].ready = 0; > >>- > >>- if (page_data_buf[index].dumpable == FALSE) > >>- goto unlock; > >>- > >> num_dumped++; > >> > >>- if (page_data_buf[index].zero == TRUE) { > >>+ > >>+ if (info->page_flag_buf[consuming]->zero == TRUE) { > >> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > >> goto out; > >> pfn_zero++; > >> } else { > >>+ index = info->page_flag_buf[consuming]->index; > >> pd.flags = page_data_buf[index].flags; > >> pd.size = page_data_buf[index].size; > >> pd.page_flags = 0; > >>@@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> */ > >> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) > >> goto out; > >>- > >>+ page_data_buf[index].used = FALSE; > >> } > >>-unlock: > >>- pthread_mutex_unlock(&page_data_buf[index].mutex); > >>+ info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > >>+ info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > >> } > >>- > >>+finish: > >> ret = TRUE; > >> /* > >> * print [100 %] > >>@@ -7463,15 +7532,9 @@ out: > >> } > >> } > >> > >>- if (page_data_buf != NULL) { > >>- for (i = 0; i < page_data_num; i++) { > >>- pthread_mutex_destroy(&page_data_buf[i].mutex); > >>- } > >>- } > >>- > >>+ sem_destroy(&info->page_flag_buf_sem); > >> pthread_rwlock_destroy(&info->usemmap_rwlock); > >> pthread_mutex_destroy(&info->filter_mutex); > >>- pthread_mutex_destroy(&info->consumed_pfn_mutex); > >> pthread_mutex_destroy(&info->current_pfn_mutex); > >> > >> return ret; > >>@@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag > >> num_dumped++; > >> if (!read_pfn(pfn, buf)) > >> goto out; > >>+ > >> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); > >> > >> /* > >>diff --git a/makedumpfile.h b/makedumpfile.h > >>index e0b5bbf..4b315c0 100644 > >>--- a/makedumpfile.h > >>+++ b/makedumpfile.h > >>@@ -44,6 +44,7 @@ > >> #include "print_info.h" > >> #include "sadump_mod.h" > >> #include <pthread.h> > >>+#include <semaphore.h> > >> > >> /* > >> * Result of command > >>@@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; > >> #define PAGE_DATA_NUM (50) > >> #define WAIT_TIME (60 * 10) > >> #define PTHREAD_FAIL ((void *)-2) > >>-#define NUM_BUFFERS (50) > >>+#define NUM_BUFFERS (20) > >> > >> struct mmap_cache { > >> char *mmap_buf; > >>@@ -985,28 +986,33 @@ struct mmap_cache { > >> off_t mmap_end_offset; > >> }; > >> > >>+enum { > >>+ FLAG_UNUSED, > >>+ FLAG_READY, > >>+ FLAG_FILLING > >>+}; > >>+struct page_flag { > >>+ mdf_pfn_t pfn; > >>+ char zero; > >>+ char ready; > >>+ short index; > >>+ struct page_flag *next; > >>+}; > >>+ > >> struct page_data > >> { > >>- mdf_pfn_t pfn; > >>- int dumpable; > >>- int zero; > >>- unsigned int flags; > >> long size; > >> unsigned char *buf; > >>- pthread_mutex_t mutex; > >>- /* > >>- * whether the page_data is ready to be consumed > >>- */ > >>- int ready; > >>+ int flags; > >>+ int used; > >> }; > >> > >> struct thread_args { > >> int thread_num; > >> unsigned long len_buf_out; > >>- mdf_pfn_t start_pfn, end_pfn; > >>- int page_data_num; > >> struct cycle *cycle; > >> struct page_data *page_data_buf; > >>+ struct page_flag *page_flag_buf; > >> }; > >> > >> /* > >>@@ -1295,11 +1301,12 @@ struct DumpInfo { > >> pthread_t **threads; > >> struct thread_args *kdump_thread_args; > >> struct page_data *page_data_buf; > >>+ struct page_flag **page_flag_buf; > >>+ sem_t page_flag_buf_sem; > >> pthread_rwlock_t usemmap_rwlock; > >> mdf_pfn_t current_pfn; > >> pthread_mutex_t current_pfn_mutex; > >>- mdf_pfn_t consumed_pfn; > >>- pthread_mutex_t consumed_pfn_mutex; > >>+ pthread_mutex_t page_data_mutex; > >> pthread_mutex_t filter_mutex; > >> }; > >> extern struct DumpInfo *info; > >>-- > >>1.8.3.1 > >> > >> > >> > >> > >>_______________________________________________ > >>kexec mailing list > >>kexec@lists.infradead.org > >>http://lists.infradead.org/mailman/listinfo/kexec > > > > > > > > _______________________________________________ > kexec mailing list > kexec@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/kexec _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-15 9:33 ` Minfei Huang @ 2016-03-16 1:55 ` "Zhou, Wenjian/周文剑" 2016-03-16 8:04 ` Minfei Huang 0 siblings, 1 reply; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-16 1:55 UTC (permalink / raw) To: Minfei Huang; +Cc: kexec Hi Minfei, I have some questions. If the value of num-threads is 8, 1. How much is the free memory before running makedumpfile failed? 2. How much is the free memory before running makedumpfile success? And the following result is very strange if all cache has been dropped. makedumpfile --num-threads 30 -d 31 real 0m0.006s user 0m0.002s sys 0m0.004s -- Thanks Zhou On 03/15/2016 05:33 PM, Minfei Huang wrote: > On 03/15/16 at 03:12pm, "Zhou, Wenjian/周文剑" wrote: >> Hello Minfei, >> >> I guess the result is affected by the caches. >> How about executing the following command before running makedumpfile each time? >> # echo 3 > /proc/sys/vm/drop_caches > > Hi, Zhou. > > Seem there is a bug during dumping vmcore with option num-threads. > > 1307 open("/proc/meminfo", O_RDONLY) = 4 > 1308 fstat(4, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 > 1309 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f59322d3000 > 1310 read(4, "MemTotal: 385452 kB\nMemF"..., 1024) = 1024 > 1311 close(4) = 0 > 1312 munmap(0x7f59322d3000, 4096) = 0 > 1313 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > 1314 mmap(NULL, 18446744048584523776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > 1315 mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x7f5927bb2000 > 1316 munmap(0x7f5927bb2000, 4513792) = 0 > 1317 munmap(0x7f592c000000, 62595072) = 0 > 1318 mprotect(0x7f5928000000, 135168, PROT_READ|PROT_WRITE) = 0 > 1319 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > > Thanks > Minfei > >> >> -- >> Thanks >> Zhou >> >> On 03/15/2016 02:34 PM, Minfei Huang wrote: >>> Hi, Zhou. >>> >>> I have applied this patch base on 1.5.9. There are several testcases I >>> have tested. >>> >>> - makedumpfile --num-threads 64 -d 31 >>> real 0m0.010s >>> user 0m0.002s >>> sys 0m0.009s >>> >>> - makedumpfile --num-threads 31 -d 31 >>> real 2m40.915s >>> user 10m50.900s >>> sys 23m9.664s >>> >>> makedumpfile --num-threads 30 -d 31 >>> real 0m0.006s >>> user 0m0.002s >>> sys 0m0.004s >>> >>> makedumpfile --num-threads 32 -d 31 >>> real 0m0.007s >>> user 0m0.002s >>> sys 0m0.005s >>> >>> - makedumpfile --num-threads 8 -d 31 >>> real 2m32.692s >>> user 7m4.630s >>> sys 2m0.369s >>> >>> - makedumpfile --num-threads 1 -d 31 >>> real 4m42.423s >>> user 7m27.153s >>> sys 0m22.490s >>> >>> - makedumpfile.orig -d 31 >>> real 4m1.297s >>> user 3m39.696s >>> sys 0m15.200s >>> >>> This patch has a huge increment to the filter performance under 31. But >>> it is not stable, since makedumpfile fails to dump vmcore intermittently. >>> You can find the above test result, makedumpfile fails to dump vmcore >>> with option --num-threads 64, also it may occur with option >>> --number-threads 8. >>> >>> Thanks >>> Minfei >>> >>> On 03/09/16 at 08:27am, Zhou Wenjian wrote: >>>> v4: >>>> 1. fix a bug caused by the logic >>>> v3: >>>> 1. remove some unused variables >>>> 2. fix a bug caused by the wrong logic >>>> 3. fix a bug caused by optimising >>>> 4. improve more performance by using Minoru Usui's code >>>> >>>> multi-threads implementation will introduce extra cost when handling >>>> each page. The origin implementation will also do the extra work for >>>> filtered pages. So there is a big performance degradation in >>>> --num-threads -d 31. >>>> The new implementation won't do the extra work for filtered pages any >>>> more. So the performance of -d 31 is close to that of serial processing. >>>> >>>> The new implementation is just like the following: >>>> * The basic idea is producer producing page and consumer writing page. >>>> * Each producer have a page_flag_buf list which is used for storing >>>> page's description. >>>> * The size of page_flag_buf is little so it won't take too much memory. >>>> * And all producers will share a page_data_buf array which is >>>> used for storing page's compressed data. >>>> * The main thread is the consumer. It will find the next pfn and write >>>> it into file. >>>> * The next pfn is smallest pfn in all page_flag_buf. >>>> >>>> Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> >>>> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >>>> --- >>>> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- >>>> makedumpfile.h | 35 ++++--- >>>> 2 files changed, 202 insertions(+), 131 deletions(-) >>>> >>>> diff --git a/makedumpfile.c b/makedumpfile.c >>>> index fa0b779..2b0864a 100644 >>>> --- a/makedumpfile.c >>>> +++ b/makedumpfile.c >>>> @@ -3483,7 +3483,8 @@ initial_for_parallel() >>>> unsigned long page_data_buf_size; >>>> unsigned long limit_size; >>>> int page_data_num; >>>> - int i; >>>> + struct page_flag *current; >>>> + int i, j; >>>> >>>> len_buf_out = calculate_len_buf_out(info->page_size); >>>> >>>> @@ -3560,10 +3561,16 @@ initial_for_parallel() >>>> >>>> limit_size = (get_free_memory_size() >>>> - MAP_REGION * info->num_threads) * 0.6; >>>> + if (limit_size < 0) { >>>> + MSG("Free memory is not enough for multi-threads\n"); >>>> + return FALSE; >>>> + } >>>> >>>> page_data_num = limit_size / page_data_buf_size; >>>> + info->num_buffers = 3 * info->num_threads; >>>> >>>> - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); >>>> + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); >>>> + info->num_buffers = MIN(info->num_buffers, page_data_num); >>>> >>>> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >>>> info->num_buffers); >>>> @@ -3588,6 +3595,36 @@ initial_for_parallel() >>>> } >>>> >>>> /* >>>> + * initial page_flag for each thread >>>> + */ >>>> + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >>>> + == NULL) { >>>> + MSG("Can't allocate memory for page_flag_buf. %s\n", >>>> + strerror(errno)); >>>> + return FALSE; >>>> + } >>>> + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); >>>> + >>>> + for (i = 0; i < info->num_threads; i++) { >>>> + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { >>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>> + strerror(errno)); >>>> + return FALSE; >>>> + } >>>> + current = info->page_flag_buf[i]; >>>> + >>>> + for (j = 1; j < NUM_BUFFERS; j++) { >>>> + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { >>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>> + strerror(errno)); >>>> + return FALSE; >>>> + } >>>> + current = current->next; >>>> + } >>>> + current->next = info->page_flag_buf[i]; >>>> + } >>>> + >>>> + /* >>>> * initial fd_memory for threads >>>> */ >>>> for (i = 0; i < info->num_threads; i++) { >>>> @@ -3612,7 +3649,8 @@ initial_for_parallel() >>>> void >>>> free_for_parallel() >>>> { >>>> - int i; >>>> + int i, j; >>>> + struct page_flag *current; >>>> >>>> if (info->threads != NULL) { >>>> for (i = 0; i < info->num_threads; i++) { >>>> @@ -3655,6 +3693,19 @@ free_for_parallel() >>>> free(info->page_data_buf); >>>> } >>>> >>>> + if (info->page_flag_buf != NULL) { >>>> + for (i = 0; i < info->num_threads; i++) { >>>> + for (j = 0; j < NUM_BUFFERS; j++) { >>>> + if (info->page_flag_buf[i] != NULL) { >>>> + current = info->page_flag_buf[i]; >>>> + info->page_flag_buf[i] = current->next; >>>> + free(current); >>>> + } >>>> + } >>>> + } >>>> + free(info->page_flag_buf); >>>> + } >>>> + >>>> if (info->parallel_info == NULL) >>>> return; >>>> >>>> @@ -7075,11 +7126,11 @@ void * >>>> kdump_thread_function_cyclic(void *arg) { >>>> void *retval = PTHREAD_FAIL; >>>> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >>>> - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>> + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>> + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >>>> struct cycle *cycle = kdump_thread_args->cycle; >>>> - int page_data_num = kdump_thread_args->page_data_num; >>>> - mdf_pfn_t pfn; >>>> - int index; >>>> + mdf_pfn_t pfn = cycle->start_pfn; >>>> + int index = kdump_thread_args->thread_num; >>>> int buf_ready; >>>> int dumpable; >>>> int fd_memory = 0; >>>> @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { >>>> kdump_thread_args->thread_num); >>>> } >>>> >>>> - while (1) { >>>> - /* get next pfn */ >>>> - pthread_mutex_lock(&info->current_pfn_mutex); >>>> - pfn = info->current_pfn; >>>> - info->current_pfn++; >>>> - pthread_mutex_unlock(&info->current_pfn_mutex); >>>> - >>>> - if (pfn >= kdump_thread_args->end_pfn) >>>> - break; >>>> - >>>> - index = -1; >>>> + /* >>>> + * filtered page won't take anything >>>> + * unfiltered zero page will only take a page_flag_buf >>>> + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf >>>> + */ >>>> + while (pfn < cycle->end_pfn) { >>>> buf_ready = FALSE; >>>> >>>> + pthread_mutex_lock(&info->page_data_mutex); >>>> + while (page_data_buf[index].used != FALSE) { >>>> + index = (index + 1) % info->num_buffers; >>>> + } >>>> + page_data_buf[index].used = TRUE; >>>> + pthread_mutex_unlock(&info->page_data_mutex); >>>> + >>>> while (buf_ready == FALSE) { >>>> pthread_testcancel(); >>>> - >>>> - index = pfn % page_data_num; >>>> - >>>> - if (pfn - info->consumed_pfn > info->num_buffers) >>>> + if (page_flag_buf->ready == FLAG_READY) >>>> continue; >>>> >>>> - if (page_data_buf[index].ready != 0) >>>> - continue; >>>> - >>>> - pthread_mutex_lock(&page_data_buf[index].mutex); >>>> - >>>> - if (page_data_buf[index].ready != 0) >>>> - goto unlock; >>>> - >>>> - buf_ready = TRUE; >>>> + /* get next dumpable pfn */ >>>> + pthread_mutex_lock(&info->current_pfn_mutex); >>>> + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { >>>> + dumpable = is_dumpable( >>>> + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>> + pfn, >>>> + cycle); >>>> + if (dumpable) >>>> + break; >>>> + } >>>> + info->current_pfn = pfn + 1; >>>> >>>> - page_data_buf[index].pfn = pfn; >>>> - page_data_buf[index].ready = 1; >>>> + page_flag_buf->pfn = pfn; >>>> + page_flag_buf->ready = FLAG_FILLING; >>>> + pthread_mutex_unlock(&info->current_pfn_mutex); >>>> + sem_post(&info->page_flag_buf_sem); >>>> >>>> - dumpable = is_dumpable( >>>> - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>> - pfn, >>>> - cycle); >>>> - page_data_buf[index].dumpable = dumpable; >>>> - if (!dumpable) >>>> - goto unlock; >>>> + if (pfn >= cycle->end_pfn) { >>>> + info->current_pfn = cycle->end_pfn; >>>> + page_data_buf[index].used = FALSE; >>>> + break; >>>> + } >>>> >>>> if (!read_pfn_parallel(fd_memory, pfn, buf, >>>> &bitmap_memory_parallel, >>>> @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { >>>> >>>> if ((info->dump_level & DL_EXCLUDE_ZERO) >>>> && is_zero_page(buf, info->page_size)) { >>>> - page_data_buf[index].zero = TRUE; >>>> - goto unlock; >>>> + page_flag_buf->zero = TRUE; >>>> + goto next; >>>> } >>>> >>>> - page_data_buf[index].zero = FALSE; >>>> + page_flag_buf->zero = FALSE; >>>> >>>> /* >>>> * Compress the page data. >>>> @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { >>>> page_data_buf[index].flags = >>>> DUMP_DH_COMPRESSED_LZO; >>>> page_data_buf[index].size = size_out; >>>> + >>>> memcpy(page_data_buf[index].buf, buf_out, size_out); >>>> #endif >>>> #ifdef USESNAPPY >>>> @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { >>>> page_data_buf[index].size = info->page_size; >>>> memcpy(page_data_buf[index].buf, buf, info->page_size); >>>> } >>>> -unlock: >>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>> + page_flag_buf->index = index; >>>> + buf_ready = TRUE; >>>> +next: >>>> + page_flag_buf->ready = FLAG_READY; >>>> + page_flag_buf = page_flag_buf->next; >>>> >>>> } >>>> } >>>> - >>>> retval = NULL; >>>> >>>> fail: >>>> @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> struct page_desc pd; >>>> struct timeval tv_start; >>>> struct timeval last, new; >>>> - unsigned long long consuming_pfn; >>>> pthread_t **threads = NULL; >>>> struct thread_args *kdump_thread_args = NULL; >>>> void *thread_result; >>>> - int page_data_num; >>>> + int page_buf_num; >>>> struct page_data *page_data_buf = NULL; >>>> int i; >>>> int index; >>>> + int end_count, consuming, check_count; >>>> + mdf_pfn_t current_pfn, temp_pfn; >>>> >>>> if (info->flag_elf_dumpfile) >>>> return FALSE; >>>> @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> goto out; >>>> } >>>> >>>> - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); >>>> - if (res != 0) { >>>> - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", >>>> - strerror(res)); >>>> - goto out; >>>> - } >>>> - >>>> res = pthread_mutex_init(&info->filter_mutex, NULL); >>>> if (res != 0) { >>>> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); >>>> @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> end_pfn = cycle->end_pfn; >>>> >>>> info->current_pfn = start_pfn; >>>> - info->consumed_pfn = start_pfn - 1; >>>> >>>> threads = info->threads; >>>> kdump_thread_args = info->kdump_thread_args; >>>> >>>> - page_data_num = info->num_buffers; >>>> + page_buf_num = info->num_buffers; >>>> page_data_buf = info->page_data_buf; >>>> + pthread_mutex_init(&info->page_data_mutex, NULL); >>>> + sem_init(&info->page_flag_buf_sem, 0, 0); >>>> >>>> - for (i = 0; i < page_data_num; i++) { >>>> - /* >>>> - * producer will use pfn in page_data_buf to decide the >>>> - * consumed pfn >>>> - */ >>>> - page_data_buf[i].pfn = start_pfn - 1; >>>> - page_data_buf[i].ready = 0; >>>> - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); >>>> - if (res != 0) { >>>> - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", >>>> - strerror(res)); >>>> - goto out; >>>> - } >>>> - } >>>> + for (i = 0; i < page_buf_num; i++) >>>> + page_data_buf[i].used = FALSE; >>>> >>>> for (i = 0; i < info->num_threads; i++) { >>>> kdump_thread_args[i].thread_num = i; >>>> kdump_thread_args[i].len_buf_out = len_buf_out; >>>> - kdump_thread_args[i].start_pfn = start_pfn; >>>> - kdump_thread_args[i].end_pfn = end_pfn; >>>> - kdump_thread_args[i].page_data_num = page_data_num; >>>> kdump_thread_args[i].page_data_buf = page_data_buf; >>>> + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >>>> kdump_thread_args[i].cycle = cycle; >>>> >>>> res = pthread_create(threads[i], NULL, >>>> @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> } >>>> } >>>> >>>> - consuming_pfn = start_pfn; >>>> - index = -1; >>>> + end_count = 0; >>>> + while (1) { >>>> + consuming = 0; >>>> + check_count = 0; >>>> >>>> - gettimeofday(&last, NULL); >>>> + /* >>>> + * The basic idea is producer producing page and consumer writing page. >>>> + * Each producer have a page_flag_buf list which is used for storing page's description. >>>> + * The size of page_flag_buf is little so it won't take too much memory. >>>> + * And all producers will share a page_data_buf array which is used for storing page's compressed data. >>>> + * The main thread is the consumer. It will find the next pfn and write it into file. >>>> + * The next pfn is smallest pfn in all page_flag_buf. >>>> + */ >>>> + sem_wait(&info->page_flag_buf_sem); >>>> + gettimeofday(&last, NULL); >>>> + while (1) { >>>> + current_pfn = end_pfn; >>>> >>>> - while (consuming_pfn < end_pfn) { >>>> - index = consuming_pfn % page_data_num; >>>> + /* >>>> + * page_flag_buf is in circular linked list. >>>> + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's >>>> + * page_flag_buf list. >>>> + * consuming is used for recording in which thread the pfn is the smallest. >>>> + * current_pfn is used for recording the value of pfn when checking the pfn. >>>> + */ >>>> + for (i = 0; i < info->num_threads; i++) { >>>> + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >>>> + continue; >>>> + temp_pfn = info->page_flag_buf[i]->pfn; >>>> >>>> - gettimeofday(&new, NULL); >>>> - if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>> - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); >>>> - goto out; >>>> - } >>>> + /* >>>> + * count how many threads have reached the end. >>>> + */ >>>> + if (temp_pfn >= end_pfn) { >>>> + info->page_flag_buf[i]->ready = FLAG_UNUSED; >>>> + end_count++; >>>> + continue; >>>> + } >>>> >>>> - /* >>>> - * check pfn first without mutex locked to reduce the time >>>> - * trying to lock the mutex >>>> - */ >>>> - if (page_data_buf[index].pfn != consuming_pfn) >>>> - continue; >>>> + if (current_pfn < temp_pfn) >>>> + continue; >>>> >>>> - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) >>>> - continue; >>>> + check_count++; >>>> + consuming = i; >>>> + current_pfn = temp_pfn; >>>> + } >>>> >>>> - /* check whether the found one is ready to be consumed */ >>>> - if (page_data_buf[index].pfn != consuming_pfn || >>>> - page_data_buf[index].ready != 1) { >>>> - goto unlock; >>>> + /* >>>> + * If all the threads have reached the end, we will finish writing. >>>> + */ >>>> + if (end_count >= info->num_threads) >>>> + goto finish; >>>> + >>>> + /* >>>> + * If the page_flag_buf is not ready, the pfn recorded may be changed. >>>> + * So we should recheck. >>>> + */ >>>> + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >>>> + gettimeofday(&new, NULL); >>>> + if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>> + ERRMSG("Can't get data of pfn.\n"); >>>> + goto out; >>>> + } >>>> + continue; >>>> + } >>>> + >>>> + if (current_pfn == info->page_flag_buf[consuming]->pfn) >>>> + break; >>>> } >>>> >>>> if ((num_dumped % per) == 0) >>>> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); >>>> >>>> - /* next pfn is found, refresh last here */ >>>> - last = new; >>>> - consuming_pfn++; >>>> - info->consumed_pfn++; >>>> - page_data_buf[index].ready = 0; >>>> - >>>> - if (page_data_buf[index].dumpable == FALSE) >>>> - goto unlock; >>>> - >>>> num_dumped++; >>>> >>>> - if (page_data_buf[index].zero == TRUE) { >>>> + >>>> + if (info->page_flag_buf[consuming]->zero == TRUE) { >>>> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >>>> goto out; >>>> pfn_zero++; >>>> } else { >>>> + index = info->page_flag_buf[consuming]->index; >>>> pd.flags = page_data_buf[index].flags; >>>> pd.size = page_data_buf[index].size; >>>> pd.page_flags = 0; >>>> @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> */ >>>> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) >>>> goto out; >>>> - >>>> + page_data_buf[index].used = FALSE; >>>> } >>>> -unlock: >>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>> + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >>>> + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >>>> } >>>> - >>>> +finish: >>>> ret = TRUE; >>>> /* >>>> * print [100 %] >>>> @@ -7463,15 +7532,9 @@ out: >>>> } >>>> } >>>> >>>> - if (page_data_buf != NULL) { >>>> - for (i = 0; i < page_data_num; i++) { >>>> - pthread_mutex_destroy(&page_data_buf[i].mutex); >>>> - } >>>> - } >>>> - >>>> + sem_destroy(&info->page_flag_buf_sem); >>>> pthread_rwlock_destroy(&info->usemmap_rwlock); >>>> pthread_mutex_destroy(&info->filter_mutex); >>>> - pthread_mutex_destroy(&info->consumed_pfn_mutex); >>>> pthread_mutex_destroy(&info->current_pfn_mutex); >>>> >>>> return ret; >>>> @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag >>>> num_dumped++; >>>> if (!read_pfn(pfn, buf)) >>>> goto out; >>>> + >>>> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); >>>> >>>> /* >>>> diff --git a/makedumpfile.h b/makedumpfile.h >>>> index e0b5bbf..4b315c0 100644 >>>> --- a/makedumpfile.h >>>> +++ b/makedumpfile.h >>>> @@ -44,6 +44,7 @@ >>>> #include "print_info.h" >>>> #include "sadump_mod.h" >>>> #include <pthread.h> >>>> +#include <semaphore.h> >>>> >>>> /* >>>> * Result of command >>>> @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; >>>> #define PAGE_DATA_NUM (50) >>>> #define WAIT_TIME (60 * 10) >>>> #define PTHREAD_FAIL ((void *)-2) >>>> -#define NUM_BUFFERS (50) >>>> +#define NUM_BUFFERS (20) >>>> >>>> struct mmap_cache { >>>> char *mmap_buf; >>>> @@ -985,28 +986,33 @@ struct mmap_cache { >>>> off_t mmap_end_offset; >>>> }; >>>> >>>> +enum { >>>> + FLAG_UNUSED, >>>> + FLAG_READY, >>>> + FLAG_FILLING >>>> +}; >>>> +struct page_flag { >>>> + mdf_pfn_t pfn; >>>> + char zero; >>>> + char ready; >>>> + short index; >>>> + struct page_flag *next; >>>> +}; >>>> + >>>> struct page_data >>>> { >>>> - mdf_pfn_t pfn; >>>> - int dumpable; >>>> - int zero; >>>> - unsigned int flags; >>>> long size; >>>> unsigned char *buf; >>>> - pthread_mutex_t mutex; >>>> - /* >>>> - * whether the page_data is ready to be consumed >>>> - */ >>>> - int ready; >>>> + int flags; >>>> + int used; >>>> }; >>>> >>>> struct thread_args { >>>> int thread_num; >>>> unsigned long len_buf_out; >>>> - mdf_pfn_t start_pfn, end_pfn; >>>> - int page_data_num; >>>> struct cycle *cycle; >>>> struct page_data *page_data_buf; >>>> + struct page_flag *page_flag_buf; >>>> }; >>>> >>>> /* >>>> @@ -1295,11 +1301,12 @@ struct DumpInfo { >>>> pthread_t **threads; >>>> struct thread_args *kdump_thread_args; >>>> struct page_data *page_data_buf; >>>> + struct page_flag **page_flag_buf; >>>> + sem_t page_flag_buf_sem; >>>> pthread_rwlock_t usemmap_rwlock; >>>> mdf_pfn_t current_pfn; >>>> pthread_mutex_t current_pfn_mutex; >>>> - mdf_pfn_t consumed_pfn; >>>> - pthread_mutex_t consumed_pfn_mutex; >>>> + pthread_mutex_t page_data_mutex; >>>> pthread_mutex_t filter_mutex; >>>> }; >>>> extern struct DumpInfo *info; >>>> -- >>>> 1.8.3.1 >>>> >>>> >>>> >>>> >>>> _______________________________________________ >>>> kexec mailing list >>>> kexec@lists.infradead.org >>>> http://lists.infradead.org/mailman/listinfo/kexec >>> >>> >> >> >> >> _______________________________________________ >> kexec mailing list >> kexec@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/kexec _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-16 1:55 ` "Zhou, Wenjian/周文剑" @ 2016-03-16 8:04 ` Minfei Huang 2016-03-16 8:24 ` Minfei Huang 2016-03-16 8:26 ` "Zhou, Wenjian/周文剑" 0 siblings, 2 replies; 33+ messages in thread From: Minfei Huang @ 2016-03-16 8:04 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑"; +Cc: kexec On 03/16/16 at 09:55am, "Zhou, Wenjian/周文剑" wrote: > Hi Minfei, > > I have some questions. > > If the value of num-threads is 8, > 1. How much is the free memory before running makedumpfile failed? Hmm, this machine is reserved by other, I have no access to take a look about reserved memory. All of the configuration are set by default. Maybe it's about 420M. > > 2. How much is the free memory before running makedumpfile success? I don't memtion this during testing it. > > > And the following result is very strange if all cache has been dropped. > makedumpfile --num-threads 30 -d 31 > real 0m0.006s > user 0m0.002s > sys 0m0.004s For this case, makedumpfile fails to dump vmcore with option --num-threads 30. I suspect the following output from strace. > >1313 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > >1314 mmap(NULL, 18446744048584523776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > >1315 mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x7f5927bb2000 Thanks Minfei > > -- > Thanks > Zhou > > On 03/15/2016 05:33 PM, Minfei Huang wrote: > >On 03/15/16 at 03:12pm, "Zhou, Wenjian/周文剑" wrote: > >>Hello Minfei, > >> > >>I guess the result is affected by the caches. > >>How about executing the following command before running makedumpfile each time? > >># echo 3 > /proc/sys/vm/drop_caches > > > >Hi, Zhou. > > > >Seem there is a bug during dumping vmcore with option num-threads. > > > >1307 open("/proc/meminfo", O_RDONLY) = 4 > >1308 fstat(4, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 > >1309 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f59322d3000 > >1310 read(4, "MemTotal: 385452 kB\nMemF"..., 1024) = 1024 > >1311 close(4) = 0 > >1312 munmap(0x7f59322d3000, 4096) = 0 > >1313 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > >1314 mmap(NULL, 18446744048584523776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > >1315 mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x7f5927bb2000 > >1316 munmap(0x7f5927bb2000, 4513792) = 0 > >1317 munmap(0x7f592c000000, 62595072) = 0 > >1318 mprotect(0x7f5928000000, 135168, PROT_READ|PROT_WRITE) = 0 > >1319 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > > > >Thanks > >Minfei > > > >> > >>-- > >>Thanks > >>Zhou > >> > >>On 03/15/2016 02:34 PM, Minfei Huang wrote: > >>>Hi, Zhou. > >>> > >>>I have applied this patch base on 1.5.9. There are several testcases I > >>>have tested. > >>> > >>>- makedumpfile --num-threads 64 -d 31 > >>> real 0m0.010s > >>> user 0m0.002s > >>> sys 0m0.009s > >>> > >>>- makedumpfile --num-threads 31 -d 31 > >>> real 2m40.915s > >>> user 10m50.900s > >>> sys 23m9.664s > >>> > >>>makedumpfile --num-threads 30 -d 31 > >>> real 0m0.006s > >>> user 0m0.002s > >>> sys 0m0.004s > >>> > >>>makedumpfile --num-threads 32 -d 31 > >>> real 0m0.007s > >>> user 0m0.002s > >>> sys 0m0.005s > >>> > >>>- makedumpfile --num-threads 8 -d 31 > >>> real 2m32.692s > >>> user 7m4.630s > >>> sys 2m0.369s > >>> > >>>- makedumpfile --num-threads 1 -d 31 > >>> real 4m42.423s > >>> user 7m27.153s > >>> sys 0m22.490s > >>> > >>>- makedumpfile.orig -d 31 > >>> real 4m1.297s > >>> user 3m39.696s > >>> sys 0m15.200s > >>> > >>>This patch has a huge increment to the filter performance under 31. But > >>>it is not stable, since makedumpfile fails to dump vmcore intermittently. > >>>You can find the above test result, makedumpfile fails to dump vmcore > >>>with option --num-threads 64, also it may occur with option > >>>--number-threads 8. > >>> > >>>Thanks > >>>Minfei > >>> > >>>On 03/09/16 at 08:27am, Zhou Wenjian wrote: > >>>>v4: > >>>> 1. fix a bug caused by the logic > >>>>v3: > >>>> 1. remove some unused variables > >>>> 2. fix a bug caused by the wrong logic > >>>> 3. fix a bug caused by optimising > >>>> 4. improve more performance by using Minoru Usui's code > >>>> > >>>>multi-threads implementation will introduce extra cost when handling > >>>>each page. The origin implementation will also do the extra work for > >>>>filtered pages. So there is a big performance degradation in > >>>>--num-threads -d 31. > >>>>The new implementation won't do the extra work for filtered pages any > >>>>more. So the performance of -d 31 is close to that of serial processing. > >>>> > >>>>The new implementation is just like the following: > >>>> * The basic idea is producer producing page and consumer writing page. > >>>> * Each producer have a page_flag_buf list which is used for storing > >>>> page's description. > >>>> * The size of page_flag_buf is little so it won't take too much memory. > >>>> * And all producers will share a page_data_buf array which is > >>>> used for storing page's compressed data. > >>>> * The main thread is the consumer. It will find the next pfn and write > >>>> it into file. > >>>> * The next pfn is smallest pfn in all page_flag_buf. > >>>> > >>>>Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> > >>>>Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > >>>>--- > >>>> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- > >>>> makedumpfile.h | 35 ++++--- > >>>> 2 files changed, 202 insertions(+), 131 deletions(-) > >>>> > >>>>diff --git a/makedumpfile.c b/makedumpfile.c > >>>>index fa0b779..2b0864a 100644 > >>>>--- a/makedumpfile.c > >>>>+++ b/makedumpfile.c > >>>>@@ -3483,7 +3483,8 @@ initial_for_parallel() > >>>> unsigned long page_data_buf_size; > >>>> unsigned long limit_size; > >>>> int page_data_num; > >>>>- int i; > >>>>+ struct page_flag *current; > >>>>+ int i, j; > >>>> > >>>> len_buf_out = calculate_len_buf_out(info->page_size); > >>>> > >>>>@@ -3560,10 +3561,16 @@ initial_for_parallel() > >>>> > >>>> limit_size = (get_free_memory_size() > >>>> - MAP_REGION * info->num_threads) * 0.6; > >>>>+ if (limit_size < 0) { > >>>>+ MSG("Free memory is not enough for multi-threads\n"); > >>>>+ return FALSE; > >>>>+ } > >>>> > >>>> page_data_num = limit_size / page_data_buf_size; > >>>>+ info->num_buffers = 3 * info->num_threads; > >>>> > >>>>- info->num_buffers = MIN(NUM_BUFFERS, page_data_num); > >>>>+ info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); > >>>>+ info->num_buffers = MIN(info->num_buffers, page_data_num); > >>>> > >>>> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", > >>>> info->num_buffers); > >>>>@@ -3588,6 +3595,36 @@ initial_for_parallel() > >>>> } > >>>> > >>>> /* > >>>>+ * initial page_flag for each thread > >>>>+ */ > >>>>+ if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) > >>>>+ == NULL) { > >>>>+ MSG("Can't allocate memory for page_flag_buf. %s\n", > >>>>+ strerror(errno)); > >>>>+ return FALSE; > >>>>+ } > >>>>+ memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); > >>>>+ > >>>>+ for (i = 0; i < info->num_threads; i++) { > >>>>+ if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { > >>>>+ MSG("Can't allocate memory for page_flag. %s\n", > >>>>+ strerror(errno)); > >>>>+ return FALSE; > >>>>+ } > >>>>+ current = info->page_flag_buf[i]; > >>>>+ > >>>>+ for (j = 1; j < NUM_BUFFERS; j++) { > >>>>+ if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { > >>>>+ MSG("Can't allocate memory for page_flag. %s\n", > >>>>+ strerror(errno)); > >>>>+ return FALSE; > >>>>+ } > >>>>+ current = current->next; > >>>>+ } > >>>>+ current->next = info->page_flag_buf[i]; > >>>>+ } > >>>>+ > >>>>+ /* > >>>> * initial fd_memory for threads > >>>> */ > >>>> for (i = 0; i < info->num_threads; i++) { > >>>>@@ -3612,7 +3649,8 @@ initial_for_parallel() > >>>> void > >>>> free_for_parallel() > >>>> { > >>>>- int i; > >>>>+ int i, j; > >>>>+ struct page_flag *current; > >>>> > >>>> if (info->threads != NULL) { > >>>> for (i = 0; i < info->num_threads; i++) { > >>>>@@ -3655,6 +3693,19 @@ free_for_parallel() > >>>> free(info->page_data_buf); > >>>> } > >>>> > >>>>+ if (info->page_flag_buf != NULL) { > >>>>+ for (i = 0; i < info->num_threads; i++) { > >>>>+ for (j = 0; j < NUM_BUFFERS; j++) { > >>>>+ if (info->page_flag_buf[i] != NULL) { > >>>>+ current = info->page_flag_buf[i]; > >>>>+ info->page_flag_buf[i] = current->next; > >>>>+ free(current); > >>>>+ } > >>>>+ } > >>>>+ } > >>>>+ free(info->page_flag_buf); > >>>>+ } > >>>>+ > >>>> if (info->parallel_info == NULL) > >>>> return; > >>>> > >>>>@@ -7075,11 +7126,11 @@ void * > >>>> kdump_thread_function_cyclic(void *arg) { > >>>> void *retval = PTHREAD_FAIL; > >>>> struct thread_args *kdump_thread_args = (struct thread_args *)arg; > >>>>- struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > >>>>+ volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > >>>>+ volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; > >>>> struct cycle *cycle = kdump_thread_args->cycle; > >>>>- int page_data_num = kdump_thread_args->page_data_num; > >>>>- mdf_pfn_t pfn; > >>>>- int index; > >>>>+ mdf_pfn_t pfn = cycle->start_pfn; > >>>>+ int index = kdump_thread_args->thread_num; > >>>> int buf_ready; > >>>> int dumpable; > >>>> int fd_memory = 0; > >>>>@@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { > >>>> kdump_thread_args->thread_num); > >>>> } > >>>> > >>>>- while (1) { > >>>>- /* get next pfn */ > >>>>- pthread_mutex_lock(&info->current_pfn_mutex); > >>>>- pfn = info->current_pfn; > >>>>- info->current_pfn++; > >>>>- pthread_mutex_unlock(&info->current_pfn_mutex); > >>>>- > >>>>- if (pfn >= kdump_thread_args->end_pfn) > >>>>- break; > >>>>- > >>>>- index = -1; > >>>>+ /* > >>>>+ * filtered page won't take anything > >>>>+ * unfiltered zero page will only take a page_flag_buf > >>>>+ * unfiltered non-zero page will take a page_flag_buf and a page_data_buf > >>>>+ */ > >>>>+ while (pfn < cycle->end_pfn) { > >>>> buf_ready = FALSE; > >>>> > >>>>+ pthread_mutex_lock(&info->page_data_mutex); > >>>>+ while (page_data_buf[index].used != FALSE) { > >>>>+ index = (index + 1) % info->num_buffers; > >>>>+ } > >>>>+ page_data_buf[index].used = TRUE; > >>>>+ pthread_mutex_unlock(&info->page_data_mutex); > >>>>+ > >>>> while (buf_ready == FALSE) { > >>>> pthread_testcancel(); > >>>>- > >>>>- index = pfn % page_data_num; > >>>>- > >>>>- if (pfn - info->consumed_pfn > info->num_buffers) > >>>>+ if (page_flag_buf->ready == FLAG_READY) > >>>> continue; > >>>> > >>>>- if (page_data_buf[index].ready != 0) > >>>>- continue; > >>>>- > >>>>- pthread_mutex_lock(&page_data_buf[index].mutex); > >>>>- > >>>>- if (page_data_buf[index].ready != 0) > >>>>- goto unlock; > >>>>- > >>>>- buf_ready = TRUE; > >>>>+ /* get next dumpable pfn */ > >>>>+ pthread_mutex_lock(&info->current_pfn_mutex); > >>>>+ for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { > >>>>+ dumpable = is_dumpable( > >>>>+ info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > >>>>+ pfn, > >>>>+ cycle); > >>>>+ if (dumpable) > >>>>+ break; > >>>>+ } > >>>>+ info->current_pfn = pfn + 1; > >>>> > >>>>- page_data_buf[index].pfn = pfn; > >>>>- page_data_buf[index].ready = 1; > >>>>+ page_flag_buf->pfn = pfn; > >>>>+ page_flag_buf->ready = FLAG_FILLING; > >>>>+ pthread_mutex_unlock(&info->current_pfn_mutex); > >>>>+ sem_post(&info->page_flag_buf_sem); > >>>> > >>>>- dumpable = is_dumpable( > >>>>- info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > >>>>- pfn, > >>>>- cycle); > >>>>- page_data_buf[index].dumpable = dumpable; > >>>>- if (!dumpable) > >>>>- goto unlock; > >>>>+ if (pfn >= cycle->end_pfn) { > >>>>+ info->current_pfn = cycle->end_pfn; > >>>>+ page_data_buf[index].used = FALSE; > >>>>+ break; > >>>>+ } > >>>> > >>>> if (!read_pfn_parallel(fd_memory, pfn, buf, > >>>> &bitmap_memory_parallel, > >>>>@@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { > >>>> > >>>> if ((info->dump_level & DL_EXCLUDE_ZERO) > >>>> && is_zero_page(buf, info->page_size)) { > >>>>- page_data_buf[index].zero = TRUE; > >>>>- goto unlock; > >>>>+ page_flag_buf->zero = TRUE; > >>>>+ goto next; > >>>> } > >>>> > >>>>- page_data_buf[index].zero = FALSE; > >>>>+ page_flag_buf->zero = FALSE; > >>>> > >>>> /* > >>>> * Compress the page data. > >>>>@@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { > >>>> page_data_buf[index].flags = > >>>> DUMP_DH_COMPRESSED_LZO; > >>>> page_data_buf[index].size = size_out; > >>>>+ > >>>> memcpy(page_data_buf[index].buf, buf_out, size_out); > >>>> #endif > >>>> #ifdef USESNAPPY > >>>>@@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { > >>>> page_data_buf[index].size = info->page_size; > >>>> memcpy(page_data_buf[index].buf, buf, info->page_size); > >>>> } > >>>>-unlock: > >>>>- pthread_mutex_unlock(&page_data_buf[index].mutex); > >>>>+ page_flag_buf->index = index; > >>>>+ buf_ready = TRUE; > >>>>+next: > >>>>+ page_flag_buf->ready = FLAG_READY; > >>>>+ page_flag_buf = page_flag_buf->next; > >>>> > >>>> } > >>>> } > >>>>- > >>>> retval = NULL; > >>>> > >>>> fail: > >>>>@@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >>>> struct page_desc pd; > >>>> struct timeval tv_start; > >>>> struct timeval last, new; > >>>>- unsigned long long consuming_pfn; > >>>> pthread_t **threads = NULL; > >>>> struct thread_args *kdump_thread_args = NULL; > >>>> void *thread_result; > >>>>- int page_data_num; > >>>>+ int page_buf_num; > >>>> struct page_data *page_data_buf = NULL; > >>>> int i; > >>>> int index; > >>>>+ int end_count, consuming, check_count; > >>>>+ mdf_pfn_t current_pfn, temp_pfn; > >>>> > >>>> if (info->flag_elf_dumpfile) > >>>> return FALSE; > >>>>@@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >>>> goto out; > >>>> } > >>>> > >>>>- res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); > >>>>- if (res != 0) { > >>>>- ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", > >>>>- strerror(res)); > >>>>- goto out; > >>>>- } > >>>>- > >>>> res = pthread_mutex_init(&info->filter_mutex, NULL); > >>>> if (res != 0) { > >>>> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); > >>>>@@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >>>> end_pfn = cycle->end_pfn; > >>>> > >>>> info->current_pfn = start_pfn; > >>>>- info->consumed_pfn = start_pfn - 1; > >>>> > >>>> threads = info->threads; > >>>> kdump_thread_args = info->kdump_thread_args; > >>>> > >>>>- page_data_num = info->num_buffers; > >>>>+ page_buf_num = info->num_buffers; > >>>> page_data_buf = info->page_data_buf; > >>>>+ pthread_mutex_init(&info->page_data_mutex, NULL); > >>>>+ sem_init(&info->page_flag_buf_sem, 0, 0); > >>>> > >>>>- for (i = 0; i < page_data_num; i++) { > >>>>- /* > >>>>- * producer will use pfn in page_data_buf to decide the > >>>>- * consumed pfn > >>>>- */ > >>>>- page_data_buf[i].pfn = start_pfn - 1; > >>>>- page_data_buf[i].ready = 0; > >>>>- res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); > >>>>- if (res != 0) { > >>>>- ERRMSG("Can't initialize mutex of page_data_buf. %s\n", > >>>>- strerror(res)); > >>>>- goto out; > >>>>- } > >>>>- } > >>>>+ for (i = 0; i < page_buf_num; i++) > >>>>+ page_data_buf[i].used = FALSE; > >>>> > >>>> for (i = 0; i < info->num_threads; i++) { > >>>> kdump_thread_args[i].thread_num = i; > >>>> kdump_thread_args[i].len_buf_out = len_buf_out; > >>>>- kdump_thread_args[i].start_pfn = start_pfn; > >>>>- kdump_thread_args[i].end_pfn = end_pfn; > >>>>- kdump_thread_args[i].page_data_num = page_data_num; > >>>> kdump_thread_args[i].page_data_buf = page_data_buf; > >>>>+ kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; > >>>> kdump_thread_args[i].cycle = cycle; > >>>> > >>>> res = pthread_create(threads[i], NULL, > >>>>@@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >>>> } > >>>> } > >>>> > >>>>- consuming_pfn = start_pfn; > >>>>- index = -1; > >>>>+ end_count = 0; > >>>>+ while (1) { > >>>>+ consuming = 0; > >>>>+ check_count = 0; > >>>> > >>>>- gettimeofday(&last, NULL); > >>>>+ /* > >>>>+ * The basic idea is producer producing page and consumer writing page. > >>>>+ * Each producer have a page_flag_buf list which is used for storing page's description. > >>>>+ * The size of page_flag_buf is little so it won't take too much memory. > >>>>+ * And all producers will share a page_data_buf array which is used for storing page's compressed data. > >>>>+ * The main thread is the consumer. It will find the next pfn and write it into file. > >>>>+ * The next pfn is smallest pfn in all page_flag_buf. > >>>>+ */ > >>>>+ sem_wait(&info->page_flag_buf_sem); > >>>>+ gettimeofday(&last, NULL); > >>>>+ while (1) { > >>>>+ current_pfn = end_pfn; > >>>> > >>>>- while (consuming_pfn < end_pfn) { > >>>>- index = consuming_pfn % page_data_num; > >>>>+ /* > >>>>+ * page_flag_buf is in circular linked list. > >>>>+ * The array info->page_flag_buf[] records the current page_flag_buf in each thread's > >>>>+ * page_flag_buf list. > >>>>+ * consuming is used for recording in which thread the pfn is the smallest. > >>>>+ * current_pfn is used for recording the value of pfn when checking the pfn. > >>>>+ */ > >>>>+ for (i = 0; i < info->num_threads; i++) { > >>>>+ if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > >>>>+ continue; > >>>>+ temp_pfn = info->page_flag_buf[i]->pfn; > >>>> > >>>>- gettimeofday(&new, NULL); > >>>>- if (new.tv_sec - last.tv_sec > WAIT_TIME) { > >>>>- ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); > >>>>- goto out; > >>>>- } > >>>>+ /* > >>>>+ * count how many threads have reached the end. > >>>>+ */ > >>>>+ if (temp_pfn >= end_pfn) { > >>>>+ info->page_flag_buf[i]->ready = FLAG_UNUSED; > >>>>+ end_count++; > >>>>+ continue; > >>>>+ } > >>>> > >>>>- /* > >>>>- * check pfn first without mutex locked to reduce the time > >>>>- * trying to lock the mutex > >>>>- */ > >>>>- if (page_data_buf[index].pfn != consuming_pfn) > >>>>- continue; > >>>>+ if (current_pfn < temp_pfn) > >>>>+ continue; > >>>> > >>>>- if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) > >>>>- continue; > >>>>+ check_count++; > >>>>+ consuming = i; > >>>>+ current_pfn = temp_pfn; > >>>>+ } > >>>> > >>>>- /* check whether the found one is ready to be consumed */ > >>>>- if (page_data_buf[index].pfn != consuming_pfn || > >>>>- page_data_buf[index].ready != 1) { > >>>>- goto unlock; > >>>>+ /* > >>>>+ * If all the threads have reached the end, we will finish writing. > >>>>+ */ > >>>>+ if (end_count >= info->num_threads) > >>>>+ goto finish; > >>>>+ > >>>>+ /* > >>>>+ * If the page_flag_buf is not ready, the pfn recorded may be changed. > >>>>+ * So we should recheck. > >>>>+ */ > >>>>+ if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > >>>>+ gettimeofday(&new, NULL); > >>>>+ if (new.tv_sec - last.tv_sec > WAIT_TIME) { > >>>>+ ERRMSG("Can't get data of pfn.\n"); > >>>>+ goto out; > >>>>+ } > >>>>+ continue; > >>>>+ } > >>>>+ > >>>>+ if (current_pfn == info->page_flag_buf[consuming]->pfn) > >>>>+ break; > >>>> } > >>>> > >>>> if ((num_dumped % per) == 0) > >>>> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); > >>>> > >>>>- /* next pfn is found, refresh last here */ > >>>>- last = new; > >>>>- consuming_pfn++; > >>>>- info->consumed_pfn++; > >>>>- page_data_buf[index].ready = 0; > >>>>- > >>>>- if (page_data_buf[index].dumpable == FALSE) > >>>>- goto unlock; > >>>>- > >>>> num_dumped++; > >>>> > >>>>- if (page_data_buf[index].zero == TRUE) { > >>>>+ > >>>>+ if (info->page_flag_buf[consuming]->zero == TRUE) { > >>>> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > >>>> goto out; > >>>> pfn_zero++; > >>>> } else { > >>>>+ index = info->page_flag_buf[consuming]->index; > >>>> pd.flags = page_data_buf[index].flags; > >>>> pd.size = page_data_buf[index].size; > >>>> pd.page_flags = 0; > >>>>@@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >>>> */ > >>>> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) > >>>> goto out; > >>>>- > >>>>+ page_data_buf[index].used = FALSE; > >>>> } > >>>>-unlock: > >>>>- pthread_mutex_unlock(&page_data_buf[index].mutex); > >>>>+ info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > >>>>+ info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > >>>> } > >>>>- > >>>>+finish: > >>>> ret = TRUE; > >>>> /* > >>>> * print [100 %] > >>>>@@ -7463,15 +7532,9 @@ out: > >>>> } > >>>> } > >>>> > >>>>- if (page_data_buf != NULL) { > >>>>- for (i = 0; i < page_data_num; i++) { > >>>>- pthread_mutex_destroy(&page_data_buf[i].mutex); > >>>>- } > >>>>- } > >>>>- > >>>>+ sem_destroy(&info->page_flag_buf_sem); > >>>> pthread_rwlock_destroy(&info->usemmap_rwlock); > >>>> pthread_mutex_destroy(&info->filter_mutex); > >>>>- pthread_mutex_destroy(&info->consumed_pfn_mutex); > >>>> pthread_mutex_destroy(&info->current_pfn_mutex); > >>>> > >>>> return ret; > >>>>@@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag > >>>> num_dumped++; > >>>> if (!read_pfn(pfn, buf)) > >>>> goto out; > >>>>+ > >>>> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); > >>>> > >>>> /* > >>>>diff --git a/makedumpfile.h b/makedumpfile.h > >>>>index e0b5bbf..4b315c0 100644 > >>>>--- a/makedumpfile.h > >>>>+++ b/makedumpfile.h > >>>>@@ -44,6 +44,7 @@ > >>>> #include "print_info.h" > >>>> #include "sadump_mod.h" > >>>> #include <pthread.h> > >>>>+#include <semaphore.h> > >>>> > >>>> /* > >>>> * Result of command > >>>>@@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; > >>>> #define PAGE_DATA_NUM (50) > >>>> #define WAIT_TIME (60 * 10) > >>>> #define PTHREAD_FAIL ((void *)-2) > >>>>-#define NUM_BUFFERS (50) > >>>>+#define NUM_BUFFERS (20) > >>>> > >>>> struct mmap_cache { > >>>> char *mmap_buf; > >>>>@@ -985,28 +986,33 @@ struct mmap_cache { > >>>> off_t mmap_end_offset; > >>>> }; > >>>> > >>>>+enum { > >>>>+ FLAG_UNUSED, > >>>>+ FLAG_READY, > >>>>+ FLAG_FILLING > >>>>+}; > >>>>+struct page_flag { > >>>>+ mdf_pfn_t pfn; > >>>>+ char zero; > >>>>+ char ready; > >>>>+ short index; > >>>>+ struct page_flag *next; > >>>>+}; > >>>>+ > >>>> struct page_data > >>>> { > >>>>- mdf_pfn_t pfn; > >>>>- int dumpable; > >>>>- int zero; > >>>>- unsigned int flags; > >>>> long size; > >>>> unsigned char *buf; > >>>>- pthread_mutex_t mutex; > >>>>- /* > >>>>- * whether the page_data is ready to be consumed > >>>>- */ > >>>>- int ready; > >>>>+ int flags; > >>>>+ int used; > >>>> }; > >>>> > >>>> struct thread_args { > >>>> int thread_num; > >>>> unsigned long len_buf_out; > >>>>- mdf_pfn_t start_pfn, end_pfn; > >>>>- int page_data_num; > >>>> struct cycle *cycle; > >>>> struct page_data *page_data_buf; > >>>>+ struct page_flag *page_flag_buf; > >>>> }; > >>>> > >>>> /* > >>>>@@ -1295,11 +1301,12 @@ struct DumpInfo { > >>>> pthread_t **threads; > >>>> struct thread_args *kdump_thread_args; > >>>> struct page_data *page_data_buf; > >>>>+ struct page_flag **page_flag_buf; > >>>>+ sem_t page_flag_buf_sem; > >>>> pthread_rwlock_t usemmap_rwlock; > >>>> mdf_pfn_t current_pfn; > >>>> pthread_mutex_t current_pfn_mutex; > >>>>- mdf_pfn_t consumed_pfn; > >>>>- pthread_mutex_t consumed_pfn_mutex; > >>>>+ pthread_mutex_t page_data_mutex; > >>>> pthread_mutex_t filter_mutex; > >>>> }; > >>>> extern struct DumpInfo *info; > >>>>-- > >>>>1.8.3.1 > >>>> > >>>> > >>>> > >>>> > >>>>_______________________________________________ > >>>>kexec mailing list > >>>>kexec@lists.infradead.org > >>>>http://lists.infradead.org/mailman/listinfo/kexec > >>> > >>> > >> > >> > >> > >>_______________________________________________ > >>kexec mailing list > >>kexec@lists.infradead.org > >>http://lists.infradead.org/mailman/listinfo/kexec > > > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-16 8:04 ` Minfei Huang @ 2016-03-16 8:24 ` Minfei Huang 2016-03-16 8:26 ` "Zhou, Wenjian/周文剑" 1 sibling, 0 replies; 33+ messages in thread From: Minfei Huang @ 2016-03-16 8:24 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑"; +Cc: kexec On 03/16/16 at 04:04pm, Minfei Huang wrote: > On 03/16/16 at 09:55am, "Zhou, Wenjian/周文剑" wrote: > > Hi Minfei, > > > > I have some questions. > > > > If the value of num-threads is 8, > > 1. How much is the free memory before running makedumpfile failed? > > Hmm, this machine is reserved by other, I have no access to take a look > about reserved memory. All of the configuration are set by default. > Maybe it's about 420M. > > > > > 2. How much is the free memory before running makedumpfile success? > > I don't memtion this during testing it. I don't notice the issue during test it. > > > > > > > And the following result is very strange if all cache has been dropped. > > makedumpfile --num-threads 30 -d 31 > > real 0m0.006s > > user 0m0.002s > > sys 0m0.004s > > For this case, makedumpfile fails to dump vmcore with option > --num-threads 30. > > I suspect the following output from strace. > > > >1313 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > > >1314 mmap(NULL, 18446744048584523776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > > >1315 mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x7f5927bb2000 > > Thanks > Minfei > > > > > -- > > Thanks > > Zhou > > > > On 03/15/2016 05:33 PM, Minfei Huang wrote: > > >On 03/15/16 at 03:12pm, "Zhou, Wenjian/周文剑" wrote: > > >>Hello Minfei, > > >> > > >>I guess the result is affected by the caches. > > >>How about executing the following command before running makedumpfile each time? > > >># echo 3 > /proc/sys/vm/drop_caches > > > > > >Hi, Zhou. > > > > > >Seem there is a bug during dumping vmcore with option num-threads. > > > > > >1307 open("/proc/meminfo", O_RDONLY) = 4 > > >1308 fstat(4, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 > > >1309 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f59322d3000 > > >1310 read(4, "MemTotal: 385452 kB\nMemF"..., 1024) = 1024 > > >1311 close(4) = 0 > > >1312 munmap(0x7f59322d3000, 4096) = 0 > > >1313 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > > >1314 mmap(NULL, 18446744048584523776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > > >1315 mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x7f5927bb2000 > > >1316 munmap(0x7f5927bb2000, 4513792) = 0 > > >1317 munmap(0x7f592c000000, 62595072) = 0 > > >1318 mprotect(0x7f5928000000, 135168, PROT_READ|PROT_WRITE) = 0 > > >1319 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) > > > > > >Thanks > > >Minfei > > > > > >> > > >>-- > > >>Thanks > > >>Zhou > > >> > > >>On 03/15/2016 02:34 PM, Minfei Huang wrote: > > >>>Hi, Zhou. > > >>> > > >>>I have applied this patch base on 1.5.9. There are several testcases I > > >>>have tested. > > >>> > > >>>- makedumpfile --num-threads 64 -d 31 > > >>> real 0m0.010s > > >>> user 0m0.002s > > >>> sys 0m0.009s > > >>> > > >>>- makedumpfile --num-threads 31 -d 31 > > >>> real 2m40.915s > > >>> user 10m50.900s > > >>> sys 23m9.664s > > >>> > > >>>makedumpfile --num-threads 30 -d 31 > > >>> real 0m0.006s > > >>> user 0m0.002s > > >>> sys 0m0.004s > > >>> > > >>>makedumpfile --num-threads 32 -d 31 > > >>> real 0m0.007s > > >>> user 0m0.002s > > >>> sys 0m0.005s > > >>> > > >>>- makedumpfile --num-threads 8 -d 31 > > >>> real 2m32.692s > > >>> user 7m4.630s > > >>> sys 2m0.369s > > >>> > > >>>- makedumpfile --num-threads 1 -d 31 > > >>> real 4m42.423s > > >>> user 7m27.153s > > >>> sys 0m22.490s > > >>> > > >>>- makedumpfile.orig -d 31 > > >>> real 4m1.297s > > >>> user 3m39.696s > > >>> sys 0m15.200s > > >>> > > >>>This patch has a huge increment to the filter performance under 31. But > > >>>it is not stable, since makedumpfile fails to dump vmcore intermittently. > > >>>You can find the above test result, makedumpfile fails to dump vmcore > > >>>with option --num-threads 64, also it may occur with option > > >>>--number-threads 8. > > >>> > > >>>Thanks > > >>>Minfei > > >>> > > >>>On 03/09/16 at 08:27am, Zhou Wenjian wrote: > > >>>>v4: > > >>>> 1. fix a bug caused by the logic > > >>>>v3: > > >>>> 1. remove some unused variables > > >>>> 2. fix a bug caused by the wrong logic > > >>>> 3. fix a bug caused by optimising > > >>>> 4. improve more performance by using Minoru Usui's code > > >>>> > > >>>>multi-threads implementation will introduce extra cost when handling > > >>>>each page. The origin implementation will also do the extra work for > > >>>>filtered pages. So there is a big performance degradation in > > >>>>--num-threads -d 31. > > >>>>The new implementation won't do the extra work for filtered pages any > > >>>>more. So the performance of -d 31 is close to that of serial processing. > > >>>> > > >>>>The new implementation is just like the following: > > >>>> * The basic idea is producer producing page and consumer writing page. > > >>>> * Each producer have a page_flag_buf list which is used for storing > > >>>> page's description. > > >>>> * The size of page_flag_buf is little so it won't take too much memory. > > >>>> * And all producers will share a page_data_buf array which is > > >>>> used for storing page's compressed data. > > >>>> * The main thread is the consumer. It will find the next pfn and write > > >>>> it into file. > > >>>> * The next pfn is smallest pfn in all page_flag_buf. > > >>>> > > >>>>Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> > > >>>>Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > > >>>>--- > > >>>> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- > > >>>> makedumpfile.h | 35 ++++--- > > >>>> 2 files changed, 202 insertions(+), 131 deletions(-) > > >>>> > > >>>>diff --git a/makedumpfile.c b/makedumpfile.c > > >>>>index fa0b779..2b0864a 100644 > > >>>>--- a/makedumpfile.c > > >>>>+++ b/makedumpfile.c > > >>>>@@ -3483,7 +3483,8 @@ initial_for_parallel() > > >>>> unsigned long page_data_buf_size; > > >>>> unsigned long limit_size; > > >>>> int page_data_num; > > >>>>- int i; > > >>>>+ struct page_flag *current; > > >>>>+ int i, j; > > >>>> > > >>>> len_buf_out = calculate_len_buf_out(info->page_size); > > >>>> > > >>>>@@ -3560,10 +3561,16 @@ initial_for_parallel() > > >>>> > > >>>> limit_size = (get_free_memory_size() > > >>>> - MAP_REGION * info->num_threads) * 0.6; > > >>>>+ if (limit_size < 0) { > > >>>>+ MSG("Free memory is not enough for multi-threads\n"); > > >>>>+ return FALSE; > > >>>>+ } > > >>>> > > >>>> page_data_num = limit_size / page_data_buf_size; > > >>>>+ info->num_buffers = 3 * info->num_threads; > > >>>> > > >>>>- info->num_buffers = MIN(NUM_BUFFERS, page_data_num); > > >>>>+ info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); > > >>>>+ info->num_buffers = MIN(info->num_buffers, page_data_num); > > >>>> > > >>>> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", > > >>>> info->num_buffers); > > >>>>@@ -3588,6 +3595,36 @@ initial_for_parallel() > > >>>> } > > >>>> > > >>>> /* > > >>>>+ * initial page_flag for each thread > > >>>>+ */ > > >>>>+ if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) > > >>>>+ == NULL) { > > >>>>+ MSG("Can't allocate memory for page_flag_buf. %s\n", > > >>>>+ strerror(errno)); > > >>>>+ return FALSE; > > >>>>+ } > > >>>>+ memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); > > >>>>+ > > >>>>+ for (i = 0; i < info->num_threads; i++) { > > >>>>+ if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { > > >>>>+ MSG("Can't allocate memory for page_flag. %s\n", > > >>>>+ strerror(errno)); > > >>>>+ return FALSE; > > >>>>+ } > > >>>>+ current = info->page_flag_buf[i]; > > >>>>+ > > >>>>+ for (j = 1; j < NUM_BUFFERS; j++) { > > >>>>+ if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { > > >>>>+ MSG("Can't allocate memory for page_flag. %s\n", > > >>>>+ strerror(errno)); > > >>>>+ return FALSE; > > >>>>+ } > > >>>>+ current = current->next; > > >>>>+ } > > >>>>+ current->next = info->page_flag_buf[i]; > > >>>>+ } > > >>>>+ > > >>>>+ /* > > >>>> * initial fd_memory for threads > > >>>> */ > > >>>> for (i = 0; i < info->num_threads; i++) { > > >>>>@@ -3612,7 +3649,8 @@ initial_for_parallel() > > >>>> void > > >>>> free_for_parallel() > > >>>> { > > >>>>- int i; > > >>>>+ int i, j; > > >>>>+ struct page_flag *current; > > >>>> > > >>>> if (info->threads != NULL) { > > >>>> for (i = 0; i < info->num_threads; i++) { > > >>>>@@ -3655,6 +3693,19 @@ free_for_parallel() > > >>>> free(info->page_data_buf); > > >>>> } > > >>>> > > >>>>+ if (info->page_flag_buf != NULL) { > > >>>>+ for (i = 0; i < info->num_threads; i++) { > > >>>>+ for (j = 0; j < NUM_BUFFERS; j++) { > > >>>>+ if (info->page_flag_buf[i] != NULL) { > > >>>>+ current = info->page_flag_buf[i]; > > >>>>+ info->page_flag_buf[i] = current->next; > > >>>>+ free(current); > > >>>>+ } > > >>>>+ } > > >>>>+ } > > >>>>+ free(info->page_flag_buf); > > >>>>+ } > > >>>>+ > > >>>> if (info->parallel_info == NULL) > > >>>> return; > > >>>> > > >>>>@@ -7075,11 +7126,11 @@ void * > > >>>> kdump_thread_function_cyclic(void *arg) { > > >>>> void *retval = PTHREAD_FAIL; > > >>>> struct thread_args *kdump_thread_args = (struct thread_args *)arg; > > >>>>- struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > > >>>>+ volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > > >>>>+ volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; > > >>>> struct cycle *cycle = kdump_thread_args->cycle; > > >>>>- int page_data_num = kdump_thread_args->page_data_num; > > >>>>- mdf_pfn_t pfn; > > >>>>- int index; > > >>>>+ mdf_pfn_t pfn = cycle->start_pfn; > > >>>>+ int index = kdump_thread_args->thread_num; > > >>>> int buf_ready; > > >>>> int dumpable; > > >>>> int fd_memory = 0; > > >>>>@@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { > > >>>> kdump_thread_args->thread_num); > > >>>> } > > >>>> > > >>>>- while (1) { > > >>>>- /* get next pfn */ > > >>>>- pthread_mutex_lock(&info->current_pfn_mutex); > > >>>>- pfn = info->current_pfn; > > >>>>- info->current_pfn++; > > >>>>- pthread_mutex_unlock(&info->current_pfn_mutex); > > >>>>- > > >>>>- if (pfn >= kdump_thread_args->end_pfn) > > >>>>- break; > > >>>>- > > >>>>- index = -1; > > >>>>+ /* > > >>>>+ * filtered page won't take anything > > >>>>+ * unfiltered zero page will only take a page_flag_buf > > >>>>+ * unfiltered non-zero page will take a page_flag_buf and a page_data_buf > > >>>>+ */ > > >>>>+ while (pfn < cycle->end_pfn) { > > >>>> buf_ready = FALSE; > > >>>> > > >>>>+ pthread_mutex_lock(&info->page_data_mutex); > > >>>>+ while (page_data_buf[index].used != FALSE) { > > >>>>+ index = (index + 1) % info->num_buffers; > > >>>>+ } > > >>>>+ page_data_buf[index].used = TRUE; > > >>>>+ pthread_mutex_unlock(&info->page_data_mutex); > > >>>>+ > > >>>> while (buf_ready == FALSE) { > > >>>> pthread_testcancel(); > > >>>>- > > >>>>- index = pfn % page_data_num; > > >>>>- > > >>>>- if (pfn - info->consumed_pfn > info->num_buffers) > > >>>>+ if (page_flag_buf->ready == FLAG_READY) > > >>>> continue; > > >>>> > > >>>>- if (page_data_buf[index].ready != 0) > > >>>>- continue; > > >>>>- > > >>>>- pthread_mutex_lock(&page_data_buf[index].mutex); > > >>>>- > > >>>>- if (page_data_buf[index].ready != 0) > > >>>>- goto unlock; > > >>>>- > > >>>>- buf_ready = TRUE; > > >>>>+ /* get next dumpable pfn */ > > >>>>+ pthread_mutex_lock(&info->current_pfn_mutex); > > >>>>+ for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { > > >>>>+ dumpable = is_dumpable( > > >>>>+ info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > > >>>>+ pfn, > > >>>>+ cycle); > > >>>>+ if (dumpable) > > >>>>+ break; > > >>>>+ } > > >>>>+ info->current_pfn = pfn + 1; > > >>>> > > >>>>- page_data_buf[index].pfn = pfn; > > >>>>- page_data_buf[index].ready = 1; > > >>>>+ page_flag_buf->pfn = pfn; > > >>>>+ page_flag_buf->ready = FLAG_FILLING; > > >>>>+ pthread_mutex_unlock(&info->current_pfn_mutex); > > >>>>+ sem_post(&info->page_flag_buf_sem); > > >>>> > > >>>>- dumpable = is_dumpable( > > >>>>- info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > > >>>>- pfn, > > >>>>- cycle); > > >>>>- page_data_buf[index].dumpable = dumpable; > > >>>>- if (!dumpable) > > >>>>- goto unlock; > > >>>>+ if (pfn >= cycle->end_pfn) { > > >>>>+ info->current_pfn = cycle->end_pfn; > > >>>>+ page_data_buf[index].used = FALSE; > > >>>>+ break; > > >>>>+ } > > >>>> > > >>>> if (!read_pfn_parallel(fd_memory, pfn, buf, > > >>>> &bitmap_memory_parallel, > > >>>>@@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { > > >>>> > > >>>> if ((info->dump_level & DL_EXCLUDE_ZERO) > > >>>> && is_zero_page(buf, info->page_size)) { > > >>>>- page_data_buf[index].zero = TRUE; > > >>>>- goto unlock; > > >>>>+ page_flag_buf->zero = TRUE; > > >>>>+ goto next; > > >>>> } > > >>>> > > >>>>- page_data_buf[index].zero = FALSE; > > >>>>+ page_flag_buf->zero = FALSE; > > >>>> > > >>>> /* > > >>>> * Compress the page data. > > >>>>@@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { > > >>>> page_data_buf[index].flags = > > >>>> DUMP_DH_COMPRESSED_LZO; > > >>>> page_data_buf[index].size = size_out; > > >>>>+ > > >>>> memcpy(page_data_buf[index].buf, buf_out, size_out); > > >>>> #endif > > >>>> #ifdef USESNAPPY > > >>>>@@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { > > >>>> page_data_buf[index].size = info->page_size; > > >>>> memcpy(page_data_buf[index].buf, buf, info->page_size); > > >>>> } > > >>>>-unlock: > > >>>>- pthread_mutex_unlock(&page_data_buf[index].mutex); > > >>>>+ page_flag_buf->index = index; > > >>>>+ buf_ready = TRUE; > > >>>>+next: > > >>>>+ page_flag_buf->ready = FLAG_READY; > > >>>>+ page_flag_buf = page_flag_buf->next; > > >>>> > > >>>> } > > >>>> } > > >>>>- > > >>>> retval = NULL; > > >>>> > > >>>> fail: > > >>>>@@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > >>>> struct page_desc pd; > > >>>> struct timeval tv_start; > > >>>> struct timeval last, new; > > >>>>- unsigned long long consuming_pfn; > > >>>> pthread_t **threads = NULL; > > >>>> struct thread_args *kdump_thread_args = NULL; > > >>>> void *thread_result; > > >>>>- int page_data_num; > > >>>>+ int page_buf_num; > > >>>> struct page_data *page_data_buf = NULL; > > >>>> int i; > > >>>> int index; > > >>>>+ int end_count, consuming, check_count; > > >>>>+ mdf_pfn_t current_pfn, temp_pfn; > > >>>> > > >>>> if (info->flag_elf_dumpfile) > > >>>> return FALSE; > > >>>>@@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > >>>> goto out; > > >>>> } > > >>>> > > >>>>- res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); > > >>>>- if (res != 0) { > > >>>>- ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", > > >>>>- strerror(res)); > > >>>>- goto out; > > >>>>- } > > >>>>- > > >>>> res = pthread_mutex_init(&info->filter_mutex, NULL); > > >>>> if (res != 0) { > > >>>> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); > > >>>>@@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > >>>> end_pfn = cycle->end_pfn; > > >>>> > > >>>> info->current_pfn = start_pfn; > > >>>>- info->consumed_pfn = start_pfn - 1; > > >>>> > > >>>> threads = info->threads; > > >>>> kdump_thread_args = info->kdump_thread_args; > > >>>> > > >>>>- page_data_num = info->num_buffers; > > >>>>+ page_buf_num = info->num_buffers; > > >>>> page_data_buf = info->page_data_buf; > > >>>>+ pthread_mutex_init(&info->page_data_mutex, NULL); > > >>>>+ sem_init(&info->page_flag_buf_sem, 0, 0); > > >>>> > > >>>>- for (i = 0; i < page_data_num; i++) { > > >>>>- /* > > >>>>- * producer will use pfn in page_data_buf to decide the > > >>>>- * consumed pfn > > >>>>- */ > > >>>>- page_data_buf[i].pfn = start_pfn - 1; > > >>>>- page_data_buf[i].ready = 0; > > >>>>- res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); > > >>>>- if (res != 0) { > > >>>>- ERRMSG("Can't initialize mutex of page_data_buf. %s\n", > > >>>>- strerror(res)); > > >>>>- goto out; > > >>>>- } > > >>>>- } > > >>>>+ for (i = 0; i < page_buf_num; i++) > > >>>>+ page_data_buf[i].used = FALSE; > > >>>> > > >>>> for (i = 0; i < info->num_threads; i++) { > > >>>> kdump_thread_args[i].thread_num = i; > > >>>> kdump_thread_args[i].len_buf_out = len_buf_out; > > >>>>- kdump_thread_args[i].start_pfn = start_pfn; > > >>>>- kdump_thread_args[i].end_pfn = end_pfn; > > >>>>- kdump_thread_args[i].page_data_num = page_data_num; > > >>>> kdump_thread_args[i].page_data_buf = page_data_buf; > > >>>>+ kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; > > >>>> kdump_thread_args[i].cycle = cycle; > > >>>> > > >>>> res = pthread_create(threads[i], NULL, > > >>>>@@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > >>>> } > > >>>> } > > >>>> > > >>>>- consuming_pfn = start_pfn; > > >>>>- index = -1; > > >>>>+ end_count = 0; > > >>>>+ while (1) { > > >>>>+ consuming = 0; > > >>>>+ check_count = 0; > > >>>> > > >>>>- gettimeofday(&last, NULL); > > >>>>+ /* > > >>>>+ * The basic idea is producer producing page and consumer writing page. > > >>>>+ * Each producer have a page_flag_buf list which is used for storing page's description. > > >>>>+ * The size of page_flag_buf is little so it won't take too much memory. > > >>>>+ * And all producers will share a page_data_buf array which is used for storing page's compressed data. > > >>>>+ * The main thread is the consumer. It will find the next pfn and write it into file. > > >>>>+ * The next pfn is smallest pfn in all page_flag_buf. > > >>>>+ */ > > >>>>+ sem_wait(&info->page_flag_buf_sem); > > >>>>+ gettimeofday(&last, NULL); > > >>>>+ while (1) { > > >>>>+ current_pfn = end_pfn; > > >>>> > > >>>>- while (consuming_pfn < end_pfn) { > > >>>>- index = consuming_pfn % page_data_num; > > >>>>+ /* > > >>>>+ * page_flag_buf is in circular linked list. > > >>>>+ * The array info->page_flag_buf[] records the current page_flag_buf in each thread's > > >>>>+ * page_flag_buf list. > > >>>>+ * consuming is used for recording in which thread the pfn is the smallest. > > >>>>+ * current_pfn is used for recording the value of pfn when checking the pfn. > > >>>>+ */ > > >>>>+ for (i = 0; i < info->num_threads; i++) { > > >>>>+ if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > > >>>>+ continue; > > >>>>+ temp_pfn = info->page_flag_buf[i]->pfn; > > >>>> > > >>>>- gettimeofday(&new, NULL); > > >>>>- if (new.tv_sec - last.tv_sec > WAIT_TIME) { > > >>>>- ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); > > >>>>- goto out; > > >>>>- } > > >>>>+ /* > > >>>>+ * count how many threads have reached the end. > > >>>>+ */ > > >>>>+ if (temp_pfn >= end_pfn) { > > >>>>+ info->page_flag_buf[i]->ready = FLAG_UNUSED; > > >>>>+ end_count++; > > >>>>+ continue; > > >>>>+ } > > >>>> > > >>>>- /* > > >>>>- * check pfn first without mutex locked to reduce the time > > >>>>- * trying to lock the mutex > > >>>>- */ > > >>>>- if (page_data_buf[index].pfn != consuming_pfn) > > >>>>- continue; > > >>>>+ if (current_pfn < temp_pfn) > > >>>>+ continue; > > >>>> > > >>>>- if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) > > >>>>- continue; > > >>>>+ check_count++; > > >>>>+ consuming = i; > > >>>>+ current_pfn = temp_pfn; > > >>>>+ } > > >>>> > > >>>>- /* check whether the found one is ready to be consumed */ > > >>>>- if (page_data_buf[index].pfn != consuming_pfn || > > >>>>- page_data_buf[index].ready != 1) { > > >>>>- goto unlock; > > >>>>+ /* > > >>>>+ * If all the threads have reached the end, we will finish writing. > > >>>>+ */ > > >>>>+ if (end_count >= info->num_threads) > > >>>>+ goto finish; > > >>>>+ > > >>>>+ /* > > >>>>+ * If the page_flag_buf is not ready, the pfn recorded may be changed. > > >>>>+ * So we should recheck. > > >>>>+ */ > > >>>>+ if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > > >>>>+ gettimeofday(&new, NULL); > > >>>>+ if (new.tv_sec - last.tv_sec > WAIT_TIME) { > > >>>>+ ERRMSG("Can't get data of pfn.\n"); > > >>>>+ goto out; > > >>>>+ } > > >>>>+ continue; > > >>>>+ } > > >>>>+ > > >>>>+ if (current_pfn == info->page_flag_buf[consuming]->pfn) > > >>>>+ break; > > >>>> } > > >>>> > > >>>> if ((num_dumped % per) == 0) > > >>>> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); > > >>>> > > >>>>- /* next pfn is found, refresh last here */ > > >>>>- last = new; > > >>>>- consuming_pfn++; > > >>>>- info->consumed_pfn++; > > >>>>- page_data_buf[index].ready = 0; > > >>>>- > > >>>>- if (page_data_buf[index].dumpable == FALSE) > > >>>>- goto unlock; > > >>>>- > > >>>> num_dumped++; > > >>>> > > >>>>- if (page_data_buf[index].zero == TRUE) { > > >>>>+ > > >>>>+ if (info->page_flag_buf[consuming]->zero == TRUE) { > > >>>> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > > >>>> goto out; > > >>>> pfn_zero++; > > >>>> } else { > > >>>>+ index = info->page_flag_buf[consuming]->index; > > >>>> pd.flags = page_data_buf[index].flags; > > >>>> pd.size = page_data_buf[index].size; > > >>>> pd.page_flags = 0; > > >>>>@@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > > >>>> */ > > >>>> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) > > >>>> goto out; > > >>>>- > > >>>>+ page_data_buf[index].used = FALSE; > > >>>> } > > >>>>-unlock: > > >>>>- pthread_mutex_unlock(&page_data_buf[index].mutex); > > >>>>+ info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > > >>>>+ info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > > >>>> } > > >>>>- > > >>>>+finish: > > >>>> ret = TRUE; > > >>>> /* > > >>>> * print [100 %] > > >>>>@@ -7463,15 +7532,9 @@ out: > > >>>> } > > >>>> } > > >>>> > > >>>>- if (page_data_buf != NULL) { > > >>>>- for (i = 0; i < page_data_num; i++) { > > >>>>- pthread_mutex_destroy(&page_data_buf[i].mutex); > > >>>>- } > > >>>>- } > > >>>>- > > >>>>+ sem_destroy(&info->page_flag_buf_sem); > > >>>> pthread_rwlock_destroy(&info->usemmap_rwlock); > > >>>> pthread_mutex_destroy(&info->filter_mutex); > > >>>>- pthread_mutex_destroy(&info->consumed_pfn_mutex); > > >>>> pthread_mutex_destroy(&info->current_pfn_mutex); > > >>>> > > >>>> return ret; > > >>>>@@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag > > >>>> num_dumped++; > > >>>> if (!read_pfn(pfn, buf)) > > >>>> goto out; > > >>>>+ > > >>>> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); > > >>>> > > >>>> /* > > >>>>diff --git a/makedumpfile.h b/makedumpfile.h > > >>>>index e0b5bbf..4b315c0 100644 > > >>>>--- a/makedumpfile.h > > >>>>+++ b/makedumpfile.h > > >>>>@@ -44,6 +44,7 @@ > > >>>> #include "print_info.h" > > >>>> #include "sadump_mod.h" > > >>>> #include <pthread.h> > > >>>>+#include <semaphore.h> > > >>>> > > >>>> /* > > >>>> * Result of command > > >>>>@@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; > > >>>> #define PAGE_DATA_NUM (50) > > >>>> #define WAIT_TIME (60 * 10) > > >>>> #define PTHREAD_FAIL ((void *)-2) > > >>>>-#define NUM_BUFFERS (50) > > >>>>+#define NUM_BUFFERS (20) > > >>>> > > >>>> struct mmap_cache { > > >>>> char *mmap_buf; > > >>>>@@ -985,28 +986,33 @@ struct mmap_cache { > > >>>> off_t mmap_end_offset; > > >>>> }; > > >>>> > > >>>>+enum { > > >>>>+ FLAG_UNUSED, > > >>>>+ FLAG_READY, > > >>>>+ FLAG_FILLING > > >>>>+}; > > >>>>+struct page_flag { > > >>>>+ mdf_pfn_t pfn; > > >>>>+ char zero; > > >>>>+ char ready; > > >>>>+ short index; > > >>>>+ struct page_flag *next; > > >>>>+}; > > >>>>+ > > >>>> struct page_data > > >>>> { > > >>>>- mdf_pfn_t pfn; > > >>>>- int dumpable; > > >>>>- int zero; > > >>>>- unsigned int flags; > > >>>> long size; > > >>>> unsigned char *buf; > > >>>>- pthread_mutex_t mutex; > > >>>>- /* > > >>>>- * whether the page_data is ready to be consumed > > >>>>- */ > > >>>>- int ready; > > >>>>+ int flags; > > >>>>+ int used; > > >>>> }; > > >>>> > > >>>> struct thread_args { > > >>>> int thread_num; > > >>>> unsigned long len_buf_out; > > >>>>- mdf_pfn_t start_pfn, end_pfn; > > >>>>- int page_data_num; > > >>>> struct cycle *cycle; > > >>>> struct page_data *page_data_buf; > > >>>>+ struct page_flag *page_flag_buf; > > >>>> }; > > >>>> > > >>>> /* > > >>>>@@ -1295,11 +1301,12 @@ struct DumpInfo { > > >>>> pthread_t **threads; > > >>>> struct thread_args *kdump_thread_args; > > >>>> struct page_data *page_data_buf; > > >>>>+ struct page_flag **page_flag_buf; > > >>>>+ sem_t page_flag_buf_sem; > > >>>> pthread_rwlock_t usemmap_rwlock; > > >>>> mdf_pfn_t current_pfn; > > >>>> pthread_mutex_t current_pfn_mutex; > > >>>>- mdf_pfn_t consumed_pfn; > > >>>>- pthread_mutex_t consumed_pfn_mutex; > > >>>>+ pthread_mutex_t page_data_mutex; > > >>>> pthread_mutex_t filter_mutex; > > >>>> }; > > >>>> extern struct DumpInfo *info; > > >>>>-- > > >>>>1.8.3.1 > > >>>> > > >>>> > > >>>> > > >>>> > > >>>>_______________________________________________ > > >>>>kexec mailing list > > >>>>kexec@lists.infradead.org > > >>>>http://lists.infradead.org/mailman/listinfo/kexec > > >>> > > >>> > > >> > > >> > > >> > > >>_______________________________________________ > > >>kexec mailing list > > >>kexec@lists.infradead.org > > >>http://lists.infradead.org/mailman/listinfo/kexec > > > > > > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-16 8:04 ` Minfei Huang 2016-03-16 8:24 ` Minfei Huang @ 2016-03-16 8:26 ` "Zhou, Wenjian/周文剑" [not found] ` <B049E864-7426-4817-96FA-8E3CCA59CA24@redhat.com> 1 sibling, 1 reply; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-16 8:26 UTC (permalink / raw) To: Minfei Huang; +Cc: kexec On 03/16/2016 04:04 PM, Minfei Huang wrote: > On 03/16/16 at 09:55am, "Zhou, Wenjian/周文剑" wrote: >> Hi Minfei, >> >> I have some questions. >> >> If the value of num-threads is 8, >> 1. How much is the free memory before running makedumpfile failed? > > Hmm, this machine is reserved by other, I have no access to take a look > about reserved memory. All of the configuration are set by default. > Maybe it's about 420M. > I don't mean the reserved memory. I mean the free memory. >> >> 2. How much is the free memory before running makedumpfile success? > > I don't memtion this during testing it. > >> >> >> And the following result is very strange if all cache has been dropped. >> makedumpfile --num-threads 30 -d 31 >> real 0m0.006s >> user 0m0.002s >> sys 0m0.004s > > For this case, makedumpfile fails to dump vmcore with option > --num-threads 30. > > I suspect the following output from strace. > >>> 1313 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>> 1314 mmap(NULL, 18446744048584523776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>> 1315 mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x7f5927bb2000 I see. Is there any error messages? Such as "out of memory"? How about it without the patch? Will it occur if double the reserved memory? BTW, can it be reproduced in other machines? I haven't get such result in my machine yet. In my machine, the number of free memory will not always the same after executing makedumpfile each time. So if there is not enough memory, makedumpfile will fail sometimes. But I'm not sure whether they are the same issue. -- Thanks Zhou > > Thanks > Minfei > >> >> -- >> Thanks >> Zhou >> >> On 03/15/2016 05:33 PM, Minfei Huang wrote: >>> On 03/15/16 at 03:12pm, "Zhou, Wenjian/周文剑" wrote: >>>> Hello Minfei, >>>> >>>> I guess the result is affected by the caches. >>>> How about executing the following command before running makedumpfile each time? >>>> # echo 3 > /proc/sys/vm/drop_caches >>> >>> Hi, Zhou. >>> >>> Seem there is a bug during dumping vmcore with option num-threads. >>> >>> 1307 open("/proc/meminfo", O_RDONLY) = 4 >>> 1308 fstat(4, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 >>> 1309 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f59322d3000 >>> 1310 read(4, "MemTotal: 385452 kB\nMemF"..., 1024) = 1024 >>> 1311 close(4) = 0 >>> 1312 munmap(0x7f59322d3000, 4096) = 0 >>> 1313 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>> 1314 mmap(NULL, 18446744048584523776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>> 1315 mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x7f5927bb2000 >>> 1316 munmap(0x7f5927bb2000, 4513792) = 0 >>> 1317 munmap(0x7f592c000000, 62595072) = 0 >>> 1318 mprotect(0x7f5928000000, 135168, PROT_READ|PROT_WRITE) = 0 >>> 1319 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>> >>> Thanks >>> Minfei >>> >>>> >>>> -- >>>> Thanks >>>> Zhou >>>> >>>> On 03/15/2016 02:34 PM, Minfei Huang wrote: >>>>> Hi, Zhou. >>>>> >>>>> I have applied this patch base on 1.5.9. There are several testcases I >>>>> have tested. >>>>> >>>>> - makedumpfile --num-threads 64 -d 31 >>>>> real 0m0.010s >>>>> user 0m0.002s >>>>> sys 0m0.009s >>>>> >>>>> - makedumpfile --num-threads 31 -d 31 >>>>> real 2m40.915s >>>>> user 10m50.900s >>>>> sys 23m9.664s >>>>> >>>>> makedumpfile --num-threads 30 -d 31 >>>>> real 0m0.006s >>>>> user 0m0.002s >>>>> sys 0m0.004s >>>>> >>>>> makedumpfile --num-threads 32 -d 31 >>>>> real 0m0.007s >>>>> user 0m0.002s >>>>> sys 0m0.005s >>>>> >>>>> - makedumpfile --num-threads 8 -d 31 >>>>> real 2m32.692s >>>>> user 7m4.630s >>>>> sys 2m0.369s >>>>> >>>>> - makedumpfile --num-threads 1 -d 31 >>>>> real 4m42.423s >>>>> user 7m27.153s >>>>> sys 0m22.490s >>>>> >>>>> - makedumpfile.orig -d 31 >>>>> real 4m1.297s >>>>> user 3m39.696s >>>>> sys 0m15.200s >>>>> >>>>> This patch has a huge increment to the filter performance under 31. But >>>>> it is not stable, since makedumpfile fails to dump vmcore intermittently. >>>>> You can find the above test result, makedumpfile fails to dump vmcore >>>>> with option --num-threads 64, also it may occur with option >>>>> --number-threads 8. >>>>> >>>>> Thanks >>>>> Minfei >>>>> >>>>> On 03/09/16 at 08:27am, Zhou Wenjian wrote: >>>>>> v4: >>>>>> 1. fix a bug caused by the logic >>>>>> v3: >>>>>> 1. remove some unused variables >>>>>> 2. fix a bug caused by the wrong logic >>>>>> 3. fix a bug caused by optimising >>>>>> 4. improve more performance by using Minoru Usui's code >>>>>> >>>>>> multi-threads implementation will introduce extra cost when handling >>>>>> each page. The origin implementation will also do the extra work for >>>>>> filtered pages. So there is a big performance degradation in >>>>>> --num-threads -d 31. >>>>>> The new implementation won't do the extra work for filtered pages any >>>>>> more. So the performance of -d 31 is close to that of serial processing. >>>>>> >>>>>> The new implementation is just like the following: >>>>>> * The basic idea is producer producing page and consumer writing page. >>>>>> * Each producer have a page_flag_buf list which is used for storing >>>>>> page's description. >>>>>> * The size of page_flag_buf is little so it won't take too much memory. >>>>>> * And all producers will share a page_data_buf array which is >>>>>> used for storing page's compressed data. >>>>>> * The main thread is the consumer. It will find the next pfn and write >>>>>> it into file. >>>>>> * The next pfn is smallest pfn in all page_flag_buf. >>>>>> >>>>>> Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> >>>>>> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >>>>>> --- >>>>>> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- >>>>>> makedumpfile.h | 35 ++++--- >>>>>> 2 files changed, 202 insertions(+), 131 deletions(-) >>>>>> >>>>>> diff --git a/makedumpfile.c b/makedumpfile.c >>>>>> index fa0b779..2b0864a 100644 >>>>>> --- a/makedumpfile.c >>>>>> +++ b/makedumpfile.c >>>>>> @@ -3483,7 +3483,8 @@ initial_for_parallel() >>>>>> unsigned long page_data_buf_size; >>>>>> unsigned long limit_size; >>>>>> int page_data_num; >>>>>> - int i; >>>>>> + struct page_flag *current; >>>>>> + int i, j; >>>>>> >>>>>> len_buf_out = calculate_len_buf_out(info->page_size); >>>>>> >>>>>> @@ -3560,10 +3561,16 @@ initial_for_parallel() >>>>>> >>>>>> limit_size = (get_free_memory_size() >>>>>> - MAP_REGION * info->num_threads) * 0.6; >>>>>> + if (limit_size < 0) { >>>>>> + MSG("Free memory is not enough for multi-threads\n"); >>>>>> + return FALSE; >>>>>> + } >>>>>> >>>>>> page_data_num = limit_size / page_data_buf_size; >>>>>> + info->num_buffers = 3 * info->num_threads; >>>>>> >>>>>> - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); >>>>>> + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); >>>>>> + info->num_buffers = MIN(info->num_buffers, page_data_num); >>>>>> >>>>>> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >>>>>> info->num_buffers); >>>>>> @@ -3588,6 +3595,36 @@ initial_for_parallel() >>>>>> } >>>>>> >>>>>> /* >>>>>> + * initial page_flag for each thread >>>>>> + */ >>>>>> + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >>>>>> + == NULL) { >>>>>> + MSG("Can't allocate memory for page_flag_buf. %s\n", >>>>>> + strerror(errno)); >>>>>> + return FALSE; >>>>>> + } >>>>>> + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); >>>>>> + >>>>>> + for (i = 0; i < info->num_threads; i++) { >>>>>> + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { >>>>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>>>> + strerror(errno)); >>>>>> + return FALSE; >>>>>> + } >>>>>> + current = info->page_flag_buf[i]; >>>>>> + >>>>>> + for (j = 1; j < NUM_BUFFERS; j++) { >>>>>> + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { >>>>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>>>> + strerror(errno)); >>>>>> + return FALSE; >>>>>> + } >>>>>> + current = current->next; >>>>>> + } >>>>>> + current->next = info->page_flag_buf[i]; >>>>>> + } >>>>>> + >>>>>> + /* >>>>>> * initial fd_memory for threads >>>>>> */ >>>>>> for (i = 0; i < info->num_threads; i++) { >>>>>> @@ -3612,7 +3649,8 @@ initial_for_parallel() >>>>>> void >>>>>> free_for_parallel() >>>>>> { >>>>>> - int i; >>>>>> + int i, j; >>>>>> + struct page_flag *current; >>>>>> >>>>>> if (info->threads != NULL) { >>>>>> for (i = 0; i < info->num_threads; i++) { >>>>>> @@ -3655,6 +3693,19 @@ free_for_parallel() >>>>>> free(info->page_data_buf); >>>>>> } >>>>>> >>>>>> + if (info->page_flag_buf != NULL) { >>>>>> + for (i = 0; i < info->num_threads; i++) { >>>>>> + for (j = 0; j < NUM_BUFFERS; j++) { >>>>>> + if (info->page_flag_buf[i] != NULL) { >>>>>> + current = info->page_flag_buf[i]; >>>>>> + info->page_flag_buf[i] = current->next; >>>>>> + free(current); >>>>>> + } >>>>>> + } >>>>>> + } >>>>>> + free(info->page_flag_buf); >>>>>> + } >>>>>> + >>>>>> if (info->parallel_info == NULL) >>>>>> return; >>>>>> >>>>>> @@ -7075,11 +7126,11 @@ void * >>>>>> kdump_thread_function_cyclic(void *arg) { >>>>>> void *retval = PTHREAD_FAIL; >>>>>> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >>>>>> - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>>>> + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>>>> + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >>>>>> struct cycle *cycle = kdump_thread_args->cycle; >>>>>> - int page_data_num = kdump_thread_args->page_data_num; >>>>>> - mdf_pfn_t pfn; >>>>>> - int index; >>>>>> + mdf_pfn_t pfn = cycle->start_pfn; >>>>>> + int index = kdump_thread_args->thread_num; >>>>>> int buf_ready; >>>>>> int dumpable; >>>>>> int fd_memory = 0; >>>>>> @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { >>>>>> kdump_thread_args->thread_num); >>>>>> } >>>>>> >>>>>> - while (1) { >>>>>> - /* get next pfn */ >>>>>> - pthread_mutex_lock(&info->current_pfn_mutex); >>>>>> - pfn = info->current_pfn; >>>>>> - info->current_pfn++; >>>>>> - pthread_mutex_unlock(&info->current_pfn_mutex); >>>>>> - >>>>>> - if (pfn >= kdump_thread_args->end_pfn) >>>>>> - break; >>>>>> - >>>>>> - index = -1; >>>>>> + /* >>>>>> + * filtered page won't take anything >>>>>> + * unfiltered zero page will only take a page_flag_buf >>>>>> + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf >>>>>> + */ >>>>>> + while (pfn < cycle->end_pfn) { >>>>>> buf_ready = FALSE; >>>>>> >>>>>> + pthread_mutex_lock(&info->page_data_mutex); >>>>>> + while (page_data_buf[index].used != FALSE) { >>>>>> + index = (index + 1) % info->num_buffers; >>>>>> + } >>>>>> + page_data_buf[index].used = TRUE; >>>>>> + pthread_mutex_unlock(&info->page_data_mutex); >>>>>> + >>>>>> while (buf_ready == FALSE) { >>>>>> pthread_testcancel(); >>>>>> - >>>>>> - index = pfn % page_data_num; >>>>>> - >>>>>> - if (pfn - info->consumed_pfn > info->num_buffers) >>>>>> + if (page_flag_buf->ready == FLAG_READY) >>>>>> continue; >>>>>> >>>>>> - if (page_data_buf[index].ready != 0) >>>>>> - continue; >>>>>> - >>>>>> - pthread_mutex_lock(&page_data_buf[index].mutex); >>>>>> - >>>>>> - if (page_data_buf[index].ready != 0) >>>>>> - goto unlock; >>>>>> - >>>>>> - buf_ready = TRUE; >>>>>> + /* get next dumpable pfn */ >>>>>> + pthread_mutex_lock(&info->current_pfn_mutex); >>>>>> + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { >>>>>> + dumpable = is_dumpable( >>>>>> + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>>>> + pfn, >>>>>> + cycle); >>>>>> + if (dumpable) >>>>>> + break; >>>>>> + } >>>>>> + info->current_pfn = pfn + 1; >>>>>> >>>>>> - page_data_buf[index].pfn = pfn; >>>>>> - page_data_buf[index].ready = 1; >>>>>> + page_flag_buf->pfn = pfn; >>>>>> + page_flag_buf->ready = FLAG_FILLING; >>>>>> + pthread_mutex_unlock(&info->current_pfn_mutex); >>>>>> + sem_post(&info->page_flag_buf_sem); >>>>>> >>>>>> - dumpable = is_dumpable( >>>>>> - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>>>> - pfn, >>>>>> - cycle); >>>>>> - page_data_buf[index].dumpable = dumpable; >>>>>> - if (!dumpable) >>>>>> - goto unlock; >>>>>> + if (pfn >= cycle->end_pfn) { >>>>>> + info->current_pfn = cycle->end_pfn; >>>>>> + page_data_buf[index].used = FALSE; >>>>>> + break; >>>>>> + } >>>>>> >>>>>> if (!read_pfn_parallel(fd_memory, pfn, buf, >>>>>> &bitmap_memory_parallel, >>>>>> @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { >>>>>> >>>>>> if ((info->dump_level & DL_EXCLUDE_ZERO) >>>>>> && is_zero_page(buf, info->page_size)) { >>>>>> - page_data_buf[index].zero = TRUE; >>>>>> - goto unlock; >>>>>> + page_flag_buf->zero = TRUE; >>>>>> + goto next; >>>>>> } >>>>>> >>>>>> - page_data_buf[index].zero = FALSE; >>>>>> + page_flag_buf->zero = FALSE; >>>>>> >>>>>> /* >>>>>> * Compress the page data. >>>>>> @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { >>>>>> page_data_buf[index].flags = >>>>>> DUMP_DH_COMPRESSED_LZO; >>>>>> page_data_buf[index].size = size_out; >>>>>> + >>>>>> memcpy(page_data_buf[index].buf, buf_out, size_out); >>>>>> #endif >>>>>> #ifdef USESNAPPY >>>>>> @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { >>>>>> page_data_buf[index].size = info->page_size; >>>>>> memcpy(page_data_buf[index].buf, buf, info->page_size); >>>>>> } >>>>>> -unlock: >>>>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>>>> + page_flag_buf->index = index; >>>>>> + buf_ready = TRUE; >>>>>> +next: >>>>>> + page_flag_buf->ready = FLAG_READY; >>>>>> + page_flag_buf = page_flag_buf->next; >>>>>> >>>>>> } >>>>>> } >>>>>> - >>>>>> retval = NULL; >>>>>> >>>>>> fail: >>>>>> @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>> struct page_desc pd; >>>>>> struct timeval tv_start; >>>>>> struct timeval last, new; >>>>>> - unsigned long long consuming_pfn; >>>>>> pthread_t **threads = NULL; >>>>>> struct thread_args *kdump_thread_args = NULL; >>>>>> void *thread_result; >>>>>> - int page_data_num; >>>>>> + int page_buf_num; >>>>>> struct page_data *page_data_buf = NULL; >>>>>> int i; >>>>>> int index; >>>>>> + int end_count, consuming, check_count; >>>>>> + mdf_pfn_t current_pfn, temp_pfn; >>>>>> >>>>>> if (info->flag_elf_dumpfile) >>>>>> return FALSE; >>>>>> @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>> goto out; >>>>>> } >>>>>> >>>>>> - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); >>>>>> - if (res != 0) { >>>>>> - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", >>>>>> - strerror(res)); >>>>>> - goto out; >>>>>> - } >>>>>> - >>>>>> res = pthread_mutex_init(&info->filter_mutex, NULL); >>>>>> if (res != 0) { >>>>>> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); >>>>>> @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>> end_pfn = cycle->end_pfn; >>>>>> >>>>>> info->current_pfn = start_pfn; >>>>>> - info->consumed_pfn = start_pfn - 1; >>>>>> >>>>>> threads = info->threads; >>>>>> kdump_thread_args = info->kdump_thread_args; >>>>>> >>>>>> - page_data_num = info->num_buffers; >>>>>> + page_buf_num = info->num_buffers; >>>>>> page_data_buf = info->page_data_buf; >>>>>> + pthread_mutex_init(&info->page_data_mutex, NULL); >>>>>> + sem_init(&info->page_flag_buf_sem, 0, 0); >>>>>> >>>>>> - for (i = 0; i < page_data_num; i++) { >>>>>> - /* >>>>>> - * producer will use pfn in page_data_buf to decide the >>>>>> - * consumed pfn >>>>>> - */ >>>>>> - page_data_buf[i].pfn = start_pfn - 1; >>>>>> - page_data_buf[i].ready = 0; >>>>>> - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); >>>>>> - if (res != 0) { >>>>>> - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", >>>>>> - strerror(res)); >>>>>> - goto out; >>>>>> - } >>>>>> - } >>>>>> + for (i = 0; i < page_buf_num; i++) >>>>>> + page_data_buf[i].used = FALSE; >>>>>> >>>>>> for (i = 0; i < info->num_threads; i++) { >>>>>> kdump_thread_args[i].thread_num = i; >>>>>> kdump_thread_args[i].len_buf_out = len_buf_out; >>>>>> - kdump_thread_args[i].start_pfn = start_pfn; >>>>>> - kdump_thread_args[i].end_pfn = end_pfn; >>>>>> - kdump_thread_args[i].page_data_num = page_data_num; >>>>>> kdump_thread_args[i].page_data_buf = page_data_buf; >>>>>> + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >>>>>> kdump_thread_args[i].cycle = cycle; >>>>>> >>>>>> res = pthread_create(threads[i], NULL, >>>>>> @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>> } >>>>>> } >>>>>> >>>>>> - consuming_pfn = start_pfn; >>>>>> - index = -1; >>>>>> + end_count = 0; >>>>>> + while (1) { >>>>>> + consuming = 0; >>>>>> + check_count = 0; >>>>>> >>>>>> - gettimeofday(&last, NULL); >>>>>> + /* >>>>>> + * The basic idea is producer producing page and consumer writing page. >>>>>> + * Each producer have a page_flag_buf list which is used for storing page's description. >>>>>> + * The size of page_flag_buf is little so it won't take too much memory. >>>>>> + * And all producers will share a page_data_buf array which is used for storing page's compressed data. >>>>>> + * The main thread is the consumer. It will find the next pfn and write it into file. >>>>>> + * The next pfn is smallest pfn in all page_flag_buf. >>>>>> + */ >>>>>> + sem_wait(&info->page_flag_buf_sem); >>>>>> + gettimeofday(&last, NULL); >>>>>> + while (1) { >>>>>> + current_pfn = end_pfn; >>>>>> >>>>>> - while (consuming_pfn < end_pfn) { >>>>>> - index = consuming_pfn % page_data_num; >>>>>> + /* >>>>>> + * page_flag_buf is in circular linked list. >>>>>> + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's >>>>>> + * page_flag_buf list. >>>>>> + * consuming is used for recording in which thread the pfn is the smallest. >>>>>> + * current_pfn is used for recording the value of pfn when checking the pfn. >>>>>> + */ >>>>>> + for (i = 0; i < info->num_threads; i++) { >>>>>> + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >>>>>> + continue; >>>>>> + temp_pfn = info->page_flag_buf[i]->pfn; >>>>>> >>>>>> - gettimeofday(&new, NULL); >>>>>> - if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>>>> - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); >>>>>> - goto out; >>>>>> - } >>>>>> + /* >>>>>> + * count how many threads have reached the end. >>>>>> + */ >>>>>> + if (temp_pfn >= end_pfn) { >>>>>> + info->page_flag_buf[i]->ready = FLAG_UNUSED; >>>>>> + end_count++; >>>>>> + continue; >>>>>> + } >>>>>> >>>>>> - /* >>>>>> - * check pfn first without mutex locked to reduce the time >>>>>> - * trying to lock the mutex >>>>>> - */ >>>>>> - if (page_data_buf[index].pfn != consuming_pfn) >>>>>> - continue; >>>>>> + if (current_pfn < temp_pfn) >>>>>> + continue; >>>>>> >>>>>> - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) >>>>>> - continue; >>>>>> + check_count++; >>>>>> + consuming = i; >>>>>> + current_pfn = temp_pfn; >>>>>> + } >>>>>> >>>>>> - /* check whether the found one is ready to be consumed */ >>>>>> - if (page_data_buf[index].pfn != consuming_pfn || >>>>>> - page_data_buf[index].ready != 1) { >>>>>> - goto unlock; >>>>>> + /* >>>>>> + * If all the threads have reached the end, we will finish writing. >>>>>> + */ >>>>>> + if (end_count >= info->num_threads) >>>>>> + goto finish; >>>>>> + >>>>>> + /* >>>>>> + * If the page_flag_buf is not ready, the pfn recorded may be changed. >>>>>> + * So we should recheck. >>>>>> + */ >>>>>> + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >>>>>> + gettimeofday(&new, NULL); >>>>>> + if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>>>> + ERRMSG("Can't get data of pfn.\n"); >>>>>> + goto out; >>>>>> + } >>>>>> + continue; >>>>>> + } >>>>>> + >>>>>> + if (current_pfn == info->page_flag_buf[consuming]->pfn) >>>>>> + break; >>>>>> } >>>>>> >>>>>> if ((num_dumped % per) == 0) >>>>>> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); >>>>>> >>>>>> - /* next pfn is found, refresh last here */ >>>>>> - last = new; >>>>>> - consuming_pfn++; >>>>>> - info->consumed_pfn++; >>>>>> - page_data_buf[index].ready = 0; >>>>>> - >>>>>> - if (page_data_buf[index].dumpable == FALSE) >>>>>> - goto unlock; >>>>>> - >>>>>> num_dumped++; >>>>>> >>>>>> - if (page_data_buf[index].zero == TRUE) { >>>>>> + >>>>>> + if (info->page_flag_buf[consuming]->zero == TRUE) { >>>>>> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >>>>>> goto out; >>>>>> pfn_zero++; >>>>>> } else { >>>>>> + index = info->page_flag_buf[consuming]->index; >>>>>> pd.flags = page_data_buf[index].flags; >>>>>> pd.size = page_data_buf[index].size; >>>>>> pd.page_flags = 0; >>>>>> @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>> */ >>>>>> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) >>>>>> goto out; >>>>>> - >>>>>> + page_data_buf[index].used = FALSE; >>>>>> } >>>>>> -unlock: >>>>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>>>> + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >>>>>> + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >>>>>> } >>>>>> - >>>>>> +finish: >>>>>> ret = TRUE; >>>>>> /* >>>>>> * print [100 %] >>>>>> @@ -7463,15 +7532,9 @@ out: >>>>>> } >>>>>> } >>>>>> >>>>>> - if (page_data_buf != NULL) { >>>>>> - for (i = 0; i < page_data_num; i++) { >>>>>> - pthread_mutex_destroy(&page_data_buf[i].mutex); >>>>>> - } >>>>>> - } >>>>>> - >>>>>> + sem_destroy(&info->page_flag_buf_sem); >>>>>> pthread_rwlock_destroy(&info->usemmap_rwlock); >>>>>> pthread_mutex_destroy(&info->filter_mutex); >>>>>> - pthread_mutex_destroy(&info->consumed_pfn_mutex); >>>>>> pthread_mutex_destroy(&info->current_pfn_mutex); >>>>>> >>>>>> return ret; >>>>>> @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag >>>>>> num_dumped++; >>>>>> if (!read_pfn(pfn, buf)) >>>>>> goto out; >>>>>> + >>>>>> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); >>>>>> >>>>>> /* >>>>>> diff --git a/makedumpfile.h b/makedumpfile.h >>>>>> index e0b5bbf..4b315c0 100644 >>>>>> --- a/makedumpfile.h >>>>>> +++ b/makedumpfile.h >>>>>> @@ -44,6 +44,7 @@ >>>>>> #include "print_info.h" >>>>>> #include "sadump_mod.h" >>>>>> #include <pthread.h> >>>>>> +#include <semaphore.h> >>>>>> >>>>>> /* >>>>>> * Result of command >>>>>> @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; >>>>>> #define PAGE_DATA_NUM (50) >>>>>> #define WAIT_TIME (60 * 10) >>>>>> #define PTHREAD_FAIL ((void *)-2) >>>>>> -#define NUM_BUFFERS (50) >>>>>> +#define NUM_BUFFERS (20) >>>>>> >>>>>> struct mmap_cache { >>>>>> char *mmap_buf; >>>>>> @@ -985,28 +986,33 @@ struct mmap_cache { >>>>>> off_t mmap_end_offset; >>>>>> }; >>>>>> >>>>>> +enum { >>>>>> + FLAG_UNUSED, >>>>>> + FLAG_READY, >>>>>> + FLAG_FILLING >>>>>> +}; >>>>>> +struct page_flag { >>>>>> + mdf_pfn_t pfn; >>>>>> + char zero; >>>>>> + char ready; >>>>>> + short index; >>>>>> + struct page_flag *next; >>>>>> +}; >>>>>> + >>>>>> struct page_data >>>>>> { >>>>>> - mdf_pfn_t pfn; >>>>>> - int dumpable; >>>>>> - int zero; >>>>>> - unsigned int flags; >>>>>> long size; >>>>>> unsigned char *buf; >>>>>> - pthread_mutex_t mutex; >>>>>> - /* >>>>>> - * whether the page_data is ready to be consumed >>>>>> - */ >>>>>> - int ready; >>>>>> + int flags; >>>>>> + int used; >>>>>> }; >>>>>> >>>>>> struct thread_args { >>>>>> int thread_num; >>>>>> unsigned long len_buf_out; >>>>>> - mdf_pfn_t start_pfn, end_pfn; >>>>>> - int page_data_num; >>>>>> struct cycle *cycle; >>>>>> struct page_data *page_data_buf; >>>>>> + struct page_flag *page_flag_buf; >>>>>> }; >>>>>> >>>>>> /* >>>>>> @@ -1295,11 +1301,12 @@ struct DumpInfo { >>>>>> pthread_t **threads; >>>>>> struct thread_args *kdump_thread_args; >>>>>> struct page_data *page_data_buf; >>>>>> + struct page_flag **page_flag_buf; >>>>>> + sem_t page_flag_buf_sem; >>>>>> pthread_rwlock_t usemmap_rwlock; >>>>>> mdf_pfn_t current_pfn; >>>>>> pthread_mutex_t current_pfn_mutex; >>>>>> - mdf_pfn_t consumed_pfn; >>>>>> - pthread_mutex_t consumed_pfn_mutex; >>>>>> + pthread_mutex_t page_data_mutex; >>>>>> pthread_mutex_t filter_mutex; >>>>>> }; >>>>>> extern struct DumpInfo *info; >>>>>> -- >>>>>> 1.8.3.1 >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> _______________________________________________ >>>>>> kexec mailing list >>>>>> kexec@lists.infradead.org >>>>>> http://lists.infradead.org/mailman/listinfo/kexec >>>>> >>>>> >>>> >>>> >>>> >>>> _______________________________________________ >>>> kexec mailing list >>>> kexec@lists.infradead.org >>>> http://lists.infradead.org/mailman/listinfo/kexec >> >> >> _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
[parent not found: <B049E864-7426-4817-96FA-8E3CCA59CA24@redhat.com>]
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 [not found] ` <B049E864-7426-4817-96FA-8E3CCA59CA24@redhat.com> @ 2016-03-16 8:59 ` "Zhou, Wenjian/周文剑" 2016-03-16 9:30 ` Minfei Huang 0 siblings, 1 reply; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-16 8:59 UTC (permalink / raw) To: Minfei Huang; +Cc: kexec Hi Minfei, Thanks a lot for your information! According to your description and strace log, it seems there is something wrong in initial_for_parallel(). I reviewed the relevant code, but haven't got any idea. And I have one more question. Does it happen every time with the same command? -- Thanks Zhou On 03/16/2016 04:32 PM, Minfei Huang wrote: > >> On Mar 16, 2016, at 16:26, Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> wrote: >> >> On 03/16/2016 04:04 PM, Minfei Huang wrote: >>> On 03/16/16 at 09:55am, "Zhou, Wenjian/周文剑" wrote: >>>> Hi Minfei, >>>> >>>> I have some questions. >>>> >>>> If the value of num-threads is 8, >>>> 1. How much is the free memory before running makedumpfile failed? >>> >>> Hmm, this machine is reserved by other, I have no access to take a look >>> about reserved memory. All of the configuration are set by default. >>> Maybe it's about 420M. >>> >> >> I don't mean the reserved memory. >> I mean the free memory. > > Sorry, there is no record about such info. > >> >>>> >>>> 2. How much is the free memory before running makedumpfile success? >>> >>> I don't memtion this during testing it. >>> >>>> >>>> >>>> And the following result is very strange if all cache has been dropped. >>>> makedumpfile --num-threads 30 -d 31 >>>> real 0m0.006s >>>> user 0m0.002s >>>> sys 0m0.004s >>> >>> For this case, makedumpfile fails to dump vmcore with option >>> --num-threads 30. >>> >>> I suspect the following output from strace. >>> >>>>> 1313 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>>>> 1314 mmap(NULL, 18446744048584523776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>>>> 1315 mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x7f5927bb2000 >> >> I see. >> >> Is there any error messages? >> Such as "out of memory”? > > the allocated memory is too large 18446744048584388608? > >> >> How about it without the patch? > > It works well without this patch from my test. > >> >> Will it occur if double the reserved memory? > > No. I just tested all of the test cases. > >> >> BTW, can it be reproduced in other machines? > > No, I have only one with such large memory. > >> I haven't get such result in my machine yet. >> >> In my machine, the number of free memory will not always the same >> after executing makedumpfile each time. >> So if there is not enough memory, makedumpfile will fail sometimes. >> But I'm not sure whether they are the same issue. >> >> -- >> Thanks >> Zhou >> >>> >>> Thanks >>> Minfei >>> >>>> >>>> -- >>>> Thanks >>>> Zhou >>>> >>>> On 03/15/2016 05:33 PM, Minfei Huang wrote: >>>>> On 03/15/16 at 03:12pm, "Zhou, Wenjian/周文剑" wrote: >>>>>> Hello Minfei, >>>>>> >>>>>> I guess the result is affected by the caches. >>>>>> How about executing the following command before running makedumpfile each time? >>>>>> # echo 3 > /proc/sys/vm/drop_caches >>>>> >>>>> Hi, Zhou. >>>>> >>>>> Seem there is a bug during dumping vmcore with option num-threads. >>>>> >>>>> 1307 open("/proc/meminfo", O_RDONLY) = 4 >>>>> 1308 fstat(4, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 >>>>> 1309 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f59322d3000 >>>>> 1310 read(4, "MemTotal: 385452 kB\nMemF"..., 1024) = 1024 >>>>> 1311 close(4) = 0 >>>>> 1312 munmap(0x7f59322d3000, 4096) = 0 >>>>> 1313 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>>>> 1314 mmap(NULL, 18446744048584523776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>>>> 1315 mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x7f5927bb2000 >>>>> 1316 munmap(0x7f5927bb2000, 4513792) = 0 >>>>> 1317 munmap(0x7f592c000000, 62595072) = 0 >>>>> 1318 mprotect(0x7f5928000000, 135168, PROT_READ|PROT_WRITE) = 0 >>>>> 1319 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>>>> >>>>> Thanks >>>>> Minfei >>>>> >>>>>> >>>>>> -- >>>>>> Thanks >>>>>> Zhou >>>>>> >>>>>> On 03/15/2016 02:34 PM, Minfei Huang wrote: >>>>>>> Hi, Zhou. >>>>>>> >>>>>>> I have applied this patch base on 1.5.9. There are several testcases I >>>>>>> have tested. >>>>>>> >>>>>>> - makedumpfile --num-threads 64 -d 31 >>>>>>> real 0m0.010s >>>>>>> user 0m0.002s >>>>>>> sys 0m0.009s >>>>>>> >>>>>>> - makedumpfile --num-threads 31 -d 31 >>>>>>> real 2m40.915s >>>>>>> user 10m50.900s >>>>>>> sys 23m9.664s >>>>>>> >>>>>>> makedumpfile --num-threads 30 -d 31 >>>>>>> real 0m0.006s >>>>>>> user 0m0.002s >>>>>>> sys 0m0.004s >>>>>>> >>>>>>> makedumpfile --num-threads 32 -d 31 >>>>>>> real 0m0.007s >>>>>>> user 0m0.002s >>>>>>> sys 0m0.005s >>>>>>> >>>>>>> - makedumpfile --num-threads 8 -d 31 >>>>>>> real 2m32.692s >>>>>>> user 7m4.630s >>>>>>> sys 2m0.369s >>>>>>> >>>>>>> - makedumpfile --num-threads 1 -d 31 >>>>>>> real 4m42.423s >>>>>>> user 7m27.153s >>>>>>> sys 0m22.490s >>>>>>> >>>>>>> - makedumpfile.orig -d 31 >>>>>>> real 4m1.297s >>>>>>> user 3m39.696s >>>>>>> sys 0m15.200s >>>>>>> >>>>>>> This patch has a huge increment to the filter performance under 31. But >>>>>>> it is not stable, since makedumpfile fails to dump vmcore intermittently. >>>>>>> You can find the above test result, makedumpfile fails to dump vmcore >>>>>>> with option --num-threads 64, also it may occur with option >>>>>>> --number-threads 8. >>>>>>> >>>>>>> Thanks >>>>>>> Minfei >>>>>>> >>>>>>> On 03/09/16 at 08:27am, Zhou Wenjian wrote: >>>>>>>> v4: >>>>>>>> 1. fix a bug caused by the logic >>>>>>>> v3: >>>>>>>> 1. remove some unused variables >>>>>>>> 2. fix a bug caused by the wrong logic >>>>>>>> 3. fix a bug caused by optimising >>>>>>>> 4. improve more performance by using Minoru Usui's code >>>>>>>> >>>>>>>> multi-threads implementation will introduce extra cost when handling >>>>>>>> each page. The origin implementation will also do the extra work for >>>>>>>> filtered pages. So there is a big performance degradation in >>>>>>>> --num-threads -d 31. >>>>>>>> The new implementation won't do the extra work for filtered pages any >>>>>>>> more. So the performance of -d 31 is close to that of serial processing. >>>>>>>> >>>>>>>> The new implementation is just like the following: >>>>>>>> * The basic idea is producer producing page and consumer writing page. >>>>>>>> * Each producer have a page_flag_buf list which is used for storing >>>>>>>> page's description. >>>>>>>> * The size of page_flag_buf is little so it won't take too much memory. >>>>>>>> * And all producers will share a page_data_buf array which is >>>>>>>> used for storing page's compressed data. >>>>>>>> * The main thread is the consumer. It will find the next pfn and write >>>>>>>> it into file. >>>>>>>> * The next pfn is smallest pfn in all page_flag_buf. >>>>>>>> >>>>>>>> Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> >>>>>>>> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >>>>>>>> --- >>>>>>>> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- >>>>>>>> makedumpfile.h | 35 ++++--- >>>>>>>> 2 files changed, 202 insertions(+), 131 deletions(-) >>>>>>>> >>>>>>>> diff --git a/makedumpfile.c b/makedumpfile.c >>>>>>>> index fa0b779..2b0864a 100644 >>>>>>>> --- a/makedumpfile.c >>>>>>>> +++ b/makedumpfile.c >>>>>>>> @@ -3483,7 +3483,8 @@ initial_for_parallel() >>>>>>>> unsigned long page_data_buf_size; >>>>>>>> unsigned long limit_size; >>>>>>>> int page_data_num; >>>>>>>> - int i; >>>>>>>> + struct page_flag *current; >>>>>>>> + int i, j; >>>>>>>> >>>>>>>> len_buf_out = calculate_len_buf_out(info->page_size); >>>>>>>> >>>>>>>> @@ -3560,10 +3561,16 @@ initial_for_parallel() >>>>>>>> >>>>>>>> limit_size = (get_free_memory_size() >>>>>>>> - MAP_REGION * info->num_threads) * 0.6; >>>>>>>> + if (limit_size < 0) { >>>>>>>> + MSG("Free memory is not enough for multi-threads\n"); >>>>>>>> + return FALSE; >>>>>>>> + } >>>>>>>> >>>>>>>> page_data_num = limit_size / page_data_buf_size; >>>>>>>> + info->num_buffers = 3 * info->num_threads; >>>>>>>> >>>>>>>> - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); >>>>>>>> + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); >>>>>>>> + info->num_buffers = MIN(info->num_buffers, page_data_num); >>>>>>>> >>>>>>>> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >>>>>>>> info->num_buffers); >>>>>>>> @@ -3588,6 +3595,36 @@ initial_for_parallel() >>>>>>>> } >>>>>>>> >>>>>>>> /* >>>>>>>> + * initial page_flag for each thread >>>>>>>> + */ >>>>>>>> + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >>>>>>>> + == NULL) { >>>>>>>> + MSG("Can't allocate memory for page_flag_buf. %s\n", >>>>>>>> + strerror(errno)); >>>>>>>> + return FALSE; >>>>>>>> + } >>>>>>>> + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); >>>>>>>> + >>>>>>>> + for (i = 0; i < info->num_threads; i++) { >>>>>>>> + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { >>>>>>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>>>>>> + strerror(errno)); >>>>>>>> + return FALSE; >>>>>>>> + } >>>>>>>> + current = info->page_flag_buf[i]; >>>>>>>> + >>>>>>>> + for (j = 1; j < NUM_BUFFERS; j++) { >>>>>>>> + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { >>>>>>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>>>>>> + strerror(errno)); >>>>>>>> + return FALSE; >>>>>>>> + } >>>>>>>> + current = current->next; >>>>>>>> + } >>>>>>>> + current->next = info->page_flag_buf[i]; >>>>>>>> + } >>>>>>>> + >>>>>>>> + /* >>>>>>>> * initial fd_memory for threads >>>>>>>> */ >>>>>>>> for (i = 0; i < info->num_threads; i++) { >>>>>>>> @@ -3612,7 +3649,8 @@ initial_for_parallel() >>>>>>>> void >>>>>>>> free_for_parallel() >>>>>>>> { >>>>>>>> - int i; >>>>>>>> + int i, j; >>>>>>>> + struct page_flag *current; >>>>>>>> >>>>>>>> if (info->threads != NULL) { >>>>>>>> for (i = 0; i < info->num_threads; i++) { >>>>>>>> @@ -3655,6 +3693,19 @@ free_for_parallel() >>>>>>>> free(info->page_data_buf); >>>>>>>> } >>>>>>>> >>>>>>>> + if (info->page_flag_buf != NULL) { >>>>>>>> + for (i = 0; i < info->num_threads; i++) { >>>>>>>> + for (j = 0; j < NUM_BUFFERS; j++) { >>>>>>>> + if (info->page_flag_buf[i] != NULL) { >>>>>>>> + current = info->page_flag_buf[i]; >>>>>>>> + info->page_flag_buf[i] = current->next; >>>>>>>> + free(current); >>>>>>>> + } >>>>>>>> + } >>>>>>>> + } >>>>>>>> + free(info->page_flag_buf); >>>>>>>> + } >>>>>>>> + >>>>>>>> if (info->parallel_info == NULL) >>>>>>>> return; >>>>>>>> >>>>>>>> @@ -7075,11 +7126,11 @@ void * >>>>>>>> kdump_thread_function_cyclic(void *arg) { >>>>>>>> void *retval = PTHREAD_FAIL; >>>>>>>> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >>>>>>>> - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>>>>>> + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>>>>>> + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >>>>>>>> struct cycle *cycle = kdump_thread_args->cycle; >>>>>>>> - int page_data_num = kdump_thread_args->page_data_num; >>>>>>>> - mdf_pfn_t pfn; >>>>>>>> - int index; >>>>>>>> + mdf_pfn_t pfn = cycle->start_pfn; >>>>>>>> + int index = kdump_thread_args->thread_num; >>>>>>>> int buf_ready; >>>>>>>> int dumpable; >>>>>>>> int fd_memory = 0; >>>>>>>> @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { >>>>>>>> kdump_thread_args->thread_num); >>>>>>>> } >>>>>>>> >>>>>>>> - while (1) { >>>>>>>> - /* get next pfn */ >>>>>>>> - pthread_mutex_lock(&info->current_pfn_mutex); >>>>>>>> - pfn = info->current_pfn; >>>>>>>> - info->current_pfn++; >>>>>>>> - pthread_mutex_unlock(&info->current_pfn_mutex); >>>>>>>> - >>>>>>>> - if (pfn >= kdump_thread_args->end_pfn) >>>>>>>> - break; >>>>>>>> - >>>>>>>> - index = -1; >>>>>>>> + /* >>>>>>>> + * filtered page won't take anything >>>>>>>> + * unfiltered zero page will only take a page_flag_buf >>>>>>>> + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf >>>>>>>> + */ >>>>>>>> + while (pfn < cycle->end_pfn) { >>>>>>>> buf_ready = FALSE; >>>>>>>> >>>>>>>> + pthread_mutex_lock(&info->page_data_mutex); >>>>>>>> + while (page_data_buf[index].used != FALSE) { >>>>>>>> + index = (index + 1) % info->num_buffers; >>>>>>>> + } >>>>>>>> + page_data_buf[index].used = TRUE; >>>>>>>> + pthread_mutex_unlock(&info->page_data_mutex); >>>>>>>> + >>>>>>>> while (buf_ready == FALSE) { >>>>>>>> pthread_testcancel(); >>>>>>>> - >>>>>>>> - index = pfn % page_data_num; >>>>>>>> - >>>>>>>> - if (pfn - info->consumed_pfn > info->num_buffers) >>>>>>>> + if (page_flag_buf->ready == FLAG_READY) >>>>>>>> continue; >>>>>>>> >>>>>>>> - if (page_data_buf[index].ready != 0) >>>>>>>> - continue; >>>>>>>> - >>>>>>>> - pthread_mutex_lock(&page_data_buf[index].mutex); >>>>>>>> - >>>>>>>> - if (page_data_buf[index].ready != 0) >>>>>>>> - goto unlock; >>>>>>>> - >>>>>>>> - buf_ready = TRUE; >>>>>>>> + /* get next dumpable pfn */ >>>>>>>> + pthread_mutex_lock(&info->current_pfn_mutex); >>>>>>>> + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { >>>>>>>> + dumpable = is_dumpable( >>>>>>>> + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>>>>>> + pfn, >>>>>>>> + cycle); >>>>>>>> + if (dumpable) >>>>>>>> + break; >>>>>>>> + } >>>>>>>> + info->current_pfn = pfn + 1; >>>>>>>> >>>>>>>> - page_data_buf[index].pfn = pfn; >>>>>>>> - page_data_buf[index].ready = 1; >>>>>>>> + page_flag_buf->pfn = pfn; >>>>>>>> + page_flag_buf->ready = FLAG_FILLING; >>>>>>>> + pthread_mutex_unlock(&info->current_pfn_mutex); >>>>>>>> + sem_post(&info->page_flag_buf_sem); >>>>>>>> >>>>>>>> - dumpable = is_dumpable( >>>>>>>> - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>>>>>> - pfn, >>>>>>>> - cycle); >>>>>>>> - page_data_buf[index].dumpable = dumpable; >>>>>>>> - if (!dumpable) >>>>>>>> - goto unlock; >>>>>>>> + if (pfn >= cycle->end_pfn) { >>>>>>>> + info->current_pfn = cycle->end_pfn; >>>>>>>> + page_data_buf[index].used = FALSE; >>>>>>>> + break; >>>>>>>> + } >>>>>>>> >>>>>>>> if (!read_pfn_parallel(fd_memory, pfn, buf, >>>>>>>> &bitmap_memory_parallel, >>>>>>>> @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { >>>>>>>> >>>>>>>> if ((info->dump_level & DL_EXCLUDE_ZERO) >>>>>>>> && is_zero_page(buf, info->page_size)) { >>>>>>>> - page_data_buf[index].zero = TRUE; >>>>>>>> - goto unlock; >>>>>>>> + page_flag_buf->zero = TRUE; >>>>>>>> + goto next; >>>>>>>> } >>>>>>>> >>>>>>>> - page_data_buf[index].zero = FALSE; >>>>>>>> + page_flag_buf->zero = FALSE; >>>>>>>> >>>>>>>> /* >>>>>>>> * Compress the page data. >>>>>>>> @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { >>>>>>>> page_data_buf[index].flags = >>>>>>>> DUMP_DH_COMPRESSED_LZO; >>>>>>>> page_data_buf[index].size = size_out; >>>>>>>> + >>>>>>>> memcpy(page_data_buf[index].buf, buf_out, size_out); >>>>>>>> #endif >>>>>>>> #ifdef USESNAPPY >>>>>>>> @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { >>>>>>>> page_data_buf[index].size = info->page_size; >>>>>>>> memcpy(page_data_buf[index].buf, buf, info->page_size); >>>>>>>> } >>>>>>>> -unlock: >>>>>>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>>>>>> + page_flag_buf->index = index; >>>>>>>> + buf_ready = TRUE; >>>>>>>> +next: >>>>>>>> + page_flag_buf->ready = FLAG_READY; >>>>>>>> + page_flag_buf = page_flag_buf->next; >>>>>>>> >>>>>>>> } >>>>>>>> } >>>>>>>> - >>>>>>>> retval = NULL; >>>>>>>> >>>>>>>> fail: >>>>>>>> @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>>>> struct page_desc pd; >>>>>>>> struct timeval tv_start; >>>>>>>> struct timeval last, new; >>>>>>>> - unsigned long long consuming_pfn; >>>>>>>> pthread_t **threads = NULL; >>>>>>>> struct thread_args *kdump_thread_args = NULL; >>>>>>>> void *thread_result; >>>>>>>> - int page_data_num; >>>>>>>> + int page_buf_num; >>>>>>>> struct page_data *page_data_buf = NULL; >>>>>>>> int i; >>>>>>>> int index; >>>>>>>> + int end_count, consuming, check_count; >>>>>>>> + mdf_pfn_t current_pfn, temp_pfn; >>>>>>>> >>>>>>>> if (info->flag_elf_dumpfile) >>>>>>>> return FALSE; >>>>>>>> @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>>>> goto out; >>>>>>>> } >>>>>>>> >>>>>>>> - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); >>>>>>>> - if (res != 0) { >>>>>>>> - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", >>>>>>>> - strerror(res)); >>>>>>>> - goto out; >>>>>>>> - } >>>>>>>> - >>>>>>>> res = pthread_mutex_init(&info->filter_mutex, NULL); >>>>>>>> if (res != 0) { >>>>>>>> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); >>>>>>>> @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>>>> end_pfn = cycle->end_pfn; >>>>>>>> >>>>>>>> info->current_pfn = start_pfn; >>>>>>>> - info->consumed_pfn = start_pfn - 1; >>>>>>>> >>>>>>>> threads = info->threads; >>>>>>>> kdump_thread_args = info->kdump_thread_args; >>>>>>>> >>>>>>>> - page_data_num = info->num_buffers; >>>>>>>> + page_buf_num = info->num_buffers; >>>>>>>> page_data_buf = info->page_data_buf; >>>>>>>> + pthread_mutex_init(&info->page_data_mutex, NULL); >>>>>>>> + sem_init(&info->page_flag_buf_sem, 0, 0); >>>>>>>> >>>>>>>> - for (i = 0; i < page_data_num; i++) { >>>>>>>> - /* >>>>>>>> - * producer will use pfn in page_data_buf to decide the >>>>>>>> - * consumed pfn >>>>>>>> - */ >>>>>>>> - page_data_buf[i].pfn = start_pfn - 1; >>>>>>>> - page_data_buf[i].ready = 0; >>>>>>>> - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); >>>>>>>> - if (res != 0) { >>>>>>>> - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", >>>>>>>> - strerror(res)); >>>>>>>> - goto out; >>>>>>>> - } >>>>>>>> - } >>>>>>>> + for (i = 0; i < page_buf_num; i++) >>>>>>>> + page_data_buf[i].used = FALSE; >>>>>>>> >>>>>>>> for (i = 0; i < info->num_threads; i++) { >>>>>>>> kdump_thread_args[i].thread_num = i; >>>>>>>> kdump_thread_args[i].len_buf_out = len_buf_out; >>>>>>>> - kdump_thread_args[i].start_pfn = start_pfn; >>>>>>>> - kdump_thread_args[i].end_pfn = end_pfn; >>>>>>>> - kdump_thread_args[i].page_data_num = page_data_num; >>>>>>>> kdump_thread_args[i].page_data_buf = page_data_buf; >>>>>>>> + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >>>>>>>> kdump_thread_args[i].cycle = cycle; >>>>>>>> >>>>>>>> res = pthread_create(threads[i], NULL, >>>>>>>> @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>>>> } >>>>>>>> } >>>>>>>> >>>>>>>> - consuming_pfn = start_pfn; >>>>>>>> - index = -1; >>>>>>>> + end_count = 0; >>>>>>>> + while (1) { >>>>>>>> + consuming = 0; >>>>>>>> + check_count = 0; >>>>>>>> >>>>>>>> - gettimeofday(&last, NULL); >>>>>>>> + /* >>>>>>>> + * The basic idea is producer producing page and consumer writing page. >>>>>>>> + * Each producer have a page_flag_buf list which is used for storing page's description. >>>>>>>> + * The size of page_flag_buf is little so it won't take too much memory. >>>>>>>> + * And all producers will share a page_data_buf array which is used for storing page's compressed data. >>>>>>>> + * The main thread is the consumer. It will find the next pfn and write it into file. >>>>>>>> + * The next pfn is smallest pfn in all page_flag_buf. >>>>>>>> + */ >>>>>>>> + sem_wait(&info->page_flag_buf_sem); >>>>>>>> + gettimeofday(&last, NULL); >>>>>>>> + while (1) { >>>>>>>> + current_pfn = end_pfn; >>>>>>>> >>>>>>>> - while (consuming_pfn < end_pfn) { >>>>>>>> - index = consuming_pfn % page_data_num; >>>>>>>> + /* >>>>>>>> + * page_flag_buf is in circular linked list. >>>>>>>> + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's >>>>>>>> + * page_flag_buf list. >>>>>>>> + * consuming is used for recording in which thread the pfn is the smallest. >>>>>>>> + * current_pfn is used for recording the value of pfn when checking the pfn. >>>>>>>> + */ >>>>>>>> + for (i = 0; i < info->num_threads; i++) { >>>>>>>> + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >>>>>>>> + continue; >>>>>>>> + temp_pfn = info->page_flag_buf[i]->pfn; >>>>>>>> >>>>>>>> - gettimeofday(&new, NULL); >>>>>>>> - if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>>>>>> - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); >>>>>>>> - goto out; >>>>>>>> - } >>>>>>>> + /* >>>>>>>> + * count how many threads have reached the end. >>>>>>>> + */ >>>>>>>> + if (temp_pfn >= end_pfn) { >>>>>>>> + info->page_flag_buf[i]->ready = FLAG_UNUSED; >>>>>>>> + end_count++; >>>>>>>> + continue; >>>>>>>> + } >>>>>>>> >>>>>>>> - /* >>>>>>>> - * check pfn first without mutex locked to reduce the time >>>>>>>> - * trying to lock the mutex >>>>>>>> - */ >>>>>>>> - if (page_data_buf[index].pfn != consuming_pfn) >>>>>>>> - continue; >>>>>>>> + if (current_pfn < temp_pfn) >>>>>>>> + continue; >>>>>>>> >>>>>>>> - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) >>>>>>>> - continue; >>>>>>>> + check_count++; >>>>>>>> + consuming = i; >>>>>>>> + current_pfn = temp_pfn; >>>>>>>> + } >>>>>>>> >>>>>>>> - /* check whether the found one is ready to be consumed */ >>>>>>>> - if (page_data_buf[index].pfn != consuming_pfn || >>>>>>>> - page_data_buf[index].ready != 1) { >>>>>>>> - goto unlock; >>>>>>>> + /* >>>>>>>> + * If all the threads have reached the end, we will finish writing. >>>>>>>> + */ >>>>>>>> + if (end_count >= info->num_threads) >>>>>>>> + goto finish; >>>>>>>> + >>>>>>>> + /* >>>>>>>> + * If the page_flag_buf is not ready, the pfn recorded may be changed. >>>>>>>> + * So we should recheck. >>>>>>>> + */ >>>>>>>> + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >>>>>>>> + gettimeofday(&new, NULL); >>>>>>>> + if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>>>>>> + ERRMSG("Can't get data of pfn.\n"); >>>>>>>> + goto out; >>>>>>>> + } >>>>>>>> + continue; >>>>>>>> + } >>>>>>>> + >>>>>>>> + if (current_pfn == info->page_flag_buf[consuming]->pfn) >>>>>>>> + break; >>>>>>>> } >>>>>>>> >>>>>>>> if ((num_dumped % per) == 0) >>>>>>>> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); >>>>>>>> >>>>>>>> - /* next pfn is found, refresh last here */ >>>>>>>> - last = new; >>>>>>>> - consuming_pfn++; >>>>>>>> - info->consumed_pfn++; >>>>>>>> - page_data_buf[index].ready = 0; >>>>>>>> - >>>>>>>> - if (page_data_buf[index].dumpable == FALSE) >>>>>>>> - goto unlock; >>>>>>>> - >>>>>>>> num_dumped++; >>>>>>>> >>>>>>>> - if (page_data_buf[index].zero == TRUE) { >>>>>>>> + >>>>>>>> + if (info->page_flag_buf[consuming]->zero == TRUE) { >>>>>>>> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >>>>>>>> goto out; >>>>>>>> pfn_zero++; >>>>>>>> } else { >>>>>>>> + index = info->page_flag_buf[consuming]->index; >>>>>>>> pd.flags = page_data_buf[index].flags; >>>>>>>> pd.size = page_data_buf[index].size; >>>>>>>> pd.page_flags = 0; >>>>>>>> @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>>>> */ >>>>>>>> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) >>>>>>>> goto out; >>>>>>>> - >>>>>>>> + page_data_buf[index].used = FALSE; >>>>>>>> } >>>>>>>> -unlock: >>>>>>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>>>>>> + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >>>>>>>> + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >>>>>>>> } >>>>>>>> - >>>>>>>> +finish: >>>>>>>> ret = TRUE; >>>>>>>> /* >>>>>>>> * print [100 %] >>>>>>>> @@ -7463,15 +7532,9 @@ out: >>>>>>>> } >>>>>>>> } >>>>>>>> >>>>>>>> - if (page_data_buf != NULL) { >>>>>>>> - for (i = 0; i < page_data_num; i++) { >>>>>>>> - pthread_mutex_destroy(&page_data_buf[i].mutex); >>>>>>>> - } >>>>>>>> - } >>>>>>>> - >>>>>>>> + sem_destroy(&info->page_flag_buf_sem); >>>>>>>> pthread_rwlock_destroy(&info->usemmap_rwlock); >>>>>>>> pthread_mutex_destroy(&info->filter_mutex); >>>>>>>> - pthread_mutex_destroy(&info->consumed_pfn_mutex); >>>>>>>> pthread_mutex_destroy(&info->current_pfn_mutex); >>>>>>>> >>>>>>>> return ret; >>>>>>>> @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag >>>>>>>> num_dumped++; >>>>>>>> if (!read_pfn(pfn, buf)) >>>>>>>> goto out; >>>>>>>> + >>>>>>>> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); >>>>>>>> >>>>>>>> /* >>>>>>>> diff --git a/makedumpfile.h b/makedumpfile.h >>>>>>>> index e0b5bbf..4b315c0 100644 >>>>>>>> --- a/makedumpfile.h >>>>>>>> +++ b/makedumpfile.h >>>>>>>> @@ -44,6 +44,7 @@ >>>>>>>> #include "print_info.h" >>>>>>>> #include "sadump_mod.h" >>>>>>>> #include <pthread.h> >>>>>>>> +#include <semaphore.h> >>>>>>>> >>>>>>>> /* >>>>>>>> * Result of command >>>>>>>> @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; >>>>>>>> #define PAGE_DATA_NUM (50) >>>>>>>> #define WAIT_TIME (60 * 10) >>>>>>>> #define PTHREAD_FAIL ((void *)-2) >>>>>>>> -#define NUM_BUFFERS (50) >>>>>>>> +#define NUM_BUFFERS (20) >>>>>>>> >>>>>>>> struct mmap_cache { >>>>>>>> char *mmap_buf; >>>>>>>> @@ -985,28 +986,33 @@ struct mmap_cache { >>>>>>>> off_t mmap_end_offset; >>>>>>>> }; >>>>>>>> >>>>>>>> +enum { >>>>>>>> + FLAG_UNUSED, >>>>>>>> + FLAG_READY, >>>>>>>> + FLAG_FILLING >>>>>>>> +}; >>>>>>>> +struct page_flag { >>>>>>>> + mdf_pfn_t pfn; >>>>>>>> + char zero; >>>>>>>> + char ready; >>>>>>>> + short index; >>>>>>>> + struct page_flag *next; >>>>>>>> +}; >>>>>>>> + >>>>>>>> struct page_data >>>>>>>> { >>>>>>>> - mdf_pfn_t pfn; >>>>>>>> - int dumpable; >>>>>>>> - int zero; >>>>>>>> - unsigned int flags; >>>>>>>> long size; >>>>>>>> unsigned char *buf; >>>>>>>> - pthread_mutex_t mutex; >>>>>>>> - /* >>>>>>>> - * whether the page_data is ready to be consumed >>>>>>>> - */ >>>>>>>> - int ready; >>>>>>>> + int flags; >>>>>>>> + int used; >>>>>>>> }; >>>>>>>> >>>>>>>> struct thread_args { >>>>>>>> int thread_num; >>>>>>>> unsigned long len_buf_out; >>>>>>>> - mdf_pfn_t start_pfn, end_pfn; >>>>>>>> - int page_data_num; >>>>>>>> struct cycle *cycle; >>>>>>>> struct page_data *page_data_buf; >>>>>>>> + struct page_flag *page_flag_buf; >>>>>>>> }; >>>>>>>> >>>>>>>> /* >>>>>>>> @@ -1295,11 +1301,12 @@ struct DumpInfo { >>>>>>>> pthread_t **threads; >>>>>>>> struct thread_args *kdump_thread_args; >>>>>>>> struct page_data *page_data_buf; >>>>>>>> + struct page_flag **page_flag_buf; >>>>>>>> + sem_t page_flag_buf_sem; >>>>>>>> pthread_rwlock_t usemmap_rwlock; >>>>>>>> mdf_pfn_t current_pfn; >>>>>>>> pthread_mutex_t current_pfn_mutex; >>>>>>>> - mdf_pfn_t consumed_pfn; >>>>>>>> - pthread_mutex_t consumed_pfn_mutex; >>>>>>>> + pthread_mutex_t page_data_mutex; >>>>>>>> pthread_mutex_t filter_mutex; >>>>>>>> }; >>>>>>>> extern struct DumpInfo *info; >>>>>>>> -- >>>>>>>> 1.8.3.1 >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> >>>>>>>> _______________________________________________ >>>>>>>> kexec mailing list >>>>>>>> kexec@lists.infradead.org >>>>>>>> http://lists.infradead.org/mailman/listinfo/kexec >>>>>>> >>>>>>> >>>>>> >>>>>> >>>>>> >>>>>> _______________________________________________ >>>>>> kexec mailing list >>>>>> kexec@lists.infradead.org >>>>>> http://lists.infradead.org/mailman/listinfo/kexec _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-16 8:59 ` "Zhou, Wenjian/周文剑" @ 2016-03-16 9:30 ` Minfei Huang 0 siblings, 0 replies; 33+ messages in thread From: Minfei Huang @ 2016-03-16 9:30 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑"; +Cc: kexec > On Mar 16, 2016, at 16:59, Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> wrote: > > Hi Minfei, > > Thanks a lot for your information! > > According to your description and strace log, > it seems there is something wrong in initial_for_parallel(). > > I reviewed the relevant code, but haven't got any idea. > And I have one more question. > Does it happen every time with the same command? Yes. it always fail with option —num-threads 64. Thanks Minfei > > -- > Thanks > Zhou > > On 03/16/2016 04:32 PM, Minfei Huang wrote: >> >>> On Mar 16, 2016, at 16:26, Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> wrote: >>> >>> On 03/16/2016 04:04 PM, Minfei Huang wrote: >>>> On 03/16/16 at 09:55am, "Zhou, Wenjian/周文剑" wrote: >>>>> Hi Minfei, >>>>> >>>>> I have some questions. >>>>> >>>>> If the value of num-threads is 8, >>>>> 1. How much is the free memory before running makedumpfile failed? >>>> >>>> Hmm, this machine is reserved by other, I have no access to take a look >>>> about reserved memory. All of the configuration are set by default. >>>> Maybe it's about 420M. >>>> >>> >>> I don't mean the reserved memory. >>> I mean the free memory. >> >> Sorry, there is no record about such info. >> >>> >>>>> >>>>> 2. How much is the free memory before running makedumpfile success? >>>> >>>> I don't memtion this during testing it. >>>> >>>>> >>>>> >>>>> And the following result is very strange if all cache has been dropped. >>>>> makedumpfile --num-threads 30 -d 31 >>>>> real 0m0.006s >>>>> user 0m0.002s >>>>> sys 0m0.004s >>>> >>>> For this case, makedumpfile fails to dump vmcore with option >>>> --num-threads 30. >>>> >>>> I suspect the following output from strace. >>>> >>>>>> 1313 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>>>>> 1314 mmap(NULL, 18446744048584523776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>>>>> 1315 mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x7f5927bb2000 >>> >>> I see. >>> >>> Is there any error messages? >>> Such as "out of memory”? >> >> the allocated memory is too large 18446744048584388608? >> >>> >>> How about it without the patch? >> >> It works well without this patch from my test. >> >>> >>> Will it occur if double the reserved memory? >> >> No. I just tested all of the test cases. >> >>> >>> BTW, can it be reproduced in other machines? >> >> No, I have only one with such large memory. >> >>> I haven't get such result in my machine yet. >>> >>> In my machine, the number of free memory will not always the same >>> after executing makedumpfile each time. >>> So if there is not enough memory, makedumpfile will fail sometimes. >>> But I'm not sure whether they are the same issue. >>> >>> -- >>> Thanks >>> Zhou >>> >>>> >>>> Thanks >>>> Minfei >>>> >>>>> >>>>> -- >>>>> Thanks >>>>> Zhou >>>>> >>>>> On 03/15/2016 05:33 PM, Minfei Huang wrote: >>>>>> On 03/15/16 at 03:12pm, "Zhou, Wenjian/周文剑" wrote: >>>>>>> Hello Minfei, >>>>>>> >>>>>>> I guess the result is affected by the caches. >>>>>>> How about executing the following command before running makedumpfile each time? >>>>>>> # echo 3 > /proc/sys/vm/drop_caches >>>>>> >>>>>> Hi, Zhou. >>>>>> >>>>>> Seem there is a bug during dumping vmcore with option num-threads. >>>>>> >>>>>> 1307 open("/proc/meminfo", O_RDONLY) = 4 >>>>>> 1308 fstat(4, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 >>>>>> 1309 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f59322d3000 >>>>>> 1310 read(4, "MemTotal: 385452 kB\nMemF"..., 1024) = 1024 >>>>>> 1311 close(4) = 0 >>>>>> 1312 munmap(0x7f59322d3000, 4096) = 0 >>>>>> 1313 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>>>>> 1314 mmap(NULL, 18446744048584523776, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>>>>> 1315 mmap(NULL, 134217728, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS|MAP_NORESERVE, -1, 0) = 0x7f5927bb2000 >>>>>> 1316 munmap(0x7f5927bb2000, 4513792) = 0 >>>>>> 1317 munmap(0x7f592c000000, 62595072) = 0 >>>>>> 1318 mprotect(0x7f5928000000, 135168, PROT_READ|PROT_WRITE) = 0 >>>>>> 1319 mmap(NULL, 18446744048584388608, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = -1 ENOMEM (Cannot allocate memory) >>>>>> >>>>>> Thanks >>>>>> Minfei >>>>>> >>>>>>> >>>>>>> -- >>>>>>> Thanks >>>>>>> Zhou >>>>>>> >>>>>>> On 03/15/2016 02:34 PM, Minfei Huang wrote: >>>>>>>> Hi, Zhou. >>>>>>>> >>>>>>>> I have applied this patch base on 1.5.9. There are several testcases I >>>>>>>> have tested. >>>>>>>> >>>>>>>> - makedumpfile --num-threads 64 -d 31 >>>>>>>> real 0m0.010s >>>>>>>> user 0m0.002s >>>>>>>> sys 0m0.009s >>>>>>>> >>>>>>>> - makedumpfile --num-threads 31 -d 31 >>>>>>>> real 2m40.915s >>>>>>>> user 10m50.900s >>>>>>>> sys 23m9.664s >>>>>>>> >>>>>>>> makedumpfile --num-threads 30 -d 31 >>>>>>>> real 0m0.006s >>>>>>>> user 0m0.002s >>>>>>>> sys 0m0.004s >>>>>>>> >>>>>>>> makedumpfile --num-threads 32 -d 31 >>>>>>>> real 0m0.007s >>>>>>>> user 0m0.002s >>>>>>>> sys 0m0.005s >>>>>>>> >>>>>>>> - makedumpfile --num-threads 8 -d 31 >>>>>>>> real 2m32.692s >>>>>>>> user 7m4.630s >>>>>>>> sys 2m0.369s >>>>>>>> >>>>>>>> - makedumpfile --num-threads 1 -d 31 >>>>>>>> real 4m42.423s >>>>>>>> user 7m27.153s >>>>>>>> sys 0m22.490s >>>>>>>> >>>>>>>> - makedumpfile.orig -d 31 >>>>>>>> real 4m1.297s >>>>>>>> user 3m39.696s >>>>>>>> sys 0m15.200s >>>>>>>> >>>>>>>> This patch has a huge increment to the filter performance under 31. But >>>>>>>> it is not stable, since makedumpfile fails to dump vmcore intermittently. >>>>>>>> You can find the above test result, makedumpfile fails to dump vmcore >>>>>>>> with option --num-threads 64, also it may occur with option >>>>>>>> --number-threads 8. >>>>>>>> >>>>>>>> Thanks >>>>>>>> Minfei >>>>>>>> >>>>>>>> On 03/09/16 at 08:27am, Zhou Wenjian wrote: >>>>>>>>> v4: >>>>>>>>> 1. fix a bug caused by the logic >>>>>>>>> v3: >>>>>>>>> 1. remove some unused variables >>>>>>>>> 2. fix a bug caused by the wrong logic >>>>>>>>> 3. fix a bug caused by optimising >>>>>>>>> 4. improve more performance by using Minoru Usui's code >>>>>>>>> >>>>>>>>> multi-threads implementation will introduce extra cost when handling >>>>>>>>> each page. The origin implementation will also do the extra work for >>>>>>>>> filtered pages. So there is a big performance degradation in >>>>>>>>> --num-threads -d 31. >>>>>>>>> The new implementation won't do the extra work for filtered pages any >>>>>>>>> more. So the performance of -d 31 is close to that of serial processing. >>>>>>>>> >>>>>>>>> The new implementation is just like the following: >>>>>>>>> * The basic idea is producer producing page and consumer writing page. >>>>>>>>> * Each producer have a page_flag_buf list which is used for storing >>>>>>>>> page's description. >>>>>>>>> * The size of page_flag_buf is little so it won't take too much memory. >>>>>>>>> * And all producers will share a page_data_buf array which is >>>>>>>>> used for storing page's compressed data. >>>>>>>>> * The main thread is the consumer. It will find the next pfn and write >>>>>>>>> it into file. >>>>>>>>> * The next pfn is smallest pfn in all page_flag_buf. >>>>>>>>> >>>>>>>>> Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> >>>>>>>>> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >>>>>>>>> --- >>>>>>>>> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- >>>>>>>>> makedumpfile.h | 35 ++++--- >>>>>>>>> 2 files changed, 202 insertions(+), 131 deletions(-) >>>>>>>>> >>>>>>>>> diff --git a/makedumpfile.c b/makedumpfile.c >>>>>>>>> index fa0b779..2b0864a 100644 >>>>>>>>> --- a/makedumpfile.c >>>>>>>>> +++ b/makedumpfile.c >>>>>>>>> @@ -3483,7 +3483,8 @@ initial_for_parallel() >>>>>>>>> unsigned long page_data_buf_size; >>>>>>>>> unsigned long limit_size; >>>>>>>>> int page_data_num; >>>>>>>>> - int i; >>>>>>>>> + struct page_flag *current; >>>>>>>>> + int i, j; >>>>>>>>> >>>>>>>>> len_buf_out = calculate_len_buf_out(info->page_size); >>>>>>>>> >>>>>>>>> @@ -3560,10 +3561,16 @@ initial_for_parallel() >>>>>>>>> >>>>>>>>> limit_size = (get_free_memory_size() >>>>>>>>> - MAP_REGION * info->num_threads) * 0.6; >>>>>>>>> + if (limit_size < 0) { >>>>>>>>> + MSG("Free memory is not enough for multi-threads\n"); >>>>>>>>> + return FALSE; >>>>>>>>> + } >>>>>>>>> >>>>>>>>> page_data_num = limit_size / page_data_buf_size; >>>>>>>>> + info->num_buffers = 3 * info->num_threads; >>>>>>>>> >>>>>>>>> - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); >>>>>>>>> + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); >>>>>>>>> + info->num_buffers = MIN(info->num_buffers, page_data_num); >>>>>>>>> >>>>>>>>> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >>>>>>>>> info->num_buffers); >>>>>>>>> @@ -3588,6 +3595,36 @@ initial_for_parallel() >>>>>>>>> } >>>>>>>>> >>>>>>>>> /* >>>>>>>>> + * initial page_flag for each thread >>>>>>>>> + */ >>>>>>>>> + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >>>>>>>>> + == NULL) { >>>>>>>>> + MSG("Can't allocate memory for page_flag_buf. %s\n", >>>>>>>>> + strerror(errno)); >>>>>>>>> + return FALSE; >>>>>>>>> + } >>>>>>>>> + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); >>>>>>>>> + >>>>>>>>> + for (i = 0; i < info->num_threads; i++) { >>>>>>>>> + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { >>>>>>>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>>>>>>> + strerror(errno)); >>>>>>>>> + return FALSE; >>>>>>>>> + } >>>>>>>>> + current = info->page_flag_buf[i]; >>>>>>>>> + >>>>>>>>> + for (j = 1; j < NUM_BUFFERS; j++) { >>>>>>>>> + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { >>>>>>>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>>>>>>> + strerror(errno)); >>>>>>>>> + return FALSE; >>>>>>>>> + } >>>>>>>>> + current = current->next; >>>>>>>>> + } >>>>>>>>> + current->next = info->page_flag_buf[i]; >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + /* >>>>>>>>> * initial fd_memory for threads >>>>>>>>> */ >>>>>>>>> for (i = 0; i < info->num_threads; i++) { >>>>>>>>> @@ -3612,7 +3649,8 @@ initial_for_parallel() >>>>>>>>> void >>>>>>>>> free_for_parallel() >>>>>>>>> { >>>>>>>>> - int i; >>>>>>>>> + int i, j; >>>>>>>>> + struct page_flag *current; >>>>>>>>> >>>>>>>>> if (info->threads != NULL) { >>>>>>>>> for (i = 0; i < info->num_threads; i++) { >>>>>>>>> @@ -3655,6 +3693,19 @@ free_for_parallel() >>>>>>>>> free(info->page_data_buf); >>>>>>>>> } >>>>>>>>> >>>>>>>>> + if (info->page_flag_buf != NULL) { >>>>>>>>> + for (i = 0; i < info->num_threads; i++) { >>>>>>>>> + for (j = 0; j < NUM_BUFFERS; j++) { >>>>>>>>> + if (info->page_flag_buf[i] != NULL) { >>>>>>>>> + current = info->page_flag_buf[i]; >>>>>>>>> + info->page_flag_buf[i] = current->next; >>>>>>>>> + free(current); >>>>>>>>> + } >>>>>>>>> + } >>>>>>>>> + } >>>>>>>>> + free(info->page_flag_buf); >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> if (info->parallel_info == NULL) >>>>>>>>> return; >>>>>>>>> >>>>>>>>> @@ -7075,11 +7126,11 @@ void * >>>>>>>>> kdump_thread_function_cyclic(void *arg) { >>>>>>>>> void *retval = PTHREAD_FAIL; >>>>>>>>> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >>>>>>>>> - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>>>>>>> + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>>>>>>> + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >>>>>>>>> struct cycle *cycle = kdump_thread_args->cycle; >>>>>>>>> - int page_data_num = kdump_thread_args->page_data_num; >>>>>>>>> - mdf_pfn_t pfn; >>>>>>>>> - int index; >>>>>>>>> + mdf_pfn_t pfn = cycle->start_pfn; >>>>>>>>> + int index = kdump_thread_args->thread_num; >>>>>>>>> int buf_ready; >>>>>>>>> int dumpable; >>>>>>>>> int fd_memory = 0; >>>>>>>>> @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { >>>>>>>>> kdump_thread_args->thread_num); >>>>>>>>> } >>>>>>>>> >>>>>>>>> - while (1) { >>>>>>>>> - /* get next pfn */ >>>>>>>>> - pthread_mutex_lock(&info->current_pfn_mutex); >>>>>>>>> - pfn = info->current_pfn; >>>>>>>>> - info->current_pfn++; >>>>>>>>> - pthread_mutex_unlock(&info->current_pfn_mutex); >>>>>>>>> - >>>>>>>>> - if (pfn >= kdump_thread_args->end_pfn) >>>>>>>>> - break; >>>>>>>>> - >>>>>>>>> - index = -1; >>>>>>>>> + /* >>>>>>>>> + * filtered page won't take anything >>>>>>>>> + * unfiltered zero page will only take a page_flag_buf >>>>>>>>> + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf >>>>>>>>> + */ >>>>>>>>> + while (pfn < cycle->end_pfn) { >>>>>>>>> buf_ready = FALSE; >>>>>>>>> >>>>>>>>> + pthread_mutex_lock(&info->page_data_mutex); >>>>>>>>> + while (page_data_buf[index].used != FALSE) { >>>>>>>>> + index = (index + 1) % info->num_buffers; >>>>>>>>> + } >>>>>>>>> + page_data_buf[index].used = TRUE; >>>>>>>>> + pthread_mutex_unlock(&info->page_data_mutex); >>>>>>>>> + >>>>>>>>> while (buf_ready == FALSE) { >>>>>>>>> pthread_testcancel(); >>>>>>>>> - >>>>>>>>> - index = pfn % page_data_num; >>>>>>>>> - >>>>>>>>> - if (pfn - info->consumed_pfn > info->num_buffers) >>>>>>>>> + if (page_flag_buf->ready == FLAG_READY) >>>>>>>>> continue; >>>>>>>>> >>>>>>>>> - if (page_data_buf[index].ready != 0) >>>>>>>>> - continue; >>>>>>>>> - >>>>>>>>> - pthread_mutex_lock(&page_data_buf[index].mutex); >>>>>>>>> - >>>>>>>>> - if (page_data_buf[index].ready != 0) >>>>>>>>> - goto unlock; >>>>>>>>> - >>>>>>>>> - buf_ready = TRUE; >>>>>>>>> + /* get next dumpable pfn */ >>>>>>>>> + pthread_mutex_lock(&info->current_pfn_mutex); >>>>>>>>> + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { >>>>>>>>> + dumpable = is_dumpable( >>>>>>>>> + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>>>>>>> + pfn, >>>>>>>>> + cycle); >>>>>>>>> + if (dumpable) >>>>>>>>> + break; >>>>>>>>> + } >>>>>>>>> + info->current_pfn = pfn + 1; >>>>>>>>> >>>>>>>>> - page_data_buf[index].pfn = pfn; >>>>>>>>> - page_data_buf[index].ready = 1; >>>>>>>>> + page_flag_buf->pfn = pfn; >>>>>>>>> + page_flag_buf->ready = FLAG_FILLING; >>>>>>>>> + pthread_mutex_unlock(&info->current_pfn_mutex); >>>>>>>>> + sem_post(&info->page_flag_buf_sem); >>>>>>>>> >>>>>>>>> - dumpable = is_dumpable( >>>>>>>>> - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>>>>>>> - pfn, >>>>>>>>> - cycle); >>>>>>>>> - page_data_buf[index].dumpable = dumpable; >>>>>>>>> - if (!dumpable) >>>>>>>>> - goto unlock; >>>>>>>>> + if (pfn >= cycle->end_pfn) { >>>>>>>>> + info->current_pfn = cycle->end_pfn; >>>>>>>>> + page_data_buf[index].used = FALSE; >>>>>>>>> + break; >>>>>>>>> + } >>>>>>>>> >>>>>>>>> if (!read_pfn_parallel(fd_memory, pfn, buf, >>>>>>>>> &bitmap_memory_parallel, >>>>>>>>> @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { >>>>>>>>> >>>>>>>>> if ((info->dump_level & DL_EXCLUDE_ZERO) >>>>>>>>> && is_zero_page(buf, info->page_size)) { >>>>>>>>> - page_data_buf[index].zero = TRUE; >>>>>>>>> - goto unlock; >>>>>>>>> + page_flag_buf->zero = TRUE; >>>>>>>>> + goto next; >>>>>>>>> } >>>>>>>>> >>>>>>>>> - page_data_buf[index].zero = FALSE; >>>>>>>>> + page_flag_buf->zero = FALSE; >>>>>>>>> >>>>>>>>> /* >>>>>>>>> * Compress the page data. >>>>>>>>> @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { >>>>>>>>> page_data_buf[index].flags = >>>>>>>>> DUMP_DH_COMPRESSED_LZO; >>>>>>>>> page_data_buf[index].size = size_out; >>>>>>>>> + >>>>>>>>> memcpy(page_data_buf[index].buf, buf_out, size_out); >>>>>>>>> #endif >>>>>>>>> #ifdef USESNAPPY >>>>>>>>> @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { >>>>>>>>> page_data_buf[index].size = info->page_size; >>>>>>>>> memcpy(page_data_buf[index].buf, buf, info->page_size); >>>>>>>>> } >>>>>>>>> -unlock: >>>>>>>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>>>>>>> + page_flag_buf->index = index; >>>>>>>>> + buf_ready = TRUE; >>>>>>>>> +next: >>>>>>>>> + page_flag_buf->ready = FLAG_READY; >>>>>>>>> + page_flag_buf = page_flag_buf->next; >>>>>>>>> >>>>>>>>> } >>>>>>>>> } >>>>>>>>> - >>>>>>>>> retval = NULL; >>>>>>>>> >>>>>>>>> fail: >>>>>>>>> @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>>>>> struct page_desc pd; >>>>>>>>> struct timeval tv_start; >>>>>>>>> struct timeval last, new; >>>>>>>>> - unsigned long long consuming_pfn; >>>>>>>>> pthread_t **threads = NULL; >>>>>>>>> struct thread_args *kdump_thread_args = NULL; >>>>>>>>> void *thread_result; >>>>>>>>> - int page_data_num; >>>>>>>>> + int page_buf_num; >>>>>>>>> struct page_data *page_data_buf = NULL; >>>>>>>>> int i; >>>>>>>>> int index; >>>>>>>>> + int end_count, consuming, check_count; >>>>>>>>> + mdf_pfn_t current_pfn, temp_pfn; >>>>>>>>> >>>>>>>>> if (info->flag_elf_dumpfile) >>>>>>>>> return FALSE; >>>>>>>>> @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>>>>> goto out; >>>>>>>>> } >>>>>>>>> >>>>>>>>> - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); >>>>>>>>> - if (res != 0) { >>>>>>>>> - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", >>>>>>>>> - strerror(res)); >>>>>>>>> - goto out; >>>>>>>>> - } >>>>>>>>> - >>>>>>>>> res = pthread_mutex_init(&info->filter_mutex, NULL); >>>>>>>>> if (res != 0) { >>>>>>>>> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); >>>>>>>>> @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>>>>> end_pfn = cycle->end_pfn; >>>>>>>>> >>>>>>>>> info->current_pfn = start_pfn; >>>>>>>>> - info->consumed_pfn = start_pfn - 1; >>>>>>>>> >>>>>>>>> threads = info->threads; >>>>>>>>> kdump_thread_args = info->kdump_thread_args; >>>>>>>>> >>>>>>>>> - page_data_num = info->num_buffers; >>>>>>>>> + page_buf_num = info->num_buffers; >>>>>>>>> page_data_buf = info->page_data_buf; >>>>>>>>> + pthread_mutex_init(&info->page_data_mutex, NULL); >>>>>>>>> + sem_init(&info->page_flag_buf_sem, 0, 0); >>>>>>>>> >>>>>>>>> - for (i = 0; i < page_data_num; i++) { >>>>>>>>> - /* >>>>>>>>> - * producer will use pfn in page_data_buf to decide the >>>>>>>>> - * consumed pfn >>>>>>>>> - */ >>>>>>>>> - page_data_buf[i].pfn = start_pfn - 1; >>>>>>>>> - page_data_buf[i].ready = 0; >>>>>>>>> - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); >>>>>>>>> - if (res != 0) { >>>>>>>>> - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", >>>>>>>>> - strerror(res)); >>>>>>>>> - goto out; >>>>>>>>> - } >>>>>>>>> - } >>>>>>>>> + for (i = 0; i < page_buf_num; i++) >>>>>>>>> + page_data_buf[i].used = FALSE; >>>>>>>>> >>>>>>>>> for (i = 0; i < info->num_threads; i++) { >>>>>>>>> kdump_thread_args[i].thread_num = i; >>>>>>>>> kdump_thread_args[i].len_buf_out = len_buf_out; >>>>>>>>> - kdump_thread_args[i].start_pfn = start_pfn; >>>>>>>>> - kdump_thread_args[i].end_pfn = end_pfn; >>>>>>>>> - kdump_thread_args[i].page_data_num = page_data_num; >>>>>>>>> kdump_thread_args[i].page_data_buf = page_data_buf; >>>>>>>>> + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >>>>>>>>> kdump_thread_args[i].cycle = cycle; >>>>>>>>> >>>>>>>>> res = pthread_create(threads[i], NULL, >>>>>>>>> @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>>>>> } >>>>>>>>> } >>>>>>>>> >>>>>>>>> - consuming_pfn = start_pfn; >>>>>>>>> - index = -1; >>>>>>>>> + end_count = 0; >>>>>>>>> + while (1) { >>>>>>>>> + consuming = 0; >>>>>>>>> + check_count = 0; >>>>>>>>> >>>>>>>>> - gettimeofday(&last, NULL); >>>>>>>>> + /* >>>>>>>>> + * The basic idea is producer producing page and consumer writing page. >>>>>>>>> + * Each producer have a page_flag_buf list which is used for storing page's description. >>>>>>>>> + * The size of page_flag_buf is little so it won't take too much memory. >>>>>>>>> + * And all producers will share a page_data_buf array which is used for storing page's compressed data. >>>>>>>>> + * The main thread is the consumer. It will find the next pfn and write it into file. >>>>>>>>> + * The next pfn is smallest pfn in all page_flag_buf. >>>>>>>>> + */ >>>>>>>>> + sem_wait(&info->page_flag_buf_sem); >>>>>>>>> + gettimeofday(&last, NULL); >>>>>>>>> + while (1) { >>>>>>>>> + current_pfn = end_pfn; >>>>>>>>> >>>>>>>>> - while (consuming_pfn < end_pfn) { >>>>>>>>> - index = consuming_pfn % page_data_num; >>>>>>>>> + /* >>>>>>>>> + * page_flag_buf is in circular linked list. >>>>>>>>> + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's >>>>>>>>> + * page_flag_buf list. >>>>>>>>> + * consuming is used for recording in which thread the pfn is the smallest. >>>>>>>>> + * current_pfn is used for recording the value of pfn when checking the pfn. >>>>>>>>> + */ >>>>>>>>> + for (i = 0; i < info->num_threads; i++) { >>>>>>>>> + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >>>>>>>>> + continue; >>>>>>>>> + temp_pfn = info->page_flag_buf[i]->pfn; >>>>>>>>> >>>>>>>>> - gettimeofday(&new, NULL); >>>>>>>>> - if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>>>>>>> - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); >>>>>>>>> - goto out; >>>>>>>>> - } >>>>>>>>> + /* >>>>>>>>> + * count how many threads have reached the end. >>>>>>>>> + */ >>>>>>>>> + if (temp_pfn >= end_pfn) { >>>>>>>>> + info->page_flag_buf[i]->ready = FLAG_UNUSED; >>>>>>>>> + end_count++; >>>>>>>>> + continue; >>>>>>>>> + } >>>>>>>>> >>>>>>>>> - /* >>>>>>>>> - * check pfn first without mutex locked to reduce the time >>>>>>>>> - * trying to lock the mutex >>>>>>>>> - */ >>>>>>>>> - if (page_data_buf[index].pfn != consuming_pfn) >>>>>>>>> - continue; >>>>>>>>> + if (current_pfn < temp_pfn) >>>>>>>>> + continue; >>>>>>>>> >>>>>>>>> - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) >>>>>>>>> - continue; >>>>>>>>> + check_count++; >>>>>>>>> + consuming = i; >>>>>>>>> + current_pfn = temp_pfn; >>>>>>>>> + } >>>>>>>>> >>>>>>>>> - /* check whether the found one is ready to be consumed */ >>>>>>>>> - if (page_data_buf[index].pfn != consuming_pfn || >>>>>>>>> - page_data_buf[index].ready != 1) { >>>>>>>>> - goto unlock; >>>>>>>>> + /* >>>>>>>>> + * If all the threads have reached the end, we will finish writing. >>>>>>>>> + */ >>>>>>>>> + if (end_count >= info->num_threads) >>>>>>>>> + goto finish; >>>>>>>>> + >>>>>>>>> + /* >>>>>>>>> + * If the page_flag_buf is not ready, the pfn recorded may be changed. >>>>>>>>> + * So we should recheck. >>>>>>>>> + */ >>>>>>>>> + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >>>>>>>>> + gettimeofday(&new, NULL); >>>>>>>>> + if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>>>>>>> + ERRMSG("Can't get data of pfn.\n"); >>>>>>>>> + goto out; >>>>>>>>> + } >>>>>>>>> + continue; >>>>>>>>> + } >>>>>>>>> + >>>>>>>>> + if (current_pfn == info->page_flag_buf[consuming]->pfn) >>>>>>>>> + break; >>>>>>>>> } >>>>>>>>> >>>>>>>>> if ((num_dumped % per) == 0) >>>>>>>>> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); >>>>>>>>> >>>>>>>>> - /* next pfn is found, refresh last here */ >>>>>>>>> - last = new; >>>>>>>>> - consuming_pfn++; >>>>>>>>> - info->consumed_pfn++; >>>>>>>>> - page_data_buf[index].ready = 0; >>>>>>>>> - >>>>>>>>> - if (page_data_buf[index].dumpable == FALSE) >>>>>>>>> - goto unlock; >>>>>>>>> - >>>>>>>>> num_dumped++; >>>>>>>>> >>>>>>>>> - if (page_data_buf[index].zero == TRUE) { >>>>>>>>> + >>>>>>>>> + if (info->page_flag_buf[consuming]->zero == TRUE) { >>>>>>>>> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >>>>>>>>> goto out; >>>>>>>>> pfn_zero++; >>>>>>>>> } else { >>>>>>>>> + index = info->page_flag_buf[consuming]->index; >>>>>>>>> pd.flags = page_data_buf[index].flags; >>>>>>>>> pd.size = page_data_buf[index].size; >>>>>>>>> pd.page_flags = 0; >>>>>>>>> @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>>>>> */ >>>>>>>>> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) >>>>>>>>> goto out; >>>>>>>>> - >>>>>>>>> + page_data_buf[index].used = FALSE; >>>>>>>>> } >>>>>>>>> -unlock: >>>>>>>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>>>>>>> + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >>>>>>>>> + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >>>>>>>>> } >>>>>>>>> - >>>>>>>>> +finish: >>>>>>>>> ret = TRUE; >>>>>>>>> /* >>>>>>>>> * print [100 %] >>>>>>>>> @@ -7463,15 +7532,9 @@ out: >>>>>>>>> } >>>>>>>>> } >>>>>>>>> >>>>>>>>> - if (page_data_buf != NULL) { >>>>>>>>> - for (i = 0; i < page_data_num; i++) { >>>>>>>>> - pthread_mutex_destroy(&page_data_buf[i].mutex); >>>>>>>>> - } >>>>>>>>> - } >>>>>>>>> - >>>>>>>>> + sem_destroy(&info->page_flag_buf_sem); >>>>>>>>> pthread_rwlock_destroy(&info->usemmap_rwlock); >>>>>>>>> pthread_mutex_destroy(&info->filter_mutex); >>>>>>>>> - pthread_mutex_destroy(&info->consumed_pfn_mutex); >>>>>>>>> pthread_mutex_destroy(&info->current_pfn_mutex); >>>>>>>>> >>>>>>>>> return ret; >>>>>>>>> @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag >>>>>>>>> num_dumped++; >>>>>>>>> if (!read_pfn(pfn, buf)) >>>>>>>>> goto out; >>>>>>>>> + >>>>>>>>> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); >>>>>>>>> >>>>>>>>> /* >>>>>>>>> diff --git a/makedumpfile.h b/makedumpfile.h >>>>>>>>> index e0b5bbf..4b315c0 100644 >>>>>>>>> --- a/makedumpfile.h >>>>>>>>> +++ b/makedumpfile.h >>>>>>>>> @@ -44,6 +44,7 @@ >>>>>>>>> #include "print_info.h" >>>>>>>>> #include "sadump_mod.h" >>>>>>>>> #include <pthread.h> >>>>>>>>> +#include <semaphore.h> >>>>>>>>> >>>>>>>>> /* >>>>>>>>> * Result of command >>>>>>>>> @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; >>>>>>>>> #define PAGE_DATA_NUM (50) >>>>>>>>> #define WAIT_TIME (60 * 10) >>>>>>>>> #define PTHREAD_FAIL ((void *)-2) >>>>>>>>> -#define NUM_BUFFERS (50) >>>>>>>>> +#define NUM_BUFFERS (20) >>>>>>>>> >>>>>>>>> struct mmap_cache { >>>>>>>>> char *mmap_buf; >>>>>>>>> @@ -985,28 +986,33 @@ struct mmap_cache { >>>>>>>>> off_t mmap_end_offset; >>>>>>>>> }; >>>>>>>>> >>>>>>>>> +enum { >>>>>>>>> + FLAG_UNUSED, >>>>>>>>> + FLAG_READY, >>>>>>>>> + FLAG_FILLING >>>>>>>>> +}; >>>>>>>>> +struct page_flag { >>>>>>>>> + mdf_pfn_t pfn; >>>>>>>>> + char zero; >>>>>>>>> + char ready; >>>>>>>>> + short index; >>>>>>>>> + struct page_flag *next; >>>>>>>>> +}; >>>>>>>>> + >>>>>>>>> struct page_data >>>>>>>>> { >>>>>>>>> - mdf_pfn_t pfn; >>>>>>>>> - int dumpable; >>>>>>>>> - int zero; >>>>>>>>> - unsigned int flags; >>>>>>>>> long size; >>>>>>>>> unsigned char *buf; >>>>>>>>> - pthread_mutex_t mutex; >>>>>>>>> - /* >>>>>>>>> - * whether the page_data is ready to be consumed >>>>>>>>> - */ >>>>>>>>> - int ready; >>>>>>>>> + int flags; >>>>>>>>> + int used; >>>>>>>>> }; >>>>>>>>> >>>>>>>>> struct thread_args { >>>>>>>>> int thread_num; >>>>>>>>> unsigned long len_buf_out; >>>>>>>>> - mdf_pfn_t start_pfn, end_pfn; >>>>>>>>> - int page_data_num; >>>>>>>>> struct cycle *cycle; >>>>>>>>> struct page_data *page_data_buf; >>>>>>>>> + struct page_flag *page_flag_buf; >>>>>>>>> }; >>>>>>>>> >>>>>>>>> /* >>>>>>>>> @@ -1295,11 +1301,12 @@ struct DumpInfo { >>>>>>>>> pthread_t **threads; >>>>>>>>> struct thread_args *kdump_thread_args; >>>>>>>>> struct page_data *page_data_buf; >>>>>>>>> + struct page_flag **page_flag_buf; >>>>>>>>> + sem_t page_flag_buf_sem; >>>>>>>>> pthread_rwlock_t usemmap_rwlock; >>>>>>>>> mdf_pfn_t current_pfn; >>>>>>>>> pthread_mutex_t current_pfn_mutex; >>>>>>>>> - mdf_pfn_t consumed_pfn; >>>>>>>>> - pthread_mutex_t consumed_pfn_mutex; >>>>>>>>> + pthread_mutex_t page_data_mutex; >>>>>>>>> pthread_mutex_t filter_mutex; >>>>>>>>> }; >>>>>>>>> extern struct DumpInfo *info; >>>>>>>>> -- >>>>>>>>> 1.8.3.1 >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> >>>>>>>>> _______________________________________________ >>>>>>>>> kexec mailing list >>>>>>>>> kexec@lists.infradead.org >>>>>>>>> http://lists.infradead.org/mailman/listinfo/kexec >>>>>>>> >>>>>>>> >>>>>>> >>>>>>> >>>>>>> >>>>>>> _______________________________________________ >>>>>>> kexec mailing list >>>>>>> kexec@lists.infradead.org >>>>>>> http://lists.infradead.org/mailman/listinfo/kexec > > > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-15 6:34 ` Minfei Huang 2016-03-15 7:12 ` "Zhou, Wenjian/周文剑" @ 2016-03-15 8:35 ` "Zhou, Wenjian/周文剑" 2016-03-18 2:46 ` "Zhou, Wenjian/周文剑" 2 siblings, 0 replies; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-15 8:35 UTC (permalink / raw) To: Minfei Huang; +Cc: kexec Hi Minfei, What do you mean by "fails to dump vmcore" exactly? Do you mean makedumpfile was interrupted? Or just mean the performance is abnormal? -- Thanks Zhou On 03/15/2016 02:34 PM, Minfei Huang wrote: > Hi, Zhou. > > I have applied this patch base on 1.5.9. There are several testcases I > have tested. > > - makedumpfile --num-threads 64 -d 31 > real 0m0.010s > user 0m0.002s > sys 0m0.009s > > - makedumpfile --num-threads 31 -d 31 > real 2m40.915s > user 10m50.900s > sys 23m9.664s > > makedumpfile --num-threads 30 -d 31 > real 0m0.006s > user 0m0.002s > sys 0m0.004s > > makedumpfile --num-threads 32 -d 31 > real 0m0.007s > user 0m0.002s > sys 0m0.005s > > - makedumpfile --num-threads 8 -d 31 > real 2m32.692s > user 7m4.630s > sys 2m0.369s > > - makedumpfile --num-threads 1 -d 31 > real 4m42.423s > user 7m27.153s > sys 0m22.490s > > - makedumpfile.orig -d 31 > real 4m1.297s > user 3m39.696s > sys 0m15.200s > > This patch has a huge increment to the filter performance under 31. But > it is not stable, since makedumpfile fails to dump vmcore intermittently. > You can find the above test result, makedumpfile fails to dump vmcore > with option --num-threads 64, also it may occur with option > --number-threads 8. > > Thanks > Minfei > > On 03/09/16 at 08:27am, Zhou Wenjian wrote: >> v4: >> 1. fix a bug caused by the logic >> v3: >> 1. remove some unused variables >> 2. fix a bug caused by the wrong logic >> 3. fix a bug caused by optimising >> 4. improve more performance by using Minoru Usui's code >> >> multi-threads implementation will introduce extra cost when handling >> each page. The origin implementation will also do the extra work for >> filtered pages. So there is a big performance degradation in >> --num-threads -d 31. >> The new implementation won't do the extra work for filtered pages any >> more. So the performance of -d 31 is close to that of serial processing. >> >> The new implementation is just like the following: >> * The basic idea is producer producing page and consumer writing page. >> * Each producer have a page_flag_buf list which is used for storing >> page's description. >> * The size of page_flag_buf is little so it won't take too much memory. >> * And all producers will share a page_data_buf array which is >> used for storing page's compressed data. >> * The main thread is the consumer. It will find the next pfn and write >> it into file. >> * The next pfn is smallest pfn in all page_flag_buf. >> >> Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> >> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >> --- >> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- >> makedumpfile.h | 35 ++++--- >> 2 files changed, 202 insertions(+), 131 deletions(-) >> >> diff --git a/makedumpfile.c b/makedumpfile.c >> index fa0b779..2b0864a 100644 >> --- a/makedumpfile.c >> +++ b/makedumpfile.c >> @@ -3483,7 +3483,8 @@ initial_for_parallel() >> unsigned long page_data_buf_size; >> unsigned long limit_size; >> int page_data_num; >> - int i; >> + struct page_flag *current; >> + int i, j; >> >> len_buf_out = calculate_len_buf_out(info->page_size); >> >> @@ -3560,10 +3561,16 @@ initial_for_parallel() >> >> limit_size = (get_free_memory_size() >> - MAP_REGION * info->num_threads) * 0.6; >> + if (limit_size < 0) { >> + MSG("Free memory is not enough for multi-threads\n"); >> + return FALSE; >> + } >> >> page_data_num = limit_size / page_data_buf_size; >> + info->num_buffers = 3 * info->num_threads; >> >> - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); >> + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); >> + info->num_buffers = MIN(info->num_buffers, page_data_num); >> >> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >> info->num_buffers); >> @@ -3588,6 +3595,36 @@ initial_for_parallel() >> } >> >> /* >> + * initial page_flag for each thread >> + */ >> + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >> + == NULL) { >> + MSG("Can't allocate memory for page_flag_buf. %s\n", >> + strerror(errno)); >> + return FALSE; >> + } >> + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); >> + >> + for (i = 0; i < info->num_threads; i++) { >> + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { >> + MSG("Can't allocate memory for page_flag. %s\n", >> + strerror(errno)); >> + return FALSE; >> + } >> + current = info->page_flag_buf[i]; >> + >> + for (j = 1; j < NUM_BUFFERS; j++) { >> + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { >> + MSG("Can't allocate memory for page_flag. %s\n", >> + strerror(errno)); >> + return FALSE; >> + } >> + current = current->next; >> + } >> + current->next = info->page_flag_buf[i]; >> + } >> + >> + /* >> * initial fd_memory for threads >> */ >> for (i = 0; i < info->num_threads; i++) { >> @@ -3612,7 +3649,8 @@ initial_for_parallel() >> void >> free_for_parallel() >> { >> - int i; >> + int i, j; >> + struct page_flag *current; >> >> if (info->threads != NULL) { >> for (i = 0; i < info->num_threads; i++) { >> @@ -3655,6 +3693,19 @@ free_for_parallel() >> free(info->page_data_buf); >> } >> >> + if (info->page_flag_buf != NULL) { >> + for (i = 0; i < info->num_threads; i++) { >> + for (j = 0; j < NUM_BUFFERS; j++) { >> + if (info->page_flag_buf[i] != NULL) { >> + current = info->page_flag_buf[i]; >> + info->page_flag_buf[i] = current->next; >> + free(current); >> + } >> + } >> + } >> + free(info->page_flag_buf); >> + } >> + >> if (info->parallel_info == NULL) >> return; >> >> @@ -7075,11 +7126,11 @@ void * >> kdump_thread_function_cyclic(void *arg) { >> void *retval = PTHREAD_FAIL; >> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >> - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >> + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >> + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >> struct cycle *cycle = kdump_thread_args->cycle; >> - int page_data_num = kdump_thread_args->page_data_num; >> - mdf_pfn_t pfn; >> - int index; >> + mdf_pfn_t pfn = cycle->start_pfn; >> + int index = kdump_thread_args->thread_num; >> int buf_ready; >> int dumpable; >> int fd_memory = 0; >> @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { >> kdump_thread_args->thread_num); >> } >> >> - while (1) { >> - /* get next pfn */ >> - pthread_mutex_lock(&info->current_pfn_mutex); >> - pfn = info->current_pfn; >> - info->current_pfn++; >> - pthread_mutex_unlock(&info->current_pfn_mutex); >> - >> - if (pfn >= kdump_thread_args->end_pfn) >> - break; >> - >> - index = -1; >> + /* >> + * filtered page won't take anything >> + * unfiltered zero page will only take a page_flag_buf >> + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf >> + */ >> + while (pfn < cycle->end_pfn) { >> buf_ready = FALSE; >> >> + pthread_mutex_lock(&info->page_data_mutex); >> + while (page_data_buf[index].used != FALSE) { >> + index = (index + 1) % info->num_buffers; >> + } >> + page_data_buf[index].used = TRUE; >> + pthread_mutex_unlock(&info->page_data_mutex); >> + >> while (buf_ready == FALSE) { >> pthread_testcancel(); >> - >> - index = pfn % page_data_num; >> - >> - if (pfn - info->consumed_pfn > info->num_buffers) >> + if (page_flag_buf->ready == FLAG_READY) >> continue; >> >> - if (page_data_buf[index].ready != 0) >> - continue; >> - >> - pthread_mutex_lock(&page_data_buf[index].mutex); >> - >> - if (page_data_buf[index].ready != 0) >> - goto unlock; >> - >> - buf_ready = TRUE; >> + /* get next dumpable pfn */ >> + pthread_mutex_lock(&info->current_pfn_mutex); >> + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { >> + dumpable = is_dumpable( >> + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >> + pfn, >> + cycle); >> + if (dumpable) >> + break; >> + } >> + info->current_pfn = pfn + 1; >> >> - page_data_buf[index].pfn = pfn; >> - page_data_buf[index].ready = 1; >> + page_flag_buf->pfn = pfn; >> + page_flag_buf->ready = FLAG_FILLING; >> + pthread_mutex_unlock(&info->current_pfn_mutex); >> + sem_post(&info->page_flag_buf_sem); >> >> - dumpable = is_dumpable( >> - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >> - pfn, >> - cycle); >> - page_data_buf[index].dumpable = dumpable; >> - if (!dumpable) >> - goto unlock; >> + if (pfn >= cycle->end_pfn) { >> + info->current_pfn = cycle->end_pfn; >> + page_data_buf[index].used = FALSE; >> + break; >> + } >> >> if (!read_pfn_parallel(fd_memory, pfn, buf, >> &bitmap_memory_parallel, >> @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { >> >> if ((info->dump_level & DL_EXCLUDE_ZERO) >> && is_zero_page(buf, info->page_size)) { >> - page_data_buf[index].zero = TRUE; >> - goto unlock; >> + page_flag_buf->zero = TRUE; >> + goto next; >> } >> >> - page_data_buf[index].zero = FALSE; >> + page_flag_buf->zero = FALSE; >> >> /* >> * Compress the page data. >> @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { >> page_data_buf[index].flags = >> DUMP_DH_COMPRESSED_LZO; >> page_data_buf[index].size = size_out; >> + >> memcpy(page_data_buf[index].buf, buf_out, size_out); >> #endif >> #ifdef USESNAPPY >> @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { >> page_data_buf[index].size = info->page_size; >> memcpy(page_data_buf[index].buf, buf, info->page_size); >> } >> -unlock: >> - pthread_mutex_unlock(&page_data_buf[index].mutex); >> + page_flag_buf->index = index; >> + buf_ready = TRUE; >> +next: >> + page_flag_buf->ready = FLAG_READY; >> + page_flag_buf = page_flag_buf->next; >> >> } >> } >> - >> retval = NULL; >> >> fail: >> @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> struct page_desc pd; >> struct timeval tv_start; >> struct timeval last, new; >> - unsigned long long consuming_pfn; >> pthread_t **threads = NULL; >> struct thread_args *kdump_thread_args = NULL; >> void *thread_result; >> - int page_data_num; >> + int page_buf_num; >> struct page_data *page_data_buf = NULL; >> int i; >> int index; >> + int end_count, consuming, check_count; >> + mdf_pfn_t current_pfn, temp_pfn; >> >> if (info->flag_elf_dumpfile) >> return FALSE; >> @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> goto out; >> } >> >> - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); >> - if (res != 0) { >> - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", >> - strerror(res)); >> - goto out; >> - } >> - >> res = pthread_mutex_init(&info->filter_mutex, NULL); >> if (res != 0) { >> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); >> @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> end_pfn = cycle->end_pfn; >> >> info->current_pfn = start_pfn; >> - info->consumed_pfn = start_pfn - 1; >> >> threads = info->threads; >> kdump_thread_args = info->kdump_thread_args; >> >> - page_data_num = info->num_buffers; >> + page_buf_num = info->num_buffers; >> page_data_buf = info->page_data_buf; >> + pthread_mutex_init(&info->page_data_mutex, NULL); >> + sem_init(&info->page_flag_buf_sem, 0, 0); >> >> - for (i = 0; i < page_data_num; i++) { >> - /* >> - * producer will use pfn in page_data_buf to decide the >> - * consumed pfn >> - */ >> - page_data_buf[i].pfn = start_pfn - 1; >> - page_data_buf[i].ready = 0; >> - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); >> - if (res != 0) { >> - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", >> - strerror(res)); >> - goto out; >> - } >> - } >> + for (i = 0; i < page_buf_num; i++) >> + page_data_buf[i].used = FALSE; >> >> for (i = 0; i < info->num_threads; i++) { >> kdump_thread_args[i].thread_num = i; >> kdump_thread_args[i].len_buf_out = len_buf_out; >> - kdump_thread_args[i].start_pfn = start_pfn; >> - kdump_thread_args[i].end_pfn = end_pfn; >> - kdump_thread_args[i].page_data_num = page_data_num; >> kdump_thread_args[i].page_data_buf = page_data_buf; >> + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >> kdump_thread_args[i].cycle = cycle; >> >> res = pthread_create(threads[i], NULL, >> @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> } >> } >> >> - consuming_pfn = start_pfn; >> - index = -1; >> + end_count = 0; >> + while (1) { >> + consuming = 0; >> + check_count = 0; >> >> - gettimeofday(&last, NULL); >> + /* >> + * The basic idea is producer producing page and consumer writing page. >> + * Each producer have a page_flag_buf list which is used for storing page's description. >> + * The size of page_flag_buf is little so it won't take too much memory. >> + * And all producers will share a page_data_buf array which is used for storing page's compressed data. >> + * The main thread is the consumer. It will find the next pfn and write it into file. >> + * The next pfn is smallest pfn in all page_flag_buf. >> + */ >> + sem_wait(&info->page_flag_buf_sem); >> + gettimeofday(&last, NULL); >> + while (1) { >> + current_pfn = end_pfn; >> >> - while (consuming_pfn < end_pfn) { >> - index = consuming_pfn % page_data_num; >> + /* >> + * page_flag_buf is in circular linked list. >> + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's >> + * page_flag_buf list. >> + * consuming is used for recording in which thread the pfn is the smallest. >> + * current_pfn is used for recording the value of pfn when checking the pfn. >> + */ >> + for (i = 0; i < info->num_threads; i++) { >> + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >> + continue; >> + temp_pfn = info->page_flag_buf[i]->pfn; >> >> - gettimeofday(&new, NULL); >> - if (new.tv_sec - last.tv_sec > WAIT_TIME) { >> - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); >> - goto out; >> - } >> + /* >> + * count how many threads have reached the end. >> + */ >> + if (temp_pfn >= end_pfn) { >> + info->page_flag_buf[i]->ready = FLAG_UNUSED; >> + end_count++; >> + continue; >> + } >> >> - /* >> - * check pfn first without mutex locked to reduce the time >> - * trying to lock the mutex >> - */ >> - if (page_data_buf[index].pfn != consuming_pfn) >> - continue; >> + if (current_pfn < temp_pfn) >> + continue; >> >> - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) >> - continue; >> + check_count++; >> + consuming = i; >> + current_pfn = temp_pfn; >> + } >> >> - /* check whether the found one is ready to be consumed */ >> - if (page_data_buf[index].pfn != consuming_pfn || >> - page_data_buf[index].ready != 1) { >> - goto unlock; >> + /* >> + * If all the threads have reached the end, we will finish writing. >> + */ >> + if (end_count >= info->num_threads) >> + goto finish; >> + >> + /* >> + * If the page_flag_buf is not ready, the pfn recorded may be changed. >> + * So we should recheck. >> + */ >> + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >> + gettimeofday(&new, NULL); >> + if (new.tv_sec - last.tv_sec > WAIT_TIME) { >> + ERRMSG("Can't get data of pfn.\n"); >> + goto out; >> + } >> + continue; >> + } >> + >> + if (current_pfn == info->page_flag_buf[consuming]->pfn) >> + break; >> } >> >> if ((num_dumped % per) == 0) >> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); >> >> - /* next pfn is found, refresh last here */ >> - last = new; >> - consuming_pfn++; >> - info->consumed_pfn++; >> - page_data_buf[index].ready = 0; >> - >> - if (page_data_buf[index].dumpable == FALSE) >> - goto unlock; >> - >> num_dumped++; >> >> - if (page_data_buf[index].zero == TRUE) { >> + >> + if (info->page_flag_buf[consuming]->zero == TRUE) { >> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >> goto out; >> pfn_zero++; >> } else { >> + index = info->page_flag_buf[consuming]->index; >> pd.flags = page_data_buf[index].flags; >> pd.size = page_data_buf[index].size; >> pd.page_flags = 0; >> @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> */ >> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) >> goto out; >> - >> + page_data_buf[index].used = FALSE; >> } >> -unlock: >> - pthread_mutex_unlock(&page_data_buf[index].mutex); >> + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >> + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >> } >> - >> +finish: >> ret = TRUE; >> /* >> * print [100 %] >> @@ -7463,15 +7532,9 @@ out: >> } >> } >> >> - if (page_data_buf != NULL) { >> - for (i = 0; i < page_data_num; i++) { >> - pthread_mutex_destroy(&page_data_buf[i].mutex); >> - } >> - } >> - >> + sem_destroy(&info->page_flag_buf_sem); >> pthread_rwlock_destroy(&info->usemmap_rwlock); >> pthread_mutex_destroy(&info->filter_mutex); >> - pthread_mutex_destroy(&info->consumed_pfn_mutex); >> pthread_mutex_destroy(&info->current_pfn_mutex); >> >> return ret; >> @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag >> num_dumped++; >> if (!read_pfn(pfn, buf)) >> goto out; >> + >> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); >> >> /* >> diff --git a/makedumpfile.h b/makedumpfile.h >> index e0b5bbf..4b315c0 100644 >> --- a/makedumpfile.h >> +++ b/makedumpfile.h >> @@ -44,6 +44,7 @@ >> #include "print_info.h" >> #include "sadump_mod.h" >> #include <pthread.h> >> +#include <semaphore.h> >> >> /* >> * Result of command >> @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; >> #define PAGE_DATA_NUM (50) >> #define WAIT_TIME (60 * 10) >> #define PTHREAD_FAIL ((void *)-2) >> -#define NUM_BUFFERS (50) >> +#define NUM_BUFFERS (20) >> >> struct mmap_cache { >> char *mmap_buf; >> @@ -985,28 +986,33 @@ struct mmap_cache { >> off_t mmap_end_offset; >> }; >> >> +enum { >> + FLAG_UNUSED, >> + FLAG_READY, >> + FLAG_FILLING >> +}; >> +struct page_flag { >> + mdf_pfn_t pfn; >> + char zero; >> + char ready; >> + short index; >> + struct page_flag *next; >> +}; >> + >> struct page_data >> { >> - mdf_pfn_t pfn; >> - int dumpable; >> - int zero; >> - unsigned int flags; >> long size; >> unsigned char *buf; >> - pthread_mutex_t mutex; >> - /* >> - * whether the page_data is ready to be consumed >> - */ >> - int ready; >> + int flags; >> + int used; >> }; >> >> struct thread_args { >> int thread_num; >> unsigned long len_buf_out; >> - mdf_pfn_t start_pfn, end_pfn; >> - int page_data_num; >> struct cycle *cycle; >> struct page_data *page_data_buf; >> + struct page_flag *page_flag_buf; >> }; >> >> /* >> @@ -1295,11 +1301,12 @@ struct DumpInfo { >> pthread_t **threads; >> struct thread_args *kdump_thread_args; >> struct page_data *page_data_buf; >> + struct page_flag **page_flag_buf; >> + sem_t page_flag_buf_sem; >> pthread_rwlock_t usemmap_rwlock; >> mdf_pfn_t current_pfn; >> pthread_mutex_t current_pfn_mutex; >> - mdf_pfn_t consumed_pfn; >> - pthread_mutex_t consumed_pfn_mutex; >> + pthread_mutex_t page_data_mutex; >> pthread_mutex_t filter_mutex; >> }; >> extern struct DumpInfo *info; >> -- >> 1.8.3.1 >> >> >> >> >> _______________________________________________ >> kexec mailing list >> kexec@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/kexec _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-15 6:34 ` Minfei Huang 2016-03-15 7:12 ` "Zhou, Wenjian/周文剑" 2016-03-15 8:35 ` "Zhou, Wenjian/周文剑" @ 2016-03-18 2:46 ` "Zhou, Wenjian/周文剑" 2016-03-18 4:16 ` Minfei Huang 2 siblings, 1 reply; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-18 2:46 UTC (permalink / raw) To: Minfei Huang; +Cc: kexec [-- Attachment #1: Type: text/plain, Size: 20609 bytes --] Hello Minfei, Since I can't produce the bug, I reviewed the patch and wrote an increment patch. Though there are some bugs in the increment patch, I wonder if the previous bug still exists with this patch. Could you help me confirm it? And I have another question. Did it only occur in patch v4? The previous patches almost have the same logic. -- Thanks Zhou On 03/15/2016 02:34 PM, Minfei Huang wrote: > Hi, Zhou. > > I have applied this patch base on 1.5.9. There are several testcases I > have tested. > > - makedumpfile --num-threads 64 -d 31 > real 0m0.010s > user 0m0.002s > sys 0m0.009s > > - makedumpfile --num-threads 31 -d 31 > real 2m40.915s > user 10m50.900s > sys 23m9.664s > > makedumpfile --num-threads 30 -d 31 > real 0m0.006s > user 0m0.002s > sys 0m0.004s > > makedumpfile --num-threads 32 -d 31 > real 0m0.007s > user 0m0.002s > sys 0m0.005s > > - makedumpfile --num-threads 8 -d 31 > real 2m32.692s > user 7m4.630s > sys 2m0.369s > > - makedumpfile --num-threads 1 -d 31 > real 4m42.423s > user 7m27.153s > sys 0m22.490s > > - makedumpfile.orig -d 31 > real 4m1.297s > user 3m39.696s > sys 0m15.200s > > This patch has a huge increment to the filter performance under 31. But > it is not stable, since makedumpfile fails to dump vmcore intermittently. > You can find the above test result, makedumpfile fails to dump vmcore > with option --num-threads 64, also it may occur with option > --number-threads 8. > > Thanks > Minfei > > On 03/09/16 at 08:27am, Zhou Wenjian wrote: >> v4: >> 1. fix a bug caused by the logic >> v3: >> 1. remove some unused variables >> 2. fix a bug caused by the wrong logic >> 3. fix a bug caused by optimising >> 4. improve more performance by using Minoru Usui's code >> >> multi-threads implementation will introduce extra cost when handling >> each page. The origin implementation will also do the extra work for >> filtered pages. So there is a big performance degradation in >> --num-threads -d 31. >> The new implementation won't do the extra work for filtered pages any >> more. So the performance of -d 31 is close to that of serial processing. >> >> The new implementation is just like the following: >> * The basic idea is producer producing page and consumer writing page. >> * Each producer have a page_flag_buf list which is used for storing >> page's description. >> * The size of page_flag_buf is little so it won't take too much memory. >> * And all producers will share a page_data_buf array which is >> used for storing page's compressed data. >> * The main thread is the consumer. It will find the next pfn and write >> it into file. >> * The next pfn is smallest pfn in all page_flag_buf. >> >> Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> >> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >> --- >> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- >> makedumpfile.h | 35 ++++--- >> 2 files changed, 202 insertions(+), 131 deletions(-) >> >> diff --git a/makedumpfile.c b/makedumpfile.c >> index fa0b779..2b0864a 100644 >> --- a/makedumpfile.c >> +++ b/makedumpfile.c >> @@ -3483,7 +3483,8 @@ initial_for_parallel() >> unsigned long page_data_buf_size; >> unsigned long limit_size; >> int page_data_num; >> - int i; >> + struct page_flag *current; >> + int i, j; >> >> len_buf_out = calculate_len_buf_out(info->page_size); >> >> @@ -3560,10 +3561,16 @@ initial_for_parallel() >> >> limit_size = (get_free_memory_size() >> - MAP_REGION * info->num_threads) * 0.6; >> + if (limit_size < 0) { >> + MSG("Free memory is not enough for multi-threads\n"); >> + return FALSE; >> + } >> >> page_data_num = limit_size / page_data_buf_size; >> + info->num_buffers = 3 * info->num_threads; >> >> - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); >> + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); >> + info->num_buffers = MIN(info->num_buffers, page_data_num); >> >> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >> info->num_buffers); >> @@ -3588,6 +3595,36 @@ initial_for_parallel() >> } >> >> /* >> + * initial page_flag for each thread >> + */ >> + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >> + == NULL) { >> + MSG("Can't allocate memory for page_flag_buf. %s\n", >> + strerror(errno)); >> + return FALSE; >> + } >> + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); >> + >> + for (i = 0; i < info->num_threads; i++) { >> + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { >> + MSG("Can't allocate memory for page_flag. %s\n", >> + strerror(errno)); >> + return FALSE; >> + } >> + current = info->page_flag_buf[i]; >> + >> + for (j = 1; j < NUM_BUFFERS; j++) { >> + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { >> + MSG("Can't allocate memory for page_flag. %s\n", >> + strerror(errno)); >> + return FALSE; >> + } >> + current = current->next; >> + } >> + current->next = info->page_flag_buf[i]; >> + } >> + >> + /* >> * initial fd_memory for threads >> */ >> for (i = 0; i < info->num_threads; i++) { >> @@ -3612,7 +3649,8 @@ initial_for_parallel() >> void >> free_for_parallel() >> { >> - int i; >> + int i, j; >> + struct page_flag *current; >> >> if (info->threads != NULL) { >> for (i = 0; i < info->num_threads; i++) { >> @@ -3655,6 +3693,19 @@ free_for_parallel() >> free(info->page_data_buf); >> } >> >> + if (info->page_flag_buf != NULL) { >> + for (i = 0; i < info->num_threads; i++) { >> + for (j = 0; j < NUM_BUFFERS; j++) { >> + if (info->page_flag_buf[i] != NULL) { >> + current = info->page_flag_buf[i]; >> + info->page_flag_buf[i] = current->next; >> + free(current); >> + } >> + } >> + } >> + free(info->page_flag_buf); >> + } >> + >> if (info->parallel_info == NULL) >> return; >> >> @@ -7075,11 +7126,11 @@ void * >> kdump_thread_function_cyclic(void *arg) { >> void *retval = PTHREAD_FAIL; >> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >> - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >> + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >> + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >> struct cycle *cycle = kdump_thread_args->cycle; >> - int page_data_num = kdump_thread_args->page_data_num; >> - mdf_pfn_t pfn; >> - int index; >> + mdf_pfn_t pfn = cycle->start_pfn; >> + int index = kdump_thread_args->thread_num; >> int buf_ready; >> int dumpable; >> int fd_memory = 0; >> @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { >> kdump_thread_args->thread_num); >> } >> >> - while (1) { >> - /* get next pfn */ >> - pthread_mutex_lock(&info->current_pfn_mutex); >> - pfn = info->current_pfn; >> - info->current_pfn++; >> - pthread_mutex_unlock(&info->current_pfn_mutex); >> - >> - if (pfn >= kdump_thread_args->end_pfn) >> - break; >> - >> - index = -1; >> + /* >> + * filtered page won't take anything >> + * unfiltered zero page will only take a page_flag_buf >> + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf >> + */ >> + while (pfn < cycle->end_pfn) { >> buf_ready = FALSE; >> >> + pthread_mutex_lock(&info->page_data_mutex); >> + while (page_data_buf[index].used != FALSE) { >> + index = (index + 1) % info->num_buffers; >> + } >> + page_data_buf[index].used = TRUE; >> + pthread_mutex_unlock(&info->page_data_mutex); >> + >> while (buf_ready == FALSE) { >> pthread_testcancel(); >> - >> - index = pfn % page_data_num; >> - >> - if (pfn - info->consumed_pfn > info->num_buffers) >> + if (page_flag_buf->ready == FLAG_READY) >> continue; >> >> - if (page_data_buf[index].ready != 0) >> - continue; >> - >> - pthread_mutex_lock(&page_data_buf[index].mutex); >> - >> - if (page_data_buf[index].ready != 0) >> - goto unlock; >> - >> - buf_ready = TRUE; >> + /* get next dumpable pfn */ >> + pthread_mutex_lock(&info->current_pfn_mutex); >> + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { >> + dumpable = is_dumpable( >> + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >> + pfn, >> + cycle); >> + if (dumpable) >> + break; >> + } >> + info->current_pfn = pfn + 1; >> >> - page_data_buf[index].pfn = pfn; >> - page_data_buf[index].ready = 1; >> + page_flag_buf->pfn = pfn; >> + page_flag_buf->ready = FLAG_FILLING; >> + pthread_mutex_unlock(&info->current_pfn_mutex); >> + sem_post(&info->page_flag_buf_sem); >> >> - dumpable = is_dumpable( >> - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >> - pfn, >> - cycle); >> - page_data_buf[index].dumpable = dumpable; >> - if (!dumpable) >> - goto unlock; >> + if (pfn >= cycle->end_pfn) { >> + info->current_pfn = cycle->end_pfn; >> + page_data_buf[index].used = FALSE; >> + break; >> + } >> >> if (!read_pfn_parallel(fd_memory, pfn, buf, >> &bitmap_memory_parallel, >> @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { >> >> if ((info->dump_level & DL_EXCLUDE_ZERO) >> && is_zero_page(buf, info->page_size)) { >> - page_data_buf[index].zero = TRUE; >> - goto unlock; >> + page_flag_buf->zero = TRUE; >> + goto next; >> } >> >> - page_data_buf[index].zero = FALSE; >> + page_flag_buf->zero = FALSE; >> >> /* >> * Compress the page data. >> @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { >> page_data_buf[index].flags = >> DUMP_DH_COMPRESSED_LZO; >> page_data_buf[index].size = size_out; >> + >> memcpy(page_data_buf[index].buf, buf_out, size_out); >> #endif >> #ifdef USESNAPPY >> @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { >> page_data_buf[index].size = info->page_size; >> memcpy(page_data_buf[index].buf, buf, info->page_size); >> } >> -unlock: >> - pthread_mutex_unlock(&page_data_buf[index].mutex); >> + page_flag_buf->index = index; >> + buf_ready = TRUE; >> +next: >> + page_flag_buf->ready = FLAG_READY; >> + page_flag_buf = page_flag_buf->next; >> >> } >> } >> - >> retval = NULL; >> >> fail: >> @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> struct page_desc pd; >> struct timeval tv_start; >> struct timeval last, new; >> - unsigned long long consuming_pfn; >> pthread_t **threads = NULL; >> struct thread_args *kdump_thread_args = NULL; >> void *thread_result; >> - int page_data_num; >> + int page_buf_num; >> struct page_data *page_data_buf = NULL; >> int i; >> int index; >> + int end_count, consuming, check_count; >> + mdf_pfn_t current_pfn, temp_pfn; >> >> if (info->flag_elf_dumpfile) >> return FALSE; >> @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> goto out; >> } >> >> - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); >> - if (res != 0) { >> - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", >> - strerror(res)); >> - goto out; >> - } >> - >> res = pthread_mutex_init(&info->filter_mutex, NULL); >> if (res != 0) { >> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); >> @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> end_pfn = cycle->end_pfn; >> >> info->current_pfn = start_pfn; >> - info->consumed_pfn = start_pfn - 1; >> >> threads = info->threads; >> kdump_thread_args = info->kdump_thread_args; >> >> - page_data_num = info->num_buffers; >> + page_buf_num = info->num_buffers; >> page_data_buf = info->page_data_buf; >> + pthread_mutex_init(&info->page_data_mutex, NULL); >> + sem_init(&info->page_flag_buf_sem, 0, 0); >> >> - for (i = 0; i < page_data_num; i++) { >> - /* >> - * producer will use pfn in page_data_buf to decide the >> - * consumed pfn >> - */ >> - page_data_buf[i].pfn = start_pfn - 1; >> - page_data_buf[i].ready = 0; >> - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); >> - if (res != 0) { >> - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", >> - strerror(res)); >> - goto out; >> - } >> - } >> + for (i = 0; i < page_buf_num; i++) >> + page_data_buf[i].used = FALSE; >> >> for (i = 0; i < info->num_threads; i++) { >> kdump_thread_args[i].thread_num = i; >> kdump_thread_args[i].len_buf_out = len_buf_out; >> - kdump_thread_args[i].start_pfn = start_pfn; >> - kdump_thread_args[i].end_pfn = end_pfn; >> - kdump_thread_args[i].page_data_num = page_data_num; >> kdump_thread_args[i].page_data_buf = page_data_buf; >> + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >> kdump_thread_args[i].cycle = cycle; >> >> res = pthread_create(threads[i], NULL, >> @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> } >> } >> >> - consuming_pfn = start_pfn; >> - index = -1; >> + end_count = 0; >> + while (1) { >> + consuming = 0; >> + check_count = 0; >> >> - gettimeofday(&last, NULL); >> + /* >> + * The basic idea is producer producing page and consumer writing page. >> + * Each producer have a page_flag_buf list which is used for storing page's description. >> + * The size of page_flag_buf is little so it won't take too much memory. >> + * And all producers will share a page_data_buf array which is used for storing page's compressed data. >> + * The main thread is the consumer. It will find the next pfn and write it into file. >> + * The next pfn is smallest pfn in all page_flag_buf. >> + */ >> + sem_wait(&info->page_flag_buf_sem); >> + gettimeofday(&last, NULL); >> + while (1) { >> + current_pfn = end_pfn; >> >> - while (consuming_pfn < end_pfn) { >> - index = consuming_pfn % page_data_num; >> + /* >> + * page_flag_buf is in circular linked list. >> + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's >> + * page_flag_buf list. >> + * consuming is used for recording in which thread the pfn is the smallest. >> + * current_pfn is used for recording the value of pfn when checking the pfn. >> + */ >> + for (i = 0; i < info->num_threads; i++) { >> + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >> + continue; >> + temp_pfn = info->page_flag_buf[i]->pfn; >> >> - gettimeofday(&new, NULL); >> - if (new.tv_sec - last.tv_sec > WAIT_TIME) { >> - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); >> - goto out; >> - } >> + /* >> + * count how many threads have reached the end. >> + */ >> + if (temp_pfn >= end_pfn) { >> + info->page_flag_buf[i]->ready = FLAG_UNUSED; >> + end_count++; >> + continue; >> + } >> >> - /* >> - * check pfn first without mutex locked to reduce the time >> - * trying to lock the mutex >> - */ >> - if (page_data_buf[index].pfn != consuming_pfn) >> - continue; >> + if (current_pfn < temp_pfn) >> + continue; >> >> - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) >> - continue; >> + check_count++; >> + consuming = i; >> + current_pfn = temp_pfn; >> + } >> >> - /* check whether the found one is ready to be consumed */ >> - if (page_data_buf[index].pfn != consuming_pfn || >> - page_data_buf[index].ready != 1) { >> - goto unlock; >> + /* >> + * If all the threads have reached the end, we will finish writing. >> + */ >> + if (end_count >= info->num_threads) >> + goto finish; >> + >> + /* >> + * If the page_flag_buf is not ready, the pfn recorded may be changed. >> + * So we should recheck. >> + */ >> + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >> + gettimeofday(&new, NULL); >> + if (new.tv_sec - last.tv_sec > WAIT_TIME) { >> + ERRMSG("Can't get data of pfn.\n"); >> + goto out; >> + } >> + continue; >> + } >> + >> + if (current_pfn == info->page_flag_buf[consuming]->pfn) >> + break; >> } >> >> if ((num_dumped % per) == 0) >> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); >> >> - /* next pfn is found, refresh last here */ >> - last = new; >> - consuming_pfn++; >> - info->consumed_pfn++; >> - page_data_buf[index].ready = 0; >> - >> - if (page_data_buf[index].dumpable == FALSE) >> - goto unlock; >> - >> num_dumped++; >> >> - if (page_data_buf[index].zero == TRUE) { >> + >> + if (info->page_flag_buf[consuming]->zero == TRUE) { >> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >> goto out; >> pfn_zero++; >> } else { >> + index = info->page_flag_buf[consuming]->index; >> pd.flags = page_data_buf[index].flags; >> pd.size = page_data_buf[index].size; >> pd.page_flags = 0; >> @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> */ >> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) >> goto out; >> - >> + page_data_buf[index].used = FALSE; >> } >> -unlock: >> - pthread_mutex_unlock(&page_data_buf[index].mutex); >> + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >> + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >> } >> - >> +finish: >> ret = TRUE; >> /* >> * print [100 %] >> @@ -7463,15 +7532,9 @@ out: >> } >> } >> >> - if (page_data_buf != NULL) { >> - for (i = 0; i < page_data_num; i++) { >> - pthread_mutex_destroy(&page_data_buf[i].mutex); >> - } >> - } >> - >> + sem_destroy(&info->page_flag_buf_sem); >> pthread_rwlock_destroy(&info->usemmap_rwlock); >> pthread_mutex_destroy(&info->filter_mutex); >> - pthread_mutex_destroy(&info->consumed_pfn_mutex); >> pthread_mutex_destroy(&info->current_pfn_mutex); >> >> return ret; >> @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag >> num_dumped++; >> if (!read_pfn(pfn, buf)) >> goto out; >> + >> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); >> >> /* >> diff --git a/makedumpfile.h b/makedumpfile.h >> index e0b5bbf..4b315c0 100644 >> --- a/makedumpfile.h >> +++ b/makedumpfile.h >> @@ -44,6 +44,7 @@ >> #include "print_info.h" >> #include "sadump_mod.h" >> #include <pthread.h> >> +#include <semaphore.h> >> >> /* >> * Result of command >> @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; >> #define PAGE_DATA_NUM (50) >> #define WAIT_TIME (60 * 10) >> #define PTHREAD_FAIL ((void *)-2) >> -#define NUM_BUFFERS (50) >> +#define NUM_BUFFERS (20) >> >> struct mmap_cache { >> char *mmap_buf; >> @@ -985,28 +986,33 @@ struct mmap_cache { >> off_t mmap_end_offset; >> }; >> >> +enum { >> + FLAG_UNUSED, >> + FLAG_READY, >> + FLAG_FILLING >> +}; >> +struct page_flag { >> + mdf_pfn_t pfn; >> + char zero; >> + char ready; >> + short index; >> + struct page_flag *next; >> +}; >> + >> struct page_data >> { >> - mdf_pfn_t pfn; >> - int dumpable; >> - int zero; >> - unsigned int flags; >> long size; >> unsigned char *buf; >> - pthread_mutex_t mutex; >> - /* >> - * whether the page_data is ready to be consumed >> - */ >> - int ready; >> + int flags; >> + int used; >> }; >> >> struct thread_args { >> int thread_num; >> unsigned long len_buf_out; >> - mdf_pfn_t start_pfn, end_pfn; >> - int page_data_num; >> struct cycle *cycle; >> struct page_data *page_data_buf; >> + struct page_flag *page_flag_buf; >> }; >> >> /* >> @@ -1295,11 +1301,12 @@ struct DumpInfo { >> pthread_t **threads; >> struct thread_args *kdump_thread_args; >> struct page_data *page_data_buf; >> + struct page_flag **page_flag_buf; >> + sem_t page_flag_buf_sem; >> pthread_rwlock_t usemmap_rwlock; >> mdf_pfn_t current_pfn; >> pthread_mutex_t current_pfn_mutex; >> - mdf_pfn_t consumed_pfn; >> - pthread_mutex_t consumed_pfn_mutex; >> + pthread_mutex_t page_data_mutex; >> pthread_mutex_t filter_mutex; >> }; >> extern struct DumpInfo *info; >> -- >> 1.8.3.1 >> >> >> >> >> _______________________________________________ >> kexec mailing list >> kexec@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/kexec [-- Attachment #2: 0001-increment.patch --] [-- Type: text/x-patch, Size: 9431 bytes --] From 1d7ad5dbbc29efe58171b1023ab0df09eb2815bf Mon Sep 17 00:00:00 2001 From: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> Date: Fri, 18 Mar 2016 10:35:35 +0800 Subject: [PATCH] increment --- makedumpfile.c | 107 ++++++++++++++++++++++++++++++++++++++++++--------------- makedumpfile.h | 12 +++++-- 2 files changed, 89 insertions(+), 30 deletions(-) diff --git a/makedumpfile.c b/makedumpfile.c index 2b0864a..a304a61 100644 --- a/makedumpfile.c +++ b/makedumpfile.c @@ -3477,6 +3477,42 @@ calibrate_machdep_info(void) } int +initial_parallel_area(int page_data_buf_size) +{ + int i, j; + struct page_flag *current; + info->page_data_buf = info->parallel_area; + void *page_data_buf = info->parallel_area + sizeof(struct page_data) * info->num_buffers; + void *page_flag_list = page_data_buf + page_data_buf_size * info->num_buffers; + + for (i = 0; i < info->num_buffers; i++) { + info->page_data_buf[i].buf = page_data_buf + page_data_buf_size * i; + } + + + if ((info->page_flag_list = malloc(sizeof(struct page_flag_list) * info->num_threads)) + == NULL) { + MSG("Can't allocate memory for page_flag_buf. %s\n", + strerror(errno)); + return FALSE; + } + + for (i = 0; i < info->num_threads; i++) { + info->page_flag_list[i].header = page_flag_list + sizeof(struct page_flag) * info->num_buffers * i; + info->page_flag_list[i].current = 0; + /* + current = info->page_flag_buf[i]; + for (j = 1; j < info->num_buffers; j++) { + current->next = current + sizeof(struct page_flag); + current = current->next; + } + current->next = info->page_flag_buf[i]; +*/ + } + +} + +int initial_for_parallel() { unsigned long len_buf_out; @@ -3575,9 +3611,15 @@ initial_for_parallel() DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", info->num_buffers); - /* - * allocate memory for page_data - */ + if ((info->parallel_area = calloc(info->num_buffers, sizeof(struct page_data)+page_data_buf_size + sizeof(struct page_flag)*info->num_threads)) + ==NULL) { + MSG("Can't allocate memory for page_data_buf. %s\n", + strerror(errno)); + return FALSE; + } + + initial_parallel_area(page_data_buf_size); +/* if ((info->page_data_buf = malloc(sizeof(struct page_data) * info->num_buffers)) == NULL) { MSG("Can't allocate memory for page_data_buf. %s\n", @@ -3594,9 +3636,6 @@ initial_for_parallel() } } - /* - * initial page_flag for each thread - */ if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) == NULL) { MSG("Can't allocate memory for page_flag_buf. %s\n", @@ -3623,7 +3662,7 @@ initial_for_parallel() } current->next = info->page_flag_buf[i]; } - +*/ /* * initial fd_memory for threads */ @@ -3685,6 +3724,15 @@ free_for_parallel() if (info->kdump_thread_args != NULL) free(info->kdump_thread_args); + if (info->page_flag_list != NULL) { + free(info->page_flag_list); + } + + if (info->parallel_area != NULL) { + free(info->parallel_area); + } + +/* if (info->page_data_buf != NULL) { for (i = 0; i < info->num_buffers; i++) { if (info->page_data_buf[i].buf != NULL) @@ -3705,7 +3753,7 @@ free_for_parallel() } free(info->page_flag_buf); } - +*/ if (info->parallel_info == NULL) return; @@ -7122,12 +7170,14 @@ int finalize_zlib(z_stream *stream) return err; } +#define CURRENT_FLAG page_flag_header[current_page_flag] void * kdump_thread_function_cyclic(void *arg) { void *retval = PTHREAD_FAIL; struct thread_args *kdump_thread_args = (struct thread_args *)arg; volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; - volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; + struct page_flag *page_flag_header = kdump_thread_args->page_flag_list->header; + int current_page_flag = kdump_thread_args->page_flag_list->current; struct cycle *cycle = kdump_thread_args->cycle; mdf_pfn_t pfn = cycle->start_pfn; int index = kdump_thread_args->thread_num; @@ -7193,7 +7243,7 @@ kdump_thread_function_cyclic(void *arg) { while (buf_ready == FALSE) { pthread_testcancel(); - if (page_flag_buf->ready == FLAG_READY) + if (CURRENT_FLAG.ready == FLAG_READY) continue; /* get next dumpable pfn */ @@ -7208,8 +7258,8 @@ kdump_thread_function_cyclic(void *arg) { } info->current_pfn = pfn + 1; - page_flag_buf->pfn = pfn; - page_flag_buf->ready = FLAG_FILLING; + CURRENT_FLAG.pfn = pfn; + CURRENT_FLAG.ready = FLAG_FILLING; pthread_mutex_unlock(&info->current_pfn_mutex); sem_post(&info->page_flag_buf_sem); @@ -7230,11 +7280,11 @@ kdump_thread_function_cyclic(void *arg) { if ((info->dump_level & DL_EXCLUDE_ZERO) && is_zero_page(buf, info->page_size)) { - page_flag_buf->zero = TRUE; + CURRENT_FLAG.zero = TRUE; goto next; } - page_flag_buf->zero = FALSE; + CURRENT_FLAG.zero = FALSE; /* * Compress the page data. @@ -7285,11 +7335,11 @@ kdump_thread_function_cyclic(void *arg) { page_data_buf[index].size = info->page_size; memcpy(page_data_buf[index].buf, buf, info->page_size); } - page_flag_buf->index = index; + CURRENT_FLAG.index = index; buf_ready = TRUE; next: - page_flag_buf->ready = FLAG_READY; - page_flag_buf = page_flag_buf->next; + CURRENT_FLAG.ready = FLAG_READY; + current_page_flag = (current_page_flag + 1) % info->num_buffers; } } @@ -7306,6 +7356,8 @@ fail: pthread_exit(retval); } +#define CURRENT_PAGE_FLAG(i) (info->page_flag_list[i].header)[info->page_flag_list[i].current] + int write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, struct cache_data *cd_page, @@ -7379,7 +7431,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, kdump_thread_args[i].thread_num = i; kdump_thread_args[i].len_buf_out = len_buf_out; kdump_thread_args[i].page_data_buf = page_data_buf; - kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; + kdump_thread_args[i].page_flag_list = &(info->page_flag_list[i]); kdump_thread_args[i].cycle = cycle; res = pthread_create(threads[i], NULL, @@ -7418,15 +7470,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, * current_pfn is used for recording the value of pfn when checking the pfn. */ for (i = 0; i < info->num_threads; i++) { - if (info->page_flag_buf[i]->ready == FLAG_UNUSED) + if (CURRENT_PAGE_FLAG(i).ready == FLAG_UNUSED) continue; - temp_pfn = info->page_flag_buf[i]->pfn; + temp_pfn = CURRENT_PAGE_FLAG(i).pfn; /* * count how many threads have reached the end. */ if (temp_pfn >= end_pfn) { - info->page_flag_buf[i]->ready = FLAG_UNUSED; + CURRENT_PAGE_FLAG(i).ready = FLAG_UNUSED; end_count++; continue; } @@ -7449,7 +7501,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, * If the page_flag_buf is not ready, the pfn recorded may be changed. * So we should recheck. */ - if (info->page_flag_buf[consuming]->ready != FLAG_READY) { + if (CURRENT_PAGE_FLAG(consuming).ready != FLAG_READY) { gettimeofday(&new, NULL); if (new.tv_sec - last.tv_sec > WAIT_TIME) { ERRMSG("Can't get data of pfn.\n"); @@ -7458,7 +7510,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, continue; } - if (current_pfn == info->page_flag_buf[consuming]->pfn) + if (current_pfn == CURRENT_PAGE_FLAG(consuming).pfn) break; } @@ -7468,12 +7520,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, num_dumped++; - if (info->page_flag_buf[consuming]->zero == TRUE) { + if (CURRENT_PAGE_FLAG(consuming).zero == TRUE) { if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) goto out; pfn_zero++; } else { - index = info->page_flag_buf[consuming]->index; + index = CURRENT_PAGE_FLAG(consuming).index; pd.flags = page_data_buf[index].flags; pd.size = page_data_buf[index].size; pd.page_flags = 0; @@ -7491,8 +7543,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, goto out; page_data_buf[index].used = FALSE; } - info->page_flag_buf[consuming]->ready = FLAG_UNUSED; - info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; + CURRENT_PAGE_FLAG(consuming).ready = FLAG_UNUSED; + info->page_flag_list[consuming].current += 1; + info->page_flag_list[consuming].current %= info->num_buffers; } finish: ret = TRUE; diff --git a/makedumpfile.h b/makedumpfile.h index 4b315c0..8c4bc1a 100644 --- a/makedumpfile.h +++ b/makedumpfile.h @@ -996,7 +996,12 @@ struct page_flag { char zero; char ready; short index; - struct page_flag *next; +// struct page_flag *next; +}; + +struct page_flag_list { + struct page_flag *header; + int current; }; struct page_data @@ -1012,7 +1017,7 @@ struct thread_args { unsigned long len_buf_out; struct cycle *cycle; struct page_data *page_data_buf; - struct page_flag *page_flag_buf; + struct page_flag_list *page_flag_list; }; /* @@ -1298,10 +1303,11 @@ struct DumpInfo { */ int num_threads; int num_buffers; + void *parallel_area; pthread_t **threads; struct thread_args *kdump_thread_args; struct page_data *page_data_buf; - struct page_flag **page_flag_buf; + struct page_flag_list *page_flag_list; sem_t page_flag_buf_sem; pthread_rwlock_t usemmap_rwlock; mdf_pfn_t current_pfn; -- 1.8.3.1 [-- Attachment #3: Type: text/plain, Size: 143 bytes --] _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-18 2:46 ` "Zhou, Wenjian/周文剑" @ 2016-03-18 4:16 ` Minfei Huang 2016-03-18 5:48 ` "Zhou, Wenjian/周文剑" 0 siblings, 1 reply; 33+ messages in thread From: Minfei Huang @ 2016-03-18 4:16 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑"; +Cc: kexec On 03/18/16 at 10:46am, "Zhou, Wenjian/周文剑" wrote: > Hello Minfei, > > Since I can't produce the bug, I reviewed the patch and wrote an increment patch. > Though there are some bugs in the increment patch, > I wonder if the previous bug still exists with this patch. > Could you help me confirm it? Ok. I will help verify this increasing patch. > > And I have another question. > Did it only occur in patch v4? This issue doesn't exist in v3. I have pasted the test result with --num-thread 32 in that thread. applied makedumpfile with option -d 31 --num-threads 32 real 3m3.533s Thanks Minfei > The previous patches almost have the same logic. > > -- > Thanks > Zhou > > On 03/15/2016 02:34 PM, Minfei Huang wrote: > >Hi, Zhou. > > > >I have applied this patch base on 1.5.9. There are several testcases I > >have tested. > > > >- makedumpfile --num-threads 64 -d 31 > > real 0m0.010s > > user 0m0.002s > > sys 0m0.009s > > > >- makedumpfile --num-threads 31 -d 31 > > real 2m40.915s > > user 10m50.900s > > sys 23m9.664s > > > >makedumpfile --num-threads 30 -d 31 > > real 0m0.006s > > user 0m0.002s > > sys 0m0.004s > > > >makedumpfile --num-threads 32 -d 31 > > real 0m0.007s > > user 0m0.002s > > sys 0m0.005s > > > >- makedumpfile --num-threads 8 -d 31 > > real 2m32.692s > > user 7m4.630s > > sys 2m0.369s > > > >- makedumpfile --num-threads 1 -d 31 > > real 4m42.423s > > user 7m27.153s > > sys 0m22.490s > > > >- makedumpfile.orig -d 31 > > real 4m1.297s > > user 3m39.696s > > sys 0m15.200s > > > >This patch has a huge increment to the filter performance under 31. But > >it is not stable, since makedumpfile fails to dump vmcore intermittently. > >You can find the above test result, makedumpfile fails to dump vmcore > >with option --num-threads 64, also it may occur with option > >--number-threads 8. > > > >Thanks > >Minfei > > > >On 03/09/16 at 08:27am, Zhou Wenjian wrote: > >>v4: > >> 1. fix a bug caused by the logic > >>v3: > >> 1. remove some unused variables > >> 2. fix a bug caused by the wrong logic > >> 3. fix a bug caused by optimising > >> 4. improve more performance by using Minoru Usui's code > >> > >>multi-threads implementation will introduce extra cost when handling > >>each page. The origin implementation will also do the extra work for > >>filtered pages. So there is a big performance degradation in > >>--num-threads -d 31. > >>The new implementation won't do the extra work for filtered pages any > >>more. So the performance of -d 31 is close to that of serial processing. > >> > >>The new implementation is just like the following: > >> * The basic idea is producer producing page and consumer writing page. > >> * Each producer have a page_flag_buf list which is used for storing > >> page's description. > >> * The size of page_flag_buf is little so it won't take too much memory. > >> * And all producers will share a page_data_buf array which is > >> used for storing page's compressed data. > >> * The main thread is the consumer. It will find the next pfn and write > >> it into file. > >> * The next pfn is smallest pfn in all page_flag_buf. > >> > >>Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> > >>Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > >>--- > >> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- > >> makedumpfile.h | 35 ++++--- > >> 2 files changed, 202 insertions(+), 131 deletions(-) > >> > >>diff --git a/makedumpfile.c b/makedumpfile.c > >>index fa0b779..2b0864a 100644 > >>--- a/makedumpfile.c > >>+++ b/makedumpfile.c > >>@@ -3483,7 +3483,8 @@ initial_for_parallel() > >> unsigned long page_data_buf_size; > >> unsigned long limit_size; > >> int page_data_num; > >>- int i; > >>+ struct page_flag *current; > >>+ int i, j; > >> > >> len_buf_out = calculate_len_buf_out(info->page_size); > >> > >>@@ -3560,10 +3561,16 @@ initial_for_parallel() > >> > >> limit_size = (get_free_memory_size() > >> - MAP_REGION * info->num_threads) * 0.6; > >>+ if (limit_size < 0) { > >>+ MSG("Free memory is not enough for multi-threads\n"); > >>+ return FALSE; > >>+ } > >> > >> page_data_num = limit_size / page_data_buf_size; > >>+ info->num_buffers = 3 * info->num_threads; > >> > >>- info->num_buffers = MIN(NUM_BUFFERS, page_data_num); > >>+ info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); > >>+ info->num_buffers = MIN(info->num_buffers, page_data_num); > >> > >> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", > >> info->num_buffers); > >>@@ -3588,6 +3595,36 @@ initial_for_parallel() > >> } > >> > >> /* > >>+ * initial page_flag for each thread > >>+ */ > >>+ if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) > >>+ == NULL) { > >>+ MSG("Can't allocate memory for page_flag_buf. %s\n", > >>+ strerror(errno)); > >>+ return FALSE; > >>+ } > >>+ memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); > >>+ > >>+ for (i = 0; i < info->num_threads; i++) { > >>+ if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { > >>+ MSG("Can't allocate memory for page_flag. %s\n", > >>+ strerror(errno)); > >>+ return FALSE; > >>+ } > >>+ current = info->page_flag_buf[i]; > >>+ > >>+ for (j = 1; j < NUM_BUFFERS; j++) { > >>+ if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { > >>+ MSG("Can't allocate memory for page_flag. %s\n", > >>+ strerror(errno)); > >>+ return FALSE; > >>+ } > >>+ current = current->next; > >>+ } > >>+ current->next = info->page_flag_buf[i]; > >>+ } > >>+ > >>+ /* > >> * initial fd_memory for threads > >> */ > >> for (i = 0; i < info->num_threads; i++) { > >>@@ -3612,7 +3649,8 @@ initial_for_parallel() > >> void > >> free_for_parallel() > >> { > >>- int i; > >>+ int i, j; > >>+ struct page_flag *current; > >> > >> if (info->threads != NULL) { > >> for (i = 0; i < info->num_threads; i++) { > >>@@ -3655,6 +3693,19 @@ free_for_parallel() > >> free(info->page_data_buf); > >> } > >> > >>+ if (info->page_flag_buf != NULL) { > >>+ for (i = 0; i < info->num_threads; i++) { > >>+ for (j = 0; j < NUM_BUFFERS; j++) { > >>+ if (info->page_flag_buf[i] != NULL) { > >>+ current = info->page_flag_buf[i]; > >>+ info->page_flag_buf[i] = current->next; > >>+ free(current); > >>+ } > >>+ } > >>+ } > >>+ free(info->page_flag_buf); > >>+ } > >>+ > >> if (info->parallel_info == NULL) > >> return; > >> > >>@@ -7075,11 +7126,11 @@ void * > >> kdump_thread_function_cyclic(void *arg) { > >> void *retval = PTHREAD_FAIL; > >> struct thread_args *kdump_thread_args = (struct thread_args *)arg; > >>- struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > >>+ volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > >>+ volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; > >> struct cycle *cycle = kdump_thread_args->cycle; > >>- int page_data_num = kdump_thread_args->page_data_num; > >>- mdf_pfn_t pfn; > >>- int index; > >>+ mdf_pfn_t pfn = cycle->start_pfn; > >>+ int index = kdump_thread_args->thread_num; > >> int buf_ready; > >> int dumpable; > >> int fd_memory = 0; > >>@@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { > >> kdump_thread_args->thread_num); > >> } > >> > >>- while (1) { > >>- /* get next pfn */ > >>- pthread_mutex_lock(&info->current_pfn_mutex); > >>- pfn = info->current_pfn; > >>- info->current_pfn++; > >>- pthread_mutex_unlock(&info->current_pfn_mutex); > >>- > >>- if (pfn >= kdump_thread_args->end_pfn) > >>- break; > >>- > >>- index = -1; > >>+ /* > >>+ * filtered page won't take anything > >>+ * unfiltered zero page will only take a page_flag_buf > >>+ * unfiltered non-zero page will take a page_flag_buf and a page_data_buf > >>+ */ > >>+ while (pfn < cycle->end_pfn) { > >> buf_ready = FALSE; > >> > >>+ pthread_mutex_lock(&info->page_data_mutex); > >>+ while (page_data_buf[index].used != FALSE) { > >>+ index = (index + 1) % info->num_buffers; > >>+ } > >>+ page_data_buf[index].used = TRUE; > >>+ pthread_mutex_unlock(&info->page_data_mutex); > >>+ > >> while (buf_ready == FALSE) { > >> pthread_testcancel(); > >>- > >>- index = pfn % page_data_num; > >>- > >>- if (pfn - info->consumed_pfn > info->num_buffers) > >>+ if (page_flag_buf->ready == FLAG_READY) > >> continue; > >> > >>- if (page_data_buf[index].ready != 0) > >>- continue; > >>- > >>- pthread_mutex_lock(&page_data_buf[index].mutex); > >>- > >>- if (page_data_buf[index].ready != 0) > >>- goto unlock; > >>- > >>- buf_ready = TRUE; > >>+ /* get next dumpable pfn */ > >>+ pthread_mutex_lock(&info->current_pfn_mutex); > >>+ for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { > >>+ dumpable = is_dumpable( > >>+ info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > >>+ pfn, > >>+ cycle); > >>+ if (dumpable) > >>+ break; > >>+ } > >>+ info->current_pfn = pfn + 1; > >> > >>- page_data_buf[index].pfn = pfn; > >>- page_data_buf[index].ready = 1; > >>+ page_flag_buf->pfn = pfn; > >>+ page_flag_buf->ready = FLAG_FILLING; > >>+ pthread_mutex_unlock(&info->current_pfn_mutex); > >>+ sem_post(&info->page_flag_buf_sem); > >> > >>- dumpable = is_dumpable( > >>- info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > >>- pfn, > >>- cycle); > >>- page_data_buf[index].dumpable = dumpable; > >>- if (!dumpable) > >>- goto unlock; > >>+ if (pfn >= cycle->end_pfn) { > >>+ info->current_pfn = cycle->end_pfn; > >>+ page_data_buf[index].used = FALSE; > >>+ break; > >>+ } > >> > >> if (!read_pfn_parallel(fd_memory, pfn, buf, > >> &bitmap_memory_parallel, > >>@@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { > >> > >> if ((info->dump_level & DL_EXCLUDE_ZERO) > >> && is_zero_page(buf, info->page_size)) { > >>- page_data_buf[index].zero = TRUE; > >>- goto unlock; > >>+ page_flag_buf->zero = TRUE; > >>+ goto next; > >> } > >> > >>- page_data_buf[index].zero = FALSE; > >>+ page_flag_buf->zero = FALSE; > >> > >> /* > >> * Compress the page data. > >>@@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { > >> page_data_buf[index].flags = > >> DUMP_DH_COMPRESSED_LZO; > >> page_data_buf[index].size = size_out; > >>+ > >> memcpy(page_data_buf[index].buf, buf_out, size_out); > >> #endif > >> #ifdef USESNAPPY > >>@@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { > >> page_data_buf[index].size = info->page_size; > >> memcpy(page_data_buf[index].buf, buf, info->page_size); > >> } > >>-unlock: > >>- pthread_mutex_unlock(&page_data_buf[index].mutex); > >>+ page_flag_buf->index = index; > >>+ buf_ready = TRUE; > >>+next: > >>+ page_flag_buf->ready = FLAG_READY; > >>+ page_flag_buf = page_flag_buf->next; > >> > >> } > >> } > >>- > >> retval = NULL; > >> > >> fail: > >>@@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> struct page_desc pd; > >> struct timeval tv_start; > >> struct timeval last, new; > >>- unsigned long long consuming_pfn; > >> pthread_t **threads = NULL; > >> struct thread_args *kdump_thread_args = NULL; > >> void *thread_result; > >>- int page_data_num; > >>+ int page_buf_num; > >> struct page_data *page_data_buf = NULL; > >> int i; > >> int index; > >>+ int end_count, consuming, check_count; > >>+ mdf_pfn_t current_pfn, temp_pfn; > >> > >> if (info->flag_elf_dumpfile) > >> return FALSE; > >>@@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> goto out; > >> } > >> > >>- res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); > >>- if (res != 0) { > >>- ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", > >>- strerror(res)); > >>- goto out; > >>- } > >>- > >> res = pthread_mutex_init(&info->filter_mutex, NULL); > >> if (res != 0) { > >> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); > >>@@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> end_pfn = cycle->end_pfn; > >> > >> info->current_pfn = start_pfn; > >>- info->consumed_pfn = start_pfn - 1; > >> > >> threads = info->threads; > >> kdump_thread_args = info->kdump_thread_args; > >> > >>- page_data_num = info->num_buffers; > >>+ page_buf_num = info->num_buffers; > >> page_data_buf = info->page_data_buf; > >>+ pthread_mutex_init(&info->page_data_mutex, NULL); > >>+ sem_init(&info->page_flag_buf_sem, 0, 0); > >> > >>- for (i = 0; i < page_data_num; i++) { > >>- /* > >>- * producer will use pfn in page_data_buf to decide the > >>- * consumed pfn > >>- */ > >>- page_data_buf[i].pfn = start_pfn - 1; > >>- page_data_buf[i].ready = 0; > >>- res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); > >>- if (res != 0) { > >>- ERRMSG("Can't initialize mutex of page_data_buf. %s\n", > >>- strerror(res)); > >>- goto out; > >>- } > >>- } > >>+ for (i = 0; i < page_buf_num; i++) > >>+ page_data_buf[i].used = FALSE; > >> > >> for (i = 0; i < info->num_threads; i++) { > >> kdump_thread_args[i].thread_num = i; > >> kdump_thread_args[i].len_buf_out = len_buf_out; > >>- kdump_thread_args[i].start_pfn = start_pfn; > >>- kdump_thread_args[i].end_pfn = end_pfn; > >>- kdump_thread_args[i].page_data_num = page_data_num; > >> kdump_thread_args[i].page_data_buf = page_data_buf; > >>+ kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; > >> kdump_thread_args[i].cycle = cycle; > >> > >> res = pthread_create(threads[i], NULL, > >>@@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> } > >> } > >> > >>- consuming_pfn = start_pfn; > >>- index = -1; > >>+ end_count = 0; > >>+ while (1) { > >>+ consuming = 0; > >>+ check_count = 0; > >> > >>- gettimeofday(&last, NULL); > >>+ /* > >>+ * The basic idea is producer producing page and consumer writing page. > >>+ * Each producer have a page_flag_buf list which is used for storing page's description. > >>+ * The size of page_flag_buf is little so it won't take too much memory. > >>+ * And all producers will share a page_data_buf array which is used for storing page's compressed data. > >>+ * The main thread is the consumer. It will find the next pfn and write it into file. > >>+ * The next pfn is smallest pfn in all page_flag_buf. > >>+ */ > >>+ sem_wait(&info->page_flag_buf_sem); > >>+ gettimeofday(&last, NULL); > >>+ while (1) { > >>+ current_pfn = end_pfn; > >> > >>- while (consuming_pfn < end_pfn) { > >>- index = consuming_pfn % page_data_num; > >>+ /* > >>+ * page_flag_buf is in circular linked list. > >>+ * The array info->page_flag_buf[] records the current page_flag_buf in each thread's > >>+ * page_flag_buf list. > >>+ * consuming is used for recording in which thread the pfn is the smallest. > >>+ * current_pfn is used for recording the value of pfn when checking the pfn. > >>+ */ > >>+ for (i = 0; i < info->num_threads; i++) { > >>+ if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > >>+ continue; > >>+ temp_pfn = info->page_flag_buf[i]->pfn; > >> > >>- gettimeofday(&new, NULL); > >>- if (new.tv_sec - last.tv_sec > WAIT_TIME) { > >>- ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); > >>- goto out; > >>- } > >>+ /* > >>+ * count how many threads have reached the end. > >>+ */ > >>+ if (temp_pfn >= end_pfn) { > >>+ info->page_flag_buf[i]->ready = FLAG_UNUSED; > >>+ end_count++; > >>+ continue; > >>+ } > >> > >>- /* > >>- * check pfn first without mutex locked to reduce the time > >>- * trying to lock the mutex > >>- */ > >>- if (page_data_buf[index].pfn != consuming_pfn) > >>- continue; > >>+ if (current_pfn < temp_pfn) > >>+ continue; > >> > >>- if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) > >>- continue; > >>+ check_count++; > >>+ consuming = i; > >>+ current_pfn = temp_pfn; > >>+ } > >> > >>- /* check whether the found one is ready to be consumed */ > >>- if (page_data_buf[index].pfn != consuming_pfn || > >>- page_data_buf[index].ready != 1) { > >>- goto unlock; > >>+ /* > >>+ * If all the threads have reached the end, we will finish writing. > >>+ */ > >>+ if (end_count >= info->num_threads) > >>+ goto finish; > >>+ > >>+ /* > >>+ * If the page_flag_buf is not ready, the pfn recorded may be changed. > >>+ * So we should recheck. > >>+ */ > >>+ if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > >>+ gettimeofday(&new, NULL); > >>+ if (new.tv_sec - last.tv_sec > WAIT_TIME) { > >>+ ERRMSG("Can't get data of pfn.\n"); > >>+ goto out; > >>+ } > >>+ continue; > >>+ } > >>+ > >>+ if (current_pfn == info->page_flag_buf[consuming]->pfn) > >>+ break; > >> } > >> > >> if ((num_dumped % per) == 0) > >> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); > >> > >>- /* next pfn is found, refresh last here */ > >>- last = new; > >>- consuming_pfn++; > >>- info->consumed_pfn++; > >>- page_data_buf[index].ready = 0; > >>- > >>- if (page_data_buf[index].dumpable == FALSE) > >>- goto unlock; > >>- > >> num_dumped++; > >> > >>- if (page_data_buf[index].zero == TRUE) { > >>+ > >>+ if (info->page_flag_buf[consuming]->zero == TRUE) { > >> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > >> goto out; > >> pfn_zero++; > >> } else { > >>+ index = info->page_flag_buf[consuming]->index; > >> pd.flags = page_data_buf[index].flags; > >> pd.size = page_data_buf[index].size; > >> pd.page_flags = 0; > >>@@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> */ > >> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) > >> goto out; > >>- > >>+ page_data_buf[index].used = FALSE; > >> } > >>-unlock: > >>- pthread_mutex_unlock(&page_data_buf[index].mutex); > >>+ info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > >>+ info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > >> } > >>- > >>+finish: > >> ret = TRUE; > >> /* > >> * print [100 %] > >>@@ -7463,15 +7532,9 @@ out: > >> } > >> } > >> > >>- if (page_data_buf != NULL) { > >>- for (i = 0; i < page_data_num; i++) { > >>- pthread_mutex_destroy(&page_data_buf[i].mutex); > >>- } > >>- } > >>- > >>+ sem_destroy(&info->page_flag_buf_sem); > >> pthread_rwlock_destroy(&info->usemmap_rwlock); > >> pthread_mutex_destroy(&info->filter_mutex); > >>- pthread_mutex_destroy(&info->consumed_pfn_mutex); > >> pthread_mutex_destroy(&info->current_pfn_mutex); > >> > >> return ret; > >>@@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag > >> num_dumped++; > >> if (!read_pfn(pfn, buf)) > >> goto out; > >>+ > >> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); > >> > >> /* > >>diff --git a/makedumpfile.h b/makedumpfile.h > >>index e0b5bbf..4b315c0 100644 > >>--- a/makedumpfile.h > >>+++ b/makedumpfile.h > >>@@ -44,6 +44,7 @@ > >> #include "print_info.h" > >> #include "sadump_mod.h" > >> #include <pthread.h> > >>+#include <semaphore.h> > >> > >> /* > >> * Result of command > >>@@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; > >> #define PAGE_DATA_NUM (50) > >> #define WAIT_TIME (60 * 10) > >> #define PTHREAD_FAIL ((void *)-2) > >>-#define NUM_BUFFERS (50) > >>+#define NUM_BUFFERS (20) > >> > >> struct mmap_cache { > >> char *mmap_buf; > >>@@ -985,28 +986,33 @@ struct mmap_cache { > >> off_t mmap_end_offset; > >> }; > >> > >>+enum { > >>+ FLAG_UNUSED, > >>+ FLAG_READY, > >>+ FLAG_FILLING > >>+}; > >>+struct page_flag { > >>+ mdf_pfn_t pfn; > >>+ char zero; > >>+ char ready; > >>+ short index; > >>+ struct page_flag *next; > >>+}; > >>+ > >> struct page_data > >> { > >>- mdf_pfn_t pfn; > >>- int dumpable; > >>- int zero; > >>- unsigned int flags; > >> long size; > >> unsigned char *buf; > >>- pthread_mutex_t mutex; > >>- /* > >>- * whether the page_data is ready to be consumed > >>- */ > >>- int ready; > >>+ int flags; > >>+ int used; > >> }; > >> > >> struct thread_args { > >> int thread_num; > >> unsigned long len_buf_out; > >>- mdf_pfn_t start_pfn, end_pfn; > >>- int page_data_num; > >> struct cycle *cycle; > >> struct page_data *page_data_buf; > >>+ struct page_flag *page_flag_buf; > >> }; > >> > >> /* > >>@@ -1295,11 +1301,12 @@ struct DumpInfo { > >> pthread_t **threads; > >> struct thread_args *kdump_thread_args; > >> struct page_data *page_data_buf; > >>+ struct page_flag **page_flag_buf; > >>+ sem_t page_flag_buf_sem; > >> pthread_rwlock_t usemmap_rwlock; > >> mdf_pfn_t current_pfn; > >> pthread_mutex_t current_pfn_mutex; > >>- mdf_pfn_t consumed_pfn; > >>- pthread_mutex_t consumed_pfn_mutex; > >>+ pthread_mutex_t page_data_mutex; > >> pthread_mutex_t filter_mutex; > >> }; > >> extern struct DumpInfo *info; > >>-- > >>1.8.3.1 > >> > >> > >> > >> > >>_______________________________________________ > >>kexec mailing list > >>kexec@lists.infradead.org > >>http://lists.infradead.org/mailman/listinfo/kexec > > > > From 1d7ad5dbbc29efe58171b1023ab0df09eb2815bf Mon Sep 17 00:00:00 2001 > From: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > Date: Fri, 18 Mar 2016 10:35:35 +0800 > Subject: [PATCH] increment > > --- > makedumpfile.c | 107 ++++++++++++++++++++++++++++++++++++++++++--------------- > makedumpfile.h | 12 +++++-- > 2 files changed, 89 insertions(+), 30 deletions(-) > > diff --git a/makedumpfile.c b/makedumpfile.c > index 2b0864a..a304a61 100644 > --- a/makedumpfile.c > +++ b/makedumpfile.c > @@ -3477,6 +3477,42 @@ calibrate_machdep_info(void) > } > > int > +initial_parallel_area(int page_data_buf_size) > +{ > + int i, j; > + struct page_flag *current; > + info->page_data_buf = info->parallel_area; > + void *page_data_buf = info->parallel_area + sizeof(struct page_data) * info->num_buffers; > + void *page_flag_list = page_data_buf + page_data_buf_size * info->num_buffers; > + > + for (i = 0; i < info->num_buffers; i++) { > + info->page_data_buf[i].buf = page_data_buf + page_data_buf_size * i; > + } > + > + > + if ((info->page_flag_list = malloc(sizeof(struct page_flag_list) * info->num_threads)) > + == NULL) { > + MSG("Can't allocate memory for page_flag_buf. %s\n", > + strerror(errno)); > + return FALSE; > + } > + > + for (i = 0; i < info->num_threads; i++) { > + info->page_flag_list[i].header = page_flag_list + sizeof(struct page_flag) * info->num_buffers * i; > + info->page_flag_list[i].current = 0; > + /* > + current = info->page_flag_buf[i]; > + for (j = 1; j < info->num_buffers; j++) { > + current->next = current + sizeof(struct page_flag); > + current = current->next; > + } > + current->next = info->page_flag_buf[i]; > +*/ > + } > + > +} > + > +int > initial_for_parallel() > { > unsigned long len_buf_out; > @@ -3575,9 +3611,15 @@ initial_for_parallel() > DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", > info->num_buffers); > > - /* > - * allocate memory for page_data > - */ > + if ((info->parallel_area = calloc(info->num_buffers, sizeof(struct page_data)+page_data_buf_size + sizeof(struct page_flag)*info->num_threads)) > + ==NULL) { > + MSG("Can't allocate memory for page_data_buf. %s\n", > + strerror(errno)); > + return FALSE; > + } > + > + initial_parallel_area(page_data_buf_size); > +/* > if ((info->page_data_buf = malloc(sizeof(struct page_data) * info->num_buffers)) > == NULL) { > MSG("Can't allocate memory for page_data_buf. %s\n", > @@ -3594,9 +3636,6 @@ initial_for_parallel() > } > } > > - /* > - * initial page_flag for each thread > - */ > if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) > == NULL) { > MSG("Can't allocate memory for page_flag_buf. %s\n", > @@ -3623,7 +3662,7 @@ initial_for_parallel() > } > current->next = info->page_flag_buf[i]; > } > - > +*/ > /* > * initial fd_memory for threads > */ > @@ -3685,6 +3724,15 @@ free_for_parallel() > if (info->kdump_thread_args != NULL) > free(info->kdump_thread_args); > > + if (info->page_flag_list != NULL) { > + free(info->page_flag_list); > + } > + > + if (info->parallel_area != NULL) { > + free(info->parallel_area); > + } > + > +/* > if (info->page_data_buf != NULL) { > for (i = 0; i < info->num_buffers; i++) { > if (info->page_data_buf[i].buf != NULL) > @@ -3705,7 +3753,7 @@ free_for_parallel() > } > free(info->page_flag_buf); > } > - > +*/ > if (info->parallel_info == NULL) > return; > > @@ -7122,12 +7170,14 @@ int finalize_zlib(z_stream *stream) > return err; > } > > +#define CURRENT_FLAG page_flag_header[current_page_flag] > void * > kdump_thread_function_cyclic(void *arg) { > void *retval = PTHREAD_FAIL; > struct thread_args *kdump_thread_args = (struct thread_args *)arg; > volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > - volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; > + struct page_flag *page_flag_header = kdump_thread_args->page_flag_list->header; > + int current_page_flag = kdump_thread_args->page_flag_list->current; > struct cycle *cycle = kdump_thread_args->cycle; > mdf_pfn_t pfn = cycle->start_pfn; > int index = kdump_thread_args->thread_num; > @@ -7193,7 +7243,7 @@ kdump_thread_function_cyclic(void *arg) { > > while (buf_ready == FALSE) { > pthread_testcancel(); > - if (page_flag_buf->ready == FLAG_READY) > + if (CURRENT_FLAG.ready == FLAG_READY) > continue; > > /* get next dumpable pfn */ > @@ -7208,8 +7258,8 @@ kdump_thread_function_cyclic(void *arg) { > } > info->current_pfn = pfn + 1; > > - page_flag_buf->pfn = pfn; > - page_flag_buf->ready = FLAG_FILLING; > + CURRENT_FLAG.pfn = pfn; > + CURRENT_FLAG.ready = FLAG_FILLING; > pthread_mutex_unlock(&info->current_pfn_mutex); > sem_post(&info->page_flag_buf_sem); > > @@ -7230,11 +7280,11 @@ kdump_thread_function_cyclic(void *arg) { > > if ((info->dump_level & DL_EXCLUDE_ZERO) > && is_zero_page(buf, info->page_size)) { > - page_flag_buf->zero = TRUE; > + CURRENT_FLAG.zero = TRUE; > goto next; > } > > - page_flag_buf->zero = FALSE; > + CURRENT_FLAG.zero = FALSE; > > /* > * Compress the page data. > @@ -7285,11 +7335,11 @@ kdump_thread_function_cyclic(void *arg) { > page_data_buf[index].size = info->page_size; > memcpy(page_data_buf[index].buf, buf, info->page_size); > } > - page_flag_buf->index = index; > + CURRENT_FLAG.index = index; > buf_ready = TRUE; > next: > - page_flag_buf->ready = FLAG_READY; > - page_flag_buf = page_flag_buf->next; > + CURRENT_FLAG.ready = FLAG_READY; > + current_page_flag = (current_page_flag + 1) % info->num_buffers; > > } > } > @@ -7306,6 +7356,8 @@ fail: > pthread_exit(retval); > } > > +#define CURRENT_PAGE_FLAG(i) (info->page_flag_list[i].header)[info->page_flag_list[i].current] > + > int > write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > struct cache_data *cd_page, > @@ -7379,7 +7431,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > kdump_thread_args[i].thread_num = i; > kdump_thread_args[i].len_buf_out = len_buf_out; > kdump_thread_args[i].page_data_buf = page_data_buf; > - kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; > + kdump_thread_args[i].page_flag_list = &(info->page_flag_list[i]); > kdump_thread_args[i].cycle = cycle; > > res = pthread_create(threads[i], NULL, > @@ -7418,15 +7470,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > * current_pfn is used for recording the value of pfn when checking the pfn. > */ > for (i = 0; i < info->num_threads; i++) { > - if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > + if (CURRENT_PAGE_FLAG(i).ready == FLAG_UNUSED) > continue; > - temp_pfn = info->page_flag_buf[i]->pfn; > + temp_pfn = CURRENT_PAGE_FLAG(i).pfn; > > /* > * count how many threads have reached the end. > */ > if (temp_pfn >= end_pfn) { > - info->page_flag_buf[i]->ready = FLAG_UNUSED; > + CURRENT_PAGE_FLAG(i).ready = FLAG_UNUSED; > end_count++; > continue; > } > @@ -7449,7 +7501,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > * If the page_flag_buf is not ready, the pfn recorded may be changed. > * So we should recheck. > */ > - if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > + if (CURRENT_PAGE_FLAG(consuming).ready != FLAG_READY) { > gettimeofday(&new, NULL); > if (new.tv_sec - last.tv_sec > WAIT_TIME) { > ERRMSG("Can't get data of pfn.\n"); > @@ -7458,7 +7510,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > continue; > } > > - if (current_pfn == info->page_flag_buf[consuming]->pfn) > + if (current_pfn == CURRENT_PAGE_FLAG(consuming).pfn) > break; > } > > @@ -7468,12 +7520,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > num_dumped++; > > > - if (info->page_flag_buf[consuming]->zero == TRUE) { > + if (CURRENT_PAGE_FLAG(consuming).zero == TRUE) { > if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > goto out; > pfn_zero++; > } else { > - index = info->page_flag_buf[consuming]->index; > + index = CURRENT_PAGE_FLAG(consuming).index; > pd.flags = page_data_buf[index].flags; > pd.size = page_data_buf[index].size; > pd.page_flags = 0; > @@ -7491,8 +7543,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > goto out; > page_data_buf[index].used = FALSE; > } > - info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > - info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > + CURRENT_PAGE_FLAG(consuming).ready = FLAG_UNUSED; > + info->page_flag_list[consuming].current += 1; > + info->page_flag_list[consuming].current %= info->num_buffers; > } > finish: > ret = TRUE; > diff --git a/makedumpfile.h b/makedumpfile.h > index 4b315c0..8c4bc1a 100644 > --- a/makedumpfile.h > +++ b/makedumpfile.h > @@ -996,7 +996,12 @@ struct page_flag { > char zero; > char ready; > short index; > - struct page_flag *next; > +// struct page_flag *next; > +}; > + > +struct page_flag_list { > + struct page_flag *header; > + int current; > }; > > struct page_data > @@ -1012,7 +1017,7 @@ struct thread_args { > unsigned long len_buf_out; > struct cycle *cycle; > struct page_data *page_data_buf; > - struct page_flag *page_flag_buf; > + struct page_flag_list *page_flag_list; > }; > > /* > @@ -1298,10 +1303,11 @@ struct DumpInfo { > */ > int num_threads; > int num_buffers; > + void *parallel_area; > pthread_t **threads; > struct thread_args *kdump_thread_args; > struct page_data *page_data_buf; > - struct page_flag **page_flag_buf; > + struct page_flag_list *page_flag_list; > sem_t page_flag_buf_sem; > pthread_rwlock_t usemmap_rwlock; > mdf_pfn_t current_pfn; > -- > 1.8.3.1 > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-18 4:16 ` Minfei Huang @ 2016-03-18 5:48 ` "Zhou, Wenjian/周文剑" 2016-03-24 5:28 ` "Zhou, Wenjian/周文剑" 2016-03-31 8:38 ` Minfei Huang 0 siblings, 2 replies; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-18 5:48 UTC (permalink / raw) To: Minfei Huang; +Cc: kexec [-- Attachment #1: Type: text/plain, Size: 33094 bytes --] On 03/18/2016 12:16 PM, Minfei Huang wrote: > On 03/18/16 at 10:46am, "Zhou, Wenjian/周文剑" wrote: >> Hello Minfei, >> >> Since I can't produce the bug, I reviewed the patch and wrote an increment patch. >> Though there are some bugs in the increment patch, >> I wonder if the previous bug still exists with this patch. >> Could you help me confirm it? > > Ok. I will help verify this increasing patch. > Thank you very much. >> >> And I have another question. >> Did it only occur in patch v4? > > This issue doesn't exist in v3. I have pasted the test result with > --num-thread 32 in that thread. > > applied makedumpfile with option -d 31 --num-threads 32 > real 3m3.533s Oh, then the patch in the previous mail may not work. I'm appreciated if you can also test the patch in this letter. I introduced semaphore to fix the bug in the v3. So I want to know if it is this which affects the result. The attached patch is based on v4, used to remove semaohore. -- Thanks Zhou > > Thanks > Minfei > >> The previous patches almost have the same logic. >> >> -- >> Thanks >> Zhou >> >> On 03/15/2016 02:34 PM, Minfei Huang wrote: >>> Hi, Zhou. >>> >>> I have applied this patch base on 1.5.9. There are several testcases I >>> have tested. >>> >>> - makedumpfile --num-threads 64 -d 31 >>> real 0m0.010s >>> user 0m0.002s >>> sys 0m0.009s >>> >>> - makedumpfile --num-threads 31 -d 31 >>> real 2m40.915s >>> user 10m50.900s >>> sys 23m9.664s >>> >>> makedumpfile --num-threads 30 -d 31 >>> real 0m0.006s >>> user 0m0.002s >>> sys 0m0.004s >>> >>> makedumpfile --num-threads 32 -d 31 >>> real 0m0.007s >>> user 0m0.002s >>> sys 0m0.005s >>> >>> - makedumpfile --num-threads 8 -d 31 >>> real 2m32.692s >>> user 7m4.630s >>> sys 2m0.369s >>> >>> - makedumpfile --num-threads 1 -d 31 >>> real 4m42.423s >>> user 7m27.153s >>> sys 0m22.490s >>> >>> - makedumpfile.orig -d 31 >>> real 4m1.297s >>> user 3m39.696s >>> sys 0m15.200s >>> >>> This patch has a huge increment to the filter performance under 31. But >>> it is not stable, since makedumpfile fails to dump vmcore intermittently. >>> You can find the above test result, makedumpfile fails to dump vmcore >>> with option --num-threads 64, also it may occur with option >>> --number-threads 8. >>> >>> Thanks >>> Minfei >>> >>> On 03/09/16 at 08:27am, Zhou Wenjian wrote: >>>> v4: >>>> 1. fix a bug caused by the logic >>>> v3: >>>> 1. remove some unused variables >>>> 2. fix a bug caused by the wrong logic >>>> 3. fix a bug caused by optimising >>>> 4. improve more performance by using Minoru Usui's code >>>> >>>> multi-threads implementation will introduce extra cost when handling >>>> each page. The origin implementation will also do the extra work for >>>> filtered pages. So there is a big performance degradation in >>>> --num-threads -d 31. >>>> The new implementation won't do the extra work for filtered pages any >>>> more. So the performance of -d 31 is close to that of serial processing. >>>> >>>> The new implementation is just like the following: >>>> * The basic idea is producer producing page and consumer writing page. >>>> * Each producer have a page_flag_buf list which is used for storing >>>> page's description. >>>> * The size of page_flag_buf is little so it won't take too much memory. >>>> * And all producers will share a page_data_buf array which is >>>> used for storing page's compressed data. >>>> * The main thread is the consumer. It will find the next pfn and write >>>> it into file. >>>> * The next pfn is smallest pfn in all page_flag_buf. >>>> >>>> Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> >>>> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >>>> --- >>>> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- >>>> makedumpfile.h | 35 ++++--- >>>> 2 files changed, 202 insertions(+), 131 deletions(-) >>>> >>>> diff --git a/makedumpfile.c b/makedumpfile.c >>>> index fa0b779..2b0864a 100644 >>>> --- a/makedumpfile.c >>>> +++ b/makedumpfile.c >>>> @@ -3483,7 +3483,8 @@ initial_for_parallel() >>>> unsigned long page_data_buf_size; >>>> unsigned long limit_size; >>>> int page_data_num; >>>> - int i; >>>> + struct page_flag *current; >>>> + int i, j; >>>> >>>> len_buf_out = calculate_len_buf_out(info->page_size); >>>> >>>> @@ -3560,10 +3561,16 @@ initial_for_parallel() >>>> >>>> limit_size = (get_free_memory_size() >>>> - MAP_REGION * info->num_threads) * 0.6; >>>> + if (limit_size < 0) { >>>> + MSG("Free memory is not enough for multi-threads\n"); >>>> + return FALSE; >>>> + } >>>> >>>> page_data_num = limit_size / page_data_buf_size; >>>> + info->num_buffers = 3 * info->num_threads; >>>> >>>> - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); >>>> + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); >>>> + info->num_buffers = MIN(info->num_buffers, page_data_num); >>>> >>>> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >>>> info->num_buffers); >>>> @@ -3588,6 +3595,36 @@ initial_for_parallel() >>>> } >>>> >>>> /* >>>> + * initial page_flag for each thread >>>> + */ >>>> + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >>>> + == NULL) { >>>> + MSG("Can't allocate memory for page_flag_buf. %s\n", >>>> + strerror(errno)); >>>> + return FALSE; >>>> + } >>>> + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); >>>> + >>>> + for (i = 0; i < info->num_threads; i++) { >>>> + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { >>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>> + strerror(errno)); >>>> + return FALSE; >>>> + } >>>> + current = info->page_flag_buf[i]; >>>> + >>>> + for (j = 1; j < NUM_BUFFERS; j++) { >>>> + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { >>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>> + strerror(errno)); >>>> + return FALSE; >>>> + } >>>> + current = current->next; >>>> + } >>>> + current->next = info->page_flag_buf[i]; >>>> + } >>>> + >>>> + /* >>>> * initial fd_memory for threads >>>> */ >>>> for (i = 0; i < info->num_threads; i++) { >>>> @@ -3612,7 +3649,8 @@ initial_for_parallel() >>>> void >>>> free_for_parallel() >>>> { >>>> - int i; >>>> + int i, j; >>>> + struct page_flag *current; >>>> >>>> if (info->threads != NULL) { >>>> for (i = 0; i < info->num_threads; i++) { >>>> @@ -3655,6 +3693,19 @@ free_for_parallel() >>>> free(info->page_data_buf); >>>> } >>>> >>>> + if (info->page_flag_buf != NULL) { >>>> + for (i = 0; i < info->num_threads; i++) { >>>> + for (j = 0; j < NUM_BUFFERS; j++) { >>>> + if (info->page_flag_buf[i] != NULL) { >>>> + current = info->page_flag_buf[i]; >>>> + info->page_flag_buf[i] = current->next; >>>> + free(current); >>>> + } >>>> + } >>>> + } >>>> + free(info->page_flag_buf); >>>> + } >>>> + >>>> if (info->parallel_info == NULL) >>>> return; >>>> >>>> @@ -7075,11 +7126,11 @@ void * >>>> kdump_thread_function_cyclic(void *arg) { >>>> void *retval = PTHREAD_FAIL; >>>> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >>>> - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>> + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>> + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >>>> struct cycle *cycle = kdump_thread_args->cycle; >>>> - int page_data_num = kdump_thread_args->page_data_num; >>>> - mdf_pfn_t pfn; >>>> - int index; >>>> + mdf_pfn_t pfn = cycle->start_pfn; >>>> + int index = kdump_thread_args->thread_num; >>>> int buf_ready; >>>> int dumpable; >>>> int fd_memory = 0; >>>> @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { >>>> kdump_thread_args->thread_num); >>>> } >>>> >>>> - while (1) { >>>> - /* get next pfn */ >>>> - pthread_mutex_lock(&info->current_pfn_mutex); >>>> - pfn = info->current_pfn; >>>> - info->current_pfn++; >>>> - pthread_mutex_unlock(&info->current_pfn_mutex); >>>> - >>>> - if (pfn >= kdump_thread_args->end_pfn) >>>> - break; >>>> - >>>> - index = -1; >>>> + /* >>>> + * filtered page won't take anything >>>> + * unfiltered zero page will only take a page_flag_buf >>>> + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf >>>> + */ >>>> + while (pfn < cycle->end_pfn) { >>>> buf_ready = FALSE; >>>> >>>> + pthread_mutex_lock(&info->page_data_mutex); >>>> + while (page_data_buf[index].used != FALSE) { >>>> + index = (index + 1) % info->num_buffers; >>>> + } >>>> + page_data_buf[index].used = TRUE; >>>> + pthread_mutex_unlock(&info->page_data_mutex); >>>> + >>>> while (buf_ready == FALSE) { >>>> pthread_testcancel(); >>>> - >>>> - index = pfn % page_data_num; >>>> - >>>> - if (pfn - info->consumed_pfn > info->num_buffers) >>>> + if (page_flag_buf->ready == FLAG_READY) >>>> continue; >>>> >>>> - if (page_data_buf[index].ready != 0) >>>> - continue; >>>> - >>>> - pthread_mutex_lock(&page_data_buf[index].mutex); >>>> - >>>> - if (page_data_buf[index].ready != 0) >>>> - goto unlock; >>>> - >>>> - buf_ready = TRUE; >>>> + /* get next dumpable pfn */ >>>> + pthread_mutex_lock(&info->current_pfn_mutex); >>>> + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { >>>> + dumpable = is_dumpable( >>>> + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>> + pfn, >>>> + cycle); >>>> + if (dumpable) >>>> + break; >>>> + } >>>> + info->current_pfn = pfn + 1; >>>> >>>> - page_data_buf[index].pfn = pfn; >>>> - page_data_buf[index].ready = 1; >>>> + page_flag_buf->pfn = pfn; >>>> + page_flag_buf->ready = FLAG_FILLING; >>>> + pthread_mutex_unlock(&info->current_pfn_mutex); >>>> + sem_post(&info->page_flag_buf_sem); >>>> >>>> - dumpable = is_dumpable( >>>> - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>> - pfn, >>>> - cycle); >>>> - page_data_buf[index].dumpable = dumpable; >>>> - if (!dumpable) >>>> - goto unlock; >>>> + if (pfn >= cycle->end_pfn) { >>>> + info->current_pfn = cycle->end_pfn; >>>> + page_data_buf[index].used = FALSE; >>>> + break; >>>> + } >>>> >>>> if (!read_pfn_parallel(fd_memory, pfn, buf, >>>> &bitmap_memory_parallel, >>>> @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { >>>> >>>> if ((info->dump_level & DL_EXCLUDE_ZERO) >>>> && is_zero_page(buf, info->page_size)) { >>>> - page_data_buf[index].zero = TRUE; >>>> - goto unlock; >>>> + page_flag_buf->zero = TRUE; >>>> + goto next; >>>> } >>>> >>>> - page_data_buf[index].zero = FALSE; >>>> + page_flag_buf->zero = FALSE; >>>> >>>> /* >>>> * Compress the page data. >>>> @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { >>>> page_data_buf[index].flags = >>>> DUMP_DH_COMPRESSED_LZO; >>>> page_data_buf[index].size = size_out; >>>> + >>>> memcpy(page_data_buf[index].buf, buf_out, size_out); >>>> #endif >>>> #ifdef USESNAPPY >>>> @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { >>>> page_data_buf[index].size = info->page_size; >>>> memcpy(page_data_buf[index].buf, buf, info->page_size); >>>> } >>>> -unlock: >>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>> + page_flag_buf->index = index; >>>> + buf_ready = TRUE; >>>> +next: >>>> + page_flag_buf->ready = FLAG_READY; >>>> + page_flag_buf = page_flag_buf->next; >>>> >>>> } >>>> } >>>> - >>>> retval = NULL; >>>> >>>> fail: >>>> @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> struct page_desc pd; >>>> struct timeval tv_start; >>>> struct timeval last, new; >>>> - unsigned long long consuming_pfn; >>>> pthread_t **threads = NULL; >>>> struct thread_args *kdump_thread_args = NULL; >>>> void *thread_result; >>>> - int page_data_num; >>>> + int page_buf_num; >>>> struct page_data *page_data_buf = NULL; >>>> int i; >>>> int index; >>>> + int end_count, consuming, check_count; >>>> + mdf_pfn_t current_pfn, temp_pfn; >>>> >>>> if (info->flag_elf_dumpfile) >>>> return FALSE; >>>> @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> goto out; >>>> } >>>> >>>> - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); >>>> - if (res != 0) { >>>> - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", >>>> - strerror(res)); >>>> - goto out; >>>> - } >>>> - >>>> res = pthread_mutex_init(&info->filter_mutex, NULL); >>>> if (res != 0) { >>>> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); >>>> @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> end_pfn = cycle->end_pfn; >>>> >>>> info->current_pfn = start_pfn; >>>> - info->consumed_pfn = start_pfn - 1; >>>> >>>> threads = info->threads; >>>> kdump_thread_args = info->kdump_thread_args; >>>> >>>> - page_data_num = info->num_buffers; >>>> + page_buf_num = info->num_buffers; >>>> page_data_buf = info->page_data_buf; >>>> + pthread_mutex_init(&info->page_data_mutex, NULL); >>>> + sem_init(&info->page_flag_buf_sem, 0, 0); >>>> >>>> - for (i = 0; i < page_data_num; i++) { >>>> - /* >>>> - * producer will use pfn in page_data_buf to decide the >>>> - * consumed pfn >>>> - */ >>>> - page_data_buf[i].pfn = start_pfn - 1; >>>> - page_data_buf[i].ready = 0; >>>> - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); >>>> - if (res != 0) { >>>> - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", >>>> - strerror(res)); >>>> - goto out; >>>> - } >>>> - } >>>> + for (i = 0; i < page_buf_num; i++) >>>> + page_data_buf[i].used = FALSE; >>>> >>>> for (i = 0; i < info->num_threads; i++) { >>>> kdump_thread_args[i].thread_num = i; >>>> kdump_thread_args[i].len_buf_out = len_buf_out; >>>> - kdump_thread_args[i].start_pfn = start_pfn; >>>> - kdump_thread_args[i].end_pfn = end_pfn; >>>> - kdump_thread_args[i].page_data_num = page_data_num; >>>> kdump_thread_args[i].page_data_buf = page_data_buf; >>>> + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >>>> kdump_thread_args[i].cycle = cycle; >>>> >>>> res = pthread_create(threads[i], NULL, >>>> @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> } >>>> } >>>> >>>> - consuming_pfn = start_pfn; >>>> - index = -1; >>>> + end_count = 0; >>>> + while (1) { >>>> + consuming = 0; >>>> + check_count = 0; >>>> >>>> - gettimeofday(&last, NULL); >>>> + /* >>>> + * The basic idea is producer producing page and consumer writing page. >>>> + * Each producer have a page_flag_buf list which is used for storing page's description. >>>> + * The size of page_flag_buf is little so it won't take too much memory. >>>> + * And all producers will share a page_data_buf array which is used for storing page's compressed data. >>>> + * The main thread is the consumer. It will find the next pfn and write it into file. >>>> + * The next pfn is smallest pfn in all page_flag_buf. >>>> + */ >>>> + sem_wait(&info->page_flag_buf_sem); >>>> + gettimeofday(&last, NULL); >>>> + while (1) { >>>> + current_pfn = end_pfn; >>>> >>>> - while (consuming_pfn < end_pfn) { >>>> - index = consuming_pfn % page_data_num; >>>> + /* >>>> + * page_flag_buf is in circular linked list. >>>> + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's >>>> + * page_flag_buf list. >>>> + * consuming is used for recording in which thread the pfn is the smallest. >>>> + * current_pfn is used for recording the value of pfn when checking the pfn. >>>> + */ >>>> + for (i = 0; i < info->num_threads; i++) { >>>> + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >>>> + continue; >>>> + temp_pfn = info->page_flag_buf[i]->pfn; >>>> >>>> - gettimeofday(&new, NULL); >>>> - if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>> - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); >>>> - goto out; >>>> - } >>>> + /* >>>> + * count how many threads have reached the end. >>>> + */ >>>> + if (temp_pfn >= end_pfn) { >>>> + info->page_flag_buf[i]->ready = FLAG_UNUSED; >>>> + end_count++; >>>> + continue; >>>> + } >>>> >>>> - /* >>>> - * check pfn first without mutex locked to reduce the time >>>> - * trying to lock the mutex >>>> - */ >>>> - if (page_data_buf[index].pfn != consuming_pfn) >>>> - continue; >>>> + if (current_pfn < temp_pfn) >>>> + continue; >>>> >>>> - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) >>>> - continue; >>>> + check_count++; >>>> + consuming = i; >>>> + current_pfn = temp_pfn; >>>> + } >>>> >>>> - /* check whether the found one is ready to be consumed */ >>>> - if (page_data_buf[index].pfn != consuming_pfn || >>>> - page_data_buf[index].ready != 1) { >>>> - goto unlock; >>>> + /* >>>> + * If all the threads have reached the end, we will finish writing. >>>> + */ >>>> + if (end_count >= info->num_threads) >>>> + goto finish; >>>> + >>>> + /* >>>> + * If the page_flag_buf is not ready, the pfn recorded may be changed. >>>> + * So we should recheck. >>>> + */ >>>> + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >>>> + gettimeofday(&new, NULL); >>>> + if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>> + ERRMSG("Can't get data of pfn.\n"); >>>> + goto out; >>>> + } >>>> + continue; >>>> + } >>>> + >>>> + if (current_pfn == info->page_flag_buf[consuming]->pfn) >>>> + break; >>>> } >>>> >>>> if ((num_dumped % per) == 0) >>>> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); >>>> >>>> - /* next pfn is found, refresh last here */ >>>> - last = new; >>>> - consuming_pfn++; >>>> - info->consumed_pfn++; >>>> - page_data_buf[index].ready = 0; >>>> - >>>> - if (page_data_buf[index].dumpable == FALSE) >>>> - goto unlock; >>>> - >>>> num_dumped++; >>>> >>>> - if (page_data_buf[index].zero == TRUE) { >>>> + >>>> + if (info->page_flag_buf[consuming]->zero == TRUE) { >>>> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >>>> goto out; >>>> pfn_zero++; >>>> } else { >>>> + index = info->page_flag_buf[consuming]->index; >>>> pd.flags = page_data_buf[index].flags; >>>> pd.size = page_data_buf[index].size; >>>> pd.page_flags = 0; >>>> @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> */ >>>> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) >>>> goto out; >>>> - >>>> + page_data_buf[index].used = FALSE; >>>> } >>>> -unlock: >>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>> + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >>>> + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >>>> } >>>> - >>>> +finish: >>>> ret = TRUE; >>>> /* >>>> * print [100 %] >>>> @@ -7463,15 +7532,9 @@ out: >>>> } >>>> } >>>> >>>> - if (page_data_buf != NULL) { >>>> - for (i = 0; i < page_data_num; i++) { >>>> - pthread_mutex_destroy(&page_data_buf[i].mutex); >>>> - } >>>> - } >>>> - >>>> + sem_destroy(&info->page_flag_buf_sem); >>>> pthread_rwlock_destroy(&info->usemmap_rwlock); >>>> pthread_mutex_destroy(&info->filter_mutex); >>>> - pthread_mutex_destroy(&info->consumed_pfn_mutex); >>>> pthread_mutex_destroy(&info->current_pfn_mutex); >>>> >>>> return ret; >>>> @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag >>>> num_dumped++; >>>> if (!read_pfn(pfn, buf)) >>>> goto out; >>>> + >>>> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); >>>> >>>> /* >>>> diff --git a/makedumpfile.h b/makedumpfile.h >>>> index e0b5bbf..4b315c0 100644 >>>> --- a/makedumpfile.h >>>> +++ b/makedumpfile.h >>>> @@ -44,6 +44,7 @@ >>>> #include "print_info.h" >>>> #include "sadump_mod.h" >>>> #include <pthread.h> >>>> +#include <semaphore.h> >>>> >>>> /* >>>> * Result of command >>>> @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; >>>> #define PAGE_DATA_NUM (50) >>>> #define WAIT_TIME (60 * 10) >>>> #define PTHREAD_FAIL ((void *)-2) >>>> -#define NUM_BUFFERS (50) >>>> +#define NUM_BUFFERS (20) >>>> >>>> struct mmap_cache { >>>> char *mmap_buf; >>>> @@ -985,28 +986,33 @@ struct mmap_cache { >>>> off_t mmap_end_offset; >>>> }; >>>> >>>> +enum { >>>> + FLAG_UNUSED, >>>> + FLAG_READY, >>>> + FLAG_FILLING >>>> +}; >>>> +struct page_flag { >>>> + mdf_pfn_t pfn; >>>> + char zero; >>>> + char ready; >>>> + short index; >>>> + struct page_flag *next; >>>> +}; >>>> + >>>> struct page_data >>>> { >>>> - mdf_pfn_t pfn; >>>> - int dumpable; >>>> - int zero; >>>> - unsigned int flags; >>>> long size; >>>> unsigned char *buf; >>>> - pthread_mutex_t mutex; >>>> - /* >>>> - * whether the page_data is ready to be consumed >>>> - */ >>>> - int ready; >>>> + int flags; >>>> + int used; >>>> }; >>>> >>>> struct thread_args { >>>> int thread_num; >>>> unsigned long len_buf_out; >>>> - mdf_pfn_t start_pfn, end_pfn; >>>> - int page_data_num; >>>> struct cycle *cycle; >>>> struct page_data *page_data_buf; >>>> + struct page_flag *page_flag_buf; >>>> }; >>>> >>>> /* >>>> @@ -1295,11 +1301,12 @@ struct DumpInfo { >>>> pthread_t **threads; >>>> struct thread_args *kdump_thread_args; >>>> struct page_data *page_data_buf; >>>> + struct page_flag **page_flag_buf; >>>> + sem_t page_flag_buf_sem; >>>> pthread_rwlock_t usemmap_rwlock; >>>> mdf_pfn_t current_pfn; >>>> pthread_mutex_t current_pfn_mutex; >>>> - mdf_pfn_t consumed_pfn; >>>> - pthread_mutex_t consumed_pfn_mutex; >>>> + pthread_mutex_t page_data_mutex; >>>> pthread_mutex_t filter_mutex; >>>> }; >>>> extern struct DumpInfo *info; >>>> -- >>>> 1.8.3.1 >>>> >>>> >>>> >>>> >>>> _______________________________________________ >>>> kexec mailing list >>>> kexec@lists.infradead.org >>>> http://lists.infradead.org/mailman/listinfo/kexec >> >> >> > >> From 1d7ad5dbbc29efe58171b1023ab0df09eb2815bf Mon Sep 17 00:00:00 2001 >> From: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >> Date: Fri, 18 Mar 2016 10:35:35 +0800 >> Subject: [PATCH] increment >> >> --- >> makedumpfile.c | 107 ++++++++++++++++++++++++++++++++++++++++++--------------- >> makedumpfile.h | 12 +++++-- >> 2 files changed, 89 insertions(+), 30 deletions(-) >> >> diff --git a/makedumpfile.c b/makedumpfile.c >> index 2b0864a..a304a61 100644 >> --- a/makedumpfile.c >> +++ b/makedumpfile.c >> @@ -3477,6 +3477,42 @@ calibrate_machdep_info(void) >> } >> >> int >> +initial_parallel_area(int page_data_buf_size) >> +{ >> + int i, j; >> + struct page_flag *current; >> + info->page_data_buf = info->parallel_area; >> + void *page_data_buf = info->parallel_area + sizeof(struct page_data) * info->num_buffers; >> + void *page_flag_list = page_data_buf + page_data_buf_size * info->num_buffers; >> + >> + for (i = 0; i < info->num_buffers; i++) { >> + info->page_data_buf[i].buf = page_data_buf + page_data_buf_size * i; >> + } >> + >> + >> + if ((info->page_flag_list = malloc(sizeof(struct page_flag_list) * info->num_threads)) >> + == NULL) { >> + MSG("Can't allocate memory for page_flag_buf. %s\n", >> + strerror(errno)); >> + return FALSE; >> + } >> + >> + for (i = 0; i < info->num_threads; i++) { >> + info->page_flag_list[i].header = page_flag_list + sizeof(struct page_flag) * info->num_buffers * i; >> + info->page_flag_list[i].current = 0; >> + /* >> + current = info->page_flag_buf[i]; >> + for (j = 1; j < info->num_buffers; j++) { >> + current->next = current + sizeof(struct page_flag); >> + current = current->next; >> + } >> + current->next = info->page_flag_buf[i]; >> +*/ >> + } >> + >> +} >> + >> +int >> initial_for_parallel() >> { >> unsigned long len_buf_out; >> @@ -3575,9 +3611,15 @@ initial_for_parallel() >> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >> info->num_buffers); >> >> - /* >> - * allocate memory for page_data >> - */ >> + if ((info->parallel_area = calloc(info->num_buffers, sizeof(struct page_data)+page_data_buf_size + sizeof(struct page_flag)*info->num_threads)) >> + ==NULL) { >> + MSG("Can't allocate memory for page_data_buf. %s\n", >> + strerror(errno)); >> + return FALSE; >> + } >> + >> + initial_parallel_area(page_data_buf_size); >> +/* >> if ((info->page_data_buf = malloc(sizeof(struct page_data) * info->num_buffers)) >> == NULL) { >> MSG("Can't allocate memory for page_data_buf. %s\n", >> @@ -3594,9 +3636,6 @@ initial_for_parallel() >> } >> } >> >> - /* >> - * initial page_flag for each thread >> - */ >> if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >> == NULL) { >> MSG("Can't allocate memory for page_flag_buf. %s\n", >> @@ -3623,7 +3662,7 @@ initial_for_parallel() >> } >> current->next = info->page_flag_buf[i]; >> } >> - >> +*/ >> /* >> * initial fd_memory for threads >> */ >> @@ -3685,6 +3724,15 @@ free_for_parallel() >> if (info->kdump_thread_args != NULL) >> free(info->kdump_thread_args); >> >> + if (info->page_flag_list != NULL) { >> + free(info->page_flag_list); >> + } >> + >> + if (info->parallel_area != NULL) { >> + free(info->parallel_area); >> + } >> + >> +/* >> if (info->page_data_buf != NULL) { >> for (i = 0; i < info->num_buffers; i++) { >> if (info->page_data_buf[i].buf != NULL) >> @@ -3705,7 +3753,7 @@ free_for_parallel() >> } >> free(info->page_flag_buf); >> } >> - >> +*/ >> if (info->parallel_info == NULL) >> return; >> >> @@ -7122,12 +7170,14 @@ int finalize_zlib(z_stream *stream) >> return err; >> } >> >> +#define CURRENT_FLAG page_flag_header[current_page_flag] >> void * >> kdump_thread_function_cyclic(void *arg) { >> void *retval = PTHREAD_FAIL; >> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >> volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >> - volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >> + struct page_flag *page_flag_header = kdump_thread_args->page_flag_list->header; >> + int current_page_flag = kdump_thread_args->page_flag_list->current; >> struct cycle *cycle = kdump_thread_args->cycle; >> mdf_pfn_t pfn = cycle->start_pfn; >> int index = kdump_thread_args->thread_num; >> @@ -7193,7 +7243,7 @@ kdump_thread_function_cyclic(void *arg) { >> >> while (buf_ready == FALSE) { >> pthread_testcancel(); >> - if (page_flag_buf->ready == FLAG_READY) >> + if (CURRENT_FLAG.ready == FLAG_READY) >> continue; >> >> /* get next dumpable pfn */ >> @@ -7208,8 +7258,8 @@ kdump_thread_function_cyclic(void *arg) { >> } >> info->current_pfn = pfn + 1; >> >> - page_flag_buf->pfn = pfn; >> - page_flag_buf->ready = FLAG_FILLING; >> + CURRENT_FLAG.pfn = pfn; >> + CURRENT_FLAG.ready = FLAG_FILLING; >> pthread_mutex_unlock(&info->current_pfn_mutex); >> sem_post(&info->page_flag_buf_sem); >> >> @@ -7230,11 +7280,11 @@ kdump_thread_function_cyclic(void *arg) { >> >> if ((info->dump_level & DL_EXCLUDE_ZERO) >> && is_zero_page(buf, info->page_size)) { >> - page_flag_buf->zero = TRUE; >> + CURRENT_FLAG.zero = TRUE; >> goto next; >> } >> >> - page_flag_buf->zero = FALSE; >> + CURRENT_FLAG.zero = FALSE; >> >> /* >> * Compress the page data. >> @@ -7285,11 +7335,11 @@ kdump_thread_function_cyclic(void *arg) { >> page_data_buf[index].size = info->page_size; >> memcpy(page_data_buf[index].buf, buf, info->page_size); >> } >> - page_flag_buf->index = index; >> + CURRENT_FLAG.index = index; >> buf_ready = TRUE; >> next: >> - page_flag_buf->ready = FLAG_READY; >> - page_flag_buf = page_flag_buf->next; >> + CURRENT_FLAG.ready = FLAG_READY; >> + current_page_flag = (current_page_flag + 1) % info->num_buffers; >> >> } >> } >> @@ -7306,6 +7356,8 @@ fail: >> pthread_exit(retval); >> } >> >> +#define CURRENT_PAGE_FLAG(i) (info->page_flag_list[i].header)[info->page_flag_list[i].current] >> + >> int >> write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> struct cache_data *cd_page, >> @@ -7379,7 +7431,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> kdump_thread_args[i].thread_num = i; >> kdump_thread_args[i].len_buf_out = len_buf_out; >> kdump_thread_args[i].page_data_buf = page_data_buf; >> - kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >> + kdump_thread_args[i].page_flag_list = &(info->page_flag_list[i]); >> kdump_thread_args[i].cycle = cycle; >> >> res = pthread_create(threads[i], NULL, >> @@ -7418,15 +7470,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> * current_pfn is used for recording the value of pfn when checking the pfn. >> */ >> for (i = 0; i < info->num_threads; i++) { >> - if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >> + if (CURRENT_PAGE_FLAG(i).ready == FLAG_UNUSED) >> continue; >> - temp_pfn = info->page_flag_buf[i]->pfn; >> + temp_pfn = CURRENT_PAGE_FLAG(i).pfn; >> >> /* >> * count how many threads have reached the end. >> */ >> if (temp_pfn >= end_pfn) { >> - info->page_flag_buf[i]->ready = FLAG_UNUSED; >> + CURRENT_PAGE_FLAG(i).ready = FLAG_UNUSED; >> end_count++; >> continue; >> } >> @@ -7449,7 +7501,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> * If the page_flag_buf is not ready, the pfn recorded may be changed. >> * So we should recheck. >> */ >> - if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >> + if (CURRENT_PAGE_FLAG(consuming).ready != FLAG_READY) { >> gettimeofday(&new, NULL); >> if (new.tv_sec - last.tv_sec > WAIT_TIME) { >> ERRMSG("Can't get data of pfn.\n"); >> @@ -7458,7 +7510,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> continue; >> } >> >> - if (current_pfn == info->page_flag_buf[consuming]->pfn) >> + if (current_pfn == CURRENT_PAGE_FLAG(consuming).pfn) >> break; >> } >> >> @@ -7468,12 +7520,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> num_dumped++; >> >> >> - if (info->page_flag_buf[consuming]->zero == TRUE) { >> + if (CURRENT_PAGE_FLAG(consuming).zero == TRUE) { >> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >> goto out; >> pfn_zero++; >> } else { >> - index = info->page_flag_buf[consuming]->index; >> + index = CURRENT_PAGE_FLAG(consuming).index; >> pd.flags = page_data_buf[index].flags; >> pd.size = page_data_buf[index].size; >> pd.page_flags = 0; >> @@ -7491,8 +7543,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> goto out; >> page_data_buf[index].used = FALSE; >> } >> - info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >> - info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >> + CURRENT_PAGE_FLAG(consuming).ready = FLAG_UNUSED; >> + info->page_flag_list[consuming].current += 1; >> + info->page_flag_list[consuming].current %= info->num_buffers; >> } >> finish: >> ret = TRUE; >> diff --git a/makedumpfile.h b/makedumpfile.h >> index 4b315c0..8c4bc1a 100644 >> --- a/makedumpfile.h >> +++ b/makedumpfile.h >> @@ -996,7 +996,12 @@ struct page_flag { >> char zero; >> char ready; >> short index; >> - struct page_flag *next; >> +// struct page_flag *next; >> +}; >> + >> +struct page_flag_list { >> + struct page_flag *header; >> + int current; >> }; >> >> struct page_data >> @@ -1012,7 +1017,7 @@ struct thread_args { >> unsigned long len_buf_out; >> struct cycle *cycle; >> struct page_data *page_data_buf; >> - struct page_flag *page_flag_buf; >> + struct page_flag_list *page_flag_list; >> }; >> >> /* >> @@ -1298,10 +1303,11 @@ struct DumpInfo { >> */ >> int num_threads; >> int num_buffers; >> + void *parallel_area; >> pthread_t **threads; >> struct thread_args *kdump_thread_args; >> struct page_data *page_data_buf; >> - struct page_flag **page_flag_buf; >> + struct page_flag_list *page_flag_list; >> sem_t page_flag_buf_sem; >> pthread_rwlock_t usemmap_rwlock; >> mdf_pfn_t current_pfn; >> -- >> 1.8.3.1 >> [-- Attachment #2: 0001-remove-sem.patch --] [-- Type: text/x-patch, Size: 2395 bytes --] From 1dbf68c21a2bbc7b454c1a742c1e3ff00bb85829 Mon Sep 17 00:00:00 2001 From: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> Date: Fri, 18 Mar 2016 13:36:57 +0800 Subject: [PATCH] remove sem --- makedumpfile.c | 8 ++++---- makedumpfile.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/makedumpfile.c b/makedumpfile.c index 2b0864a..8de5e1d 100644 --- a/makedumpfile.c +++ b/makedumpfile.c @@ -7211,7 +7211,7 @@ kdump_thread_function_cyclic(void *arg) { page_flag_buf->pfn = pfn; page_flag_buf->ready = FLAG_FILLING; pthread_mutex_unlock(&info->current_pfn_mutex); - sem_post(&info->page_flag_buf_sem); +// sem_post(&info->page_flag_buf_sem); if (pfn >= cycle->end_pfn) { info->current_pfn = cycle->end_pfn; @@ -7370,7 +7370,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, page_buf_num = info->num_buffers; page_data_buf = info->page_data_buf; pthread_mutex_init(&info->page_data_mutex, NULL); - sem_init(&info->page_flag_buf_sem, 0, 0); +// sem_init(&info->page_flag_buf_sem, 0, 0); for (i = 0; i < page_buf_num; i++) page_data_buf[i].used = FALSE; @@ -7405,7 +7405,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, * The main thread is the consumer. It will find the next pfn and write it into file. * The next pfn is smallest pfn in all page_flag_buf. */ - sem_wait(&info->page_flag_buf_sem); +// sem_wait(&info->page_flag_buf_sem); gettimeofday(&last, NULL); while (1) { current_pfn = end_pfn; @@ -7532,7 +7532,7 @@ out: } } - sem_destroy(&info->page_flag_buf_sem); +// sem_destroy(&info->page_flag_buf_sem); pthread_rwlock_destroy(&info->usemmap_rwlock); pthread_mutex_destroy(&info->filter_mutex); pthread_mutex_destroy(&info->current_pfn_mutex); diff --git a/makedumpfile.h b/makedumpfile.h index 4b315c0..7627286 100644 --- a/makedumpfile.h +++ b/makedumpfile.h @@ -44,7 +44,7 @@ #include "print_info.h" #include "sadump_mod.h" #include <pthread.h> -#include <semaphore.h> +//#include <semaphore.h> /* * Result of command @@ -1302,7 +1302,7 @@ struct DumpInfo { struct thread_args *kdump_thread_args; struct page_data *page_data_buf; struct page_flag **page_flag_buf; - sem_t page_flag_buf_sem; +// sem_t page_flag_buf_sem; pthread_rwlock_t usemmap_rwlock; mdf_pfn_t current_pfn; pthread_mutex_t current_pfn_mutex; -- 1.8.3.1 [-- Attachment #3: Type: text/plain, Size: 143 bytes --] _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-18 5:48 ` "Zhou, Wenjian/周文剑" @ 2016-03-24 5:28 ` "Zhou, Wenjian/周文剑" 2016-03-24 5:39 ` Minfei Huang 2016-03-31 8:38 ` Minfei Huang 1 sibling, 1 reply; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-24 5:28 UTC (permalink / raw) To: Minfei Huang; +Cc: kexec@lists.infradead.org Hello Minfei, How do these two patches work? -- Thanks Zhou On 03/18/2016 01:48 PM, "Zhou, Wenjian/周文剑" wrote: > On 03/18/2016 12:16 PM, Minfei Huang wrote: >> On 03/18/16 at 10:46am, "Zhou, Wenjian/周文剑" wrote: >>> Hello Minfei, >>> >>> Since I can't produce the bug, I reviewed the patch and wrote an increment patch. >>> Though there are some bugs in the increment patch, >>> I wonder if the previous bug still exists with this patch. >>> Could you help me confirm it? >> >> Ok. I will help verify this increasing patch. >> > > Thank you very much. > >>> >>> And I have another question. >>> Did it only occur in patch v4? >> >> This issue doesn't exist in v3. I have pasted the test result with >> --num-thread 32 in that thread. >> >> applied makedumpfile with option -d 31 --num-threads 32 >> real 3m3.533s > > Oh, then the patch in the previous mail may not work. > > I'm appreciated if you can also test the patch in this letter. > > I introduced semaphore to fix the bug in the v3. > So I want to know if it is this which affects the result. > The attached patch is based on v4, used to remove semaohore. > > > > _______________________________________________ > kexec mailing list > kexec@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/kexec > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-24 5:28 ` "Zhou, Wenjian/周文剑" @ 2016-03-24 5:39 ` Minfei Huang 2016-03-25 2:57 ` Atsushi Kumagai 0 siblings, 1 reply; 33+ messages in thread From: Minfei Huang @ 2016-03-24 5:39 UTC (permalink / raw) To: Zhou, Wenjian/周文剑 ; +Cc: kexec@lists.infradead.org Hi, Zhou. I'm on holiday now, you can ask other people to help test, if necessary. Thanks Minfei > 在 2016年3月24日,12:29,Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> 写道: > > Hello Minfei, > > How do these two patches work? > > -- > Thanks > Zhou > >> On 03/18/2016 01:48 PM, "Zhou, Wenjian/周文剑" wrote: >>> On 03/18/2016 12:16 PM, Minfei Huang wrote: >>>> On 03/18/16 at 10:46am, "Zhou, Wenjian/周文剑" wrote: >>>> Hello Minfei, >>>> >>>> Since I can't produce the bug, I reviewed the patch and wrote an increment patch. >>>> Though there are some bugs in the increment patch, >>>> I wonder if the previous bug still exists with this patch. >>>> Could you help me confirm it? >>> >>> Ok. I will help verify this increasing patch. >> >> Thank you very much. >> >>>> >>>> And I have another question. >>>> Did it only occur in patch v4? >>> >>> This issue doesn't exist in v3. I have pasted the test result with >>> --num-thread 32 in that thread. >>> >>> applied makedumpfile with option -d 31 --num-threads 32 >>> real 3m3.533s >> >> Oh, then the patch in the previous mail may not work. >> >> I'm appreciated if you can also test the patch in this letter. >> >> I introduced semaphore to fix the bug in the v3. >> So I want to know if it is this which affects the result. >> The attached patch is based on v4, used to remove semaohore. >> >> >> >> _______________________________________________ >> kexec mailing list >> kexec@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/kexec > > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-24 5:39 ` Minfei Huang @ 2016-03-25 2:57 ` Atsushi Kumagai 2016-03-28 1:23 ` "Zhou, Wenjian/周文剑" 0 siblings, 1 reply; 33+ messages in thread From: Atsushi Kumagai @ 2016-03-25 2:57 UTC (permalink / raw) To: Minfei Huang, Zhou, Wenjian/周文剑 Cc: kexec@lists.infradead.org Hello, This is just a quick note to inform you. I measured the memory consumption with -d31 by VmHWM in /proc/PID/status and compared them between v3 and v4 since Minfei said the problem only occurs in v4. | VmHWM[kB] num-thread | v3 v4 ------------+-------------------------- 1 | 20,516 20,516 2 | 20,624 20,628 4 | 20,832 20,832 8 | 21,292 21,288 16 | 22,240 22,236 32 | 24,096 24,100 64 | 27,900 27,888 According to this result, the problem we face seems not just any lack of memory issue. BTW, the memory consumption increases depending on num-thread, I think it should be considered in the calculate_cyclic_buffer_size(). Thanks, Atsushi Kumagai diff --git a/makedumpfile.c b/makedumpfile.c index 4075f3e..d5626f9 100644 --- a/makedumpfile.c +++ b/makedumpfile.c @@ -44,6 +44,14 @@ extern int find_vmemmap(); char filename_stdout[] = FILENAME_STDOUT; +void +print_VmHWM(void) +{ + char command[64]; + sprintf(command, "grep VmHWM /proc/%d/status", getpid()); + system(command); +} + /* Cache statistics */ static unsigned long long cache_hit; static unsigned long long cache_miss; @@ -11185,5 +11193,7 @@ out: } free_elf_info(); + print_VmHWM(); + return retcd; } >Hi, Zhou. > >I'm on holiday now, you can ask other people to help test, if necessary. > >Thanks >Minfei > >> 在 2016年3月24日,12:29,Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> 写道: >> >> Hello Minfei, >> >> How do these two patches work? >> >> -- >> Thanks >> Zhou >> >>> On 03/18/2016 01:48 PM, "Zhou, Wenjian/周文剑" wrote: >>>> On 03/18/2016 12:16 PM, Minfei Huang wrote: >>>>> On 03/18/16 at 10:46am, "Zhou, Wenjian/周文剑" wrote: >>>>> Hello Minfei, >>>>> >>>>> Since I can't produce the bug, I reviewed the patch and wrote an increment patch. >>>>> Though there are some bugs in the increment patch, >>>>> I wonder if the previous bug still exists with this patch. >>>>> Could you help me confirm it? >>>> >>>> Ok. I will help verify this increasing patch. >>> >>> Thank you very much. >>> >>>>> >>>>> And I have another question. >>>>> Did it only occur in patch v4? >>>> >>>> This issue doesn't exist in v3. I have pasted the test result with >>>> --num-thread 32 in that thread. >>>> >>>> applied makedumpfile with option -d 31 --num-threads 32 >>>> real 3m3.533s >>> >>> Oh, then the patch in the previous mail may not work. >>> >>> I'm appreciated if you can also test the patch in this letter. >>> >>> I introduced semaphore to fix the bug in the v3. >>> So I want to know if it is this which affects the result. >>> The attached patch is based on v4, used to remove semaohore. >>> >>> >>> >>> _______________________________________________ >>> kexec mailing list >>> kexec@lists.infradead.org >>> http://lists.infradead.org/mailman/listinfo/kexec >> >> >_______________________________________________ >kexec mailing list >kexec@lists.infradead.org >http://lists.infradead.org/mailman/listinfo/kexec _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-25 2:57 ` Atsushi Kumagai @ 2016-03-28 1:23 ` "Zhou, Wenjian/周文剑" 2016-03-28 5:43 ` Atsushi Kumagai 0 siblings, 1 reply; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-28 1:23 UTC (permalink / raw) To: Atsushi Kumagai, Minfei Huang; +Cc: kexec@lists.infradead.org On 03/25/2016 10:57 AM, Atsushi Kumagai wrote: > Hello, > > This is just a quick note to inform you. > I measured the memory consumption with -d31 by VmHWM in > /proc/PID/status and compared them between v3 and v4 since > Minfei said the problem only occurs in v4. > > | VmHWM[kB] > num-thread | v3 v4 > ------------+-------------------------- > 1 | 20,516 20,516 > 2 | 20,624 20,628 > 4 | 20,832 20,832 > 8 | 21,292 21,288 > 16 | 22,240 22,236 > 32 | 24,096 24,100 > 64 | 27,900 27,888 > > According to this result, the problem we face seems not just > any lack of memory issue. > Yes, I had realized it, for there isn't much difference between v3 and v4. And it is hardly to some further investigation, until get Minfei's result. BTW, can you reproduce the bug? > BTW, the memory consumption increases depending on num-thread, > I think it should be considered in the calculate_cyclic_buffer_size(). > I will think about it. -- Thanks Zhou > > Thanks, > Atsushi Kumagai > > diff --git a/makedumpfile.c b/makedumpfile.c > index 4075f3e..d5626f9 100644 > --- a/makedumpfile.c > +++ b/makedumpfile.c > @@ -44,6 +44,14 @@ extern int find_vmemmap(); > > char filename_stdout[] = FILENAME_STDOUT; > > +void > +print_VmHWM(void) > +{ > + char command[64]; > + sprintf(command, "grep VmHWM /proc/%d/status", getpid()); > + system(command); > +} > + > /* Cache statistics */ > static unsigned long long cache_hit; > static unsigned long long cache_miss; > @@ -11185,5 +11193,7 @@ out: > } > free_elf_info(); > > + print_VmHWM(); > + > return retcd; > } > > >> Hi, Zhou. >> >> I'm on holiday now, you can ask other people to help test, if necessary. >> >> Thanks >> Minfei >> >>> 在 2016年3月24日,12:29,Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> 写道: >>> >>> Hello Minfei, >>> >>> How do these two patches work? >>> >>> -- >>> Thanks >>> Zhou >>> >>>> On 03/18/2016 01:48 PM, "Zhou, Wenjian/周文剑" wrote: >>>>> On 03/18/2016 12:16 PM, Minfei Huang wrote: >>>>>> On 03/18/16 at 10:46am, "Zhou, Wenjian/周文剑" wrote: >>>>>> Hello Minfei, >>>>>> >>>>>> Since I can't produce the bug, I reviewed the patch and wrote an increment patch. >>>>>> Though there are some bugs in the increment patch, >>>>>> I wonder if the previous bug still exists with this patch. >>>>>> Could you help me confirm it? >>>>> >>>>> Ok. I will help verify this increasing patch. >>>> >>>> Thank you very much. >>>> >>>>>> >>>>>> And I have another question. >>>>>> Did it only occur in patch v4? >>>>> >>>>> This issue doesn't exist in v3. I have pasted the test result with >>>>> --num-thread 32 in that thread. >>>>> >>>>> applied makedumpfile with option -d 31 --num-threads 32 >>>>> real 3m3.533s >>>> >>>> Oh, then the patch in the previous mail may not work. >>>> >>>> I'm appreciated if you can also test the patch in this letter. >>>> >>>> I introduced semaphore to fix the bug in the v3. >>>> So I want to know if it is this which affects the result. >>>> The attached patch is based on v4, used to remove semaohore. >>>> >>>> >>>> >>>> _______________________________________________ >>>> kexec mailing list >>>> kexec@lists.infradead.org >>>> http://lists.infradead.org/mailman/listinfo/kexec >>> >>> >> _______________________________________________ >> kexec mailing list >> kexec@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/kexec > > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-28 1:23 ` "Zhou, Wenjian/周文剑" @ 2016-03-28 5:43 ` Atsushi Kumagai 0 siblings, 0 replies; 33+ messages in thread From: Atsushi Kumagai @ 2016-03-28 5:43 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑", Minfei Huang Cc: kexec@lists.infradead.org >> Hello, >> >> This is just a quick note to inform you. >> I measured the memory consumption with -d31 by VmHWM in >> /proc/PID/status and compared them between v3 and v4 since >> Minfei said the problem only occurs in v4. >> >> | VmHWM[kB] >> num-thread | v3 v4 >> ------------+-------------------------- >> 1 | 20,516 20,516 >> 2 | 20,624 20,628 >> 4 | 20,832 20,832 >> 8 | 21,292 21,288 >> 16 | 22,240 22,236 >> 32 | 24,096 24,100 >> 64 | 27,900 27,888 >> >> According to this result, the problem we face seems not just >> any lack of memory issue. >> > >Yes, I had realized it, for there isn't much difference between v3 and v4. >And it is hardly to some further investigation, until get Minfei's result. > >BTW, can you reproduce the bug? Unfortunately, I can't reproduce it yet. I'm also waiting for Minfei's result. Thanks, Atsushi Kumagai >> BTW, the memory consumption increases depending on num-thread, >> I think it should be considered in the calculate_cyclic_buffer_size(). >> > >I will think about it. > >-- >Thanks >Zhou > >> >> Thanks, >> Atsushi Kumagai >> >> diff --git a/makedumpfile.c b/makedumpfile.c >> index 4075f3e..d5626f9 100644 >> --- a/makedumpfile.c >> +++ b/makedumpfile.c >> @@ -44,6 +44,14 @@ extern int find_vmemmap(); >> >> char filename_stdout[] = FILENAME_STDOUT; >> >> +void >> +print_VmHWM(void) >> +{ >> + char command[64]; >> + sprintf(command, "grep VmHWM /proc/%d/status", getpid()); >> + system(command); >> +} >> + >> /* Cache statistics */ >> static unsigned long long cache_hit; >> static unsigned long long cache_miss; >> @@ -11185,5 +11193,7 @@ out: >> } >> free_elf_info(); >> >> + print_VmHWM(); >> + >> return retcd; >> } >> >> >>> Hi, Zhou. >>> >>> I'm on holiday now, you can ask other people to help test, if necessary. >>> >>> Thanks >>> Minfei >>> >>>> 在 2016年3月24日,12:29,Zhou, Wenjian/周文剑 <zhouwj-fnst@cn.fujitsu.com> 写道: >>>> >>>> Hello Minfei, >>>> >>>> How do these two patches work? >>>> >>>> -- >>>> Thanks >>>> Zhou >>>> >>>>> On 03/18/2016 01:48 PM, "Zhou, Wenjian/周文剑" wrote: >>>>>> On 03/18/2016 12:16 PM, Minfei Huang wrote: >>>>>>> On 03/18/16 at 10:46am, "Zhou, Wenjian/周文剑" wrote: >>>>>>> Hello Minfei, >>>>>>> >>>>>>> Since I can't produce the bug, I reviewed the patch and wrote an increment patch. >>>>>>> Though there are some bugs in the increment patch, >>>>>>> I wonder if the previous bug still exists with this patch. >>>>>>> Could you help me confirm it? >>>>>> >>>>>> Ok. I will help verify this increasing patch. >>>>> >>>>> Thank you very much. >>>>> >>>>>>> >>>>>>> And I have another question. >>>>>>> Did it only occur in patch v4? >>>>>> >>>>>> This issue doesn't exist in v3. I have pasted the test result with >>>>>> --num-thread 32 in that thread. >>>>>> >>>>>> applied makedumpfile with option -d 31 --num-threads 32 >>>>>> real 3m3.533s >>>>> >>>>> Oh, then the patch in the previous mail may not work. >>>>> >>>>> I'm appreciated if you can also test the patch in this letter. >>>>> >>>>> I introduced semaphore to fix the bug in the v3. >>>>> So I want to know if it is this which affects the result. >>>>> The attached patch is based on v4, used to remove semaohore. >>>>> >>>>> >>>>> >>>>> _______________________________________________ >>>>> kexec mailing list >>>>> kexec@lists.infradead.org >>>>> http://lists.infradead.org/mailman/listinfo/kexec >>>> >>>> >>> _______________________________________________ >>> kexec mailing list >>> kexec@lists.infradead.org >>> http://lists.infradead.org/mailman/listinfo/kexec >> >> > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-18 5:48 ` "Zhou, Wenjian/周文剑" 2016-03-24 5:28 ` "Zhou, Wenjian/周文剑" @ 2016-03-31 8:38 ` Minfei Huang 2016-03-31 9:09 ` "Zhou, Wenjian/周文剑" 1 sibling, 1 reply; 33+ messages in thread From: Minfei Huang @ 2016-03-31 8:38 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑"; +Cc: kexec Hi, Zhou. I have tested the increasing patch on 4T memory machine. makedumpfile fails to dump vmcore, if there are about 384M memory in 2nd kernel which is reserved by crashkernel=auto. But once the reserved memory is enlarged up to 10G, makedumpfile can dump vmcore successfully. The cache should be dropped before testing, otherwise makedumpfile will fail to dump vmcore. echo 3 > /proc/sys/vm/drop_caches Maybe there is something cleanup we can do to avoid this. Following is the result with different parameter for option --num-threads. makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 real 5m34.116s user 103m42.531s sys 86m12.586s makedumpfile -l --num-threads 64 --message-level 1 -d 31 /proc/vmcore a.64 real 3m29.544s user 27m14.674s sys 53m8.030s makedumpfile -l --num-threads 32 --message-level 1 -d 31 /proc/vmcore a.32 real 2m50.565s user 10m52.662s sys 32m0.446s makedumpfile -l --num-threads 16 --message-level 1 -d 31 /proc/vmcore a.16 real 2m27.999s user 8m18.650s sys 12m54.151s makedumpfile -l --num-threads 8 --message-level 1 -d 31 /proc/vmcore a.8 real 2m27.136s user 8m4.009s sys 3m12.090s makedumpfile -l --num-threads 4 --message-level 1 -d 31 /proc/vmcore a.4 real 2m43.739s user 7m39.689s sys 1m28.504s makedumpfile -l --num-threads 0 --message-level 1 -d 31 /proc/vmcore a.0 real 3m46.531s user 3m29.371s sys 0m16.909s makedumpfile.back -l --message-level 1 -d 31 /proc/vmcore a real 3m55.712s user 3m39.254s sys 0m16.287s Once the reserved memory is enlarged, makedumpfile works well with or without this increaseing patch. But there is an another issue I found during testing. makedumpfile may hang in about 24%. And with option --num-threads 64, this issue is also occured. makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 Excluding unnecessary pages : [100.0 %] | Excluding unnecessary pages : [100.0 %] / Excluding unnecessary pages : [100.0 %] - Copying data : [ 11.2 %] | Copying data : [ 12.4 %] - Excluding unnecessary pages : [100.0 %] \ Excluding unnecessary pages : [100.0 %] | Copying data : [ 23.6 %] - Copying data : [ 24.4 %] / Thanks Minfei On 03/18/16 at 01:48pm, "Zhou, Wenjian/周文剑" wrote: > On 03/18/2016 12:16 PM, Minfei Huang wrote: > >On 03/18/16 at 10:46am, "Zhou, Wenjian/周文剑" wrote: > >>Hello Minfei, > >> > >>Since I can't produce the bug, I reviewed the patch and wrote an increment patch. > >>Though there are some bugs in the increment patch, > >>I wonder if the previous bug still exists with this patch. > >>Could you help me confirm it? > > > >Ok. I will help verify this increasing patch. > > > > Thank you very much. > > >> > >>And I have another question. > >>Did it only occur in patch v4? > > > >This issue doesn't exist in v3. I have pasted the test result with > >--num-thread 32 in that thread. > > > >applied makedumpfile with option -d 31 --num-threads 32 > >real 3m3.533s > > Oh, then the patch in the previous mail may not work. > > I'm appreciated if you can also test the patch in this letter. > > I introduced semaphore to fix the bug in the v3. > So I want to know if it is this which affects the result. > The attached patch is based on v4, used to remove semaohore. > > -- > Thanks > Zhou > > > > >Thanks > >Minfei > > > >>The previous patches almost have the same logic. > >> > >>-- > >>Thanks > >>Zhou > >> > >>On 03/15/2016 02:34 PM, Minfei Huang wrote: > >>>Hi, Zhou. > >>> > >>>I have applied this patch base on 1.5.9. There are several testcases I > >>>have tested. > >>> > >>>- makedumpfile --num-threads 64 -d 31 > >>> real 0m0.010s > >>> user 0m0.002s > >>> sys 0m0.009s > >>> > >>>- makedumpfile --num-threads 31 -d 31 > >>> real 2m40.915s > >>> user 10m50.900s > >>> sys 23m9.664s > >>> > >>>makedumpfile --num-threads 30 -d 31 > >>> real 0m0.006s > >>> user 0m0.002s > >>> sys 0m0.004s > >>> > >>>makedumpfile --num-threads 32 -d 31 > >>> real 0m0.007s > >>> user 0m0.002s > >>> sys 0m0.005s > >>> > >>>- makedumpfile --num-threads 8 -d 31 > >>> real 2m32.692s > >>> user 7m4.630s > >>> sys 2m0.369s > >>> > >>>- makedumpfile --num-threads 1 -d 31 > >>> real 4m42.423s > >>> user 7m27.153s > >>> sys 0m22.490s > >>> > >>>- makedumpfile.orig -d 31 > >>> real 4m1.297s > >>> user 3m39.696s > >>> sys 0m15.200s > >>> > >>>This patch has a huge increment to the filter performance under 31. But > >>>it is not stable, since makedumpfile fails to dump vmcore intermittently. > >>>You can find the above test result, makedumpfile fails to dump vmcore > >>>with option --num-threads 64, also it may occur with option > >>>--number-threads 8. > >>> > >>>Thanks > >>>Minfei > >>> > >>>On 03/09/16 at 08:27am, Zhou Wenjian wrote: > >>>>v4: > >>>> 1. fix a bug caused by the logic > >>>>v3: > >>>> 1. remove some unused variables > >>>> 2. fix a bug caused by the wrong logic > >>>> 3. fix a bug caused by optimising > >>>> 4. improve more performance by using Minoru Usui's code > >>>> > >>>>multi-threads implementation will introduce extra cost when handling > >>>>each page. The origin implementation will also do the extra work for > >>>>filtered pages. So there is a big performance degradation in > >>>>--num-threads -d 31. > >>>>The new implementation won't do the extra work for filtered pages any > >>>>more. So the performance of -d 31 is close to that of serial processing. > >>>> > >>>>The new implementation is just like the following: > >>>> * The basic idea is producer producing page and consumer writing page. > >>>> * Each producer have a page_flag_buf list which is used for storing > >>>> page's description. > >>>> * The size of page_flag_buf is little so it won't take too much memory. > >>>> * And all producers will share a page_data_buf array which is > >>>> used for storing page's compressed data. > >>>> * The main thread is the consumer. It will find the next pfn and write > >>>> it into file. > >>>> * The next pfn is smallest pfn in all page_flag_buf. > >>>> > >>>>Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> > >>>>Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > >>>>--- > >>>> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- > >>>> makedumpfile.h | 35 ++++--- > >>>> 2 files changed, 202 insertions(+), 131 deletions(-) > >>>> > >>>>diff --git a/makedumpfile.c b/makedumpfile.c > >>>>index fa0b779..2b0864a 100644 > >>>>--- a/makedumpfile.c > >>>>+++ b/makedumpfile.c > >>>>@@ -3483,7 +3483,8 @@ initial_for_parallel() > >>>> unsigned long page_data_buf_size; > >>>> unsigned long limit_size; > >>>> int page_data_num; > >>>>- int i; > >>>>+ struct page_flag *current; > >>>>+ int i, j; > >>>> > >>>> len_buf_out = calculate_len_buf_out(info->page_size); > >>>> > >>>>@@ -3560,10 +3561,16 @@ initial_for_parallel() > >>>> > >>>> limit_size = (get_free_memory_size() > >>>> - MAP_REGION * info->num_threads) * 0.6; > >>>>+ if (limit_size < 0) { > >>>>+ MSG("Free memory is not enough for multi-threads\n"); > >>>>+ return FALSE; > >>>>+ } > >>>> > >>>> page_data_num = limit_size / page_data_buf_size; > >>>>+ info->num_buffers = 3 * info->num_threads; > >>>> > >>>>- info->num_buffers = MIN(NUM_BUFFERS, page_data_num); > >>>>+ info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); > >>>>+ info->num_buffers = MIN(info->num_buffers, page_data_num); > >>>> > >>>> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", > >>>> info->num_buffers); > >>>>@@ -3588,6 +3595,36 @@ initial_for_parallel() > >>>> } > >>>> > >>>> /* > >>>>+ * initial page_flag for each thread > >>>>+ */ > >>>>+ if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) > >>>>+ == NULL) { > >>>>+ MSG("Can't allocate memory for page_flag_buf. %s\n", > >>>>+ strerror(errno)); > >>>>+ return FALSE; > >>>>+ } > >>>>+ memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); > >>>>+ > >>>>+ for (i = 0; i < info->num_threads; i++) { > >>>>+ if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { > >>>>+ MSG("Can't allocate memory for page_flag. %s\n", > >>>>+ strerror(errno)); > >>>>+ return FALSE; > >>>>+ } > >>>>+ current = info->page_flag_buf[i]; > >>>>+ > >>>>+ for (j = 1; j < NUM_BUFFERS; j++) { > >>>>+ if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { > >>>>+ MSG("Can't allocate memory for page_flag. %s\n", > >>>>+ strerror(errno)); > >>>>+ return FALSE; > >>>>+ } > >>>>+ current = current->next; > >>>>+ } > >>>>+ current->next = info->page_flag_buf[i]; > >>>>+ } > >>>>+ > >>>>+ /* > >>>> * initial fd_memory for threads > >>>> */ > >>>> for (i = 0; i < info->num_threads; i++) { > >>>>@@ -3612,7 +3649,8 @@ initial_for_parallel() > >>>> void > >>>> free_for_parallel() > >>>> { > >>>>- int i; > >>>>+ int i, j; > >>>>+ struct page_flag *current; > >>>> > >>>> if (info->threads != NULL) { > >>>> for (i = 0; i < info->num_threads; i++) { > >>>>@@ -3655,6 +3693,19 @@ free_for_parallel() > >>>> free(info->page_data_buf); > >>>> } > >>>> > >>>>+ if (info->page_flag_buf != NULL) { > >>>>+ for (i = 0; i < info->num_threads; i++) { > >>>>+ for (j = 0; j < NUM_BUFFERS; j++) { > >>>>+ if (info->page_flag_buf[i] != NULL) { > >>>>+ current = info->page_flag_buf[i]; > >>>>+ info->page_flag_buf[i] = current->next; > >>>>+ free(current); > >>>>+ } > >>>>+ } > >>>>+ } > >>>>+ free(info->page_flag_buf); > >>>>+ } > >>>>+ > >>>> if (info->parallel_info == NULL) > >>>> return; > >>>> > >>>>@@ -7075,11 +7126,11 @@ void * > >>>> kdump_thread_function_cyclic(void *arg) { > >>>> void *retval = PTHREAD_FAIL; > >>>> struct thread_args *kdump_thread_args = (struct thread_args *)arg; > >>>>- struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > >>>>+ volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > >>>>+ volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; > >>>> struct cycle *cycle = kdump_thread_args->cycle; > >>>>- int page_data_num = kdump_thread_args->page_data_num; > >>>>- mdf_pfn_t pfn; > >>>>- int index; > >>>>+ mdf_pfn_t pfn = cycle->start_pfn; > >>>>+ int index = kdump_thread_args->thread_num; > >>>> int buf_ready; > >>>> int dumpable; > >>>> int fd_memory = 0; > >>>>@@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { > >>>> kdump_thread_args->thread_num); > >>>> } > >>>> > >>>>- while (1) { > >>>>- /* get next pfn */ > >>>>- pthread_mutex_lock(&info->current_pfn_mutex); > >>>>- pfn = info->current_pfn; > >>>>- info->current_pfn++; > >>>>- pthread_mutex_unlock(&info->current_pfn_mutex); > >>>>- > >>>>- if (pfn >= kdump_thread_args->end_pfn) > >>>>- break; > >>>>- > >>>>- index = -1; > >>>>+ /* > >>>>+ * filtered page won't take anything > >>>>+ * unfiltered zero page will only take a page_flag_buf > >>>>+ * unfiltered non-zero page will take a page_flag_buf and a page_data_buf > >>>>+ */ > >>>>+ while (pfn < cycle->end_pfn) { > >>>> buf_ready = FALSE; > >>>> > >>>>+ pthread_mutex_lock(&info->page_data_mutex); > >>>>+ while (page_data_buf[index].used != FALSE) { > >>>>+ index = (index + 1) % info->num_buffers; > >>>>+ } > >>>>+ page_data_buf[index].used = TRUE; > >>>>+ pthread_mutex_unlock(&info->page_data_mutex); > >>>>+ > >>>> while (buf_ready == FALSE) { > >>>> pthread_testcancel(); > >>>>- > >>>>- index = pfn % page_data_num; > >>>>- > >>>>- if (pfn - info->consumed_pfn > info->num_buffers) > >>>>+ if (page_flag_buf->ready == FLAG_READY) > >>>> continue; > >>>> > >>>>- if (page_data_buf[index].ready != 0) > >>>>- continue; > >>>>- > >>>>- pthread_mutex_lock(&page_data_buf[index].mutex); > >>>>- > >>>>- if (page_data_buf[index].ready != 0) > >>>>- goto unlock; > >>>>- > >>>>- buf_ready = TRUE; > >>>>+ /* get next dumpable pfn */ > >>>>+ pthread_mutex_lock(&info->current_pfn_mutex); > >>>>+ for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { > >>>>+ dumpable = is_dumpable( > >>>>+ info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > >>>>+ pfn, > >>>>+ cycle); > >>>>+ if (dumpable) > >>>>+ break; > >>>>+ } > >>>>+ info->current_pfn = pfn + 1; > >>>> > >>>>- page_data_buf[index].pfn = pfn; > >>>>- page_data_buf[index].ready = 1; > >>>>+ page_flag_buf->pfn = pfn; > >>>>+ page_flag_buf->ready = FLAG_FILLING; > >>>>+ pthread_mutex_unlock(&info->current_pfn_mutex); > >>>>+ sem_post(&info->page_flag_buf_sem); > >>>> > >>>>- dumpable = is_dumpable( > >>>>- info->fd_bitmap ? &bitmap_parallel : info->bitmap2, > >>>>- pfn, > >>>>- cycle); > >>>>- page_data_buf[index].dumpable = dumpable; > >>>>- if (!dumpable) > >>>>- goto unlock; > >>>>+ if (pfn >= cycle->end_pfn) { > >>>>+ info->current_pfn = cycle->end_pfn; > >>>>+ page_data_buf[index].used = FALSE; > >>>>+ break; > >>>>+ } > >>>> > >>>> if (!read_pfn_parallel(fd_memory, pfn, buf, > >>>> &bitmap_memory_parallel, > >>>>@@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { > >>>> > >>>> if ((info->dump_level & DL_EXCLUDE_ZERO) > >>>> && is_zero_page(buf, info->page_size)) { > >>>>- page_data_buf[index].zero = TRUE; > >>>>- goto unlock; > >>>>+ page_flag_buf->zero = TRUE; > >>>>+ goto next; > >>>> } > >>>> > >>>>- page_data_buf[index].zero = FALSE; > >>>>+ page_flag_buf->zero = FALSE; > >>>> > >>>> /* > >>>> * Compress the page data. > >>>>@@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { > >>>> page_data_buf[index].flags = > >>>> DUMP_DH_COMPRESSED_LZO; > >>>> page_data_buf[index].size = size_out; > >>>>+ > >>>> memcpy(page_data_buf[index].buf, buf_out, size_out); > >>>> #endif > >>>> #ifdef USESNAPPY > >>>>@@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { > >>>> page_data_buf[index].size = info->page_size; > >>>> memcpy(page_data_buf[index].buf, buf, info->page_size); > >>>> } > >>>>-unlock: > >>>>- pthread_mutex_unlock(&page_data_buf[index].mutex); > >>>>+ page_flag_buf->index = index; > >>>>+ buf_ready = TRUE; > >>>>+next: > >>>>+ page_flag_buf->ready = FLAG_READY; > >>>>+ page_flag_buf = page_flag_buf->next; > >>>> > >>>> } > >>>> } > >>>>- > >>>> retval = NULL; > >>>> > >>>> fail: > >>>>@@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >>>> struct page_desc pd; > >>>> struct timeval tv_start; > >>>> struct timeval last, new; > >>>>- unsigned long long consuming_pfn; > >>>> pthread_t **threads = NULL; > >>>> struct thread_args *kdump_thread_args = NULL; > >>>> void *thread_result; > >>>>- int page_data_num; > >>>>+ int page_buf_num; > >>>> struct page_data *page_data_buf = NULL; > >>>> int i; > >>>> int index; > >>>>+ int end_count, consuming, check_count; > >>>>+ mdf_pfn_t current_pfn, temp_pfn; > >>>> > >>>> if (info->flag_elf_dumpfile) > >>>> return FALSE; > >>>>@@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >>>> goto out; > >>>> } > >>>> > >>>>- res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); > >>>>- if (res != 0) { > >>>>- ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", > >>>>- strerror(res)); > >>>>- goto out; > >>>>- } > >>>>- > >>>> res = pthread_mutex_init(&info->filter_mutex, NULL); > >>>> if (res != 0) { > >>>> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); > >>>>@@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >>>> end_pfn = cycle->end_pfn; > >>>> > >>>> info->current_pfn = start_pfn; > >>>>- info->consumed_pfn = start_pfn - 1; > >>>> > >>>> threads = info->threads; > >>>> kdump_thread_args = info->kdump_thread_args; > >>>> > >>>>- page_data_num = info->num_buffers; > >>>>+ page_buf_num = info->num_buffers; > >>>> page_data_buf = info->page_data_buf; > >>>>+ pthread_mutex_init(&info->page_data_mutex, NULL); > >>>>+ sem_init(&info->page_flag_buf_sem, 0, 0); > >>>> > >>>>- for (i = 0; i < page_data_num; i++) { > >>>>- /* > >>>>- * producer will use pfn in page_data_buf to decide the > >>>>- * consumed pfn > >>>>- */ > >>>>- page_data_buf[i].pfn = start_pfn - 1; > >>>>- page_data_buf[i].ready = 0; > >>>>- res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); > >>>>- if (res != 0) { > >>>>- ERRMSG("Can't initialize mutex of page_data_buf. %s\n", > >>>>- strerror(res)); > >>>>- goto out; > >>>>- } > >>>>- } > >>>>+ for (i = 0; i < page_buf_num; i++) > >>>>+ page_data_buf[i].used = FALSE; > >>>> > >>>> for (i = 0; i < info->num_threads; i++) { > >>>> kdump_thread_args[i].thread_num = i; > >>>> kdump_thread_args[i].len_buf_out = len_buf_out; > >>>>- kdump_thread_args[i].start_pfn = start_pfn; > >>>>- kdump_thread_args[i].end_pfn = end_pfn; > >>>>- kdump_thread_args[i].page_data_num = page_data_num; > >>>> kdump_thread_args[i].page_data_buf = page_data_buf; > >>>>+ kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; > >>>> kdump_thread_args[i].cycle = cycle; > >>>> > >>>> res = pthread_create(threads[i], NULL, > >>>>@@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >>>> } > >>>> } > >>>> > >>>>- consuming_pfn = start_pfn; > >>>>- index = -1; > >>>>+ end_count = 0; > >>>>+ while (1) { > >>>>+ consuming = 0; > >>>>+ check_count = 0; > >>>> > >>>>- gettimeofday(&last, NULL); > >>>>+ /* > >>>>+ * The basic idea is producer producing page and consumer writing page. > >>>>+ * Each producer have a page_flag_buf list which is used for storing page's description. > >>>>+ * The size of page_flag_buf is little so it won't take too much memory. > >>>>+ * And all producers will share a page_data_buf array which is used for storing page's compressed data. > >>>>+ * The main thread is the consumer. It will find the next pfn and write it into file. > >>>>+ * The next pfn is smallest pfn in all page_flag_buf. > >>>>+ */ > >>>>+ sem_wait(&info->page_flag_buf_sem); > >>>>+ gettimeofday(&last, NULL); > >>>>+ while (1) { > >>>>+ current_pfn = end_pfn; > >>>> > >>>>- while (consuming_pfn < end_pfn) { > >>>>- index = consuming_pfn % page_data_num; > >>>>+ /* > >>>>+ * page_flag_buf is in circular linked list. > >>>>+ * The array info->page_flag_buf[] records the current page_flag_buf in each thread's > >>>>+ * page_flag_buf list. > >>>>+ * consuming is used for recording in which thread the pfn is the smallest. > >>>>+ * current_pfn is used for recording the value of pfn when checking the pfn. > >>>>+ */ > >>>>+ for (i = 0; i < info->num_threads; i++) { > >>>>+ if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > >>>>+ continue; > >>>>+ temp_pfn = info->page_flag_buf[i]->pfn; > >>>> > >>>>- gettimeofday(&new, NULL); > >>>>- if (new.tv_sec - last.tv_sec > WAIT_TIME) { > >>>>- ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); > >>>>- goto out; > >>>>- } > >>>>+ /* > >>>>+ * count how many threads have reached the end. > >>>>+ */ > >>>>+ if (temp_pfn >= end_pfn) { > >>>>+ info->page_flag_buf[i]->ready = FLAG_UNUSED; > >>>>+ end_count++; > >>>>+ continue; > >>>>+ } > >>>> > >>>>- /* > >>>>- * check pfn first without mutex locked to reduce the time > >>>>- * trying to lock the mutex > >>>>- */ > >>>>- if (page_data_buf[index].pfn != consuming_pfn) > >>>>- continue; > >>>>+ if (current_pfn < temp_pfn) > >>>>+ continue; > >>>> > >>>>- if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) > >>>>- continue; > >>>>+ check_count++; > >>>>+ consuming = i; > >>>>+ current_pfn = temp_pfn; > >>>>+ } > >>>> > >>>>- /* check whether the found one is ready to be consumed */ > >>>>- if (page_data_buf[index].pfn != consuming_pfn || > >>>>- page_data_buf[index].ready != 1) { > >>>>- goto unlock; > >>>>+ /* > >>>>+ * If all the threads have reached the end, we will finish writing. > >>>>+ */ > >>>>+ if (end_count >= info->num_threads) > >>>>+ goto finish; > >>>>+ > >>>>+ /* > >>>>+ * If the page_flag_buf is not ready, the pfn recorded may be changed. > >>>>+ * So we should recheck. > >>>>+ */ > >>>>+ if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > >>>>+ gettimeofday(&new, NULL); > >>>>+ if (new.tv_sec - last.tv_sec > WAIT_TIME) { > >>>>+ ERRMSG("Can't get data of pfn.\n"); > >>>>+ goto out; > >>>>+ } > >>>>+ continue; > >>>>+ } > >>>>+ > >>>>+ if (current_pfn == info->page_flag_buf[consuming]->pfn) > >>>>+ break; > >>>> } > >>>> > >>>> if ((num_dumped % per) == 0) > >>>> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); > >>>> > >>>>- /* next pfn is found, refresh last here */ > >>>>- last = new; > >>>>- consuming_pfn++; > >>>>- info->consumed_pfn++; > >>>>- page_data_buf[index].ready = 0; > >>>>- > >>>>- if (page_data_buf[index].dumpable == FALSE) > >>>>- goto unlock; > >>>>- > >>>> num_dumped++; > >>>> > >>>>- if (page_data_buf[index].zero == TRUE) { > >>>>+ > >>>>+ if (info->page_flag_buf[consuming]->zero == TRUE) { > >>>> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > >>>> goto out; > >>>> pfn_zero++; > >>>> } else { > >>>>+ index = info->page_flag_buf[consuming]->index; > >>>> pd.flags = page_data_buf[index].flags; > >>>> pd.size = page_data_buf[index].size; > >>>> pd.page_flags = 0; > >>>>@@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >>>> */ > >>>> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) > >>>> goto out; > >>>>- > >>>>+ page_data_buf[index].used = FALSE; > >>>> } > >>>>-unlock: > >>>>- pthread_mutex_unlock(&page_data_buf[index].mutex); > >>>>+ info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > >>>>+ info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > >>>> } > >>>>- > >>>>+finish: > >>>> ret = TRUE; > >>>> /* > >>>> * print [100 %] > >>>>@@ -7463,15 +7532,9 @@ out: > >>>> } > >>>> } > >>>> > >>>>- if (page_data_buf != NULL) { > >>>>- for (i = 0; i < page_data_num; i++) { > >>>>- pthread_mutex_destroy(&page_data_buf[i].mutex); > >>>>- } > >>>>- } > >>>>- > >>>>+ sem_destroy(&info->page_flag_buf_sem); > >>>> pthread_rwlock_destroy(&info->usemmap_rwlock); > >>>> pthread_mutex_destroy(&info->filter_mutex); > >>>>- pthread_mutex_destroy(&info->consumed_pfn_mutex); > >>>> pthread_mutex_destroy(&info->current_pfn_mutex); > >>>> > >>>> return ret; > >>>>@@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag > >>>> num_dumped++; > >>>> if (!read_pfn(pfn, buf)) > >>>> goto out; > >>>>+ > >>>> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); > >>>> > >>>> /* > >>>>diff --git a/makedumpfile.h b/makedumpfile.h > >>>>index e0b5bbf..4b315c0 100644 > >>>>--- a/makedumpfile.h > >>>>+++ b/makedumpfile.h > >>>>@@ -44,6 +44,7 @@ > >>>> #include "print_info.h" > >>>> #include "sadump_mod.h" > >>>> #include <pthread.h> > >>>>+#include <semaphore.h> > >>>> > >>>> /* > >>>> * Result of command > >>>>@@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; > >>>> #define PAGE_DATA_NUM (50) > >>>> #define WAIT_TIME (60 * 10) > >>>> #define PTHREAD_FAIL ((void *)-2) > >>>>-#define NUM_BUFFERS (50) > >>>>+#define NUM_BUFFERS (20) > >>>> > >>>> struct mmap_cache { > >>>> char *mmap_buf; > >>>>@@ -985,28 +986,33 @@ struct mmap_cache { > >>>> off_t mmap_end_offset; > >>>> }; > >>>> > >>>>+enum { > >>>>+ FLAG_UNUSED, > >>>>+ FLAG_READY, > >>>>+ FLAG_FILLING > >>>>+}; > >>>>+struct page_flag { > >>>>+ mdf_pfn_t pfn; > >>>>+ char zero; > >>>>+ char ready; > >>>>+ short index; > >>>>+ struct page_flag *next; > >>>>+}; > >>>>+ > >>>> struct page_data > >>>> { > >>>>- mdf_pfn_t pfn; > >>>>- int dumpable; > >>>>- int zero; > >>>>- unsigned int flags; > >>>> long size; > >>>> unsigned char *buf; > >>>>- pthread_mutex_t mutex; > >>>>- /* > >>>>- * whether the page_data is ready to be consumed > >>>>- */ > >>>>- int ready; > >>>>+ int flags; > >>>>+ int used; > >>>> }; > >>>> > >>>> struct thread_args { > >>>> int thread_num; > >>>> unsigned long len_buf_out; > >>>>- mdf_pfn_t start_pfn, end_pfn; > >>>>- int page_data_num; > >>>> struct cycle *cycle; > >>>> struct page_data *page_data_buf; > >>>>+ struct page_flag *page_flag_buf; > >>>> }; > >>>> > >>>> /* > >>>>@@ -1295,11 +1301,12 @@ struct DumpInfo { > >>>> pthread_t **threads; > >>>> struct thread_args *kdump_thread_args; > >>>> struct page_data *page_data_buf; > >>>>+ struct page_flag **page_flag_buf; > >>>>+ sem_t page_flag_buf_sem; > >>>> pthread_rwlock_t usemmap_rwlock; > >>>> mdf_pfn_t current_pfn; > >>>> pthread_mutex_t current_pfn_mutex; > >>>>- mdf_pfn_t consumed_pfn; > >>>>- pthread_mutex_t consumed_pfn_mutex; > >>>>+ pthread_mutex_t page_data_mutex; > >>>> pthread_mutex_t filter_mutex; > >>>> }; > >>>> extern struct DumpInfo *info; > >>>>-- > >>>>1.8.3.1 > >>>> > >>>> > >>>> > >>>> > >>>>_______________________________________________ > >>>>kexec mailing list > >>>>kexec@lists.infradead.org > >>>>http://lists.infradead.org/mailman/listinfo/kexec > >> > >> > >> > > > >> From 1d7ad5dbbc29efe58171b1023ab0df09eb2815bf Mon Sep 17 00:00:00 2001 > >>From: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > >>Date: Fri, 18 Mar 2016 10:35:35 +0800 > >>Subject: [PATCH] increment > >> > >>--- > >> makedumpfile.c | 107 ++++++++++++++++++++++++++++++++++++++++++--------------- > >> makedumpfile.h | 12 +++++-- > >> 2 files changed, 89 insertions(+), 30 deletions(-) > >> > >>diff --git a/makedumpfile.c b/makedumpfile.c > >>index 2b0864a..a304a61 100644 > >>--- a/makedumpfile.c > >>+++ b/makedumpfile.c > >>@@ -3477,6 +3477,42 @@ calibrate_machdep_info(void) > >> } > >> > >> int > >>+initial_parallel_area(int page_data_buf_size) > >>+{ > >>+ int i, j; > >>+ struct page_flag *current; > >>+ info->page_data_buf = info->parallel_area; > >>+ void *page_data_buf = info->parallel_area + sizeof(struct page_data) * info->num_buffers; > >>+ void *page_flag_list = page_data_buf + page_data_buf_size * info->num_buffers; > >>+ > >>+ for (i = 0; i < info->num_buffers; i++) { > >>+ info->page_data_buf[i].buf = page_data_buf + page_data_buf_size * i; > >>+ } > >>+ > >>+ > >>+ if ((info->page_flag_list = malloc(sizeof(struct page_flag_list) * info->num_threads)) > >>+ == NULL) { > >>+ MSG("Can't allocate memory for page_flag_buf. %s\n", > >>+ strerror(errno)); > >>+ return FALSE; > >>+ } > >>+ > >>+ for (i = 0; i < info->num_threads; i++) { > >>+ info->page_flag_list[i].header = page_flag_list + sizeof(struct page_flag) * info->num_buffers * i; > >>+ info->page_flag_list[i].current = 0; > >>+ /* > >>+ current = info->page_flag_buf[i]; > >>+ for (j = 1; j < info->num_buffers; j++) { > >>+ current->next = current + sizeof(struct page_flag); > >>+ current = current->next; > >>+ } > >>+ current->next = info->page_flag_buf[i]; > >>+*/ > >>+ } > >>+ > >>+} > >>+ > >>+int > >> initial_for_parallel() > >> { > >> unsigned long len_buf_out; > >>@@ -3575,9 +3611,15 @@ initial_for_parallel() > >> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", > >> info->num_buffers); > >> > >>- /* > >>- * allocate memory for page_data > >>- */ > >>+ if ((info->parallel_area = calloc(info->num_buffers, sizeof(struct page_data)+page_data_buf_size + sizeof(struct page_flag)*info->num_threads)) > >>+ ==NULL) { > >>+ MSG("Can't allocate memory for page_data_buf. %s\n", > >>+ strerror(errno)); > >>+ return FALSE; > >>+ } > >>+ > >>+ initial_parallel_area(page_data_buf_size); > >>+/* > >> if ((info->page_data_buf = malloc(sizeof(struct page_data) * info->num_buffers)) > >> == NULL) { > >> MSG("Can't allocate memory for page_data_buf. %s\n", > >>@@ -3594,9 +3636,6 @@ initial_for_parallel() > >> } > >> } > >> > >>- /* > >>- * initial page_flag for each thread > >>- */ > >> if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) > >> == NULL) { > >> MSG("Can't allocate memory for page_flag_buf. %s\n", > >>@@ -3623,7 +3662,7 @@ initial_for_parallel() > >> } > >> current->next = info->page_flag_buf[i]; > >> } > >>- > >>+*/ > >> /* > >> * initial fd_memory for threads > >> */ > >>@@ -3685,6 +3724,15 @@ free_for_parallel() > >> if (info->kdump_thread_args != NULL) > >> free(info->kdump_thread_args); > >> > >>+ if (info->page_flag_list != NULL) { > >>+ free(info->page_flag_list); > >>+ } > >>+ > >>+ if (info->parallel_area != NULL) { > >>+ free(info->parallel_area); > >>+ } > >>+ > >>+/* > >> if (info->page_data_buf != NULL) { > >> for (i = 0; i < info->num_buffers; i++) { > >> if (info->page_data_buf[i].buf != NULL) > >>@@ -3705,7 +3753,7 @@ free_for_parallel() > >> } > >> free(info->page_flag_buf); > >> } > >>- > >>+*/ > >> if (info->parallel_info == NULL) > >> return; > >> > >>@@ -7122,12 +7170,14 @@ int finalize_zlib(z_stream *stream) > >> return err; > >> } > >> > >>+#define CURRENT_FLAG page_flag_header[current_page_flag] > >> void * > >> kdump_thread_function_cyclic(void *arg) { > >> void *retval = PTHREAD_FAIL; > >> struct thread_args *kdump_thread_args = (struct thread_args *)arg; > >> volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; > >>- volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; > >>+ struct page_flag *page_flag_header = kdump_thread_args->page_flag_list->header; > >>+ int current_page_flag = kdump_thread_args->page_flag_list->current; > >> struct cycle *cycle = kdump_thread_args->cycle; > >> mdf_pfn_t pfn = cycle->start_pfn; > >> int index = kdump_thread_args->thread_num; > >>@@ -7193,7 +7243,7 @@ kdump_thread_function_cyclic(void *arg) { > >> > >> while (buf_ready == FALSE) { > >> pthread_testcancel(); > >>- if (page_flag_buf->ready == FLAG_READY) > >>+ if (CURRENT_FLAG.ready == FLAG_READY) > >> continue; > >> > >> /* get next dumpable pfn */ > >>@@ -7208,8 +7258,8 @@ kdump_thread_function_cyclic(void *arg) { > >> } > >> info->current_pfn = pfn + 1; > >> > >>- page_flag_buf->pfn = pfn; > >>- page_flag_buf->ready = FLAG_FILLING; > >>+ CURRENT_FLAG.pfn = pfn; > >>+ CURRENT_FLAG.ready = FLAG_FILLING; > >> pthread_mutex_unlock(&info->current_pfn_mutex); > >> sem_post(&info->page_flag_buf_sem); > >> > >>@@ -7230,11 +7280,11 @@ kdump_thread_function_cyclic(void *arg) { > >> > >> if ((info->dump_level & DL_EXCLUDE_ZERO) > >> && is_zero_page(buf, info->page_size)) { > >>- page_flag_buf->zero = TRUE; > >>+ CURRENT_FLAG.zero = TRUE; > >> goto next; > >> } > >> > >>- page_flag_buf->zero = FALSE; > >>+ CURRENT_FLAG.zero = FALSE; > >> > >> /* > >> * Compress the page data. > >>@@ -7285,11 +7335,11 @@ kdump_thread_function_cyclic(void *arg) { > >> page_data_buf[index].size = info->page_size; > >> memcpy(page_data_buf[index].buf, buf, info->page_size); > >> } > >>- page_flag_buf->index = index; > >>+ CURRENT_FLAG.index = index; > >> buf_ready = TRUE; > >> next: > >>- page_flag_buf->ready = FLAG_READY; > >>- page_flag_buf = page_flag_buf->next; > >>+ CURRENT_FLAG.ready = FLAG_READY; > >>+ current_page_flag = (current_page_flag + 1) % info->num_buffers; > >> > >> } > >> } > >>@@ -7306,6 +7356,8 @@ fail: > >> pthread_exit(retval); > >> } > >> > >>+#define CURRENT_PAGE_FLAG(i) (info->page_flag_list[i].header)[info->page_flag_list[i].current] > >>+ > >> int > >> write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> struct cache_data *cd_page, > >>@@ -7379,7 +7431,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> kdump_thread_args[i].thread_num = i; > >> kdump_thread_args[i].len_buf_out = len_buf_out; > >> kdump_thread_args[i].page_data_buf = page_data_buf; > >>- kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; > >>+ kdump_thread_args[i].page_flag_list = &(info->page_flag_list[i]); > >> kdump_thread_args[i].cycle = cycle; > >> > >> res = pthread_create(threads[i], NULL, > >>@@ -7418,15 +7470,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> * current_pfn is used for recording the value of pfn when checking the pfn. > >> */ > >> for (i = 0; i < info->num_threads; i++) { > >>- if (info->page_flag_buf[i]->ready == FLAG_UNUSED) > >>+ if (CURRENT_PAGE_FLAG(i).ready == FLAG_UNUSED) > >> continue; > >>- temp_pfn = info->page_flag_buf[i]->pfn; > >>+ temp_pfn = CURRENT_PAGE_FLAG(i).pfn; > >> > >> /* > >> * count how many threads have reached the end. > >> */ > >> if (temp_pfn >= end_pfn) { > >>- info->page_flag_buf[i]->ready = FLAG_UNUSED; > >>+ CURRENT_PAGE_FLAG(i).ready = FLAG_UNUSED; > >> end_count++; > >> continue; > >> } > >>@@ -7449,7 +7501,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> * If the page_flag_buf is not ready, the pfn recorded may be changed. > >> * So we should recheck. > >> */ > >>- if (info->page_flag_buf[consuming]->ready != FLAG_READY) { > >>+ if (CURRENT_PAGE_FLAG(consuming).ready != FLAG_READY) { > >> gettimeofday(&new, NULL); > >> if (new.tv_sec - last.tv_sec > WAIT_TIME) { > >> ERRMSG("Can't get data of pfn.\n"); > >>@@ -7458,7 +7510,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> continue; > >> } > >> > >>- if (current_pfn == info->page_flag_buf[consuming]->pfn) > >>+ if (current_pfn == CURRENT_PAGE_FLAG(consuming).pfn) > >> break; > >> } > >> > >>@@ -7468,12 +7520,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> num_dumped++; > >> > >> > >>- if (info->page_flag_buf[consuming]->zero == TRUE) { > >>+ if (CURRENT_PAGE_FLAG(consuming).zero == TRUE) { > >> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) > >> goto out; > >> pfn_zero++; > >> } else { > >>- index = info->page_flag_buf[consuming]->index; > >>+ index = CURRENT_PAGE_FLAG(consuming).index; > >> pd.flags = page_data_buf[index].flags; > >> pd.size = page_data_buf[index].size; > >> pd.page_flags = 0; > >>@@ -7491,8 +7543,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > >> goto out; > >> page_data_buf[index].used = FALSE; > >> } > >>- info->page_flag_buf[consuming]->ready = FLAG_UNUSED; > >>- info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; > >>+ CURRENT_PAGE_FLAG(consuming).ready = FLAG_UNUSED; > >>+ info->page_flag_list[consuming].current += 1; > >>+ info->page_flag_list[consuming].current %= info->num_buffers; > >> } > >> finish: > >> ret = TRUE; > >>diff --git a/makedumpfile.h b/makedumpfile.h > >>index 4b315c0..8c4bc1a 100644 > >>--- a/makedumpfile.h > >>+++ b/makedumpfile.h > >>@@ -996,7 +996,12 @@ struct page_flag { > >> char zero; > >> char ready; > >> short index; > >>- struct page_flag *next; > >>+// struct page_flag *next; > >>+}; > >>+ > >>+struct page_flag_list { > >>+ struct page_flag *header; > >>+ int current; > >> }; > >> > >> struct page_data > >>@@ -1012,7 +1017,7 @@ struct thread_args { > >> unsigned long len_buf_out; > >> struct cycle *cycle; > >> struct page_data *page_data_buf; > >>- struct page_flag *page_flag_buf; > >>+ struct page_flag_list *page_flag_list; > >> }; > >> > >> /* > >>@@ -1298,10 +1303,11 @@ struct DumpInfo { > >> */ > >> int num_threads; > >> int num_buffers; > >>+ void *parallel_area; > >> pthread_t **threads; > >> struct thread_args *kdump_thread_args; > >> struct page_data *page_data_buf; > >>- struct page_flag **page_flag_buf; > >>+ struct page_flag_list *page_flag_list; > >> sem_t page_flag_buf_sem; > >> pthread_rwlock_t usemmap_rwlock; > >> mdf_pfn_t current_pfn; > >>-- > >>1.8.3.1 > >> > > > > From 1dbf68c21a2bbc7b454c1a742c1e3ff00bb85829 Mon Sep 17 00:00:00 2001 > From: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> > Date: Fri, 18 Mar 2016 13:36:57 +0800 > Subject: [PATCH] remove sem > > --- > makedumpfile.c | 8 ++++---- > makedumpfile.h | 4 ++-- > 2 files changed, 6 insertions(+), 6 deletions(-) > > diff --git a/makedumpfile.c b/makedumpfile.c > index 2b0864a..8de5e1d 100644 > --- a/makedumpfile.c > +++ b/makedumpfile.c > @@ -7211,7 +7211,7 @@ kdump_thread_function_cyclic(void *arg) { > page_flag_buf->pfn = pfn; > page_flag_buf->ready = FLAG_FILLING; > pthread_mutex_unlock(&info->current_pfn_mutex); > - sem_post(&info->page_flag_buf_sem); > +// sem_post(&info->page_flag_buf_sem); > > if (pfn >= cycle->end_pfn) { > info->current_pfn = cycle->end_pfn; > @@ -7370,7 +7370,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > page_buf_num = info->num_buffers; > page_data_buf = info->page_data_buf; > pthread_mutex_init(&info->page_data_mutex, NULL); > - sem_init(&info->page_flag_buf_sem, 0, 0); > +// sem_init(&info->page_flag_buf_sem, 0, 0); > > for (i = 0; i < page_buf_num; i++) > page_data_buf[i].used = FALSE; > @@ -7405,7 +7405,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, > * The main thread is the consumer. It will find the next pfn and write it into file. > * The next pfn is smallest pfn in all page_flag_buf. > */ > - sem_wait(&info->page_flag_buf_sem); > +// sem_wait(&info->page_flag_buf_sem); > gettimeofday(&last, NULL); > while (1) { > current_pfn = end_pfn; > @@ -7532,7 +7532,7 @@ out: > } > } > > - sem_destroy(&info->page_flag_buf_sem); > +// sem_destroy(&info->page_flag_buf_sem); > pthread_rwlock_destroy(&info->usemmap_rwlock); > pthread_mutex_destroy(&info->filter_mutex); > pthread_mutex_destroy(&info->current_pfn_mutex); > diff --git a/makedumpfile.h b/makedumpfile.h > index 4b315c0..7627286 100644 > --- a/makedumpfile.h > +++ b/makedumpfile.h > @@ -44,7 +44,7 @@ > #include "print_info.h" > #include "sadump_mod.h" > #include <pthread.h> > -#include <semaphore.h> > +//#include <semaphore.h> > > /* > * Result of command > @@ -1302,7 +1302,7 @@ struct DumpInfo { > struct thread_args *kdump_thread_args; > struct page_data *page_data_buf; > struct page_flag **page_flag_buf; > - sem_t page_flag_buf_sem; > +// sem_t page_flag_buf_sem; > pthread_rwlock_t usemmap_rwlock; > mdf_pfn_t current_pfn; > pthread_mutex_t current_pfn_mutex; > -- > 1.8.3.1 > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-31 8:38 ` Minfei Huang @ 2016-03-31 9:09 ` "Zhou, Wenjian/周文剑" 2016-04-01 6:27 ` Minfei Huang 0 siblings, 1 reply; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-03-31 9:09 UTC (permalink / raw) To: Minfei Huang; +Cc: kexec Hello Minfei, Thanks for your results. And I have some questions. On 03/31/2016 04:38 PM, Minfei Huang wrote: > Hi, Zhou. > > I have tested the increasing patch on 4T memory machine. > > makedumpfile fails to dump vmcore, if there are about 384M memory in 2nd > kernel which is reserved by crashkernel=auto. But once the reserved > memory is enlarged up to 10G, makedumpfile can dump vmcore successfully. > Will it fail with patch v3? or just v4? I don't think it is a problem. If 128 cpus are enabled in second kernel, there won't be much memory left if total memory is 384M. And I think it will also work if the reserved memory is set to 1G. > The cache should be dropped before testing, otherwise makedumpfile will > fail to dump vmcore. > echo 3 > /proc/sys/vm/drop_caches > Maybe there is something cleanup we can do to avoid this. > > Following is the result with different parameter for option > --num-threads. > > makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 > real 5m34.116s > user 103m42.531s > sys 86m12.586s > > makedumpfile -l --num-threads 64 --message-level 1 -d 31 /proc/vmcore a.64 > real 3m29.544s > user 27m14.674s > sys 53m8.030s > > makedumpfile -l --num-threads 32 --message-level 1 -d 31 /proc/vmcore a.32 > real 2m50.565s > user 10m52.662s > sys 32m0.446s > > makedumpfile -l --num-threads 16 --message-level 1 -d 31 /proc/vmcore a.16 > real 2m27.999s > user 8m18.650s > sys 12m54.151s > > makedumpfile -l --num-threads 8 --message-level 1 -d 31 /proc/vmcore a.8 > real 2m27.136s > user 8m4.009s > sys 3m12.090s > > makedumpfile -l --num-threads 4 --message-level 1 -d 31 /proc/vmcore a.4 > real 2m43.739s > user 7m39.689s > sys 1m28.504s > > makedumpfile -l --num-threads 0 --message-level 1 -d 31 /proc/vmcore a.0 > real 3m46.531s > user 3m29.371s > sys 0m16.909s > > makedumpfile.back -l --message-level 1 -d 31 /proc/vmcore a > real 3m55.712s > user 3m39.254s > sys 0m16.287s > > Once the reserved memory is enlarged, makedumpfile works well with or > without this increaseing patch. > > But there is an another issue I found during testing. makedumpfile may > hang in about 24%. And with option --num-threads 64, this issue is also > occured. > Will it occur with patch v3? If it not occurs, then neither of the previous two increasing patches will work? And did you test it with or without the increasing patch? > makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 > Excluding unnecessary pages : [100.0 %] | > Excluding unnecessary pages : [100.0 %] / > Excluding unnecessary pages : [100.0 %] - > Copying data : [ 11.2 %] | > Copying data : [ 12.4 %] - > Excluding unnecessary pages : [100.0 %] \ > Excluding unnecessary pages : [100.0 %] | > Copying data : [ 23.6 %] - > Copying data : [ 24.4 %] / > Could you help me find which line of the code is running at when it hanging? makedumpfile may be in a loop and can't go out by some bugs. -- Thanks Zhou > Thanks > Minfei > > On 03/18/16 at 01:48pm, "Zhou, Wenjian/周文剑" wrote: >> On 03/18/2016 12:16 PM, Minfei Huang wrote: >>> On 03/18/16 at 10:46am, "Zhou, Wenjian/周文剑" wrote: >>>> Hello Minfei, >>>> >>>> Since I can't produce the bug, I reviewed the patch and wrote an increment patch. >>>> Though there are some bugs in the increment patch, >>>> I wonder if the previous bug still exists with this patch. >>>> Could you help me confirm it? >>> >>> Ok. I will help verify this increasing patch. >>> >> >> Thank you very much. >> >>>> >>>> And I have another question. >>>> Did it only occur in patch v4? >>> >>> This issue doesn't exist in v3. I have pasted the test result with >>> --num-thread 32 in that thread. >>> >>> applied makedumpfile with option -d 31 --num-threads 32 >>> real 3m3.533s >> >> Oh, then the patch in the previous mail may not work. >> >> I'm appreciated if you can also test the patch in this letter. >> >> I introduced semaphore to fix the bug in the v3. >> So I want to know if it is this which affects the result. >> The attached patch is based on v4, used to remove semaohore. >> >> -- >> Thanks >> Zhou >> >>> >>> Thanks >>> Minfei >>> >>>> The previous patches almost have the same logic. >>>> >>>> -- >>>> Thanks >>>> Zhou >>>> >>>> On 03/15/2016 02:34 PM, Minfei Huang wrote: >>>>> Hi, Zhou. >>>>> >>>>> I have applied this patch base on 1.5.9. There are several testcases I >>>>> have tested. >>>>> >>>>> - makedumpfile --num-threads 64 -d 31 >>>>> real 0m0.010s >>>>> user 0m0.002s >>>>> sys 0m0.009s >>>>> >>>>> - makedumpfile --num-threads 31 -d 31 >>>>> real 2m40.915s >>>>> user 10m50.900s >>>>> sys 23m9.664s >>>>> >>>>> makedumpfile --num-threads 30 -d 31 >>>>> real 0m0.006s >>>>> user 0m0.002s >>>>> sys 0m0.004s >>>>> >>>>> makedumpfile --num-threads 32 -d 31 >>>>> real 0m0.007s >>>>> user 0m0.002s >>>>> sys 0m0.005s >>>>> >>>>> - makedumpfile --num-threads 8 -d 31 >>>>> real 2m32.692s >>>>> user 7m4.630s >>>>> sys 2m0.369s >>>>> >>>>> - makedumpfile --num-threads 1 -d 31 >>>>> real 4m42.423s >>>>> user 7m27.153s >>>>> sys 0m22.490s >>>>> >>>>> - makedumpfile.orig -d 31 >>>>> real 4m1.297s >>>>> user 3m39.696s >>>>> sys 0m15.200s >>>>> >>>>> This patch has a huge increment to the filter performance under 31. But >>>>> it is not stable, since makedumpfile fails to dump vmcore intermittently. >>>>> You can find the above test result, makedumpfile fails to dump vmcore >>>>> with option --num-threads 64, also it may occur with option >>>>> --number-threads 8. >>>>> >>>>> Thanks >>>>> Minfei >>>>> >>>>> On 03/09/16 at 08:27am, Zhou Wenjian wrote: >>>>>> v4: >>>>>> 1. fix a bug caused by the logic >>>>>> v3: >>>>>> 1. remove some unused variables >>>>>> 2. fix a bug caused by the wrong logic >>>>>> 3. fix a bug caused by optimising >>>>>> 4. improve more performance by using Minoru Usui's code >>>>>> >>>>>> multi-threads implementation will introduce extra cost when handling >>>>>> each page. The origin implementation will also do the extra work for >>>>>> filtered pages. So there is a big performance degradation in >>>>>> --num-threads -d 31. >>>>>> The new implementation won't do the extra work for filtered pages any >>>>>> more. So the performance of -d 31 is close to that of serial processing. >>>>>> >>>>>> The new implementation is just like the following: >>>>>> * The basic idea is producer producing page and consumer writing page. >>>>>> * Each producer have a page_flag_buf list which is used for storing >>>>>> page's description. >>>>>> * The size of page_flag_buf is little so it won't take too much memory. >>>>>> * And all producers will share a page_data_buf array which is >>>>>> used for storing page's compressed data. >>>>>> * The main thread is the consumer. It will find the next pfn and write >>>>>> it into file. >>>>>> * The next pfn is smallest pfn in all page_flag_buf. >>>>>> >>>>>> Signed-off-by: Minoru Usui <min-usui@ti.jp.nec.com> >>>>>> Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >>>>>> --- >>>>>> makedumpfile.c | 298 +++++++++++++++++++++++++++++++++++---------------------- >>>>>> makedumpfile.h | 35 ++++--- >>>>>> 2 files changed, 202 insertions(+), 131 deletions(-) >>>>>> >>>>>> diff --git a/makedumpfile.c b/makedumpfile.c >>>>>> index fa0b779..2b0864a 100644 >>>>>> --- a/makedumpfile.c >>>>>> +++ b/makedumpfile.c >>>>>> @@ -3483,7 +3483,8 @@ initial_for_parallel() >>>>>> unsigned long page_data_buf_size; >>>>>> unsigned long limit_size; >>>>>> int page_data_num; >>>>>> - int i; >>>>>> + struct page_flag *current; >>>>>> + int i, j; >>>>>> >>>>>> len_buf_out = calculate_len_buf_out(info->page_size); >>>>>> >>>>>> @@ -3560,10 +3561,16 @@ initial_for_parallel() >>>>>> >>>>>> limit_size = (get_free_memory_size() >>>>>> - MAP_REGION * info->num_threads) * 0.6; >>>>>> + if (limit_size < 0) { >>>>>> + MSG("Free memory is not enough for multi-threads\n"); >>>>>> + return FALSE; >>>>>> + } >>>>>> >>>>>> page_data_num = limit_size / page_data_buf_size; >>>>>> + info->num_buffers = 3 * info->num_threads; >>>>>> >>>>>> - info->num_buffers = MIN(NUM_BUFFERS, page_data_num); >>>>>> + info->num_buffers = MAX(info->num_buffers, NUM_BUFFERS); >>>>>> + info->num_buffers = MIN(info->num_buffers, page_data_num); >>>>>> >>>>>> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >>>>>> info->num_buffers); >>>>>> @@ -3588,6 +3595,36 @@ initial_for_parallel() >>>>>> } >>>>>> >>>>>> /* >>>>>> + * initial page_flag for each thread >>>>>> + */ >>>>>> + if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >>>>>> + == NULL) { >>>>>> + MSG("Can't allocate memory for page_flag_buf. %s\n", >>>>>> + strerror(errno)); >>>>>> + return FALSE; >>>>>> + } >>>>>> + memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads); >>>>>> + >>>>>> + for (i = 0; i < info->num_threads; i++) { >>>>>> + if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) { >>>>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>>>> + strerror(errno)); >>>>>> + return FALSE; >>>>>> + } >>>>>> + current = info->page_flag_buf[i]; >>>>>> + >>>>>> + for (j = 1; j < NUM_BUFFERS; j++) { >>>>>> + if ((current->next = calloc(1, sizeof(struct page_flag))) == NULL) { >>>>>> + MSG("Can't allocate memory for page_flag. %s\n", >>>>>> + strerror(errno)); >>>>>> + return FALSE; >>>>>> + } >>>>>> + current = current->next; >>>>>> + } >>>>>> + current->next = info->page_flag_buf[i]; >>>>>> + } >>>>>> + >>>>>> + /* >>>>>> * initial fd_memory for threads >>>>>> */ >>>>>> for (i = 0; i < info->num_threads; i++) { >>>>>> @@ -3612,7 +3649,8 @@ initial_for_parallel() >>>>>> void >>>>>> free_for_parallel() >>>>>> { >>>>>> - int i; >>>>>> + int i, j; >>>>>> + struct page_flag *current; >>>>>> >>>>>> if (info->threads != NULL) { >>>>>> for (i = 0; i < info->num_threads; i++) { >>>>>> @@ -3655,6 +3693,19 @@ free_for_parallel() >>>>>> free(info->page_data_buf); >>>>>> } >>>>>> >>>>>> + if (info->page_flag_buf != NULL) { >>>>>> + for (i = 0; i < info->num_threads; i++) { >>>>>> + for (j = 0; j < NUM_BUFFERS; j++) { >>>>>> + if (info->page_flag_buf[i] != NULL) { >>>>>> + current = info->page_flag_buf[i]; >>>>>> + info->page_flag_buf[i] = current->next; >>>>>> + free(current); >>>>>> + } >>>>>> + } >>>>>> + } >>>>>> + free(info->page_flag_buf); >>>>>> + } >>>>>> + >>>>>> if (info->parallel_info == NULL) >>>>>> return; >>>>>> >>>>>> @@ -7075,11 +7126,11 @@ void * >>>>>> kdump_thread_function_cyclic(void *arg) { >>>>>> void *retval = PTHREAD_FAIL; >>>>>> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >>>>>> - struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>>>> + volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>>>> + volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >>>>>> struct cycle *cycle = kdump_thread_args->cycle; >>>>>> - int page_data_num = kdump_thread_args->page_data_num; >>>>>> - mdf_pfn_t pfn; >>>>>> - int index; >>>>>> + mdf_pfn_t pfn = cycle->start_pfn; >>>>>> + int index = kdump_thread_args->thread_num; >>>>>> int buf_ready; >>>>>> int dumpable; >>>>>> int fd_memory = 0; >>>>>> @@ -7125,47 +7176,48 @@ kdump_thread_function_cyclic(void *arg) { >>>>>> kdump_thread_args->thread_num); >>>>>> } >>>>>> >>>>>> - while (1) { >>>>>> - /* get next pfn */ >>>>>> - pthread_mutex_lock(&info->current_pfn_mutex); >>>>>> - pfn = info->current_pfn; >>>>>> - info->current_pfn++; >>>>>> - pthread_mutex_unlock(&info->current_pfn_mutex); >>>>>> - >>>>>> - if (pfn >= kdump_thread_args->end_pfn) >>>>>> - break; >>>>>> - >>>>>> - index = -1; >>>>>> + /* >>>>>> + * filtered page won't take anything >>>>>> + * unfiltered zero page will only take a page_flag_buf >>>>>> + * unfiltered non-zero page will take a page_flag_buf and a page_data_buf >>>>>> + */ >>>>>> + while (pfn < cycle->end_pfn) { >>>>>> buf_ready = FALSE; >>>>>> >>>>>> + pthread_mutex_lock(&info->page_data_mutex); >>>>>> + while (page_data_buf[index].used != FALSE) { >>>>>> + index = (index + 1) % info->num_buffers; >>>>>> + } >>>>>> + page_data_buf[index].used = TRUE; >>>>>> + pthread_mutex_unlock(&info->page_data_mutex); >>>>>> + >>>>>> while (buf_ready == FALSE) { >>>>>> pthread_testcancel(); >>>>>> - >>>>>> - index = pfn % page_data_num; >>>>>> - >>>>>> - if (pfn - info->consumed_pfn > info->num_buffers) >>>>>> + if (page_flag_buf->ready == FLAG_READY) >>>>>> continue; >>>>>> >>>>>> - if (page_data_buf[index].ready != 0) >>>>>> - continue; >>>>>> - >>>>>> - pthread_mutex_lock(&page_data_buf[index].mutex); >>>>>> - >>>>>> - if (page_data_buf[index].ready != 0) >>>>>> - goto unlock; >>>>>> - >>>>>> - buf_ready = TRUE; >>>>>> + /* get next dumpable pfn */ >>>>>> + pthread_mutex_lock(&info->current_pfn_mutex); >>>>>> + for (pfn = info->current_pfn; pfn < cycle->end_pfn; pfn++) { >>>>>> + dumpable = is_dumpable( >>>>>> + info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>>>> + pfn, >>>>>> + cycle); >>>>>> + if (dumpable) >>>>>> + break; >>>>>> + } >>>>>> + info->current_pfn = pfn + 1; >>>>>> >>>>>> - page_data_buf[index].pfn = pfn; >>>>>> - page_data_buf[index].ready = 1; >>>>>> + page_flag_buf->pfn = pfn; >>>>>> + page_flag_buf->ready = FLAG_FILLING; >>>>>> + pthread_mutex_unlock(&info->current_pfn_mutex); >>>>>> + sem_post(&info->page_flag_buf_sem); >>>>>> >>>>>> - dumpable = is_dumpable( >>>>>> - info->fd_bitmap ? &bitmap_parallel : info->bitmap2, >>>>>> - pfn, >>>>>> - cycle); >>>>>> - page_data_buf[index].dumpable = dumpable; >>>>>> - if (!dumpable) >>>>>> - goto unlock; >>>>>> + if (pfn >= cycle->end_pfn) { >>>>>> + info->current_pfn = cycle->end_pfn; >>>>>> + page_data_buf[index].used = FALSE; >>>>>> + break; >>>>>> + } >>>>>> >>>>>> if (!read_pfn_parallel(fd_memory, pfn, buf, >>>>>> &bitmap_memory_parallel, >>>>>> @@ -7178,11 +7230,11 @@ kdump_thread_function_cyclic(void *arg) { >>>>>> >>>>>> if ((info->dump_level & DL_EXCLUDE_ZERO) >>>>>> && is_zero_page(buf, info->page_size)) { >>>>>> - page_data_buf[index].zero = TRUE; >>>>>> - goto unlock; >>>>>> + page_flag_buf->zero = TRUE; >>>>>> + goto next; >>>>>> } >>>>>> >>>>>> - page_data_buf[index].zero = FALSE; >>>>>> + page_flag_buf->zero = FALSE; >>>>>> >>>>>> /* >>>>>> * Compress the page data. >>>>>> @@ -7210,6 +7262,7 @@ kdump_thread_function_cyclic(void *arg) { >>>>>> page_data_buf[index].flags = >>>>>> DUMP_DH_COMPRESSED_LZO; >>>>>> page_data_buf[index].size = size_out; >>>>>> + >>>>>> memcpy(page_data_buf[index].buf, buf_out, size_out); >>>>>> #endif >>>>>> #ifdef USESNAPPY >>>>>> @@ -7232,12 +7285,14 @@ kdump_thread_function_cyclic(void *arg) { >>>>>> page_data_buf[index].size = info->page_size; >>>>>> memcpy(page_data_buf[index].buf, buf, info->page_size); >>>>>> } >>>>>> -unlock: >>>>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>>>> + page_flag_buf->index = index; >>>>>> + buf_ready = TRUE; >>>>>> +next: >>>>>> + page_flag_buf->ready = FLAG_READY; >>>>>> + page_flag_buf = page_flag_buf->next; >>>>>> >>>>>> } >>>>>> } >>>>>> - >>>>>> retval = NULL; >>>>>> >>>>>> fail: >>>>>> @@ -7265,14 +7320,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>> struct page_desc pd; >>>>>> struct timeval tv_start; >>>>>> struct timeval last, new; >>>>>> - unsigned long long consuming_pfn; >>>>>> pthread_t **threads = NULL; >>>>>> struct thread_args *kdump_thread_args = NULL; >>>>>> void *thread_result; >>>>>> - int page_data_num; >>>>>> + int page_buf_num; >>>>>> struct page_data *page_data_buf = NULL; >>>>>> int i; >>>>>> int index; >>>>>> + int end_count, consuming, check_count; >>>>>> + mdf_pfn_t current_pfn, temp_pfn; >>>>>> >>>>>> if (info->flag_elf_dumpfile) >>>>>> return FALSE; >>>>>> @@ -7284,13 +7340,6 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>> goto out; >>>>>> } >>>>>> >>>>>> - res = pthread_mutex_init(&info->consumed_pfn_mutex, NULL); >>>>>> - if (res != 0) { >>>>>> - ERRMSG("Can't initialize consumed_pfn_mutex. %s\n", >>>>>> - strerror(res)); >>>>>> - goto out; >>>>>> - } >>>>>> - >>>>>> res = pthread_mutex_init(&info->filter_mutex, NULL); >>>>>> if (res != 0) { >>>>>> ERRMSG("Can't initialize filter_mutex. %s\n", strerror(res)); >>>>>> @@ -7314,36 +7363,23 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>> end_pfn = cycle->end_pfn; >>>>>> >>>>>> info->current_pfn = start_pfn; >>>>>> - info->consumed_pfn = start_pfn - 1; >>>>>> >>>>>> threads = info->threads; >>>>>> kdump_thread_args = info->kdump_thread_args; >>>>>> >>>>>> - page_data_num = info->num_buffers; >>>>>> + page_buf_num = info->num_buffers; >>>>>> page_data_buf = info->page_data_buf; >>>>>> + pthread_mutex_init(&info->page_data_mutex, NULL); >>>>>> + sem_init(&info->page_flag_buf_sem, 0, 0); >>>>>> >>>>>> - for (i = 0; i < page_data_num; i++) { >>>>>> - /* >>>>>> - * producer will use pfn in page_data_buf to decide the >>>>>> - * consumed pfn >>>>>> - */ >>>>>> - page_data_buf[i].pfn = start_pfn - 1; >>>>>> - page_data_buf[i].ready = 0; >>>>>> - res = pthread_mutex_init(&page_data_buf[i].mutex, NULL); >>>>>> - if (res != 0) { >>>>>> - ERRMSG("Can't initialize mutex of page_data_buf. %s\n", >>>>>> - strerror(res)); >>>>>> - goto out; >>>>>> - } >>>>>> - } >>>>>> + for (i = 0; i < page_buf_num; i++) >>>>>> + page_data_buf[i].used = FALSE; >>>>>> >>>>>> for (i = 0; i < info->num_threads; i++) { >>>>>> kdump_thread_args[i].thread_num = i; >>>>>> kdump_thread_args[i].len_buf_out = len_buf_out; >>>>>> - kdump_thread_args[i].start_pfn = start_pfn; >>>>>> - kdump_thread_args[i].end_pfn = end_pfn; >>>>>> - kdump_thread_args[i].page_data_num = page_data_num; >>>>>> kdump_thread_args[i].page_data_buf = page_data_buf; >>>>>> + kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >>>>>> kdump_thread_args[i].cycle = cycle; >>>>>> >>>>>> res = pthread_create(threads[i], NULL, >>>>>> @@ -7356,55 +7392,88 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>> } >>>>>> } >>>>>> >>>>>> - consuming_pfn = start_pfn; >>>>>> - index = -1; >>>>>> + end_count = 0; >>>>>> + while (1) { >>>>>> + consuming = 0; >>>>>> + check_count = 0; >>>>>> >>>>>> - gettimeofday(&last, NULL); >>>>>> + /* >>>>>> + * The basic idea is producer producing page and consumer writing page. >>>>>> + * Each producer have a page_flag_buf list which is used for storing page's description. >>>>>> + * The size of page_flag_buf is little so it won't take too much memory. >>>>>> + * And all producers will share a page_data_buf array which is used for storing page's compressed data. >>>>>> + * The main thread is the consumer. It will find the next pfn and write it into file. >>>>>> + * The next pfn is smallest pfn in all page_flag_buf. >>>>>> + */ >>>>>> + sem_wait(&info->page_flag_buf_sem); >>>>>> + gettimeofday(&last, NULL); >>>>>> + while (1) { >>>>>> + current_pfn = end_pfn; >>>>>> >>>>>> - while (consuming_pfn < end_pfn) { >>>>>> - index = consuming_pfn % page_data_num; >>>>>> + /* >>>>>> + * page_flag_buf is in circular linked list. >>>>>> + * The array info->page_flag_buf[] records the current page_flag_buf in each thread's >>>>>> + * page_flag_buf list. >>>>>> + * consuming is used for recording in which thread the pfn is the smallest. >>>>>> + * current_pfn is used for recording the value of pfn when checking the pfn. >>>>>> + */ >>>>>> + for (i = 0; i < info->num_threads; i++) { >>>>>> + if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >>>>>> + continue; >>>>>> + temp_pfn = info->page_flag_buf[i]->pfn; >>>>>> >>>>>> - gettimeofday(&new, NULL); >>>>>> - if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>>>> - ERRMSG("Can't get data of pfn %llx.\n", consuming_pfn); >>>>>> - goto out; >>>>>> - } >>>>>> + /* >>>>>> + * count how many threads have reached the end. >>>>>> + */ >>>>>> + if (temp_pfn >= end_pfn) { >>>>>> + info->page_flag_buf[i]->ready = FLAG_UNUSED; >>>>>> + end_count++; >>>>>> + continue; >>>>>> + } >>>>>> >>>>>> - /* >>>>>> - * check pfn first without mutex locked to reduce the time >>>>>> - * trying to lock the mutex >>>>>> - */ >>>>>> - if (page_data_buf[index].pfn != consuming_pfn) >>>>>> - continue; >>>>>> + if (current_pfn < temp_pfn) >>>>>> + continue; >>>>>> >>>>>> - if (pthread_mutex_trylock(&page_data_buf[index].mutex) != 0) >>>>>> - continue; >>>>>> + check_count++; >>>>>> + consuming = i; >>>>>> + current_pfn = temp_pfn; >>>>>> + } >>>>>> >>>>>> - /* check whether the found one is ready to be consumed */ >>>>>> - if (page_data_buf[index].pfn != consuming_pfn || >>>>>> - page_data_buf[index].ready != 1) { >>>>>> - goto unlock; >>>>>> + /* >>>>>> + * If all the threads have reached the end, we will finish writing. >>>>>> + */ >>>>>> + if (end_count >= info->num_threads) >>>>>> + goto finish; >>>>>> + >>>>>> + /* >>>>>> + * If the page_flag_buf is not ready, the pfn recorded may be changed. >>>>>> + * So we should recheck. >>>>>> + */ >>>>>> + if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >>>>>> + gettimeofday(&new, NULL); >>>>>> + if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>>>> + ERRMSG("Can't get data of pfn.\n"); >>>>>> + goto out; >>>>>> + } >>>>>> + continue; >>>>>> + } >>>>>> + >>>>>> + if (current_pfn == info->page_flag_buf[consuming]->pfn) >>>>>> + break; >>>>>> } >>>>>> >>>>>> if ((num_dumped % per) == 0) >>>>>> print_progress(PROGRESS_COPY, num_dumped, info->num_dumpable); >>>>>> >>>>>> - /* next pfn is found, refresh last here */ >>>>>> - last = new; >>>>>> - consuming_pfn++; >>>>>> - info->consumed_pfn++; >>>>>> - page_data_buf[index].ready = 0; >>>>>> - >>>>>> - if (page_data_buf[index].dumpable == FALSE) >>>>>> - goto unlock; >>>>>> - >>>>>> num_dumped++; >>>>>> >>>>>> - if (page_data_buf[index].zero == TRUE) { >>>>>> + >>>>>> + if (info->page_flag_buf[consuming]->zero == TRUE) { >>>>>> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >>>>>> goto out; >>>>>> pfn_zero++; >>>>>> } else { >>>>>> + index = info->page_flag_buf[consuming]->index; >>>>>> pd.flags = page_data_buf[index].flags; >>>>>> pd.size = page_data_buf[index].size; >>>>>> pd.page_flags = 0; >>>>>> @@ -7420,12 +7489,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>>>> */ >>>>>> if (!write_cache(cd_page, page_data_buf[index].buf, pd.size)) >>>>>> goto out; >>>>>> - >>>>>> + page_data_buf[index].used = FALSE; >>>>>> } >>>>>> -unlock: >>>>>> - pthread_mutex_unlock(&page_data_buf[index].mutex); >>>>>> + info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >>>>>> + info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >>>>>> } >>>>>> - >>>>>> +finish: >>>>>> ret = TRUE; >>>>>> /* >>>>>> * print [100 %] >>>>>> @@ -7463,15 +7532,9 @@ out: >>>>>> } >>>>>> } >>>>>> >>>>>> - if (page_data_buf != NULL) { >>>>>> - for (i = 0; i < page_data_num; i++) { >>>>>> - pthread_mutex_destroy(&page_data_buf[i].mutex); >>>>>> - } >>>>>> - } >>>>>> - >>>>>> + sem_destroy(&info->page_flag_buf_sem); >>>>>> pthread_rwlock_destroy(&info->usemmap_rwlock); >>>>>> pthread_mutex_destroy(&info->filter_mutex); >>>>>> - pthread_mutex_destroy(&info->consumed_pfn_mutex); >>>>>> pthread_mutex_destroy(&info->current_pfn_mutex); >>>>>> >>>>>> return ret; >>>>>> @@ -7564,6 +7627,7 @@ write_kdump_pages_cyclic(struct cache_data *cd_header, struct cache_data *cd_pag >>>>>> num_dumped++; >>>>>> if (!read_pfn(pfn, buf)) >>>>>> goto out; >>>>>> + >>>>>> filter_data_buffer(buf, pfn_to_paddr(pfn), info->page_size); >>>>>> >>>>>> /* >>>>>> diff --git a/makedumpfile.h b/makedumpfile.h >>>>>> index e0b5bbf..4b315c0 100644 >>>>>> --- a/makedumpfile.h >>>>>> +++ b/makedumpfile.h >>>>>> @@ -44,6 +44,7 @@ >>>>>> #include "print_info.h" >>>>>> #include "sadump_mod.h" >>>>>> #include <pthread.h> >>>>>> +#include <semaphore.h> >>>>>> >>>>>> /* >>>>>> * Result of command >>>>>> @@ -977,7 +978,7 @@ typedef unsigned long long int ulonglong; >>>>>> #define PAGE_DATA_NUM (50) >>>>>> #define WAIT_TIME (60 * 10) >>>>>> #define PTHREAD_FAIL ((void *)-2) >>>>>> -#define NUM_BUFFERS (50) >>>>>> +#define NUM_BUFFERS (20) >>>>>> >>>>>> struct mmap_cache { >>>>>> char *mmap_buf; >>>>>> @@ -985,28 +986,33 @@ struct mmap_cache { >>>>>> off_t mmap_end_offset; >>>>>> }; >>>>>> >>>>>> +enum { >>>>>> + FLAG_UNUSED, >>>>>> + FLAG_READY, >>>>>> + FLAG_FILLING >>>>>> +}; >>>>>> +struct page_flag { >>>>>> + mdf_pfn_t pfn; >>>>>> + char zero; >>>>>> + char ready; >>>>>> + short index; >>>>>> + struct page_flag *next; >>>>>> +}; >>>>>> + >>>>>> struct page_data >>>>>> { >>>>>> - mdf_pfn_t pfn; >>>>>> - int dumpable; >>>>>> - int zero; >>>>>> - unsigned int flags; >>>>>> long size; >>>>>> unsigned char *buf; >>>>>> - pthread_mutex_t mutex; >>>>>> - /* >>>>>> - * whether the page_data is ready to be consumed >>>>>> - */ >>>>>> - int ready; >>>>>> + int flags; >>>>>> + int used; >>>>>> }; >>>>>> >>>>>> struct thread_args { >>>>>> int thread_num; >>>>>> unsigned long len_buf_out; >>>>>> - mdf_pfn_t start_pfn, end_pfn; >>>>>> - int page_data_num; >>>>>> struct cycle *cycle; >>>>>> struct page_data *page_data_buf; >>>>>> + struct page_flag *page_flag_buf; >>>>>> }; >>>>>> >>>>>> /* >>>>>> @@ -1295,11 +1301,12 @@ struct DumpInfo { >>>>>> pthread_t **threads; >>>>>> struct thread_args *kdump_thread_args; >>>>>> struct page_data *page_data_buf; >>>>>> + struct page_flag **page_flag_buf; >>>>>> + sem_t page_flag_buf_sem; >>>>>> pthread_rwlock_t usemmap_rwlock; >>>>>> mdf_pfn_t current_pfn; >>>>>> pthread_mutex_t current_pfn_mutex; >>>>>> - mdf_pfn_t consumed_pfn; >>>>>> - pthread_mutex_t consumed_pfn_mutex; >>>>>> + pthread_mutex_t page_data_mutex; >>>>>> pthread_mutex_t filter_mutex; >>>>>> }; >>>>>> extern struct DumpInfo *info; >>>>>> -- >>>>>> 1.8.3.1 >>>>>> >>>>>> >>>>>> >>>>>> >>>>>> _______________________________________________ >>>>>> kexec mailing list >>>>>> kexec@lists.infradead.org >>>>>> http://lists.infradead.org/mailman/listinfo/kexec >>>> >>>> >>>> >>> >>>> From 1d7ad5dbbc29efe58171b1023ab0df09eb2815bf Mon Sep 17 00:00:00 2001 >>>> From: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >>>> Date: Fri, 18 Mar 2016 10:35:35 +0800 >>>> Subject: [PATCH] increment >>>> >>>> --- >>>> makedumpfile.c | 107 ++++++++++++++++++++++++++++++++++++++++++--------------- >>>> makedumpfile.h | 12 +++++-- >>>> 2 files changed, 89 insertions(+), 30 deletions(-) >>>> >>>> diff --git a/makedumpfile.c b/makedumpfile.c >>>> index 2b0864a..a304a61 100644 >>>> --- a/makedumpfile.c >>>> +++ b/makedumpfile.c >>>> @@ -3477,6 +3477,42 @@ calibrate_machdep_info(void) >>>> } >>>> >>>> int >>>> +initial_parallel_area(int page_data_buf_size) >>>> +{ >>>> + int i, j; >>>> + struct page_flag *current; >>>> + info->page_data_buf = info->parallel_area; >>>> + void *page_data_buf = info->parallel_area + sizeof(struct page_data) * info->num_buffers; >>>> + void *page_flag_list = page_data_buf + page_data_buf_size * info->num_buffers; >>>> + >>>> + for (i = 0; i < info->num_buffers; i++) { >>>> + info->page_data_buf[i].buf = page_data_buf + page_data_buf_size * i; >>>> + } >>>> + >>>> + >>>> + if ((info->page_flag_list = malloc(sizeof(struct page_flag_list) * info->num_threads)) >>>> + == NULL) { >>>> + MSG("Can't allocate memory for page_flag_buf. %s\n", >>>> + strerror(errno)); >>>> + return FALSE; >>>> + } >>>> + >>>> + for (i = 0; i < info->num_threads; i++) { >>>> + info->page_flag_list[i].header = page_flag_list + sizeof(struct page_flag) * info->num_buffers * i; >>>> + info->page_flag_list[i].current = 0; >>>> + /* >>>> + current = info->page_flag_buf[i]; >>>> + for (j = 1; j < info->num_buffers; j++) { >>>> + current->next = current + sizeof(struct page_flag); >>>> + current = current->next; >>>> + } >>>> + current->next = info->page_flag_buf[i]; >>>> +*/ >>>> + } >>>> + >>>> +} >>>> + >>>> +int >>>> initial_for_parallel() >>>> { >>>> unsigned long len_buf_out; >>>> @@ -3575,9 +3611,15 @@ initial_for_parallel() >>>> DEBUG_MSG("Number of struct page_data for produce/consume: %d\n", >>>> info->num_buffers); >>>> >>>> - /* >>>> - * allocate memory for page_data >>>> - */ >>>> + if ((info->parallel_area = calloc(info->num_buffers, sizeof(struct page_data)+page_data_buf_size + sizeof(struct page_flag)*info->num_threads)) >>>> + ==NULL) { >>>> + MSG("Can't allocate memory for page_data_buf. %s\n", >>>> + strerror(errno)); >>>> + return FALSE; >>>> + } >>>> + >>>> + initial_parallel_area(page_data_buf_size); >>>> +/* >>>> if ((info->page_data_buf = malloc(sizeof(struct page_data) * info->num_buffers)) >>>> == NULL) { >>>> MSG("Can't allocate memory for page_data_buf. %s\n", >>>> @@ -3594,9 +3636,6 @@ initial_for_parallel() >>>> } >>>> } >>>> >>>> - /* >>>> - * initial page_flag for each thread >>>> - */ >>>> if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads)) >>>> == NULL) { >>>> MSG("Can't allocate memory for page_flag_buf. %s\n", >>>> @@ -3623,7 +3662,7 @@ initial_for_parallel() >>>> } >>>> current->next = info->page_flag_buf[i]; >>>> } >>>> - >>>> +*/ >>>> /* >>>> * initial fd_memory for threads >>>> */ >>>> @@ -3685,6 +3724,15 @@ free_for_parallel() >>>> if (info->kdump_thread_args != NULL) >>>> free(info->kdump_thread_args); >>>> >>>> + if (info->page_flag_list != NULL) { >>>> + free(info->page_flag_list); >>>> + } >>>> + >>>> + if (info->parallel_area != NULL) { >>>> + free(info->parallel_area); >>>> + } >>>> + >>>> +/* >>>> if (info->page_data_buf != NULL) { >>>> for (i = 0; i < info->num_buffers; i++) { >>>> if (info->page_data_buf[i].buf != NULL) >>>> @@ -3705,7 +3753,7 @@ free_for_parallel() >>>> } >>>> free(info->page_flag_buf); >>>> } >>>> - >>>> +*/ >>>> if (info->parallel_info == NULL) >>>> return; >>>> >>>> @@ -7122,12 +7170,14 @@ int finalize_zlib(z_stream *stream) >>>> return err; >>>> } >>>> >>>> +#define CURRENT_FLAG page_flag_header[current_page_flag] >>>> void * >>>> kdump_thread_function_cyclic(void *arg) { >>>> void *retval = PTHREAD_FAIL; >>>> struct thread_args *kdump_thread_args = (struct thread_args *)arg; >>>> volatile struct page_data *page_data_buf = kdump_thread_args->page_data_buf; >>>> - volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf; >>>> + struct page_flag *page_flag_header = kdump_thread_args->page_flag_list->header; >>>> + int current_page_flag = kdump_thread_args->page_flag_list->current; >>>> struct cycle *cycle = kdump_thread_args->cycle; >>>> mdf_pfn_t pfn = cycle->start_pfn; >>>> int index = kdump_thread_args->thread_num; >>>> @@ -7193,7 +7243,7 @@ kdump_thread_function_cyclic(void *arg) { >>>> >>>> while (buf_ready == FALSE) { >>>> pthread_testcancel(); >>>> - if (page_flag_buf->ready == FLAG_READY) >>>> + if (CURRENT_FLAG.ready == FLAG_READY) >>>> continue; >>>> >>>> /* get next dumpable pfn */ >>>> @@ -7208,8 +7258,8 @@ kdump_thread_function_cyclic(void *arg) { >>>> } >>>> info->current_pfn = pfn + 1; >>>> >>>> - page_flag_buf->pfn = pfn; >>>> - page_flag_buf->ready = FLAG_FILLING; >>>> + CURRENT_FLAG.pfn = pfn; >>>> + CURRENT_FLAG.ready = FLAG_FILLING; >>>> pthread_mutex_unlock(&info->current_pfn_mutex); >>>> sem_post(&info->page_flag_buf_sem); >>>> >>>> @@ -7230,11 +7280,11 @@ kdump_thread_function_cyclic(void *arg) { >>>> >>>> if ((info->dump_level & DL_EXCLUDE_ZERO) >>>> && is_zero_page(buf, info->page_size)) { >>>> - page_flag_buf->zero = TRUE; >>>> + CURRENT_FLAG.zero = TRUE; >>>> goto next; >>>> } >>>> >>>> - page_flag_buf->zero = FALSE; >>>> + CURRENT_FLAG.zero = FALSE; >>>> >>>> /* >>>> * Compress the page data. >>>> @@ -7285,11 +7335,11 @@ kdump_thread_function_cyclic(void *arg) { >>>> page_data_buf[index].size = info->page_size; >>>> memcpy(page_data_buf[index].buf, buf, info->page_size); >>>> } >>>> - page_flag_buf->index = index; >>>> + CURRENT_FLAG.index = index; >>>> buf_ready = TRUE; >>>> next: >>>> - page_flag_buf->ready = FLAG_READY; >>>> - page_flag_buf = page_flag_buf->next; >>>> + CURRENT_FLAG.ready = FLAG_READY; >>>> + current_page_flag = (current_page_flag + 1) % info->num_buffers; >>>> >>>> } >>>> } >>>> @@ -7306,6 +7356,8 @@ fail: >>>> pthread_exit(retval); >>>> } >>>> >>>> +#define CURRENT_PAGE_FLAG(i) (info->page_flag_list[i].header)[info->page_flag_list[i].current] >>>> + >>>> int >>>> write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> struct cache_data *cd_page, >>>> @@ -7379,7 +7431,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> kdump_thread_args[i].thread_num = i; >>>> kdump_thread_args[i].len_buf_out = len_buf_out; >>>> kdump_thread_args[i].page_data_buf = page_data_buf; >>>> - kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i]; >>>> + kdump_thread_args[i].page_flag_list = &(info->page_flag_list[i]); >>>> kdump_thread_args[i].cycle = cycle; >>>> >>>> res = pthread_create(threads[i], NULL, >>>> @@ -7418,15 +7470,15 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> * current_pfn is used for recording the value of pfn when checking the pfn. >>>> */ >>>> for (i = 0; i < info->num_threads; i++) { >>>> - if (info->page_flag_buf[i]->ready == FLAG_UNUSED) >>>> + if (CURRENT_PAGE_FLAG(i).ready == FLAG_UNUSED) >>>> continue; >>>> - temp_pfn = info->page_flag_buf[i]->pfn; >>>> + temp_pfn = CURRENT_PAGE_FLAG(i).pfn; >>>> >>>> /* >>>> * count how many threads have reached the end. >>>> */ >>>> if (temp_pfn >= end_pfn) { >>>> - info->page_flag_buf[i]->ready = FLAG_UNUSED; >>>> + CURRENT_PAGE_FLAG(i).ready = FLAG_UNUSED; >>>> end_count++; >>>> continue; >>>> } >>>> @@ -7449,7 +7501,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> * If the page_flag_buf is not ready, the pfn recorded may be changed. >>>> * So we should recheck. >>>> */ >>>> - if (info->page_flag_buf[consuming]->ready != FLAG_READY) { >>>> + if (CURRENT_PAGE_FLAG(consuming).ready != FLAG_READY) { >>>> gettimeofday(&new, NULL); >>>> if (new.tv_sec - last.tv_sec > WAIT_TIME) { >>>> ERRMSG("Can't get data of pfn.\n"); >>>> @@ -7458,7 +7510,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> continue; >>>> } >>>> >>>> - if (current_pfn == info->page_flag_buf[consuming]->pfn) >>>> + if (current_pfn == CURRENT_PAGE_FLAG(consuming).pfn) >>>> break; >>>> } >>>> >>>> @@ -7468,12 +7520,12 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> num_dumped++; >>>> >>>> >>>> - if (info->page_flag_buf[consuming]->zero == TRUE) { >>>> + if (CURRENT_PAGE_FLAG(consuming).zero == TRUE) { >>>> if (!write_cache(cd_header, pd_zero, sizeof(page_desc_t))) >>>> goto out; >>>> pfn_zero++; >>>> } else { >>>> - index = info->page_flag_buf[consuming]->index; >>>> + index = CURRENT_PAGE_FLAG(consuming).index; >>>> pd.flags = page_data_buf[index].flags; >>>> pd.size = page_data_buf[index].size; >>>> pd.page_flags = 0; >>>> @@ -7491,8 +7543,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >>>> goto out; >>>> page_data_buf[index].used = FALSE; >>>> } >>>> - info->page_flag_buf[consuming]->ready = FLAG_UNUSED; >>>> - info->page_flag_buf[consuming] = info->page_flag_buf[consuming]->next; >>>> + CURRENT_PAGE_FLAG(consuming).ready = FLAG_UNUSED; >>>> + info->page_flag_list[consuming].current += 1; >>>> + info->page_flag_list[consuming].current %= info->num_buffers; >>>> } >>>> finish: >>>> ret = TRUE; >>>> diff --git a/makedumpfile.h b/makedumpfile.h >>>> index 4b315c0..8c4bc1a 100644 >>>> --- a/makedumpfile.h >>>> +++ b/makedumpfile.h >>>> @@ -996,7 +996,12 @@ struct page_flag { >>>> char zero; >>>> char ready; >>>> short index; >>>> - struct page_flag *next; >>>> +// struct page_flag *next; >>>> +}; >>>> + >>>> +struct page_flag_list { >>>> + struct page_flag *header; >>>> + int current; >>>> }; >>>> >>>> struct page_data >>>> @@ -1012,7 +1017,7 @@ struct thread_args { >>>> unsigned long len_buf_out; >>>> struct cycle *cycle; >>>> struct page_data *page_data_buf; >>>> - struct page_flag *page_flag_buf; >>>> + struct page_flag_list *page_flag_list; >>>> }; >>>> >>>> /* >>>> @@ -1298,10 +1303,11 @@ struct DumpInfo { >>>> */ >>>> int num_threads; >>>> int num_buffers; >>>> + void *parallel_area; >>>> pthread_t **threads; >>>> struct thread_args *kdump_thread_args; >>>> struct page_data *page_data_buf; >>>> - struct page_flag **page_flag_buf; >>>> + struct page_flag_list *page_flag_list; >>>> sem_t page_flag_buf_sem; >>>> pthread_rwlock_t usemmap_rwlock; >>>> mdf_pfn_t current_pfn; >>>> -- >>>> 1.8.3.1 >>>> >> >> >> > >> From 1dbf68c21a2bbc7b454c1a742c1e3ff00bb85829 Mon Sep 17 00:00:00 2001 >> From: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com> >> Date: Fri, 18 Mar 2016 13:36:57 +0800 >> Subject: [PATCH] remove sem >> >> --- >> makedumpfile.c | 8 ++++---- >> makedumpfile.h | 4 ++-- >> 2 files changed, 6 insertions(+), 6 deletions(-) >> >> diff --git a/makedumpfile.c b/makedumpfile.c >> index 2b0864a..8de5e1d 100644 >> --- a/makedumpfile.c >> +++ b/makedumpfile.c >> @@ -7211,7 +7211,7 @@ kdump_thread_function_cyclic(void *arg) { >> page_flag_buf->pfn = pfn; >> page_flag_buf->ready = FLAG_FILLING; >> pthread_mutex_unlock(&info->current_pfn_mutex); >> - sem_post(&info->page_flag_buf_sem); >> +// sem_post(&info->page_flag_buf_sem); >> >> if (pfn >= cycle->end_pfn) { >> info->current_pfn = cycle->end_pfn; >> @@ -7370,7 +7370,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> page_buf_num = info->num_buffers; >> page_data_buf = info->page_data_buf; >> pthread_mutex_init(&info->page_data_mutex, NULL); >> - sem_init(&info->page_flag_buf_sem, 0, 0); >> +// sem_init(&info->page_flag_buf_sem, 0, 0); >> >> for (i = 0; i < page_buf_num; i++) >> page_data_buf[i].used = FALSE; >> @@ -7405,7 +7405,7 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header, >> * The main thread is the consumer. It will find the next pfn and write it into file. >> * The next pfn is smallest pfn in all page_flag_buf. >> */ >> - sem_wait(&info->page_flag_buf_sem); >> +// sem_wait(&info->page_flag_buf_sem); >> gettimeofday(&last, NULL); >> while (1) { >> current_pfn = end_pfn; >> @@ -7532,7 +7532,7 @@ out: >> } >> } >> >> - sem_destroy(&info->page_flag_buf_sem); >> +// sem_destroy(&info->page_flag_buf_sem); >> pthread_rwlock_destroy(&info->usemmap_rwlock); >> pthread_mutex_destroy(&info->filter_mutex); >> pthread_mutex_destroy(&info->current_pfn_mutex); >> diff --git a/makedumpfile.h b/makedumpfile.h >> index 4b315c0..7627286 100644 >> --- a/makedumpfile.h >> +++ b/makedumpfile.h >> @@ -44,7 +44,7 @@ >> #include "print_info.h" >> #include "sadump_mod.h" >> #include <pthread.h> >> -#include <semaphore.h> >> +//#include <semaphore.h> >> >> /* >> * Result of command >> @@ -1302,7 +1302,7 @@ struct DumpInfo { >> struct thread_args *kdump_thread_args; >> struct page_data *page_data_buf; >> struct page_flag **page_flag_buf; >> - sem_t page_flag_buf_sem; >> +// sem_t page_flag_buf_sem; >> pthread_rwlock_t usemmap_rwlock; >> mdf_pfn_t current_pfn; >> pthread_mutex_t current_pfn_mutex; >> -- >> 1.8.3.1 >> > > > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-03-31 9:09 ` "Zhou, Wenjian/周文剑" @ 2016-04-01 6:27 ` Minfei Huang 2016-04-01 11:21 ` "Zhou, Wenjian/周文剑" 0 siblings, 1 reply; 33+ messages in thread From: Minfei Huang @ 2016-04-01 6:27 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑"; +Cc: kexec On 03/31/16 at 05:09pm, "Zhou, Wenjian/周文剑" wrote: > Hello Minfei, > > Thanks for your results. > And I have some questions. > > On 03/31/2016 04:38 PM, Minfei Huang wrote: > >Hi, Zhou. > > > >I have tested the increasing patch on 4T memory machine. > > > >makedumpfile fails to dump vmcore, if there are about 384M memory in 2nd > >kernel which is reserved by crashkernel=auto. But once the reserved > >memory is enlarged up to 10G, makedumpfile can dump vmcore successfully. > > > > Will it fail with patch v3? or just v4? Both v3 and v4 can work well, once reserved memory is enlarged manually. > I don't think it is a problem. > If 128 cpus are enabled in second kernel, there won't be much memory left if total memory is 384M. Enable 128 CPUs with 1GB reserved memory. kdump:/# /sysroot/bin/free -m total used free shared buff/cache available Mem: 976 97 732 6 146 774 Enable 1 CPU with 1GB reserved memory. kdump:/# /sysroot/bin/free -m total used free shared buff/cache available Mem: 991 32 873 6 85 909 Extra enabled 127 CPUs will consume 65MB. So I think it is acceptable in kdump kernel. The major memory is consumed by makedumpfile from the test result. crashkernel=auto doesn't work any more, if option --num-threads is set. Even more, there is no warning to let user enlarge the reserved memory. > > And I think it will also work if the reserved memory is set to 1G. Yes, makedumpfile can work well under 1GB reserved memory. > > >The cache should be dropped before testing, otherwise makedumpfile will > >fail to dump vmcore. > >echo 3 > /proc/sys/vm/drop_caches > >Maybe there is something cleanup we can do to avoid this. > > > >Following is the result with different parameter for option > >--num-threads. > > > >makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 > >real 5m34.116s > >user 103m42.531s > >sys 86m12.586s [ snip ] > >makedumpfile -l --num-threads 0 --message-level 1 -d 31 /proc/vmcore a.0 > >real 3m46.531s > >user 3m29.371s > >sys 0m16.909s > > > >makedumpfile.back -l --message-level 1 -d 31 /proc/vmcore a > >real 3m55.712s > >user 3m39.254s > >sys 0m16.287s > > > >Once the reserved memory is enlarged, makedumpfile works well with or > >without this increaseing patch. > > > >But there is an another issue I found during testing. makedumpfile may > >hang in about 24%. And with option --num-threads 64, this issue is also > >occured. > > > > Will it occur with patch v3? > If it not occurs, then neither of the previous two increasing patches will work? > > And did you test it with or without the increasing patch? without this increasing patch, v4 works well. > > >makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 > >Excluding unnecessary pages : [100.0 %] | > >Excluding unnecessary pages : [100.0 %] / > >Excluding unnecessary pages : [100.0 %] - > >Copying data : [ 11.2 %] | > >Copying data : [ 12.4 %] - > >Excluding unnecessary pages : [100.0 %] \ > >Excluding unnecessary pages : [100.0 %] | > >Copying data : [ 23.6 %] - > >Copying data : [ 24.4 %] / > > > > Could you help me find which line of the code is running at when it hanging? > makedumpfile may be in a loop and can't go out by some bugs. This issue happens very occasionally. I can update it once meet it. Thanks Minfei _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-04-01 6:27 ` Minfei Huang @ 2016-04-01 11:21 ` "Zhou, Wenjian/周文剑" 2016-04-01 13:15 ` Minfei Huang 0 siblings, 1 reply; 33+ messages in thread From: "Zhou, Wenjian/周文剑" @ 2016-04-01 11:21 UTC (permalink / raw) To: Minfei Huang; +Cc: kexec On 04/01/2016 02:27 PM, Minfei Huang wrote: > On 03/31/16 at 05:09pm, "Zhou, Wenjian/周文剑" wrote: >> Hello Minfei, >> >> Thanks for your results. >> And I have some questions. >> >> On 03/31/2016 04:38 PM, Minfei Huang wrote: >>> Hi, Zhou. >>> >>> I have tested the increasing patch on 4T memory machine. >>> >>> makedumpfile fails to dump vmcore, if there are about 384M memory in 2nd >>> kernel which is reserved by crashkernel=auto. But once the reserved >>> memory is enlarged up to 10G, makedumpfile can dump vmcore successfully. >>> >> >> Will it fail with patch v3? or just v4? > > Both v3 and v4 can work well, once reserved memory is enlarged manually. > >> I don't think it is a problem. >> If 128 cpus are enabled in second kernel, there won't be much memory left if total memory is 384M. > > Enable 128 CPUs with 1GB reserved memory. > kdump:/# /sysroot/bin/free -m > total used free shared buff/cache available > Mem: 976 97 732 6 146 774 > > Enable 1 CPU with 1GB reserved memory. > kdump:/# /sysroot/bin/free -m > total used free shared buff/cache available > Mem: 991 32 873 6 85 909 > > Extra enabled 127 CPUs will consume 65MB. So I think it is acceptable > in kdump kernel. > > The major memory is consumed by makedumpfile from the test result. > crashkernel=auto doesn't work any more, if option --num-threads is > set. Even more, there is no warning to let user enlarge the reserved > memory. > Yes, we should remind user if they want to use too much threads. >> >> And I think it will also work if the reserved memory is set to 1G. > > Yes, makedumpfile can work well under 1GB reserved memory. > >> >>> The cache should be dropped before testing, otherwise makedumpfile will >>> fail to dump vmcore. >>> echo 3 > /proc/sys/vm/drop_caches >>> Maybe there is something cleanup we can do to avoid this. >>> >>> Following is the result with different parameter for option >>> --num-threads. >>> >>> makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 >>> real 5m34.116s >>> user 103m42.531s >>> sys 86m12.586s > [ snip ] >>> makedumpfile -l --num-threads 0 --message-level 1 -d 31 /proc/vmcore a.0 >>> real 3m46.531s >>> user 3m29.371s >>> sys 0m16.909s >>> >>> makedumpfile.back -l --message-level 1 -d 31 /proc/vmcore a >>> real 3m55.712s >>> user 3m39.254s >>> sys 0m16.287s >>> >>> Once the reserved memory is enlarged, makedumpfile works well with or >>> without this increaseing patch. >>> >>> But there is an another issue I found during testing. makedumpfile may >>> hang in about 24%. And with option --num-threads 64, this issue is also >>> occured. >>> >> >> Will it occur with patch v3? >> If it not occurs, then neither of the previous two increasing patches will work? >> >> And did you test it with or without the increasing patch? > > without this increasing patch, v4 works well. > Do you mean makedumpfile won't hang without the increasing patch? -- Thanks Zhou >> >>> makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 >>> Excluding unnecessary pages : [100.0 %] | >>> Excluding unnecessary pages : [100.0 %] / >>> Excluding unnecessary pages : [100.0 %] - >>> Copying data : [ 11.2 %] | >>> Copying data : [ 12.4 %] - >>> Excluding unnecessary pages : [100.0 %] \ >>> Excluding unnecessary pages : [100.0 %] | >>> Copying data : [ 23.6 %] - >>> Copying data : [ 24.4 %] / >>> >> >> Could you help me find which line of the code is running at when it hanging? >> makedumpfile may be in a loop and can't go out by some bugs. > > This issue happens very occasionally. I can update it once meet it. > > Thanks > Minfei > > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-04-01 11:21 ` "Zhou, Wenjian/周文剑" @ 2016-04-01 13:15 ` Minfei Huang 2016-04-04 5:46 ` Atsushi Kumagai 0 siblings, 1 reply; 33+ messages in thread From: Minfei Huang @ 2016-04-01 13:15 UTC (permalink / raw) To: "Zhou, Wenjian/周文剑"; +Cc: kexec On 04/01/16 at 07:21pm, "Zhou, Wenjian/周文剑" wrote: > On 04/01/2016 02:27 PM, Minfei Huang wrote: > >On 03/31/16 at 05:09pm, "Zhou, Wenjian/周文剑" wrote: > >>Hello Minfei, > >> > >>Thanks for your results. > >>And I have some questions. > >> > >>On 03/31/2016 04:38 PM, Minfei Huang wrote: > >>>Hi, Zhou. > >>> > >>>I have tested the increasing patch on 4T memory machine. > >>> > >>>makedumpfile fails to dump vmcore, if there are about 384M memory in 2nd > >>>kernel which is reserved by crashkernel=auto. But once the reserved > >>>memory is enlarged up to 10G, makedumpfile can dump vmcore successfully. > >>> > >> > >>Will it fail with patch v3? or just v4? > > > >Both v3 and v4 can work well, once reserved memory is enlarged manually. > > > >>I don't think it is a problem. > >>If 128 cpus are enabled in second kernel, there won't be much memory left if total memory is 384M. > > > >Enable 128 CPUs with 1GB reserved memory. > >kdump:/# /sysroot/bin/free -m > > total used free shared buff/cache available > >Mem: 976 97 732 6 146 774 > > > >Enable 1 CPU with 1GB reserved memory. > >kdump:/# /sysroot/bin/free -m > > total used free shared buff/cache available > >Mem: 991 32 873 6 85 909 > > > >Extra enabled 127 CPUs will consume 65MB. So I think it is acceptable > >in kdump kernel. > > > >The major memory is consumed by makedumpfile from the test result. > >crashkernel=auto doesn't work any more, if option --num-threads is > >set. Even more, there is no warning to let user enlarge the reserved > >memory. > > > > Yes, we should remind user if they want to use too much threads. This new feature multi-threads will consume more memory during dumping vmcore in 2nd kernel. Is it possible to improve it? > > >> > >>And I think it will also work if the reserved memory is set to 1G. > > > >Yes, makedumpfile can work well under 1GB reserved memory. > > > >> > >>>The cache should be dropped before testing, otherwise makedumpfile will > >>>fail to dump vmcore. > >>>echo 3 > /proc/sys/vm/drop_caches > >>>Maybe there is something cleanup we can do to avoid this. > >>> > >>>Following is the result with different parameter for option > >>>--num-threads. > >>> > >>>makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 > >>>real 5m34.116s > >>>user 103m42.531s > >>>sys 86m12.586s > >[ snip ] > >>>makedumpfile -l --num-threads 0 --message-level 1 -d 31 /proc/vmcore a.0 > >>>real 3m46.531s > >>>user 3m29.371s > >>>sys 0m16.909s > >>> > >>>makedumpfile.back -l --message-level 1 -d 31 /proc/vmcore a > >>>real 3m55.712s > >>>user 3m39.254s > >>>sys 0m16.287s > >>> > >>>Once the reserved memory is enlarged, makedumpfile works well with or > >>>without this increaseing patch. > >>> > >>>But there is an another issue I found during testing. makedumpfile may > >>>hang in about 24%. And with option --num-threads 64, this issue is also > >>>occured. > >>> > >> > >>Will it occur with patch v3? > >>If it not occurs, then neither of the previous two increasing patches will work? > >> > >>And did you test it with or without the increasing patch? > > > >without this increasing patch, v4 works well. > > > > Do you mean makedumpfile won't hang without the increasing patch? Seem that, but I cann't confirm it, since this issue occurs very occasionally. Thanks Minfei > > -- > Thanks > Zhou > >> > >>>makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 > >>>Excluding unnecessary pages : [100.0 %] | > >>>Excluding unnecessary pages : [100.0 %] / > >>>Excluding unnecessary pages : [100.0 %] - > >>>Copying data : [ 11.2 %] | > >>>Copying data : [ 12.4 %] - > >>>Excluding unnecessary pages : [100.0 %] \ > >>>Excluding unnecessary pages : [100.0 %] | > >>>Copying data : [ 23.6 %] - > >>>Copying data : [ 24.4 %] / > >>> > >> > >>Could you help me find which line of the code is running at when it hanging? > >>makedumpfile may be in a loop and can't go out by some bugs. > > > >This issue happens very occasionally. I can update it once meet it. > > > >Thanks > >Minfei > > > > > > > > > _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* RE: [PATCH v4] Improve the performance of --num-threads -d 31 2016-04-01 13:15 ` Minfei Huang @ 2016-04-04 5:46 ` Atsushi Kumagai 2016-04-05 9:18 ` Minfei Huang 0 siblings, 1 reply; 33+ messages in thread From: Atsushi Kumagai @ 2016-04-04 5:46 UTC (permalink / raw) To: Minfei Huang, "Zhou, Wenjian/周文剑" Cc: kexec@lists.infradead.org Hello, Let me get this straight. >> >>Hello Minfei, >> >> >> >>Thanks for your results. >> >>And I have some questions. >> >> >> >>On 03/31/2016 04:38 PM, Minfei Huang wrote: >> >>>Hi, Zhou. >> >>> >> >>>I have tested the increasing patch on 4T memory machine. >> >>> >> >>>makedumpfile fails to dump vmcore, if there are about 384M memory in 2nd >> >>>kernel which is reserved by crashkernel=auto. But once the reserved >> >>>memory is enlarged up to 10G, makedumpfile can dump vmcore successfully. >> >>> >> >> >> >>Will it fail with patch v3? or just v4? >> > >> >Both v3 and v4 can work well, once reserved memory is enlarged manually. >> > >> >>I don't think it is a problem. >> >>If 128 cpus are enabled in second kernel, there won't be much memory left if total memory is 384M. >> > >> >Enable 128 CPUs with 1GB reserved memory. >> >kdump:/# /sysroot/bin/free -m >> > total used free shared buff/cache available >> >Mem: 976 97 732 6 146 774 >> > >> >Enable 1 CPU with 1GB reserved memory. >> >kdump:/# /sysroot/bin/free -m >> > total used free shared buff/cache available >> >Mem: 991 32 873 6 85 909 >> > >> >Extra enabled 127 CPUs will consume 65MB. So I think it is acceptable >> >in kdump kernel. >> > >> >The major memory is consumed by makedumpfile from the test result. >> >crashkernel=auto doesn't work any more, if option --num-threads is >> >set. Even more, there is no warning to let user enlarge the reserved >> >memory. >> > >> >> Yes, we should remind user if they want to use too much threads. After all, the ENOMEM issue isn't peculiar to a specific patch but just a matter of course "more threads consume more memory" ? (At least I expect so.) >This new feature multi-threads will consume more memory during dumping >vmcore in 2nd kernel. Is it possible to improve it? makedumpfile is designed to work in limited memory space, but the extra memory consumption for each thread isn't considered. I think it can be estimated by checking code and measured by actual test. e.g. If we find that each thread requires 100KB more memory, 100KB * num-thread should be reserved before filtering processing. As I said to Zhou, the logic should be inserted into the cyclic buffer allocation method (calculate_cyclic_buffer_size()). >> >>And I think it will also work if the reserved memory is set to 1G. >> > >> >Yes, makedumpfile can work well under 1GB reserved memory. >> > >> >> >> >>>The cache should be dropped before testing, otherwise makedumpfile will >> >>>fail to dump vmcore. >> >>>echo 3 > /proc/sys/vm/drop_caches >> >>>Maybe there is something cleanup we can do to avoid this. >> >>> >> >>>Following is the result with different parameter for option >> >>>--num-threads. >> >>> >> >>>makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 >> >>>real 5m34.116s >> >>>user 103m42.531s >> >>>sys 86m12.586s >> >[ snip ] >> >>>makedumpfile -l --num-threads 0 --message-level 1 -d 31 /proc/vmcore a.0 >> >>>real 3m46.531s >> >>>user 3m29.371s >> >>>sys 0m16.909s >> >>> >> >>>makedumpfile.back -l --message-level 1 -d 31 /proc/vmcore a >> >>>real 3m55.712s >> >>>user 3m39.254s >> >>>sys 0m16.287s >> >>> >> >>>Once the reserved memory is enlarged, makedumpfile works well with or >> >>>without this increaseing patch. >> >>> >> >>>But there is an another issue I found during testing. makedumpfile may >> >>>hang in about 24%. And with option --num-threads 64, this issue is also >> >>>occured. >> >>> >> >> >> >>Will it occur with patch v3? >> >>If it not occurs, then neither of the previous two increasing patches will work? >> >> >> >>And did you test it with or without the increasing patch? >> > >> >without this increasing patch, v4 works well. >> > >> >> Do you mean makedumpfile won't hang without the increasing patch? > >Seem that, but I cann't confirm it, since this issue occurs very >occasionally. I want to know how many times were the two cases (v4 and v4+inc) tested and how often did them hang. Thanks, Atsushi Kumagai >Thanks >Minfei > >> >> -- >> Thanks >> Zhou >> >> >> >>>makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 >> >>>Excluding unnecessary pages : [100.0 %] | >> >>>Excluding unnecessary pages : [100.0 %] / >> >>>Excluding unnecessary pages : [100.0 %] - >> >>>Copying data : [ 11.2 %] | >> >>>Copying data : [ 12.4 %] - >> >>>Excluding unnecessary pages : [100.0 %] \ >> >>>Excluding unnecessary pages : [100.0 %] | >> >>>Copying data : [ 23.6 %] - >> >>>Copying data : [ 24.4 %] / >> >>> >> >> >> >>Could you help me find which line of the code is running at when it hanging? >> >>makedumpfile may be in a loop and can't go out by some bugs. >> > >> >This issue happens very occasionally. I can update it once meet it. >> > >> >Thanks >> >Minfei >> > >> > >> >> >> >> >> > >_______________________________________________ >kexec mailing list >kexec@lists.infradead.org >http://lists.infradead.org/mailman/listinfo/kexec _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
* Re: [PATCH v4] Improve the performance of --num-threads -d 31 2016-04-04 5:46 ` Atsushi Kumagai @ 2016-04-05 9:18 ` Minfei Huang 0 siblings, 0 replies; 33+ messages in thread From: Minfei Huang @ 2016-04-05 9:18 UTC (permalink / raw) To: Atsushi Kumagai Cc: "Zhou, Wenjian/周文剑", kexec@lists.infradead.org On 04/04/16 at 05:46am, Atsushi Kumagai wrote: > Hello, > > Let me get this straight. > >> >The major memory is consumed by makedumpfile from the test result. > >> >crashkernel=auto doesn't work any more, if option --num-threads is > >> >set. Even more, there is no warning to let user enlarge the reserved > >> >memory. > >> > > >> > >> Yes, we should remind user if they want to use too much threads. > > After all, the ENOMEM issue isn't peculiar to a specific patch but > just a matter of course "more threads consume more memory" ? > (At least I expect so.) Hi, Atsushi. Yes, it is, because ENOMEM never happen once the reserved memory is enlarged manually from my testing. Maybe there should be a generic formular to estimate how much memory will be consumed by specific makedumpfile config. > > >This new feature multi-threads will consume more memory during dumping > >vmcore in 2nd kernel. Is it possible to improve it? > > makedumpfile is designed to work in limited memory space, but the extra > memory consumption for each thread isn't considered. > I think it can be estimated by checking code and measured by actual test. > > e.g. If we find that each thread requires 100KB more memory, 100KB * num-thread > should be reserved before filtering processing. > As I said to Zhou, the logic should be inserted into the cyclic buffer > allocation method (calculate_cyclic_buffer_size()). Agree. > > >> >>And I think it will also work if the reserved memory is set to 1G. > >> > > >> >Yes, makedumpfile can work well under 1GB reserved memory. > >> > > >> >> > >> >>>The cache should be dropped before testing, otherwise makedumpfile will > >> >>>fail to dump vmcore. > >> >>>echo 3 > /proc/sys/vm/drop_caches > >> >>>Maybe there is something cleanup we can do to avoid this. > >> >>> > >> >>>Following is the result with different parameter for option > >> >>>--num-threads. > >> >>> > >> >>>makedumpfile -l --num-threads 128 --message-level 1 -d 31 /proc/vmcore a.128 > >> >>>real 5m34.116s > >> >>>user 103m42.531s > >> >>>sys 86m12.586s > >> >[ snip ] > >> >>>makedumpfile -l --num-threads 0 --message-level 1 -d 31 /proc/vmcore a.0 > >> >>>real 3m46.531s > >> >>>user 3m29.371s > >> >>>sys 0m16.909s > >> >>> > >> >>>makedumpfile.back -l --message-level 1 -d 31 /proc/vmcore a > >> >>>real 3m55.712s > >> >>>user 3m39.254s > >> >>>sys 0m16.287s > >> >>> > >> >>>Once the reserved memory is enlarged, makedumpfile works well with or > >> >>>without this increaseing patch. > >> >>> > >> >>>But there is an another issue I found during testing. makedumpfile may > >> >>>hang in about 24%. And with option --num-threads 64, this issue is also > >> >>>occured. > >> >>> > >> >> > >> >>Will it occur with patch v3? > >> >>If it not occurs, then neither of the previous two increasing patches will work? > >> >> > >> >>And did you test it with or without the increasing patch? > >> > > >> >without this increasing patch, v4 works well. > >> > > >> > >> Do you mean makedumpfile won't hang without the increasing patch? > > > >Seem that, but I cann't confirm it, since this issue occurs very > >occasionally. > > I want to know how many times were the two cases (v4 and v4+inc) tested > and how often did them hang. I have test it more than twenty times with patch v4, and there is no hang during testing. Appending option --num-threads 32 or bigger thread number, makedumpfile will hang in about every five times with increasing patch based on patch v4. Thanks Minfei > > > Thanks, > Atsushi Kumagai > > >Thanks > >Minfei _______________________________________________ kexec mailing list kexec@lists.infradead.org http://lists.infradead.org/mailman/listinfo/kexec ^ permalink raw reply [flat|nested] 33+ messages in thread
end of thread, other threads:[~2016-04-05 9:14 UTC | newest]
Thread overview: 33+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-03-09 0:27 [PATCH v4] Improve the performance of --num-threads -d 31 Zhou Wenjian
2016-03-09 0:35 ` "Zhou, Wenjian/周文剑"
2016-03-11 1:00 ` "Zhou, Wenjian/周文剑"
2016-03-11 3:03 ` Minoru Usui
2016-03-11 3:10 ` "Zhou, Wenjian/周文剑"
2016-03-11 4:55 ` Atsushi Kumagai
2016-03-11 5:33 ` Minfei Huang
2016-03-15 6:34 ` Minfei Huang
2016-03-15 7:12 ` "Zhou, Wenjian/周文剑"
2016-03-15 7:38 ` Minfei Huang
2016-03-15 9:33 ` Minfei Huang
2016-03-16 1:55 ` "Zhou, Wenjian/周文剑"
2016-03-16 8:04 ` Minfei Huang
2016-03-16 8:24 ` Minfei Huang
2016-03-16 8:26 ` "Zhou, Wenjian/周文剑"
[not found] ` <B049E864-7426-4817-96FA-8E3CCA59CA24@redhat.com>
2016-03-16 8:59 ` "Zhou, Wenjian/周文剑"
2016-03-16 9:30 ` Minfei Huang
2016-03-15 8:35 ` "Zhou, Wenjian/周文剑"
2016-03-18 2:46 ` "Zhou, Wenjian/周文剑"
2016-03-18 4:16 ` Minfei Huang
2016-03-18 5:48 ` "Zhou, Wenjian/周文剑"
2016-03-24 5:28 ` "Zhou, Wenjian/周文剑"
2016-03-24 5:39 ` Minfei Huang
2016-03-25 2:57 ` Atsushi Kumagai
2016-03-28 1:23 ` "Zhou, Wenjian/周文剑"
2016-03-28 5:43 ` Atsushi Kumagai
2016-03-31 8:38 ` Minfei Huang
2016-03-31 9:09 ` "Zhou, Wenjian/周文剑"
2016-04-01 6:27 ` Minfei Huang
2016-04-01 11:21 ` "Zhou, Wenjian/周文剑"
2016-04-01 13:15 ` Minfei Huang
2016-04-04 5:46 ` Atsushi Kumagai
2016-04-05 9:18 ` Minfei Huang
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox