All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] makedumpfile: Change num_threads to num_producers
@ 2016-08-01  3:26 Zhou Wenjian
  2016-08-04  0:50 ` Atsushi Kumagai
  0 siblings, 1 reply; 3+ messages in thread
From: Zhou Wenjian @ 2016-08-01  3:26 UTC (permalink / raw)
  To: kexec; +Cc: ats-kumagai

Currently, num_threads means the producer number. The main thread
is not included. However, num_threads is specified by the option
"--num-threads". So this may make user confused why it still has
performance degradation when the value by "--num-threads" equals
the usable cpu number.

I think it will be more nature to make "--num-threads" be producer
number + 1. In other words, "--num-threads" should include the
main thread.

In this patch, num_threads has been change to the prodecer_num.
And the num_producers is calculated by "--num-threads" - 1;

In addition, thread_num is changed to producer_num.

Signed-off-by: Zhou Wenjian <zhouwj-fnst@cn.fujitsu.com>
---
 makedumpfile.c | 108 ++++++++++++++++++++++++++++-----------------------------
 makedumpfile.h |   4 +--
 2 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/makedumpfile.c b/makedumpfile.c
index 21784e8..19019a4 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -1407,13 +1407,13 @@ open_dump_bitmap(void)
 		}
 	}
 
-	if (info->num_threads) {
+	if (info->num_producers) {
 		/*
 		 * Reserve file descriptors of bitmap for creating dumpfiles
 		 * parallelly, because a bitmap file will be unlinked just after
 		 * this and it is not possible to open a bitmap file later.
 		 */
-		for (i = 0; i < info->num_threads; i++) {
+		for (i = 0; i < info->num_producers; i++) {
 			if ((fd = open(info->name_bitmap, O_RDONLY)) < 0) {
 				ERRMSG("Can't open the bitmap file(%s). %s\n",
 				    info->name_bitmap, strerror(errno));
@@ -3495,9 +3495,9 @@ initialize_bitmap_memory(void)
 }
 
 void
-initialize_bitmap_memory_parallel(struct dump_bitmap *bitmap, int thread_num)
+initialize_bitmap_memory_parallel(struct dump_bitmap *bitmap, int producer_num)
 {
-	bitmap->fd = FD_BITMAP_MEMORY_PARALLEL(thread_num);
+	bitmap->fd = FD_BITMAP_MEMORY_PARALLEL(producer_num);
 	bitmap->file_name = info->name_memory;
 	bitmap->no_block = -1;
 	memset(bitmap->buf, 0, BUFSIZE_BITMAP);
@@ -3529,24 +3529,24 @@ initial_for_parallel()
 	/*
 	 * allocate memory for threads
 	 */
-	if ((info->threads = malloc(sizeof(pthread_t *) * info->num_threads))
+	if ((info->threads = malloc(sizeof(pthread_t *) * info->num_producers))
 	    == NULL) {
 		MSG("Can't allocate memory for threads. %s\n",
 				strerror(errno));
 		return FALSE;
 	}
-	memset(info->threads, 0, sizeof(pthread_t *) * info->num_threads);
+	memset(info->threads, 0, sizeof(pthread_t *) * info->num_producers);
 
 	if ((info->kdump_thread_args =
-			malloc(sizeof(struct thread_args) * info->num_threads))
+			malloc(sizeof(struct thread_args) * info->num_producers))
 	    == NULL) {
 		MSG("Can't allocate memory for arguments of threads. %s\n",
 				strerror(errno));
 		return FALSE;
 	}
-	memset(info->kdump_thread_args, 0, sizeof(struct thread_args) * info->num_threads);
+	memset(info->kdump_thread_args, 0, sizeof(struct thread_args) * info->num_producers);
 
-	for (i = 0; i < info->num_threads; i++) {
+	for (i = 0; i < info->num_producers; i++) {
 		if ((info->threads[i] = malloc(sizeof(pthread_t))) == NULL) {
 			MSG("Can't allocate memory for thread %d. %s",
 					i, strerror(errno));
@@ -3592,7 +3592,7 @@ initial_for_parallel()
 #endif
 	}
 
-	info->num_buffers = PAGE_DATA_NUM * info->num_threads;
+	info->num_buffers = PAGE_DATA_NUM * info->num_producers;
 
 	/*
 	 * allocate memory for page_data
@@ -3615,17 +3615,17 @@ initial_for_parallel()
 	}
 
 	/*
-	 * initial page_flag for each thread
+	 * initial page_flag for each producer thread
 	 */
-	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
+	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_producers))
 	    == NULL) {
 		MSG("Can't allocate memory for page_flag_buf. %s\n",
 				strerror(errno));
 		return FALSE;
 	}
-	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
+	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_producers);
 
-	for (i = 0; i < info->num_threads; i++) {
+	for (i = 0; i < info->num_producers; i++) {
 		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
 			MSG("Can't allocate memory for page_flag. %s\n",
 				strerror(errno));
@@ -3645,9 +3645,9 @@ initial_for_parallel()
 	}
 
 	/*
-	 * initial fd_memory for threads
+	 * initial fd_memory for producer threads
 	 */
-	for (i = 0; i < info->num_threads; i++) {
+	for (i = 0; i < info->num_producers; i++) {
 		if ((FD_MEMORY_PARALLEL(i) = open(info->name_memory, O_RDONLY))
 									< 0) {
 			ERRMSG("Can't open the dump memory(%s). %s\n",
@@ -3673,7 +3673,7 @@ free_for_parallel()
 	struct page_flag *current;
 
 	if (info->threads != NULL) {
-		for (i = 0; i < info->num_threads; i++) {
+		for (i = 0; i < info->num_producers; i++) {
 			if (info->threads[i] != NULL)
 				free(info->threads[i]);
 
@@ -3714,7 +3714,7 @@ free_for_parallel()
 	}
 
 	if (info->page_flag_buf != NULL) {
-		for (i = 0; i < info->num_threads; i++) {
+		for (i = 0; i < info->num_producers; i++) {
 			for (j = 0; j < PAGE_FLAG_NUM; j++) {
 				if (info->page_flag_buf[i] != NULL) {
 					current = info->page_flag_buf[i];
@@ -3729,7 +3729,7 @@ free_for_parallel()
 	if (info->parallel_info == NULL)
 		return;
 
-	for (i = 0; i < info->num_threads; i++) {
+	for (i = 0; i < info->num_producers; i++) {
 		if (FD_MEMORY_PARALLEL(i) > 0)
 			close(FD_MEMORY_PARALLEL(i));
 
@@ -3936,7 +3936,7 @@ out:
 		DEBUG_MSG("Buffer size for the cyclic mode: %ld\n", info->bufsize_cyclic);
 	}
 
-	if (info->num_threads) {
+	if (info->num_producers) {
 		if (is_xen_memory()) {
 			MSG("'--num-threads' option is disable,\n");
 			MSG("because %s is Xen's memory core image.\n",
@@ -4066,9 +4066,9 @@ initialize_2nd_bitmap(struct dump_bitmap *bitmap)
 }
 
 void
-initialize_2nd_bitmap_parallel(struct dump_bitmap *bitmap, int thread_num)
+initialize_2nd_bitmap_parallel(struct dump_bitmap *bitmap, int producer_num)
 {
-	bitmap->fd = FD_BITMAP_PARALLEL(thread_num);
+	bitmap->fd = FD_BITMAP_PARALLEL(producer_num);
 	bitmap->file_name = info->name_bitmap;
 	bitmap->no_block = -1;
 	memset(bitmap->buf, 0, BUFSIZE_BITMAP);
@@ -7558,7 +7558,7 @@ kdump_thread_function_cyclic(void *arg) {
 	volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
 	struct cycle *cycle = kdump_thread_args->cycle;
 	mdf_pfn_t pfn = cycle->start_pfn;
-	int index = kdump_thread_args->thread_num;
+	int index = kdump_thread_args->producer_num;
 	int buf_ready;
 	int dumpable;
 	int fd_memory = 0;
@@ -7566,21 +7566,21 @@ kdump_thread_function_cyclic(void *arg) {
 	struct dump_bitmap bitmap_memory_parallel = {0};
 	unsigned char *buf = NULL, *buf_out = NULL;
 	struct mmap_cache *mmap_cache =
-			MMAP_CACHE_PARALLEL(kdump_thread_args->thread_num);
+			MMAP_CACHE_PARALLEL(kdump_thread_args->producer_num);
 	unsigned long size_out;
-	z_stream *stream = &ZLIB_STREAM_PARALLEL(kdump_thread_args->thread_num);
+	z_stream *stream = &ZLIB_STREAM_PARALLEL(kdump_thread_args->producer_num);
 #ifdef USELZO
-	lzo_bytep wrkmem = WRKMEM_PARALLEL(kdump_thread_args->thread_num);
+	lzo_bytep wrkmem = WRKMEM_PARALLEL(kdump_thread_args->producer_num);
 #endif
 #ifdef USESNAPPY
 	unsigned long len_buf_out_snappy =
 				snappy_max_compressed_length(info->page_size);
 #endif
 
-	buf = BUF_PARALLEL(kdump_thread_args->thread_num);
-	buf_out = BUF_OUT_PARALLEL(kdump_thread_args->thread_num);
+	buf = BUF_PARALLEL(kdump_thread_args->producer_num);
+	buf_out = BUF_OUT_PARALLEL(kdump_thread_args->producer_num);
 
-	fd_memory = FD_MEMORY_PARALLEL(kdump_thread_args->thread_num);
+	fd_memory = FD_MEMORY_PARALLEL(kdump_thread_args->producer_num);
 
 	if (info->fd_bitmap) {
 		bitmap_parallel.buf = malloc(BUFSIZE_BITMAP);
@@ -7590,7 +7590,7 @@ kdump_thread_function_cyclic(void *arg) {
 			goto fail;
 		}
 		initialize_2nd_bitmap_parallel(&bitmap_parallel,
-					kdump_thread_args->thread_num);
+					kdump_thread_args->producer_num);
 	}
 
 	if (info->flag_refiltering) {
@@ -7601,7 +7601,7 @@ kdump_thread_function_cyclic(void *arg) {
 			goto fail;
 		}
 		initialize_bitmap_memory_parallel(&bitmap_memory_parallel,
-						kdump_thread_args->thread_num);
+						kdump_thread_args->producer_num);
 	}
 
 	/*
@@ -7802,8 +7802,8 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 	for (i = 0; i < page_buf_num; i++)
 		page_data_buf[i].used = FALSE;
 
-	for (i = 0; i < info->num_threads; i++) {
-		kdump_thread_args[i].thread_num = i;
+	for (i = 0; i < info->num_producers; i++) {
+		kdump_thread_args[i].producer_num = i;
 		kdump_thread_args[i].len_buf_out = len_buf_out;
 		kdump_thread_args[i].page_data_buf = page_data_buf;
 		kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
@@ -7843,13 +7843,13 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 			 * consuming is used for recording in which thread the pfn is the smallest.
 			 * current_pfn is used for recording the value of pfn when checking the pfn.
 			 */
-			for (i = 0; i < info->num_threads; i++) {
+			for (i = 0; i < info->num_producers; i++) {
 				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
 					continue;
 				temp_pfn = info->page_flag_buf[i]->pfn;
 
 				/*
-				 * count how many threads have reached the end.
+				 * count how many producer threads have reached the end.
 				 */
 				if (temp_pfn >= end_pfn) {
 					info->page_flag_buf[i]->ready = FLAG_UNUSED;
@@ -7865,9 +7865,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
 			}
 
 			/*
-			 * If all the threads have reached the end, we will finish writing.
+			 * If all producer threads have reached the end, we will finish writing.
 			 */
-			if (end_count >= info->num_threads)
+			if (end_count >= info->num_producers)
 				goto finish;
 
 			/*
@@ -7930,7 +7930,7 @@ finish:
 
 out:
 	if (threads != NULL) {
-		for (i = 0; i < info->num_threads; i++) {
+		for (i = 0; i < info->num_producers; i++) {
 			if (threads[i] != NULL) {
 				res = pthread_cancel(*threads[i]);
 				if (res != 0 && res != ESRCH)
@@ -7939,7 +7939,7 @@ out:
 			}
 		}
 
-		for (i = 0; i < info->num_threads; i++) {
+		for (i = 0; i < info->num_producers; i++) {
 			if (threads[i] != NULL) {
 				res = pthread_join(*threads[i], &thread_result);
 				if (res != 0)
@@ -8553,7 +8553,7 @@ write_kdump_pages_and_bitmap_cyclic(struct cache_data *cd_header, struct cache_d
 		if (!write_kdump_bitmap2(&cycle))
 			return FALSE;
 
-		if (info->num_threads) {
+		if (info->num_producers) {
 			if (!write_kdump_pages_parallel_cyclic(cd_header,
 							cd_page, &pd_zero,
 							&offset_data, &cycle))
@@ -10531,7 +10531,7 @@ check_param_for_creating_dumpfile(int argc, char *argv[])
 	if (info->flag_sadump_diskset && !sadump_is_supported_arch())
 		return FALSE;
 
-	if (info->num_threads) {
+	if (info->num_producers) {
 		if (info->flag_split) {
 			MSG("--num-threads cannot used with --split.\n");
 			return FALSE;
@@ -10607,16 +10607,16 @@ check_param_for_creating_dumpfile(int argc, char *argv[])
 	} else
 		return FALSE;
 
-	if (info->num_threads) {
+	if (info->num_producers) {
 		if ((info->parallel_info =
-		     malloc(sizeof(parallel_info_t) * info->num_threads))
+		     malloc(sizeof(parallel_info_t) * info->num_producers))
 		    == NULL) {
 			MSG("Can't allocate memory for parallel_info.\n");
 			return FALSE;
 		}
 
 		memset(info->parallel_info, 0, sizeof(parallel_info_t)
-							* info->num_threads);
+							* info->num_producers);
 	}
 
 	return TRUE;
@@ -10721,20 +10721,20 @@ calculate_cyclic_buffer_size(void) {
 	limit_size = get_free_memory_size() * 0.6;
 
 	/*
-	 * Recalculate the limit_size according to num_threads.
-	 * And reset num_threads if there is not enough memory.
+	 * Recalculate the limit_size according to num_producers.
+	 * And reset --num-threads if there is not enough memory.
 	 */
-	if (info->num_threads > 0) {
+	if (info->num_producers > 0) {
 		if (limit_size <= maximum_size) {
 			MSG("There isn't enough memory for multi-threads.\n");
-			info->num_threads = 0;
+			info->num_producers = 0;
 		}
-		else if ((limit_size - maximum_size) / info->num_threads < THREAD_REGION) {
-			MSG("There isn't enough memory for %d threads.\n", info->num_threads);
-			info->num_threads = (limit_size - maximum_size) / THREAD_REGION;
-			MSG("--num_threads is set to %d.\n", info->num_threads);
+		else if ((limit_size - maximum_size) / info->num_producers < THREAD_REGION) {
+			MSG("There isn't enough memory for %d threads.\n", info->num_producers + 1);
+			info->num_producers = (limit_size - maximum_size) / THREAD_REGION;
+			MSG("--num-threads is set to %d.\n", info->num_producers + 1);
 
-			limit_size = limit_size - THREAD_REGION * info->num_threads;
+			limit_size = limit_size - THREAD_REGION * info->num_producers;
 		}
 	}
 
@@ -11103,7 +11103,7 @@ main(int argc, char *argv[])
 			info->working_dir = optarg;
 			break;
 		case OPT_NUM_THREADS:
-			info->num_threads = MAX(atoi(optarg), 0);
+			info->num_producers = MAX(atoi(optarg), 1) - 1;
 			break;
 		case '?':
 			MSG("Commandline parameter is invalid.\n");
diff --git a/makedumpfile.h b/makedumpfile.h
index 533e5b8..6a0f51e 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -1004,7 +1004,7 @@ struct page_data
 };
 
 struct thread_args {
-	int thread_num;
+	int producer_num;
 	unsigned long len_buf_out;
 	struct cycle *cycle;
 	struct page_data *page_data_buf;
@@ -1294,7 +1294,7 @@ struct DumpInfo {
 	/*
 	 * for parallel process
 	 */
-	int num_threads;
+	int num_producers;
 	int num_buffers;
 	pthread_t **threads;
 	struct thread_args *kdump_thread_args;
-- 
1.8.3.1




_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* RE: [PATCH] makedumpfile: Change num_threads to num_producers
  2016-08-01  3:26 [PATCH] makedumpfile: Change num_threads to num_producers Zhou Wenjian
@ 2016-08-04  0:50 ` Atsushi Kumagai
  2016-08-04  1:28   ` "Zhou, Wenjian/周文?"
  0 siblings, 1 reply; 3+ messages in thread
From: Atsushi Kumagai @ 2016-08-04  0:50 UTC (permalink / raw)
  To: Zhou Wenjian; +Cc: kexec@lists.infradead.org

Hello Zhou,

>Currently, num_threads means the producer number. The main thread
>is not included. However, num_threads is specified by the option
>"--num-threads". So this may make user confused why it still has
>performance degradation when the value by "--num-threads" equals
>the usable cpu number.
>
>I think it will be more nature to make "--num-threads" be producer
>number + 1. In other words, "--num-threads" should include the
>main thread.

The most part of cpu time will be spent for compression while
the main thread(Consumer) just does I/O work.
If the usable cpu number is 2 and --num-thread is also 2, your
patch reserves a whole cpu for the main thread and the compression
will be done by just one cpu. Is that really the multi thread
processing which we wanted ?

If there are 4 usable cpus, I guess 4 producers with the main thread
is faster than 3 producers with the main thread.

However, according to my quick test, there were no significant
differences between the two cases.
(This was measured on a small VM(5GB) without your patch, the number
of trials is 3, so just for information.)

# makedumpfile -c --num-thread [3|4]

         |               |              time[s]
    CPUs |  -num-threads |    1st   |    2nd   |    3rd   
  -------+---------------+----------+----------+----------
     4   |       3       |   29.54      30.19     28.26
     4   |       4       |   29.80      31.75     28.19

Then, I noticed that the cpu usage of the main thread is almost 100%
while it doesn't need a lot of cpu resources in theory. Do you have
any idea why the main thread require a lot of cpu resources ?
If it's an unintended behavior, I believe the case of --num-thread=4
can be faster if the cpu usage of the main thread is reduced.


Thanks,
Atsushi Kumagai

>---
> makedumpfile.c | 108 ++++++++++++++++++++++++++++-----------------------------
> makedumpfile.h |   4 +--
> 2 files changed, 56 insertions(+), 56 deletions(-)
>
>diff --git a/makedumpfile.c b/makedumpfile.c
>index 21784e8..19019a4 100644
>--- a/makedumpfile.c
>+++ b/makedumpfile.c
>@@ -1407,13 +1407,13 @@ open_dump_bitmap(void)
> 		}
> 	}
>
>-	if (info->num_threads) {
>+	if (info->num_producers) {
> 		/*
> 		 * Reserve file descriptors of bitmap for creating dumpfiles
> 		 * parallelly, because a bitmap file will be unlinked just after
> 		 * this and it is not possible to open a bitmap file later.
> 		 */
>-		for (i = 0; i < info->num_threads; i++) {
>+		for (i = 0; i < info->num_producers; i++) {
> 			if ((fd = open(info->name_bitmap, O_RDONLY)) < 0) {
> 				ERRMSG("Can't open the bitmap file(%s). %s\n",
> 				    info->name_bitmap, strerror(errno));
>@@ -3495,9 +3495,9 @@ initialize_bitmap_memory(void)
> }
>
> void
>-initialize_bitmap_memory_parallel(struct dump_bitmap *bitmap, int thread_num)
>+initialize_bitmap_memory_parallel(struct dump_bitmap *bitmap, int producer_num)
> {
>-	bitmap->fd = FD_BITMAP_MEMORY_PARALLEL(thread_num);
>+	bitmap->fd = FD_BITMAP_MEMORY_PARALLEL(producer_num);
> 	bitmap->file_name = info->name_memory;
> 	bitmap->no_block = -1;
> 	memset(bitmap->buf, 0, BUFSIZE_BITMAP);
>@@ -3529,24 +3529,24 @@ initial_for_parallel()
> 	/*
> 	 * allocate memory for threads
> 	 */
>-	if ((info->threads = malloc(sizeof(pthread_t *) * info->num_threads))
>+	if ((info->threads = malloc(sizeof(pthread_t *) * info->num_producers))
> 	    == NULL) {
> 		MSG("Can't allocate memory for threads. %s\n",
> 				strerror(errno));
> 		return FALSE;
> 	}
>-	memset(info->threads, 0, sizeof(pthread_t *) * info->num_threads);
>+	memset(info->threads, 0, sizeof(pthread_t *) * info->num_producers);
>
> 	if ((info->kdump_thread_args =
>-			malloc(sizeof(struct thread_args) * info->num_threads))
>+			malloc(sizeof(struct thread_args) * info->num_producers))
> 	    == NULL) {
> 		MSG("Can't allocate memory for arguments of threads. %s\n",
> 				strerror(errno));
> 		return FALSE;
> 	}
>-	memset(info->kdump_thread_args, 0, sizeof(struct thread_args) * info->num_threads);
>+	memset(info->kdump_thread_args, 0, sizeof(struct thread_args) * info->num_producers);
>
>-	for (i = 0; i < info->num_threads; i++) {
>+	for (i = 0; i < info->num_producers; i++) {
> 		if ((info->threads[i] = malloc(sizeof(pthread_t))) == NULL) {
> 			MSG("Can't allocate memory for thread %d. %s",
> 					i, strerror(errno));
>@@ -3592,7 +3592,7 @@ initial_for_parallel()
> #endif
> 	}
>
>-	info->num_buffers = PAGE_DATA_NUM * info->num_threads;
>+	info->num_buffers = PAGE_DATA_NUM * info->num_producers;
>
> 	/*
> 	 * allocate memory for page_data
>@@ -3615,17 +3615,17 @@ initial_for_parallel()
> 	}
>
> 	/*
>-	 * initial page_flag for each thread
>+	 * initial page_flag for each producer thread
> 	 */
>-	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
>+	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_producers))
> 	    == NULL) {
> 		MSG("Can't allocate memory for page_flag_buf. %s\n",
> 				strerror(errno));
> 		return FALSE;
> 	}
>-	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
>+	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_producers);
>
>-	for (i = 0; i < info->num_threads; i++) {
>+	for (i = 0; i < info->num_producers; i++) {
> 		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
> 			MSG("Can't allocate memory for page_flag. %s\n",
> 				strerror(errno));
>@@ -3645,9 +3645,9 @@ initial_for_parallel()
> 	}
>
> 	/*
>-	 * initial fd_memory for threads
>+	 * initial fd_memory for producer threads
> 	 */
>-	for (i = 0; i < info->num_threads; i++) {
>+	for (i = 0; i < info->num_producers; i++) {
> 		if ((FD_MEMORY_PARALLEL(i) = open(info->name_memory, O_RDONLY))
> 									< 0) {
> 			ERRMSG("Can't open the dump memory(%s). %s\n",
>@@ -3673,7 +3673,7 @@ free_for_parallel()
> 	struct page_flag *current;
>
> 	if (info->threads != NULL) {
>-		for (i = 0; i < info->num_threads; i++) {
>+		for (i = 0; i < info->num_producers; i++) {
> 			if (info->threads[i] != NULL)
> 				free(info->threads[i]);
>
>@@ -3714,7 +3714,7 @@ free_for_parallel()
> 	}
>
> 	if (info->page_flag_buf != NULL) {
>-		for (i = 0; i < info->num_threads; i++) {
>+		for (i = 0; i < info->num_producers; i++) {
> 			for (j = 0; j < PAGE_FLAG_NUM; j++) {
> 				if (info->page_flag_buf[i] != NULL) {
> 					current = info->page_flag_buf[i];
>@@ -3729,7 +3729,7 @@ free_for_parallel()
> 	if (info->parallel_info == NULL)
> 		return;
>
>-	for (i = 0; i < info->num_threads; i++) {
>+	for (i = 0; i < info->num_producers; i++) {
> 		if (FD_MEMORY_PARALLEL(i) > 0)
> 			close(FD_MEMORY_PARALLEL(i));
>
>@@ -3936,7 +3936,7 @@ out:
> 		DEBUG_MSG("Buffer size for the cyclic mode: %ld\n", info->bufsize_cyclic);
> 	}
>
>-	if (info->num_threads) {
>+	if (info->num_producers) {
> 		if (is_xen_memory()) {
> 			MSG("'--num-threads' option is disable,\n");
> 			MSG("because %s is Xen's memory core image.\n",
>@@ -4066,9 +4066,9 @@ initialize_2nd_bitmap(struct dump_bitmap *bitmap)
> }
>
> void
>-initialize_2nd_bitmap_parallel(struct dump_bitmap *bitmap, int thread_num)
>+initialize_2nd_bitmap_parallel(struct dump_bitmap *bitmap, int producer_num)
> {
>-	bitmap->fd = FD_BITMAP_PARALLEL(thread_num);
>+	bitmap->fd = FD_BITMAP_PARALLEL(producer_num);
> 	bitmap->file_name = info->name_bitmap;
> 	bitmap->no_block = -1;
> 	memset(bitmap->buf, 0, BUFSIZE_BITMAP);
>@@ -7558,7 +7558,7 @@ kdump_thread_function_cyclic(void *arg) {
> 	volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
> 	struct cycle *cycle = kdump_thread_args->cycle;
> 	mdf_pfn_t pfn = cycle->start_pfn;
>-	int index = kdump_thread_args->thread_num;
>+	int index = kdump_thread_args->producer_num;
> 	int buf_ready;
> 	int dumpable;
> 	int fd_memory = 0;
>@@ -7566,21 +7566,21 @@ kdump_thread_function_cyclic(void *arg) {
> 	struct dump_bitmap bitmap_memory_parallel = {0};
> 	unsigned char *buf = NULL, *buf_out = NULL;
> 	struct mmap_cache *mmap_cache =
>-			MMAP_CACHE_PARALLEL(kdump_thread_args->thread_num);
>+			MMAP_CACHE_PARALLEL(kdump_thread_args->producer_num);
> 	unsigned long size_out;
>-	z_stream *stream = &ZLIB_STREAM_PARALLEL(kdump_thread_args->thread_num);
>+	z_stream *stream = &ZLIB_STREAM_PARALLEL(kdump_thread_args->producer_num);
> #ifdef USELZO
>-	lzo_bytep wrkmem = WRKMEM_PARALLEL(kdump_thread_args->thread_num);
>+	lzo_bytep wrkmem = WRKMEM_PARALLEL(kdump_thread_args->producer_num);
> #endif
> #ifdef USESNAPPY
> 	unsigned long len_buf_out_snappy =
> 				snappy_max_compressed_length(info->page_size);
> #endif
>
>-	buf = BUF_PARALLEL(kdump_thread_args->thread_num);
>-	buf_out = BUF_OUT_PARALLEL(kdump_thread_args->thread_num);
>+	buf = BUF_PARALLEL(kdump_thread_args->producer_num);
>+	buf_out = BUF_OUT_PARALLEL(kdump_thread_args->producer_num);
>
>-	fd_memory = FD_MEMORY_PARALLEL(kdump_thread_args->thread_num);
>+	fd_memory = FD_MEMORY_PARALLEL(kdump_thread_args->producer_num);
>
> 	if (info->fd_bitmap) {
> 		bitmap_parallel.buf = malloc(BUFSIZE_BITMAP);
>@@ -7590,7 +7590,7 @@ kdump_thread_function_cyclic(void *arg) {
> 			goto fail;
> 		}
> 		initialize_2nd_bitmap_parallel(&bitmap_parallel,
>-					kdump_thread_args->thread_num);
>+					kdump_thread_args->producer_num);
> 	}
>
> 	if (info->flag_refiltering) {
>@@ -7601,7 +7601,7 @@ kdump_thread_function_cyclic(void *arg) {
> 			goto fail;
> 		}
> 		initialize_bitmap_memory_parallel(&bitmap_memory_parallel,
>-						kdump_thread_args->thread_num);
>+						kdump_thread_args->producer_num);
> 	}
>
> 	/*
>@@ -7802,8 +7802,8 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> 	for (i = 0; i < page_buf_num; i++)
> 		page_data_buf[i].used = FALSE;
>
>-	for (i = 0; i < info->num_threads; i++) {
>-		kdump_thread_args[i].thread_num = i;
>+	for (i = 0; i < info->num_producers; i++) {
>+		kdump_thread_args[i].producer_num = i;
> 		kdump_thread_args[i].len_buf_out = len_buf_out;
> 		kdump_thread_args[i].page_data_buf = page_data_buf;
> 		kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
>@@ -7843,13 +7843,13 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> 			 * consuming is used for recording in which thread the pfn is the smallest.
> 			 * current_pfn is used for recording the value of pfn when checking the pfn.
> 			 */
>-			for (i = 0; i < info->num_threads; i++) {
>+			for (i = 0; i < info->num_producers; i++) {
> 				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
> 					continue;
> 				temp_pfn = info->page_flag_buf[i]->pfn;
>
> 				/*
>-				 * count how many threads have reached the end.
>+				 * count how many producer threads have reached the end.
> 				 */
> 				if (temp_pfn >= end_pfn) {
> 					info->page_flag_buf[i]->ready = FLAG_UNUSED;
>@@ -7865,9 +7865,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
> 			}
>
> 			/*
>-			 * If all the threads have reached the end, we will finish writing.
>+			 * If all producer threads have reached the end, we will finish writing.
> 			 */
>-			if (end_count >= info->num_threads)
>+			if (end_count >= info->num_producers)
> 				goto finish;
>
> 			/*
>@@ -7930,7 +7930,7 @@ finish:
>
> out:
> 	if (threads != NULL) {
>-		for (i = 0; i < info->num_threads; i++) {
>+		for (i = 0; i < info->num_producers; i++) {
> 			if (threads[i] != NULL) {
> 				res = pthread_cancel(*threads[i]);
> 				if (res != 0 && res != ESRCH)
>@@ -7939,7 +7939,7 @@ out:
> 			}
> 		}
>
>-		for (i = 0; i < info->num_threads; i++) {
>+		for (i = 0; i < info->num_producers; i++) {
> 			if (threads[i] != NULL) {
> 				res = pthread_join(*threads[i], &thread_result);
> 				if (res != 0)
>@@ -8553,7 +8553,7 @@ write_kdump_pages_and_bitmap_cyclic(struct cache_data *cd_header, struct cache_d
> 		if (!write_kdump_bitmap2(&cycle))
> 			return FALSE;
>
>-		if (info->num_threads) {
>+		if (info->num_producers) {
> 			if (!write_kdump_pages_parallel_cyclic(cd_header,
> 							cd_page, &pd_zero,
> 							&offset_data, &cycle))
>@@ -10531,7 +10531,7 @@ check_param_for_creating_dumpfile(int argc, char *argv[])
> 	if (info->flag_sadump_diskset && !sadump_is_supported_arch())
> 		return FALSE;
>
>-	if (info->num_threads) {
>+	if (info->num_producers) {
> 		if (info->flag_split) {
> 			MSG("--num-threads cannot used with --split.\n");
> 			return FALSE;
>@@ -10607,16 +10607,16 @@ check_param_for_creating_dumpfile(int argc, char *argv[])
> 	} else
> 		return FALSE;
>
>-	if (info->num_threads) {
>+	if (info->num_producers) {
> 		if ((info->parallel_info =
>-		     malloc(sizeof(parallel_info_t) * info->num_threads))
>+		     malloc(sizeof(parallel_info_t) * info->num_producers))
> 		    == NULL) {
> 			MSG("Can't allocate memory for parallel_info.\n");
> 			return FALSE;
> 		}
>
> 		memset(info->parallel_info, 0, sizeof(parallel_info_t)
>-							* info->num_threads);
>+							* info->num_producers);
> 	}
>
> 	return TRUE;
>@@ -10721,20 +10721,20 @@ calculate_cyclic_buffer_size(void) {
> 	limit_size = get_free_memory_size() * 0.6;
>
> 	/*
>-	 * Recalculate the limit_size according to num_threads.
>-	 * And reset num_threads if there is not enough memory.
>+	 * Recalculate the limit_size according to num_producers.
>+	 * And reset --num-threads if there is not enough memory.
> 	 */
>-	if (info->num_threads > 0) {
>+	if (info->num_producers > 0) {
> 		if (limit_size <= maximum_size) {
> 			MSG("There isn't enough memory for multi-threads.\n");
>-			info->num_threads = 0;
>+			info->num_producers = 0;
> 		}
>-		else if ((limit_size - maximum_size) / info->num_threads < THREAD_REGION) {
>-			MSG("There isn't enough memory for %d threads.\n", info->num_threads);
>-			info->num_threads = (limit_size - maximum_size) / THREAD_REGION;
>-			MSG("--num_threads is set to %d.\n", info->num_threads);
>+		else if ((limit_size - maximum_size) / info->num_producers < THREAD_REGION) {
>+			MSG("There isn't enough memory for %d threads.\n", info->num_producers + 1);
>+			info->num_producers = (limit_size - maximum_size) / THREAD_REGION;
>+			MSG("--num-threads is set to %d.\n", info->num_producers + 1);
>
>-			limit_size = limit_size - THREAD_REGION * info->num_threads;
>+			limit_size = limit_size - THREAD_REGION * info->num_producers;
> 		}
> 	}
>
>@@ -11103,7 +11103,7 @@ main(int argc, char *argv[])
> 			info->working_dir = optarg;
> 			break;
> 		case OPT_NUM_THREADS:
>-			info->num_threads = MAX(atoi(optarg), 0);
>+			info->num_producers = MAX(atoi(optarg), 1) - 1;
> 			break;
> 		case '?':
> 			MSG("Commandline parameter is invalid.\n");
>diff --git a/makedumpfile.h b/makedumpfile.h
>index 533e5b8..6a0f51e 100644
>--- a/makedumpfile.h
>+++ b/makedumpfile.h
>@@ -1004,7 +1004,7 @@ struct page_data
> };
>
> struct thread_args {
>-	int thread_num;
>+	int producer_num;
> 	unsigned long len_buf_out;
> 	struct cycle *cycle;
> 	struct page_data *page_data_buf;
>@@ -1294,7 +1294,7 @@ struct DumpInfo {
> 	/*
> 	 * for parallel process
> 	 */
>-	int num_threads;
>+	int num_producers;
> 	int num_buffers;
> 	pthread_t **threads;
> 	struct thread_args *kdump_thread_args;
>--
>1.8.3.1
>
>


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] makedumpfile: Change num_threads to num_producers
  2016-08-04  0:50 ` Atsushi Kumagai
@ 2016-08-04  1:28   ` "Zhou, Wenjian/周文?"
  0 siblings, 0 replies; 3+ messages in thread
From: "Zhou, Wenjian/周文?" @ 2016-08-04  1:28 UTC (permalink / raw)
  To: Atsushi Kumagai; +Cc: kexec@lists.infradead.org

On 08/04/2016 08:50 AM, Atsushi Kumagai wrote:
> Hello Zhou,
> 
>> Currently, num_threads means the producer number. The main thread
>> is not included. However, num_threads is specified by the option
>> "--num-threads". So this may make user confused why it still has
>> performance degradation when the value by "--num-threads" equals
>> the usable cpu number.
>>
>> I think it will be more nature to make "--num-threads" be producer
>> number + 1. In other words, "--num-threads" should include the
>> main thread.
> 
> The most part of cpu time will be spent for compression while
> the main thread(Consumer) just does I/O work.
> If the usable cpu number is 2 and --num-thread is also 2, your
> patch reserves a whole cpu for the main thread and the compression
> will be done by just one cpu. Is that really the multi thread
> processing which we wanted ?
> 
> If there are 4 usable cpus, I guess 4 producers with the main thread
> is faster than 3 producers with the main thread.
> 
> However, according to my quick test, there were no significant
> differences between the two cases.
> (This was measured on a small VM(5GB) without your patch, the number
> of trials is 3, so just for information.)
> 
> # makedumpfile -c --num-thread [3|4]
> 
>           |               |              time[s]
>      CPUs |  -num-threads |    1st   |    2nd   |    3rd
>    -------+---------------+----------+----------+----------
>       4   |       3       |   29.54      30.19     28.26
>       4   |       4       |   29.80      31.75     28.19
> 
> Then, I noticed that the cpu usage of the main thread is almost 100%
> while it doesn't need a lot of cpu resources in theory. Do you have
> any idea why the main thread require a lot of cpu resources ?

Yes, it shouldn't take so much cpu time.
It really need to be improved.
I'll think about it.

-- 
Thanks
Zhou
> If it's an unintended behavior, I believe the case of --num-thread=4
> can be faster if the cpu usage of the main thread is reduced.
> 
> 
> Thanks,
> Atsushi Kumagai
> 
>> ---
>> makedumpfile.c | 108 ++++++++++++++++++++++++++++-----------------------------
>> makedumpfile.h |   4 +--
>> 2 files changed, 56 insertions(+), 56 deletions(-)
>>
>> diff --git a/makedumpfile.c b/makedumpfile.c
>> index 21784e8..19019a4 100644
>> --- a/makedumpfile.c
>> +++ b/makedumpfile.c
>> @@ -1407,13 +1407,13 @@ open_dump_bitmap(void)
>> 		}
>> 	}
>>
>> -	if (info->num_threads) {
>> +	if (info->num_producers) {
>> 		/*
>> 		 * Reserve file descriptors of bitmap for creating dumpfiles
>> 		 * parallelly, because a bitmap file will be unlinked just after
>> 		 * this and it is not possible to open a bitmap file later.
>> 		 */
>> -		for (i = 0; i < info->num_threads; i++) {
>> +		for (i = 0; i < info->num_producers; i++) {
>> 			if ((fd = open(info->name_bitmap, O_RDONLY)) < 0) {
>> 				ERRMSG("Can't open the bitmap file(%s). %s\n",
>> 				    info->name_bitmap, strerror(errno));
>> @@ -3495,9 +3495,9 @@ initialize_bitmap_memory(void)
>> }
>>
>> void
>> -initialize_bitmap_memory_parallel(struct dump_bitmap *bitmap, int thread_num)
>> +initialize_bitmap_memory_parallel(struct dump_bitmap *bitmap, int producer_num)
>> {
>> -	bitmap->fd = FD_BITMAP_MEMORY_PARALLEL(thread_num);
>> +	bitmap->fd = FD_BITMAP_MEMORY_PARALLEL(producer_num);
>> 	bitmap->file_name = info->name_memory;
>> 	bitmap->no_block = -1;
>> 	memset(bitmap->buf, 0, BUFSIZE_BITMAP);
>> @@ -3529,24 +3529,24 @@ initial_for_parallel()
>> 	/*
>> 	 * allocate memory for threads
>> 	 */
>> -	if ((info->threads = malloc(sizeof(pthread_t *) * info->num_threads))
>> +	if ((info->threads = malloc(sizeof(pthread_t *) * info->num_producers))
>> 	    == NULL) {
>> 		MSG("Can't allocate memory for threads. %s\n",
>> 				strerror(errno));
>> 		return FALSE;
>> 	}
>> -	memset(info->threads, 0, sizeof(pthread_t *) * info->num_threads);
>> +	memset(info->threads, 0, sizeof(pthread_t *) * info->num_producers);
>>
>> 	if ((info->kdump_thread_args =
>> -			malloc(sizeof(struct thread_args) * info->num_threads))
>> +			malloc(sizeof(struct thread_args) * info->num_producers))
>> 	    == NULL) {
>> 		MSG("Can't allocate memory for arguments of threads. %s\n",
>> 				strerror(errno));
>> 		return FALSE;
>> 	}
>> -	memset(info->kdump_thread_args, 0, sizeof(struct thread_args) * info->num_threads);
>> +	memset(info->kdump_thread_args, 0, sizeof(struct thread_args) * info->num_producers);
>>
>> -	for (i = 0; i < info->num_threads; i++) {
>> +	for (i = 0; i < info->num_producers; i++) {
>> 		if ((info->threads[i] = malloc(sizeof(pthread_t))) == NULL) {
>> 			MSG("Can't allocate memory for thread %d. %s",
>> 					i, strerror(errno));
>> @@ -3592,7 +3592,7 @@ initial_for_parallel()
>> #endif
>> 	}
>>
>> -	info->num_buffers = PAGE_DATA_NUM * info->num_threads;
>> +	info->num_buffers = PAGE_DATA_NUM * info->num_producers;
>>
>> 	/*
>> 	 * allocate memory for page_data
>> @@ -3615,17 +3615,17 @@ initial_for_parallel()
>> 	}
>>
>> 	/*
>> -	 * initial page_flag for each thread
>> +	 * initial page_flag for each producer thread
>> 	 */
>> -	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_threads))
>> +	if ((info->page_flag_buf = malloc(sizeof(void *) * info->num_producers))
>> 	    == NULL) {
>> 		MSG("Can't allocate memory for page_flag_buf. %s\n",
>> 				strerror(errno));
>> 		return FALSE;
>> 	}
>> -	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_threads);
>> +	memset(info->page_flag_buf, 0, sizeof(void *) * info->num_producers);
>>
>> -	for (i = 0; i < info->num_threads; i++) {
>> +	for (i = 0; i < info->num_producers; i++) {
>> 		if ((info->page_flag_buf[i] = calloc(1, sizeof(struct page_flag))) == NULL) {
>> 			MSG("Can't allocate memory for page_flag. %s\n",
>> 				strerror(errno));
>> @@ -3645,9 +3645,9 @@ initial_for_parallel()
>> 	}
>>
>> 	/*
>> -	 * initial fd_memory for threads
>> +	 * initial fd_memory for producer threads
>> 	 */
>> -	for (i = 0; i < info->num_threads; i++) {
>> +	for (i = 0; i < info->num_producers; i++) {
>> 		if ((FD_MEMORY_PARALLEL(i) = open(info->name_memory, O_RDONLY))
>> 									< 0) {
>> 			ERRMSG("Can't open the dump memory(%s). %s\n",
>> @@ -3673,7 +3673,7 @@ free_for_parallel()
>> 	struct page_flag *current;
>>
>> 	if (info->threads != NULL) {
>> -		for (i = 0; i < info->num_threads; i++) {
>> +		for (i = 0; i < info->num_producers; i++) {
>> 			if (info->threads[i] != NULL)
>> 				free(info->threads[i]);
>>
>> @@ -3714,7 +3714,7 @@ free_for_parallel()
>> 	}
>>
>> 	if (info->page_flag_buf != NULL) {
>> -		for (i = 0; i < info->num_threads; i++) {
>> +		for (i = 0; i < info->num_producers; i++) {
>> 			for (j = 0; j < PAGE_FLAG_NUM; j++) {
>> 				if (info->page_flag_buf[i] != NULL) {
>> 					current = info->page_flag_buf[i];
>> @@ -3729,7 +3729,7 @@ free_for_parallel()
>> 	if (info->parallel_info == NULL)
>> 		return;
>>
>> -	for (i = 0; i < info->num_threads; i++) {
>> +	for (i = 0; i < info->num_producers; i++) {
>> 		if (FD_MEMORY_PARALLEL(i) > 0)
>> 			close(FD_MEMORY_PARALLEL(i));
>>
>> @@ -3936,7 +3936,7 @@ out:
>> 		DEBUG_MSG("Buffer size for the cyclic mode: %ld\n", info->bufsize_cyclic);
>> 	}
>>
>> -	if (info->num_threads) {
>> +	if (info->num_producers) {
>> 		if (is_xen_memory()) {
>> 			MSG("'--num-threads' option is disable,\n");
>> 			MSG("because %s is Xen's memory core image.\n",
>> @@ -4066,9 +4066,9 @@ initialize_2nd_bitmap(struct dump_bitmap *bitmap)
>> }
>>
>> void
>> -initialize_2nd_bitmap_parallel(struct dump_bitmap *bitmap, int thread_num)
>> +initialize_2nd_bitmap_parallel(struct dump_bitmap *bitmap, int producer_num)
>> {
>> -	bitmap->fd = FD_BITMAP_PARALLEL(thread_num);
>> +	bitmap->fd = FD_BITMAP_PARALLEL(producer_num);
>> 	bitmap->file_name = info->name_bitmap;
>> 	bitmap->no_block = -1;
>> 	memset(bitmap->buf, 0, BUFSIZE_BITMAP);
>> @@ -7558,7 +7558,7 @@ kdump_thread_function_cyclic(void *arg) {
>> 	volatile struct page_flag *page_flag_buf = kdump_thread_args->page_flag_buf;
>> 	struct cycle *cycle = kdump_thread_args->cycle;
>> 	mdf_pfn_t pfn = cycle->start_pfn;
>> -	int index = kdump_thread_args->thread_num;
>> +	int index = kdump_thread_args->producer_num;
>> 	int buf_ready;
>> 	int dumpable;
>> 	int fd_memory = 0;
>> @@ -7566,21 +7566,21 @@ kdump_thread_function_cyclic(void *arg) {
>> 	struct dump_bitmap bitmap_memory_parallel = {0};
>> 	unsigned char *buf = NULL, *buf_out = NULL;
>> 	struct mmap_cache *mmap_cache =
>> -			MMAP_CACHE_PARALLEL(kdump_thread_args->thread_num);
>> +			MMAP_CACHE_PARALLEL(kdump_thread_args->producer_num);
>> 	unsigned long size_out;
>> -	z_stream *stream = &ZLIB_STREAM_PARALLEL(kdump_thread_args->thread_num);
>> +	z_stream *stream = &ZLIB_STREAM_PARALLEL(kdump_thread_args->producer_num);
>> #ifdef USELZO
>> -	lzo_bytep wrkmem = WRKMEM_PARALLEL(kdump_thread_args->thread_num);
>> +	lzo_bytep wrkmem = WRKMEM_PARALLEL(kdump_thread_args->producer_num);
>> #endif
>> #ifdef USESNAPPY
>> 	unsigned long len_buf_out_snappy =
>> 				snappy_max_compressed_length(info->page_size);
>> #endif
>>
>> -	buf = BUF_PARALLEL(kdump_thread_args->thread_num);
>> -	buf_out = BUF_OUT_PARALLEL(kdump_thread_args->thread_num);
>> +	buf = BUF_PARALLEL(kdump_thread_args->producer_num);
>> +	buf_out = BUF_OUT_PARALLEL(kdump_thread_args->producer_num);
>>
>> -	fd_memory = FD_MEMORY_PARALLEL(kdump_thread_args->thread_num);
>> +	fd_memory = FD_MEMORY_PARALLEL(kdump_thread_args->producer_num);
>>
>> 	if (info->fd_bitmap) {
>> 		bitmap_parallel.buf = malloc(BUFSIZE_BITMAP);
>> @@ -7590,7 +7590,7 @@ kdump_thread_function_cyclic(void *arg) {
>> 			goto fail;
>> 		}
>> 		initialize_2nd_bitmap_parallel(&bitmap_parallel,
>> -					kdump_thread_args->thread_num);
>> +					kdump_thread_args->producer_num);
>> 	}
>>
>> 	if (info->flag_refiltering) {
>> @@ -7601,7 +7601,7 @@ kdump_thread_function_cyclic(void *arg) {
>> 			goto fail;
>> 		}
>> 		initialize_bitmap_memory_parallel(&bitmap_memory_parallel,
>> -						kdump_thread_args->thread_num);
>> +						kdump_thread_args->producer_num);
>> 	}
>>
>> 	/*
>> @@ -7802,8 +7802,8 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>> 	for (i = 0; i < page_buf_num; i++)
>> 		page_data_buf[i].used = FALSE;
>>
>> -	for (i = 0; i < info->num_threads; i++) {
>> -		kdump_thread_args[i].thread_num = i;
>> +	for (i = 0; i < info->num_producers; i++) {
>> +		kdump_thread_args[i].producer_num = i;
>> 		kdump_thread_args[i].len_buf_out = len_buf_out;
>> 		kdump_thread_args[i].page_data_buf = page_data_buf;
>> 		kdump_thread_args[i].page_flag_buf = info->page_flag_buf[i];
>> @@ -7843,13 +7843,13 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>> 			 * consuming is used for recording in which thread the pfn is the smallest.
>> 			 * current_pfn is used for recording the value of pfn when checking the pfn.
>> 			 */
>> -			for (i = 0; i < info->num_threads; i++) {
>> +			for (i = 0; i < info->num_producers; i++) {
>> 				if (info->page_flag_buf[i]->ready == FLAG_UNUSED)
>> 					continue;
>> 				temp_pfn = info->page_flag_buf[i]->pfn;
>>
>> 				/*
>> -				 * count how many threads have reached the end.
>> +				 * count how many producer threads have reached the end.
>> 				 */
>> 				if (temp_pfn >= end_pfn) {
>> 					info->page_flag_buf[i]->ready = FLAG_UNUSED;
>> @@ -7865,9 +7865,9 @@ write_kdump_pages_parallel_cyclic(struct cache_data *cd_header,
>> 			}
>>
>> 			/*
>> -			 * If all the threads have reached the end, we will finish writing.
>> +			 * If all producer threads have reached the end, we will finish writing.
>> 			 */
>> -			if (end_count >= info->num_threads)
>> +			if (end_count >= info->num_producers)
>> 				goto finish;
>>
>> 			/*
>> @@ -7930,7 +7930,7 @@ finish:
>>
>> out:
>> 	if (threads != NULL) {
>> -		for (i = 0; i < info->num_threads; i++) {
>> +		for (i = 0; i < info->num_producers; i++) {
>> 			if (threads[i] != NULL) {
>> 				res = pthread_cancel(*threads[i]);
>> 				if (res != 0 && res != ESRCH)
>> @@ -7939,7 +7939,7 @@ out:
>> 			}
>> 		}
>>
>> -		for (i = 0; i < info->num_threads; i++) {
>> +		for (i = 0; i < info->num_producers; i++) {
>> 			if (threads[i] != NULL) {
>> 				res = pthread_join(*threads[i], &thread_result);
>> 				if (res != 0)
>> @@ -8553,7 +8553,7 @@ write_kdump_pages_and_bitmap_cyclic(struct cache_data *cd_header, struct cache_d
>> 		if (!write_kdump_bitmap2(&cycle))
>> 			return FALSE;
>>
>> -		if (info->num_threads) {
>> +		if (info->num_producers) {
>> 			if (!write_kdump_pages_parallel_cyclic(cd_header,
>> 							cd_page, &pd_zero,
>> 							&offset_data, &cycle))
>> @@ -10531,7 +10531,7 @@ check_param_for_creating_dumpfile(int argc, char *argv[])
>> 	if (info->flag_sadump_diskset && !sadump_is_supported_arch())
>> 		return FALSE;
>>
>> -	if (info->num_threads) {
>> +	if (info->num_producers) {
>> 		if (info->flag_split) {
>> 			MSG("--num-threads cannot used with --split.\n");
>> 			return FALSE;
>> @@ -10607,16 +10607,16 @@ check_param_for_creating_dumpfile(int argc, char *argv[])
>> 	} else
>> 		return FALSE;
>>
>> -	if (info->num_threads) {
>> +	if (info->num_producers) {
>> 		if ((info->parallel_info =
>> -		     malloc(sizeof(parallel_info_t) * info->num_threads))
>> +		     malloc(sizeof(parallel_info_t) * info->num_producers))
>> 		    == NULL) {
>> 			MSG("Can't allocate memory for parallel_info.\n");
>> 			return FALSE;
>> 		}
>>
>> 		memset(info->parallel_info, 0, sizeof(parallel_info_t)
>> -							* info->num_threads);
>> +							* info->num_producers);
>> 	}
>>
>> 	return TRUE;
>> @@ -10721,20 +10721,20 @@ calculate_cyclic_buffer_size(void) {
>> 	limit_size = get_free_memory_size() * 0.6;
>>
>> 	/*
>> -	 * Recalculate the limit_size according to num_threads.
>> -	 * And reset num_threads if there is not enough memory.
>> +	 * Recalculate the limit_size according to num_producers.
>> +	 * And reset --num-threads if there is not enough memory.
>> 	 */
>> -	if (info->num_threads > 0) {
>> +	if (info->num_producers > 0) {
>> 		if (limit_size <= maximum_size) {
>> 			MSG("There isn't enough memory for multi-threads.\n");
>> -			info->num_threads = 0;
>> +			info->num_producers = 0;
>> 		}
>> -		else if ((limit_size - maximum_size) / info->num_threads < THREAD_REGION) {
>> -			MSG("There isn't enough memory for %d threads.\n", info->num_threads);
>> -			info->num_threads = (limit_size - maximum_size) / THREAD_REGION;
>> -			MSG("--num_threads is set to %d.\n", info->num_threads);
>> +		else if ((limit_size - maximum_size) / info->num_producers < THREAD_REGION) {
>> +			MSG("There isn't enough memory for %d threads.\n", info->num_producers + 1);
>> +			info->num_producers = (limit_size - maximum_size) / THREAD_REGION;
>> +			MSG("--num-threads is set to %d.\n", info->num_producers + 1);
>>
>> -			limit_size = limit_size - THREAD_REGION * info->num_threads;
>> +			limit_size = limit_size - THREAD_REGION * info->num_producers;
>> 		}
>> 	}
>>
>> @@ -11103,7 +11103,7 @@ main(int argc, char *argv[])
>> 			info->working_dir = optarg;
>> 			break;
>> 		case OPT_NUM_THREADS:
>> -			info->num_threads = MAX(atoi(optarg), 0);
>> +			info->num_producers = MAX(atoi(optarg), 1) - 1;
>> 			break;
>> 		case '?':
>> 			MSG("Commandline parameter is invalid.\n");
>> diff --git a/makedumpfile.h b/makedumpfile.h
>> index 533e5b8..6a0f51e 100644
>> --- a/makedumpfile.h
>> +++ b/makedumpfile.h
>> @@ -1004,7 +1004,7 @@ struct page_data
>> };
>>
>> struct thread_args {
>> -	int thread_num;
>> +	int producer_num;
>> 	unsigned long len_buf_out;
>> 	struct cycle *cycle;
>> 	struct page_data *page_data_buf;
>> @@ -1294,7 +1294,7 @@ struct DumpInfo {
>> 	/*
>> 	 * for parallel process
>> 	 */
>> -	int num_threads;
>> +	int num_producers;
>> 	int num_buffers;
>> 	pthread_t **threads;
>> 	struct thread_args *kdump_thread_args;
>> --
>> 1.8.3.1
>>
>>




_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-08-04  1:29 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-08-01  3:26 [PATCH] makedumpfile: Change num_threads to num_producers Zhou Wenjian
2016-08-04  0:50 ` Atsushi Kumagai
2016-08-04  1:28   ` "Zhou, Wenjian/周文?"

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.