linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* Re: [Patch v2] kexec: increase max of kexec segments and use dynamic allocation
       [not found] <20100727082321.5813.84456.sendpatchset@localhost.localdomain>
@ 2010-07-27 10:00 ` Milton Miller
  2010-07-27 18:24   ` Eric W. Biederman
  2010-07-29  6:42   ` Cong Wang
  0 siblings, 2 replies; 4+ messages in thread
From: Milton Miller @ 2010-07-27 10:00 UTC (permalink / raw)
  To: WANG Cong
  Cc: Neil Horman, Neil Horman, huang ying, linux-kernel, kexec,
	Eric W. Biederman, linuxppc-dev

[ Added kexec at lists.infradead.org and linuxppc-dev@lists.ozlabs.org ]

> 
> Currently KEXEC_SEGMENT_MAX is only 16 which is too small for machine with
> many memory ranges.  When hibernate on a machine with disjoint memory we do
> need one segment for each memory region. Increase this hard limit to 16K
> which is reasonably large.
> 
> And change ->segment from a static array to a dynamically allocated memory.
> 
> Cc: Neil Horman <nhorman@redhat.com>
> Cc: huang ying <huang.ying.caritas@gmail.com>
> Cc: Eric W. Biederman <ebiederm@xmission.com>
> Signed-off-by: WANG Cong <amwang@redhat.com>
> 
> ---
> diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
> index ed31a29..f115585 100644
> --- a/arch/powerpc/kernel/machine_kexec_64.c
> +++ b/arch/powerpc/kernel/machine_kexec_64.c
> @@ -131,10 +131,7 @@ static void copy_segments(unsigned long ind)
>  void kexec_copy_flush(struct kimage *image)
>  {
>  	long i, nr_segments = image->nr_segments;
> -	struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
> -
> -	/* save the ranges on the stack to efficiently flush the icache */
> -	memcpy(ranges, image->segment, sizeof(ranges));
> +	struct  kexec_segment range;

I'm glad you found our copy on the stack and removed the stack overflow
that comes with this bump, but ...

>  
>  	/*
>  	 * After this call we may not use anything allocated in dynamic
> @@ -148,9 +145,11 @@ void kexec_copy_flush(struct kimage *image)
>  	 * we need to clear the icache for all dest pages sometime,
>  	 * including ones that were in place on the original copy
>  	 */
> -	for (i = 0; i < nr_segments; i++)
> -		flush_icache_range((unsigned long)__va(ranges[i].mem),
> -			(unsigned long)__va(ranges[i].mem + ranges[i].memsz));
> +	for (i = 0; i < nr_segments; i++) {
> +		memcpy(&range, &image->segment[i], sizeof(range));
> +		flush_icache_range((unsigned long)__va(range.mem),
> +			(unsigned long)__va(range.mem + range.memsz));
> +	}
>  }

This is executed after the copy, so as it says,
"we may not use anything allocated in dynamic memory".

We could allocate control pages to copy the segment list into.
Actually ppc64 doesn't use the existing control page, but that
is only 4kB today.

We need the list to icache flush all the pages in all the segments.
The as the indirect list doesn't have pages that were allocated at
their destination.

Or maybe the icache flush should be done in the generic code
like it does for crash load segments?


>  
>  #ifdef CONFIG_SMP
> diff --git a/include/linux/kexec.h b/include/linux/kexec.h
> index 03e8e8d..26b70ff 100644
> --- a/include/linux/kexec.h
> +++ b/include/linux/kexec.h
> @@ -57,7 +57,7 @@ typedef unsigned long kimage_entry_t;
>  #define IND_DONE         0x4
>  #define IND_SOURCE       0x8
>  
> -#define KEXEC_SEGMENT_MAX 16
> +#define KEXEC_SEGMENT_MAX (1024*16)
>  struct kexec_segment {
>  	void __user *buf;
>  	size_t bufsz;
> @@ -86,7 +86,7 @@ struct kimage {
>  	struct page *swap_page;
>  
>  	unsigned long nr_segments;
> -	struct kexec_segment segment[KEXEC_SEGMENT_MAX];
> +	struct kexec_segment *segment;
>  
>  	struct list_head control_pages;
>  	struct list_head dest_pages;
> diff --git a/kernel/kexec.c b/kernel/kexec.c
> index 131b170..3f97309 100644
> --- a/kernel/kexec.c
> +++ b/kernel/kexec.c
> @@ -131,6 +131,11 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
>  	if (!image)
>  		goto out;
>  
> +	image->segment = kzalloc(nr_segments * sizeof(struct kexec_segment),
> +				 GFP_KERNEL);
> +	if (!image->segment)
> +		goto out;
> +
>  	image->head = 0;
>  	image->entry = &image->head;
>  	image->last_entry = &image->head;
> @@ -216,8 +221,10 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
>  out:
>  	if (result == 0)
>  		*rimage = image;
> -	else
> +	else if (image) {
> +		kfree(image->segment);
>  		kfree(image);
> +	}
>  
>  	return result;
>  
> @@ -261,8 +268,10 @@ static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
>   out:
>  	if (result == 0)
>  		*rimage = image;
> -	else
> +	else if (image) {
> +		kfree(image->segment);
>  		kfree(image);
> +	}
>  
>  	return result;
>  }
> @@ -330,8 +339,10 @@ static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
>  out:
>  	if (result == 0)
>  		*rimage = image;
> -	else
> +	else if (image) {
> +		kfree(image->segment);
>  		kfree(image);
> +	}
>  
>  	return result;
>  }
> @@ -656,6 +667,7 @@ static void kimage_free(struct kimage *image)
>  
>  	/* Free the kexec control pages... */
>  	kimage_free_page_list(&image->control_pages);
> +	kfree(image->segment);
>  	kfree(image);
>  }
>  

milton

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Patch v2] kexec: increase max of kexec segments and use dynamic allocation
  2010-07-27 10:00 ` [Patch v2] kexec: increase max of kexec segments and use dynamic allocation Milton Miller
@ 2010-07-27 18:24   ` Eric W. Biederman
  2010-07-29  6:42   ` Cong Wang
  1 sibling, 0 replies; 4+ messages in thread
From: Eric W. Biederman @ 2010-07-27 18:24 UTC (permalink / raw)
  To: Milton Miller
  Cc: Neil Horman, WANG Cong, Neil Horman, huang ying, linux-kernel,
	kexec, linuxppc-dev

Milton Miller <miltonm@bga.com> writes:

> [ Added kexec at lists.infradead.org and linuxppc-dev@lists.ozlabs.org ]
>
>> 
>> Currently KEXEC_SEGMENT_MAX is only 16 which is too small for machine with
>> many memory ranges.  When hibernate on a machine with disjoint memory we do
>> need one segment for each memory region. Increase this hard limit to 16K
>> which is reasonably large.
>> 
>> And change ->segment from a static array to a dynamically allocated memory.
>> 
>> Cc: Neil Horman <nhorman@redhat.com>
>> Cc: huang ying <huang.ying.caritas@gmail.com>
>> Cc: Eric W. Biederman <ebiederm@xmission.com>
>> Signed-off-by: WANG Cong <amwang@redhat.com>
>> 
>> ---
>> diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
>> index ed31a29..f115585 100644
>> --- a/arch/powerpc/kernel/machine_kexec_64.c
>> +++ b/arch/powerpc/kernel/machine_kexec_64.c
>> @@ -131,10 +131,7 @@ static void copy_segments(unsigned long ind)
>>  void kexec_copy_flush(struct kimage *image)
>>  {
>>  	long i, nr_segments = image->nr_segments;
>> -	struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
>> -
>> -	/* save the ranges on the stack to efficiently flush the icache */
>> -	memcpy(ranges, image->segment, sizeof(ranges));
>> +	struct  kexec_segment range;
>
> I'm glad you found our copy on the stack and removed the stack overflow
> that comes with this bump, but ...
>
>>  
>>  	/*
>>  	 * After this call we may not use anything allocated in dynamic
>> @@ -148,9 +145,11 @@ void kexec_copy_flush(struct kimage *image)
>>  	 * we need to clear the icache for all dest pages sometime,
>>  	 * including ones that were in place on the original copy
>>  	 */
>> -	for (i = 0; i < nr_segments; i++)
>> -		flush_icache_range((unsigned long)__va(ranges[i].mem),
>> -			(unsigned long)__va(ranges[i].mem + ranges[i].memsz));
>> +	for (i = 0; i < nr_segments; i++) {
>> +		memcpy(&range, &image->segment[i], sizeof(range));
>> +		flush_icache_range((unsigned long)__va(range.mem),
>> +			(unsigned long)__va(range.mem + range.memsz));
>> +	}
>>  }
>
> This is executed after the copy, so as it says,
> "we may not use anything allocated in dynamic memory".
>
> We could allocate control pages to copy the segment list into.
> Actually ppc64 doesn't use the existing control page, but that
> is only 4kB today.
>
> We need the list to icache flush all the pages in all the segments.
> The as the indirect list doesn't have pages that were allocated at
> their destination.

An interesting point.

> Or maybe the icache flush should be done in the generic code
> like it does for crash load segments?

Please.  I don't quite understand the icache flush requirement.
But we really should not be looking at the segments in the
architecture specific code.

Ideally we would only keep the segment information around for
the duration of the kexec_load syscall and not have it when
it comes time to start the second kernel.

I am puzzled.  We should be completely replacing the page tables so
can't we just do a global flush?  Perhaps I am being naive about what
is required for a ppc flush.

Eric

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Patch v2] kexec: increase max of kexec segments and use dynamic allocation
  2010-07-27 10:00 ` [Patch v2] kexec: increase max of kexec segments and use dynamic allocation Milton Miller
  2010-07-27 18:24   ` Eric W. Biederman
@ 2010-07-29  6:42   ` Cong Wang
  2010-08-05  2:32     ` Cong Wang
  1 sibling, 1 reply; 4+ messages in thread
From: Cong Wang @ 2010-07-29  6:42 UTC (permalink / raw)
  To: Milton Miller
  Cc: Neil Horman, Neil Horman, huang ying, linux-kernel, kexec,
	Eric W. Biederman, linuxppc-dev

On 07/27/10 18:00, Milton Miller wrote:
> [ Added kexec at lists.infradead.org and linuxppc-dev@lists.ozlabs.org ]
>
>>
>> Currently KEXEC_SEGMENT_MAX is only 16 which is too small for machine with
>> many memory ranges.  When hibernate on a machine with disjoint memory we do
>> need one segment for each memory region. Increase this hard limit to 16K
>> which is reasonably large.
>>
>> And change ->segment from a static array to a dynamically allocated memory.
>>
>> Cc: Neil Horman<nhorman@redhat.com>
>> Cc: huang ying<huang.ying.caritas@gmail.com>
>> Cc: Eric W. Biederman<ebiederm@xmission.com>
>> Signed-off-by: WANG Cong<amwang@redhat.com>
>>
>> ---
>> diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
>> index ed31a29..f115585 100644
>> --- a/arch/powerpc/kernel/machine_kexec_64.c
>> +++ b/arch/powerpc/kernel/machine_kexec_64.c
>> @@ -131,10 +131,7 @@ static void copy_segments(unsigned long ind)
>>   void kexec_copy_flush(struct kimage *image)
>>   {
>>   	long i, nr_segments = image->nr_segments;
>> -	struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
>> -
>> -	/* save the ranges on the stack to efficiently flush the icache */
>> -	memcpy(ranges, image->segment, sizeof(ranges));
>> +	struct  kexec_segment range;
>
> I'm glad you found our copy on the stack and removed the stack overflow
> that comes with this bump, but ...
>
>>
>>   	/*
>>   	 * After this call we may not use anything allocated in dynamic
>> @@ -148,9 +145,11 @@ void kexec_copy_flush(struct kimage *image)
>>   	 * we need to clear the icache for all dest pages sometime,
>>   	 * including ones that were in place on the original copy
>>   	 */
>> -	for (i = 0; i<  nr_segments; i++)
>> -		flush_icache_range((unsigned long)__va(ranges[i].mem),
>> -			(unsigned long)__va(ranges[i].mem + ranges[i].memsz));
>> +	for (i = 0; i<  nr_segments; i++) {
>> +		memcpy(&range,&image->segment[i], sizeof(range));
>> +		flush_icache_range((unsigned long)__va(range.mem),
>> +			(unsigned long)__va(range.mem + range.memsz));
>> +	}
>>   }
>
> This is executed after the copy, so as it says,
> "we may not use anything allocated in dynamic memory".
>
> We could allocate control pages to copy the segment list into.
> Actually ppc64 doesn't use the existing control page, but that
> is only 4kB today.
>
> We need the list to icache flush all the pages in all the segments.
> The as the indirect list doesn't have pages that were allocated at
> their destination.
>
> Or maybe the icache flush should be done in the generic code
> like it does for crash load segments?
>

I don't get the point here, according to the comments,
it is copied into stack because of efficiency.

-- 
The opposite of love is not hate, it's indifference.
  - Elie Wiesel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [Patch v2] kexec: increase max of kexec segments and use dynamic allocation
  2010-07-29  6:42   ` Cong Wang
@ 2010-08-05  2:32     ` Cong Wang
  0 siblings, 0 replies; 4+ messages in thread
From: Cong Wang @ 2010-08-05  2:32 UTC (permalink / raw)
  To: Milton Miller
  Cc: Neil Horman, Neil Horman, huang ying, linux-kernel, kexec,
	Eric W. Biederman, linuxppc-dev

(Ping Milton...)

On 07/29/10 14:42, Cong Wang wrote:
> On 07/27/10 18:00, Milton Miller wrote:
>> [ Added kexec at lists.infradead.org and linuxppc-dev@lists.ozlabs.org ]
>>
>>>
>>> Currently KEXEC_SEGMENT_MAX is only 16 which is too small for machine
>>> with
>>> many memory ranges. When hibernate on a machine with disjoint memory
>>> we do
>>> need one segment for each memory region. Increase this hard limit to 16K
>>> which is reasonably large.
>>>
>>> And change ->segment from a static array to a dynamically allocated
>>> memory.
>>>
>>> Cc: Neil Horman<nhorman@redhat.com>
>>> Cc: huang ying<huang.ying.caritas@gmail.com>
>>> Cc: Eric W. Biederman<ebiederm@xmission.com>
>>> Signed-off-by: WANG Cong<amwang@redhat.com>
>>>
>>> ---
>>> diff --git a/arch/powerpc/kernel/machine_kexec_64.c
>>> b/arch/powerpc/kernel/machine_kexec_64.c
>>> index ed31a29..f115585 100644
>>> --- a/arch/powerpc/kernel/machine_kexec_64.c
>>> +++ b/arch/powerpc/kernel/machine_kexec_64.c
>>> @@ -131,10 +131,7 @@ static void copy_segments(unsigned long ind)
>>> void kexec_copy_flush(struct kimage *image)
>>> {
>>> long i, nr_segments = image->nr_segments;
>>> - struct kexec_segment ranges[KEXEC_SEGMENT_MAX];
>>> -
>>> - /* save the ranges on the stack to efficiently flush the icache */
>>> - memcpy(ranges, image->segment, sizeof(ranges));
>>> + struct kexec_segment range;
>>
>> I'm glad you found our copy on the stack and removed the stack overflow
>> that comes with this bump, but ...
>>
>>>
>>> /*
>>> * After this call we may not use anything allocated in dynamic
>>> @@ -148,9 +145,11 @@ void kexec_copy_flush(struct kimage *image)
>>> * we need to clear the icache for all dest pages sometime,
>>> * including ones that were in place on the original copy
>>> */
>>> - for (i = 0; i< nr_segments; i++)
>>> - flush_icache_range((unsigned long)__va(ranges[i].mem),
>>> - (unsigned long)__va(ranges[i].mem + ranges[i].memsz));
>>> + for (i = 0; i< nr_segments; i++) {
>>> + memcpy(&range,&image->segment[i], sizeof(range));
>>> + flush_icache_range((unsigned long)__va(range.mem),
>>> + (unsigned long)__va(range.mem + range.memsz));
>>> + }
>>> }
>>
>> This is executed after the copy, so as it says,
>> "we may not use anything allocated in dynamic memory".
>>
>> We could allocate control pages to copy the segment list into.
>> Actually ppc64 doesn't use the existing control page, but that
>> is only 4kB today.
>>
>> We need the list to icache flush all the pages in all the segments.
>> The as the indirect list doesn't have pages that were allocated at
>> their destination.
>>
>> Or maybe the icache flush should be done in the generic code
>> like it does for crash load segments?
>>
>
> I don't get the point here, according to the comments,
> it is copied into stack because of efficiency.
>


-- 
The opposite of love is not hate, it's indifference.
  - Elie Wiesel

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2010-08-05  2:28 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20100727082321.5813.84456.sendpatchset@localhost.localdomain>
2010-07-27 10:00 ` [Patch v2] kexec: increase max of kexec segments and use dynamic allocation Milton Miller
2010-07-27 18:24   ` Eric W. Biederman
2010-07-29  6:42   ` Cong Wang
2010-08-05  2:32     ` Cong Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).