[PATCH] create multiple banks for dom0 in 1:1 mapping if necessary

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary
@ 2014-01-10  4:12 Karim Raslan
  2014-01-10 15:48 ` Ian Campbell
  0 siblings, 1 reply; 15+ messages in thread
From: Karim Raslan @ 2014-01-10  4:12 UTC (permalink / raw)
  To: xen-devel; +Cc: tim, julien.grall, stefano.stabellini, ian.campbell

Create multiple banks to hold dom0 memory in case of 1:1 mapping
if we failed to find 1 large contiguous memory to hold the whole
thing.

Signed-off-by: Karim Raslan <karim.allah.ahmed@gmail.com>
---
 xen/arch/arm/domain_build.c |   74 ++++++++++++++++++++++++++-----------
 xen/arch/arm/kernel.c       |   86 ++++++++++++++++++++++++++++++++++---------
 2 files changed, 121 insertions(+), 39 deletions(-)

diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index faff88e..bb44cdd 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -69,39 +69,71 @@ static void allocate_memory_11(struct domain *d, struct kernel_info *kinfo)
 {
     paddr_t start;
     paddr_t size;
+    unsigned int cur_order, cur_bank, nr_banks = 1, index = 0;
     struct page_info *pg = NULL;
-    unsigned int order = get_order_from_bytes(dom0_mem);
+    unsigned int order = get_order_from_bytes(kinfo->unassigned_mem);
     int res;
     paddr_t spfn;
     unsigned int bits;
 
-    for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
+#define MIN_BANK_ORDER 10
+
+    kinfo->mem.nr_banks = 0;
+
+    /*
+     * We always first try to allocate all dom0 memory in 1 bank.
+     * However, if we failed to allocate physically contiguous memory
+     * from the allocator, then we try to create more than one bank.
+     */
+    for ( cur_order = order; cur_order > MIN_BANK_ORDER; cur_order--)
     {
-        pg = alloc_domheap_pages(d, order, MEMF_bits(bits));
-        if ( pg != NULL )
-            break;
+        for( cur_bank = 1; cur_bank <= nr_banks; cur_bank++ )
+        {
+            for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
+			{
+				pg = alloc_domheap_pages(d, cur_order, MEMF_bits(bits));
+				if ( pg != NULL )
+					break;
+			}
+
+			if ( !pg )
+				break;
+
+			spfn = page_to_mfn(pg);
+			start = pfn_to_paddr(spfn);
+			size = pfn_to_paddr((1 << cur_order));
+
+		    kinfo->mem.bank[index].start = start;
+		    kinfo->mem.bank[index].size = size;
+		    index++;
+		    kinfo->mem.nr_banks++;
+    	}
+
+    	if( pg )
+    		break;
+
+    	nr_banks = (nr_banks - cur_bank + 1) << 1;
     }
 
-    if ( !pg )
-        panic("Failed to allocate contiguous memory for dom0");
+	if ( !pg )
+		panic("Failed to allocate contiguous memory for dom0");
 
-    spfn = page_to_mfn(pg);
-    start = pfn_to_paddr(spfn);
-    size = pfn_to_paddr((1 << order));
+	for ( index = 0; index < kinfo->mem.nr_banks; index++ )
+	{
+	    start = kinfo->mem.bank[index].start;
+	    size = kinfo->mem.bank[index].size;
+	    spfn = paddr_to_pfn(start);
+	    order = get_order_from_bytes(size);
 
-    // 1:1 mapping
-    printk("Populate P2M %#"PRIx64"->%#"PRIx64" (1:1 mapping for dom0)\n",
-           start, start + size);
-    res = guest_physmap_add_page(d, spfn, spfn, order);
-
-    if ( res )
-        panic("Unable to add pages in DOM0: %d", res);
+	    printk("Populate P2M %#"PRIx64"->%#"PRIx64" (1:1 mapping for dom0 - order : %i)\n",
+	            start, start + size, order);
+	    res = guest_physmap_add_page(d, spfn, spfn, order);
 
-    kinfo->mem.bank[0].start = start;
-    kinfo->mem.bank[0].size = size;
-    kinfo->mem.nr_banks = 1;
+	    if ( res )
+	        panic("Unable to add pages in DOM0: %d", res);
 
-    kinfo->unassigned_mem -= size;
+	    kinfo->unassigned_mem -= size;
+	}
 }
 
 static void allocate_memory(struct domain *d, struct kernel_info *kinfo)
diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
index 6a5772b..658c3de 100644
--- a/xen/arch/arm/kernel.c
+++ b/xen/arch/arm/kernel.c
@@ -79,15 +79,43 @@ static void place_modules(struct kernel_info *info,
     const paddr_t total = initrd_len + dtb_len;
 
     /* Convenient */
-    const paddr_t mem_start = info->mem.bank[0].start;
-    const paddr_t mem_size = info->mem.bank[0].size;
-    const paddr_t mem_end = mem_start + mem_size;
-    const paddr_t kernel_size = kernel_end - kernel_start;
+    unsigned int i, min_i = -1;
+    bool_t same_bank = false;
+
+    paddr_t mem_start, mem_end, mem_size;
+    paddr_t kernel_size;
 
     paddr_t addr;
 
-    if ( total + kernel_size > mem_size )
-        panic("Not enough memory in the first bank for the dtb+initrd");
+    kernel_size = kernel_end - kernel_start;
+
+    for ( i = 0; i < info->mem.nr_banks; i++ )
+    {
+        mem_start = info->mem.bank[i].start;
+        mem_size = info->mem.bank[i].size;
+        mem_end = mem_start + mem_size - 1;
+
+        if ( (kernel_end > mem_start) && (kernel_end <= mem_end) )
+            same_bank = true;
+        else
+            same_bank = false;
+
+        if ( same_bank && ((total + kernel_size) < mem_size) )
+            min_i = i;
+        else if ( (!same_bank) && (total < mem_size) )
+            min_i = i;
+        else
+            continue;
+
+        break;
+    }
+
+    if ( min_i == -1 )
+        panic("Not enough memory for the dtb+initrd");
+
+    mem_start = info->mem.bank[min_i].start;
+    mem_size = info->mem.bank[min_i].size;
+    mem_end = mem_start + mem_size;
 
     /*
      * DTB must be loaded such that it does not conflict with the
@@ -104,17 +132,25 @@ static void place_modules(struct kernel_info *info,
      * just after the kernel, if there is room, otherwise just before.
      */
 
-    if ( kernel_end < MIN(mem_start + MB(128), mem_end - total) )
-        addr = MIN(mem_start + MB(128), mem_end - total);
-    else if ( mem_end - ROUNDUP(kernel_end, MB(2)) >= total )
-        addr = ROUNDUP(kernel_end, MB(2));
-    else if ( kernel_start - mem_start >= total )
-        addr = kernel_start - total;
-    else
+    if ( same_bank )
     {
-        panic("Unable to find suitable location for dtb+initrd");
-        return;
-    }
+        if ( kernel_end < MIN(mem_start + MB(128), mem_end - total) )
+            addr = MIN(mem_start + MB(128), mem_end - total);
+        if ( mem_end - ROUNDUP(kernel_end, MB(2)) >= total )
+            addr = ROUNDUP(kernel_end, MB(2));
+        else if ( kernel_start - mem_start >= total )
+            addr = kernel_start - total;
+        else
+        {
+            /*
+             * We should never hit this condition because we've already
+             * done the check while choosing the bank.
+             */
+            panic("Unable to find suitable location for dtb+initrd");
+            return;
+        }
+    } else
+        addr = ROUNDUP(mem_end - total, MB(2));
 
     info->dtb_paddr = addr;
     info->initrd_paddr = info->dtb_paddr + dtb_len;
@@ -264,10 +300,24 @@ static int kernel_try_zimage32_prepare(struct kernel_info *info,
      */
     if (start == 0)
     {
+        unsigned int i, min_i = 0, min_start = -1;
         paddr_t load_end;
 
-        load_end = info->mem.bank[0].start + info->mem.bank[0].size;
-        load_end = MIN(info->mem.bank[0].start + MB(128), load_end);
+        /*
+         * Load kernel at the lowest possible bank
+         * ( check commit 6c21cb36e263de2db8716b477157a5b6cd531e1e for reason behind that )
+         */
+        for ( i = 0; i < info->mem.nr_banks; i++ )
+        {
+            if( (unsigned int)info->mem.bank[i].start < min_start )
+            {
+                min_start = info->mem.bank[i].start;
+                min_i = i;
+            }
+        }
+
+        load_end = info->mem.bank[min_i].start + info->mem.bank[min_i].size;
+        load_end = MIN(info->mem.bank[min_i].start + MB(128), load_end);
 
         info->zimage.load_addr = load_end - end;
         /* Align to 2MB */
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary
  2014-01-10  4:12 [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary Karim Raslan
@ 2014-01-10 15:48 ` Ian Campbell
  2014-01-11  1:58   ` karim.allah.ahmed
  0 siblings, 1 reply; 15+ messages in thread
From: Ian Campbell @ 2014-01-10 15:48 UTC (permalink / raw)
  To: Karim Raslan; +Cc: tim, julien.grall, stefano.stabellini, xen-devel

On Fri, 2014-01-10 at 04:12 +0000, Karim Raslan wrote:
> Create multiple banks to hold dom0 memory in case of 1:1 mapping
> if we failed to find 1 large contiguous memory to hold the whole
> thing.

Thanks. While with my ARM maintainer hat on I'd love for this to go in
for 4.4 with my acting release manager hat on I think if I have to be
honest this is too big a change for 4.4 at this stage, which is a
pity :-(


> 
> Signed-off-by: Karim Raslan <karim.allah.ahmed@gmail.com>
> ---
>  xen/arch/arm/domain_build.c |   74 ++++++++++++++++++++++++++-----------
>  xen/arch/arm/kernel.c       |   86 ++++++++++++++++++++++++++++++++++---------
>  2 files changed, 121 insertions(+), 39 deletions(-)
> 
> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> index faff88e..bb44cdd 100644
> --- a/xen/arch/arm/domain_build.c
> +++ b/xen/arch/arm/domain_build.c
> @@ -69,39 +69,71 @@ static void allocate_memory_11(struct domain *d, struct kernel_info *kinfo)
>  {
>      paddr_t start;
>      paddr_t size;
> +    unsigned int cur_order, cur_bank, nr_banks = 1, index = 0;
>      struct page_info *pg = NULL;
> -    unsigned int order = get_order_from_bytes(dom0_mem);
> +    unsigned int order = get_order_from_bytes(kinfo->unassigned_mem);
>      int res;
>      paddr_t spfn;
>      unsigned int bits;
>  
> -    for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
> +#define MIN_BANK_ORDER 10

2^10 is < PAGE_SIZE (PAGE_SHIFT is 12). 12 is the lowest allocation size
which can be made, but I think in practice the lowest useful bank size
is going to be somewhat larger than that.

NR_MEM_BANKS is 8 so we also need to consider that.

A 64M dom0 would mean 8M per bank, which seems like a reasonable minimum
bank size. That would be a MIN_BANK_ORDER of 23. Please include a
comment explaining where this number comes from.

The other way to look at this would be to calculate it dynamically as
get_order_from_bytes(dom0_mem / NR_MEM_BANKS).

> +
> +    kinfo->mem.nr_banks = 0;
> +
> +    /*
> +     * We always first try to allocate all dom0 memory in 1 bank.
> +     * However, if we failed to allocate physically contiguous memory
> +     * from the allocator, then we try to create more than one bank.
> +     */
> +    for ( cur_order = order; cur_order > MIN_BANK_ORDER; cur_order--)

I think this can be just 
	for( order = get_order_from_bytes(...) ; order > MIN_BANK_ORDER ; order-- )
(maybe order >= ?) or
	while (order > MIN_BANK_ORDER )
	{
		...
		order--;
	}
I think the first works better.

This does away with the need for cur_order vs order. I think order is
mostly unused after this patch, also not renaming cur_order would
hopefully reduce the diff and therefore the "scariness" of the patch wrt
4.4 (although that may not be sufficient).

>      {
> -        pg = alloc_domheap_pages(d, order, MEMF_bits(bits));
> -        if ( pg != NULL )
> -            break;
> +        for( cur_bank = 1; cur_bank <= nr_banks; cur_bank++ )

Is cur_bank redundant with index? Also kinfo->mem.nr_banks tells you how
many banks are filled in.

> +        {
> +            for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
> +			{

There something a bit odd going on with the whitespace here and in the
rest of this loop. Perhaps some hard tabs snuck in?

> +				pg = alloc_domheap_pages(d, cur_order, MEMF_bits(bits));
> +				if ( pg != NULL )
> +					break;
> +			}
> +
> +			if ( !pg )
> +				break;
> +
> +			spfn = page_to_mfn(pg);
> +			start = pfn_to_paddr(spfn);
> +			size = pfn_to_paddr((1 << cur_order));
> +
> +		    kinfo->mem.bank[index].start = start;
> +		    kinfo->mem.bank[index].size = size;
> +		    index++;
> +		    kinfo->mem.nr_banks++;
> +    	}
> +
> +    	if( pg )
> +    		break;
> +
> +    	nr_banks = (nr_banks - cur_bank + 1) << 1;

<<1 ? 

Is this not just kinfo->mem.nr_banks?

The basic structure here is:

for ( cur_order = order; cur_order > MIN_BANK_ORDER; cur_order--)
        for( cur_bank = 1; cur_bank <= nr_banks; cur_bank++ )
                for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
                
Shouldn't the iteration over bank be the outer one?

The banks might be different sizes, right?

Also with either approach then depending on where memory is available
this may result in the memory not being allocated in and/or that they
are not in increasing order (in fact, because Xen prefer to allocate
higher memory first it seems likely that it will be in reverse order).

I don't know if either of those things matter. What does ePAPR have to
say on the matter?

I'd expect that the ordering might matter from the point of view of
putting the kernel in the first bank -- since that may no longer be the
lowest address.

You don't seem to reference kinfo->unassigned_mem anywhere after the
initial order calculation -- I think you need to subtract memory from it
on each iteration, or else I'm not sure you will actually get the right
amount allocated in all cases.

>      }
>  
> -    if ( !pg )
> -        panic("Failed to allocate contiguous memory for dom0");
> +	if ( !pg )
> +		panic("Failed to allocate contiguous memory for dom0");
>  
> -    spfn = page_to_mfn(pg);
> -    start = pfn_to_paddr(spfn);
> -    size = pfn_to_paddr((1 << order));
> +	for ( index = 0; index < kinfo->mem.nr_banks; index++ )
> +	{
> +	    start = kinfo->mem.bank[index].start;
> +	    size = kinfo->mem.bank[index].size;
> +	    spfn = paddr_to_pfn(start);
> +	    order = get_order_from_bytes(size);
>  
> -    // 1:1 mapping
> -    printk("Populate P2M %#"PRIx64"->%#"PRIx64" (1:1 mapping for dom0)\n",
> -           start, start + size);
> -    res = guest_physmap_add_page(d, spfn, spfn, order);
> -
> -    if ( res )
> -        panic("Unable to add pages in DOM0: %d", res);
> +	    printk("Populate P2M %#"PRIx64"->%#"PRIx64" (1:1 mapping for dom0 - order : %i)\n",
> +	            start, start + size, order);
> +	    res = guest_physmap_add_page(d, spfn, spfn, order);

Can this not be done as it is allocated rather than in a second pass?

>  
> -    kinfo->mem.bank[0].start = start;
> -    kinfo->mem.bank[0].size = size;
> -    kinfo->mem.nr_banks = 1;
> +	    if ( res )
> +	        panic("Unable to add pages in DOM0: %d", res);
>  
> -    kinfo->unassigned_mem -= size;
> +	    kinfo->unassigned_mem -= size;
> +	}
>  }
>  
>  static void allocate_memory(struct domain *d, struct kernel_info *kinfo)
> diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
> index 6a5772b..658c3de 100644
> --- a/xen/arch/arm/kernel.c
> +++ b/xen/arch/arm/kernel.c
> @@ -79,15 +79,43 @@ static void place_modules(struct kernel_info *info,
>      const paddr_t total = initrd_len + dtb_len;
>  
>      /* Convenient */

If you are going to remove all of the following convenience variables
then this comment is no longer correct.

(Convenient here means a shorter local name for something complex)

> -    const paddr_t mem_start = info->mem.bank[0].start;
> -    const paddr_t mem_size = info->mem.bank[0].size;
> -    const paddr_t mem_end = mem_start + mem_size;
> -    const paddr_t kernel_size = kernel_end - kernel_start;
> +    unsigned int i, min_i = -1;
> +    bool_t same_bank = false;
> +
> +    paddr_t mem_start, mem_end, mem_size;
> +    paddr_t kernel_size;
>  
>      paddr_t addr;
>  
> -    if ( total + kernel_size > mem_size )
> -        panic("Not enough memory in the first bank for the dtb+initrd");
> +    kernel_size = kernel_end - kernel_start;
> +
> +    for ( i = 0; i < info->mem.nr_banks; i++ )
> +    {
> +        mem_start = info->mem.bank[i].start;
> +        mem_size = info->mem.bank[i].size;
> +        mem_end = mem_start + mem_size - 1;
> +
> +        if ( (kernel_end > mem_start) && (kernel_end <= mem_end) )
> +            same_bank = true;
> +        else
> +            same_bank = false;
> +
> +        if ( same_bank && ((total + kernel_size) < mem_size) )
> +            min_i = i;
> +        else if ( (!same_bank) && (total < mem_size) )
> +            min_i = i;
> +        else
> +            continue;

What is all this doing?

> +
> +        break;
> +    }
> +
> +    if ( min_i == -1 )
> +        panic("Not enough memory for the dtb+initrd");
> +
> +    mem_start = info->mem.bank[min_i].start;
> +    mem_size = info->mem.bank[min_i].size;
> +    mem_end = mem_start + mem_size;
>  
>      /*
>       * DTB must be loaded such that it does not conflict with the
> @@ -104,17 +132,25 @@ static void place_modules(struct kernel_info *info,
>       * just after the kernel, if there is room, otherwise just before.
>       */
>  
> -    if ( kernel_end < MIN(mem_start + MB(128), mem_end - total) )
> -        addr = MIN(mem_start + MB(128), mem_end - total);
> -    else if ( mem_end - ROUNDUP(kernel_end, MB(2)) >= total )
> -        addr = ROUNDUP(kernel_end, MB(2));
> -    else if ( kernel_start - mem_start >= total )
> -        addr = kernel_start - total;
> -    else
> +    if ( same_bank )
>      {
> -        panic("Unable to find suitable location for dtb+initrd");
> -        return;
> -    }
> +        if ( kernel_end < MIN(mem_start + MB(128), mem_end - total) )
> +            addr = MIN(mem_start + MB(128), mem_end - total);
> +        if ( mem_end - ROUNDUP(kernel_end, MB(2)) >= total )
> +            addr = ROUNDUP(kernel_end, MB(2));
> +        else if ( kernel_start - mem_start >= total )
> +            addr = kernel_start - total;
> +        else
> +        {
> +            /*
> +             * We should never hit this condition because we've already
> +             * done the check while choosing the bank.
> +             */
> +            panic("Unable to find suitable location for dtb+initrd");
> +            return;
> +        }
> +    } else
> +        addr = ROUNDUP(mem_end - total, MB(2));
>  
>      info->dtb_paddr = addr;
>      info->initrd_paddr = info->dtb_paddr + dtb_len;
> @@ -264,10 +300,24 @@ static int kernel_try_zimage32_prepare(struct kernel_info *info,
>       */
>      if (start == 0)
>      {
> +        unsigned int i, min_i = 0, min_start = -1;
>          paddr_t load_end;
>  
> -        load_end = info->mem.bank[0].start + info->mem.bank[0].size;
> -        load_end = MIN(info->mem.bank[0].start + MB(128), load_end);
> +        /*
> +         * Load kernel at the lowest possible bank
> +         * ( check commit 6c21cb36e263de2db8716b477157a5b6cd531e1e for reason behind that )

That commit says nothing about loading in the lowest possible bank,
though. If there is some relevant factor which is worth commenting on
please do so directly.

Actually now that the kernel is fixed upstream we don't need the
behaviour of that commit at all. Although there are still restrictions
based on load address vs start of RAM (See booting.txt in the kernel
docs)

Ian.

> +         */
> +        for ( i = 0; i < info->mem.nr_banks; i++ )
> +        {
> +            if( (unsigned int)info->mem.bank[i].start < min_start )
> +            {
> +                min_start = info->mem.bank[i].start;
> +                min_i = i;
> +            }
> +        }
> +
> +        load_end = info->mem.bank[min_i].start + info->mem.bank[min_i].size;
> +        load_end = MIN(info->mem.bank[min_i].start + MB(128), load_end);
>  
>          info->zimage.load_addr = load_end - end;
>          /* Align to 2MB */

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary
  2014-01-10 15:48 ` Ian Campbell
@ 2014-01-11  1:58   ` karim.allah.ahmed
  2014-01-14 10:51     ` Ian Campbell
  0 siblings, 1 reply; 15+ messages in thread
From: karim.allah.ahmed @ 2014-01-11  1:58 UTC (permalink / raw)
  To: Ian Campbell; +Cc: tim, Julien Grall, stefano.stabellini, xen-devel

On Fri, Jan 10, 2014 at 3:48 PM, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> On Fri, 2014-01-10 at 04:12 +0000, Karim Raslan wrote:
>> Create multiple banks to hold dom0 memory in case of 1:1 mapping
>> if we failed to find 1 large contiguous memory to hold the whole
>> thing.
>
> Thanks. While with my ARM maintainer hat on I'd love for this to go in
> for 4.4 with my acting release manager hat on I think if I have to be
> honest this is too big a change for 4.4 at this stage, which is a
> pity :-(
>
>
>>
>> Signed-off-by: Karim Raslan <karim.allah.ahmed@gmail.com>
>> ---
>>  xen/arch/arm/domain_build.c |   74 ++++++++++++++++++++++++++-----------
>>  xen/arch/arm/kernel.c       |   86 ++++++++++++++++++++++++++++++++++---------
>>  2 files changed, 121 insertions(+), 39 deletions(-)
>>
>> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>> index faff88e..bb44cdd 100644
>> --- a/xen/arch/arm/domain_build.c
>> +++ b/xen/arch/arm/domain_build.c
>> @@ -69,39 +69,71 @@ static void allocate_memory_11(struct domain *d, struct kernel_info *kinfo)
>>  {
>>      paddr_t start;
>>      paddr_t size;
>> +    unsigned int cur_order, cur_bank, nr_banks = 1, index = 0;
>>      struct page_info *pg = NULL;
>> -    unsigned int order = get_order_from_bytes(dom0_mem);
>> +    unsigned int order = get_order_from_bytes(kinfo->unassigned_mem);
>>      int res;
>>      paddr_t spfn;
>>      unsigned int bits;
>>
>> -    for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
>> +#define MIN_BANK_ORDER 10
>
> 2^10 is < PAGE_SIZE (PAGE_SHIFT is 12). 12 is the lowest allocation size
> which can be made, but I think in practice the lowest useful bank size
> is going to be somewhat larger than that.

MIN_BANK_ORDER is in pages, so it 2^10 pages not 2^10 bytes which
makes the minimum 4 Mbyte

>
> NR_MEM_BANKS is 8 so we also need to consider that.
>
> A 64M dom0 would mean 8M per bank, which seems like a reasonable minimum
> bank size. That would be a MIN_BANK_ORDER of 23. Please include a
> comment explaining where this number comes from.
>
> The other way to look at this would be to calculate it dynamically as
> get_order_from_bytes(dom0_mem / NR_MEM_BANKS).

Yup, you're right. That sounds more reasonable.

>
>> +
>> +    kinfo->mem.nr_banks = 0;
>> +
>> +    /*
>> +     * We always first try to allocate all dom0 memory in 1 bank.
>> +     * However, if we failed to allocate physically contiguous memory
>> +     * from the allocator, then we try to create more than one bank.
>> +     */
>> +    for ( cur_order = order; cur_order > MIN_BANK_ORDER; cur_order--)
>
> I think this can be just
>         for( order = get_order_from_bytes(...) ; order > MIN_BANK_ORDER ; order-- )
> (maybe order >= ?) or
>         while (order > MIN_BANK_ORDER )
>         {
>                 ...
>                 order--;
>         }
> I think the first works better.
>
> This does away with the need for cur_order vs order. I think order is
> mostly unused after this patch, also not renaming cur_order would
> hopefully reduce the diff and therefore the "scariness" of the patch wrt
> 4.4 (although that may not be sufficient).

Yes, that's correct, however I'm intentionally using a different
variable because I thought that this is going to make things more
obvious. If you think it's better to use the same variable, then I'll
update it.

>
>>      {
>> -        pg = alloc_domheap_pages(d, order, MEMF_bits(bits));
>> -        if ( pg != NULL )
>> -            break;
>> +        for( cur_bank = 1; cur_bank <= nr_banks; cur_bank++ )
>
> Is cur_bank redundant with index? Also kinfo->mem.nr_banks tells you how
> many banks are filled in.
>
>> +        {
>> +            for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
>> +                     {
>
> There something a bit odd going on with the whitespace here and in the
> rest of this loop. Perhaps some hard tabs snuck in?

Yes, I noticed that when the patch appeared in the mailing list :)

>
>> +                             pg = alloc_domheap_pages(d, cur_order, MEMF_bits(bits));
>> +                             if ( pg != NULL )
>> +                                     break;
>> +                     }
>> +
>> +                     if ( !pg )
>> +                             break;
>> +
>> +                     spfn = page_to_mfn(pg);
>> +                     start = pfn_to_paddr(spfn);
>> +                     size = pfn_to_paddr((1 << cur_order));
>> +
>> +                 kinfo->mem.bank[index].start = start;
>> +                 kinfo->mem.bank[index].size = size;
>> +                 index++;
>> +                 kinfo->mem.nr_banks++;
>> +     }
>> +
>> +     if( pg )
>> +             break;
>> +
>> +     nr_banks = (nr_banks - cur_bank + 1) << 1;
>
> <<1 ?

* 2 :)

>
> Is this not just kinfo->mem.nr_banks?

No, In this equation I'm calculating how much more memory will be
needed to satisfy the memory size of dom0.
So at the end of the iteration, I check how much memory has been
allocated (=cur_bank * cur_order) and how much memory was needed
(=nr_banks * cur_order), so nr_unallocated_banks = (nr_banks -
cur_bank + 1) * cur_order
So cur_order is decremented and nr_unallocated_banks is doubled ( <<1
) and we do another iteration to satisfy the rest of unallocated
memory.

>
> The basic structure here is:
>
> for ( cur_order = order; cur_order > MIN_BANK_ORDER; cur_order--)
>         for( cur_bank = 1; cur_bank <= nr_banks; cur_bank++ )
>                 for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
>
> Shouldn't the iteration over bank be the outer one?
>
> The banks might be different sizes, right?
>

The outer loop will define the order of the allocated bank[s] while
the inner one will define how many banks of that order will be needed.

So you try to allocate one big bank, if it succeeds you're done. If
not, you double the number of required banks and retry with smaller
order (order - 1).

The code can indeed allocate banks of different sizes. So, if we
failed to allocate 1 big bank, we will try to allocate two banks of
(order = order -1) in that case we might only allocate the first bank
and fail to allocate the second one. So, we try to allocate the
required memory for the second one in two banks.

So the logic is always: In the end of the outer loop calculate how
many banks we need to allocate for next iteration as well as the
required order. So all allocation that occur in the same iteration is
equal, while each iteration changes the nr banks and order depending
on how much memory we still need to allocate

> Also with either approach then depending on where memory is available
> this may result in the memory not being allocated in and/or that they
> are not in increasing order (in fact, because Xen prefer to allocate
> higher memory first it seems likely that it will be in reverse order).

Yes, there's no restriction what so ever on the order of the
addresses. However each allocation is trying to allocate the bank as
early as possible

for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
{
    pg = alloc_domheap_pages(d, cur_order, MEMF_bits(bits));
    if ( pg != NULL )
        break;
}

>
> I don't know if either of those things matter. What does ePAPR have to
> say on the matter?

There is no mention of address range ordering ( at least in section
3.4 "Memory Node" )

> I'd expect that the ordering might matter from the point of view of
> putting the kernel in the first bank -- since that may no longer be the
> lowest address.
>
In the patch, when I set the loadaddr of the image I search through
the banks for the lowest address bank not the first one anyway.


> You don't seem to reference kinfo->unassigned_mem anywhere after the
> initial order calculation -- I think you need to subtract memory from it
> on each iteration, or else I'm not sure you will actually get the right
> amount allocated in all cases.

It's being properly calculated in the external mapping loop.

>
>>      }
>>
>> -    if ( !pg )
>> -        panic("Failed to allocate contiguous memory for dom0");
>> +     if ( !pg )
>> +             panic("Failed to allocate contiguous memory for dom0");
>>
>> -    spfn = page_to_mfn(pg);
>> -    start = pfn_to_paddr(spfn);
>> -    size = pfn_to_paddr((1 << order));
>> +     for ( index = 0; index < kinfo->mem.nr_banks; index++ )
>> +     {
>> +         start = kinfo->mem.bank[index].start;
>> +         size = kinfo->mem.bank[index].size;
>> +         spfn = paddr_to_pfn(start);
>> +         order = get_order_from_bytes(size);
>>
>> -    // 1:1 mapping
>> -    printk("Populate P2M %#"PRIx64"->%#"PRIx64" (1:1 mapping for dom0)\n",
>> -           start, start + size);
>> -    res = guest_physmap_add_page(d, spfn, spfn, order);
>> -
>> -    if ( res )
>> -        panic("Unable to add pages in DOM0: %d", res);
>> +         printk("Populate P2M %#"PRIx64"->%#"PRIx64" (1:1 mapping for dom0 - order : %i)\n",
>> +                 start, start + size, order);
>> +         res = guest_physmap_add_page(d, spfn, spfn, order);
>
> Can this not be done as it is allocated rather than in a second pass?
>
Yes, that's possible.

>>
>> -    kinfo->mem.bank[0].start = start;
>> -    kinfo->mem.bank[0].size = size;
>> -    kinfo->mem.nr_banks = 1;
>> +         if ( res )
>> +             panic("Unable to add pages in DOM0: %d", res);
>>
>> -    kinfo->unassigned_mem -= size;
>> +         kinfo->unassigned_mem -= size;
>> +     }
>>  }
>>
>>  static void allocate_memory(struct domain *d, struct kernel_info *kinfo)
>> diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
>> index 6a5772b..658c3de 100644
>> --- a/xen/arch/arm/kernel.c
>> +++ b/xen/arch/arm/kernel.c
>> @@ -79,15 +79,43 @@ static void place_modules(struct kernel_info *info,
>>      const paddr_t total = initrd_len + dtb_len;
>>
>>      /* Convenient */
>
> If you are going to remove all of the following convenience variables
> then this comment is no longer correct.
>
> (Convenient here means a shorter local name for something complex)

It still applies to these variables except probably "unsigned int i,
min_i = -1;", so I'll push the comment one line down.

>
>> -    const paddr_t mem_start = info->mem.bank[0].start;
>> -    const paddr_t mem_size = info->mem.bank[0].size;
>> -    const paddr_t mem_end = mem_start + mem_size;
>> -    const paddr_t kernel_size = kernel_end - kernel_start;
>> +    unsigned int i, min_i = -1;
>> +    bool_t same_bank = false;
>> +
>> +    paddr_t mem_start, mem_end, mem_size;
>> +    paddr_t kernel_size;
>>
>>      paddr_t addr;
>>
>> -    if ( total + kernel_size > mem_size )
>> -        panic("Not enough memory in the first bank for the dtb+initrd");
>> +    kernel_size = kernel_end - kernel_start;
>> +
>> +    for ( i = 0; i < info->mem.nr_banks; i++ )
>> +    {
>> +        mem_start = info->mem.bank[i].start;
>> +        mem_size = info->mem.bank[i].size;
>> +        mem_end = mem_start + mem_size - 1;
>> +
>> +        if ( (kernel_end > mem_start) && (kernel_end <= mem_end) )
>> +            same_bank = true;
>> +        else
>> +            same_bank = false;
>> +
>> +        if ( same_bank && ((total + kernel_size) < mem_size) )
>> +            min_i = i;
>> +        else if ( (!same_bank) && (total < mem_size) )
>> +            min_i = i;
>> +        else
>> +            continue;
>
> What is all this doing?

Search through the banks for the bank that fits the initrd and dtb.
Calculation is slightly different depending on whether I ended up
using the same bank as the one used by the kernel or not. ( as
mentioned previously, I don't just blindly choose the first bank for
the kernel. I search through all of the banks for the lowest banks -
address-wise - ). In the same_bank case, I just use the old
calculations that was already there in the code, however in the
!same_bank case I just start at the end of the bank.

>
>> +
>> +        break;
>> +    }
>> +
>> +    if ( min_i == -1 )
>> +        panic("Not enough memory for the dtb+initrd");
>> +
>> +    mem_start = info->mem.bank[min_i].start;
>> +    mem_size = info->mem.bank[min_i].size;
>> +    mem_end = mem_start + mem_size;
>>
>>      /*
>>       * DTB must be loaded such that it does not conflict with the
>> @@ -104,17 +132,25 @@ static void place_modules(struct kernel_info *info,
>>       * just after the kernel, if there is room, otherwise just before.
>>       */
>>
>> -    if ( kernel_end < MIN(mem_start + MB(128), mem_end - total) )
>> -        addr = MIN(mem_start + MB(128), mem_end - total);
>> -    else if ( mem_end - ROUNDUP(kernel_end, MB(2)) >= total )
>> -        addr = ROUNDUP(kernel_end, MB(2));
>> -    else if ( kernel_start - mem_start >= total )
>> -        addr = kernel_start - total;
>> -    else
>> +    if ( same_bank )
>>      {
>> -        panic("Unable to find suitable location for dtb+initrd");
>> -        return;
>> -    }
>> +        if ( kernel_end < MIN(mem_start + MB(128), mem_end - total) )
>> +            addr = MIN(mem_start + MB(128), mem_end - total);
>> +        if ( mem_end - ROUNDUP(kernel_end, MB(2)) >= total )
>> +            addr = ROUNDUP(kernel_end, MB(2));
>> +        else if ( kernel_start - mem_start >= total )
>> +            addr = kernel_start - total;
>> +        else
>> +        {
>> +            /*
>> +             * We should never hit this condition because we've already
>> +             * done the check while choosing the bank.
>> +             */
>> +            panic("Unable to find suitable location for dtb+initrd");
>> +            return;
>> +        }
>> +    } else
>> +        addr = ROUNDUP(mem_end - total, MB(2));
>>
>>      info->dtb_paddr = addr;
>>      info->initrd_paddr = info->dtb_paddr + dtb_len;
>> @@ -264,10 +300,24 @@ static int kernel_try_zimage32_prepare(struct kernel_info *info,
>>       */
>>      if (start == 0)
>>      {
>> +        unsigned int i, min_i = 0, min_start = -1;
>>          paddr_t load_end;
>>
>> -        load_end = info->mem.bank[0].start + info->mem.bank[0].size;
>> -        load_end = MIN(info->mem.bank[0].start + MB(128), load_end);
>> +        /*
>> +         * Load kernel at the lowest possible bank
>> +         * ( check commit 6c21cb36e263de2db8716b477157a5b6cd531e1e for reason behind that )
>
> That commit says nothing about loading in the lowest possible bank,
> though. If there is some relevant factor which is worth commenting on
> please do so directly.

Loading at the lowest bank is safer because of the:
"The current solution in Linux assuming that the delta physical
address - virtual address is always negative".
This delta is being calculated based on where the kernel was loaded (
using "adr" to find physical address ).
So loading it as early as possible is a good idea to avoid this problem.

However, I'm not quite sure why not just load it at the beginning of
the memory then! I think I'll look at booting.txt for that, maybe it's
a decompresser limitation or something!

>
> Actually now that the kernel is fixed upstream we don't need the
> behaviour of that commit at all. Although there are still restrictions
> based on load address vs start of RAM (See booting.txt in the kernel
> docs)
Ah, I see. I'm using an allwinner branch of the kernel (
https://github.com/bjzhang/linux-allwinner.git ). I'll take a look at
the latest thing.

>
> Ian.
>
>> +         */
>> +        for ( i = 0; i < info->mem.nr_banks; i++ )
>> +        {
>> +            if( (unsigned int)info->mem.bank[i].start < min_start )
>> +            {
>> +                min_start = info->mem.bank[i].start;
>> +                min_i = i;
>> +            }
>> +        }
>> +
>> +        load_end = info->mem.bank[min_i].start + info->mem.bank[min_i].size;
>> +        load_end = MIN(info->mem.bank[min_i].start + MB(128), load_end);
>>
>>          info->zimage.load_addr = load_end - end;
>>          /* Align to 2MB */
>
>
>



-- 
Karim Allah Ahmed.
LinkedIn

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary
  2014-01-11  1:58   ` karim.allah.ahmed
@ 2014-01-14 10:51     ` Ian Campbell
  2014-01-24 10:21       ` karim.allah.ahmed
  2014-01-24 17:11       ` Julien Grall
  0 siblings, 2 replies; 15+ messages in thread
From: Ian Campbell @ 2014-01-14 10:51 UTC (permalink / raw)
  To: karim.allah.ahmed@gmail.com
  Cc: tim, Julien Grall, stefano.stabellini, xen-devel

On Sat, 2014-01-11 at 01:58 +0000, karim.allah.ahmed@gmail.com wrote:
> On Fri, Jan 10, 2014 at 3:48 PM, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> > On Fri, 2014-01-10 at 04:12 +0000, Karim Raslan wrote:
> >> Create multiple banks to hold dom0 memory in case of 1:1 mapping
> >> if we failed to find 1 large contiguous memory to hold the whole
> >> thing.
> >
> > Thanks. While with my ARM maintainer hat on I'd love for this to go in
> > for 4.4 with my acting release manager hat on I think if I have to be
> > honest this is too big a change for 4.4 at this stage, which is a
> > pity :-(
> >
> >
> >>
> >> Signed-off-by: Karim Raslan <karim.allah.ahmed@gmail.com>
> >> ---
> >>  xen/arch/arm/domain_build.c |   74 ++++++++++++++++++++++++++-----------
> >>  xen/arch/arm/kernel.c       |   86 ++++++++++++++++++++++++++++++++++---------
> >>  2 files changed, 121 insertions(+), 39 deletions(-)
> >>
> >> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
> >> index faff88e..bb44cdd 100644
> >> --- a/xen/arch/arm/domain_build.c
> >> +++ b/xen/arch/arm/domain_build.c
> >> @@ -69,39 +69,71 @@ static void allocate_memory_11(struct domain *d, struct kernel_info *kinfo)
> >>  {
> >>      paddr_t start;
> >>      paddr_t size;
> >> +    unsigned int cur_order, cur_bank, nr_banks = 1, index = 0;
> >>      struct page_info *pg = NULL;
> >> -    unsigned int order = get_order_from_bytes(dom0_mem);
> >> +    unsigned int order = get_order_from_bytes(kinfo->unassigned_mem);
> >>      int res;
> >>      paddr_t spfn;
> >>      unsigned int bits;
> >>
> >> -    for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
> >> +#define MIN_BANK_ORDER 10
> >
> > 2^10 is < PAGE_SIZE (PAGE_SHIFT is 12). 12 is the lowest allocation size
> > which can be made, but I think in practice the lowest useful bank size
> > is going to be somewhat larger than that.
> 
> MIN_BANK_ORDER is in pages, so it 2^10 pages not 2^10 bytes which
> makes the minimum 4 Mbyte

My mistake. Please add a comment though.

> >> +
> >> +    kinfo->mem.nr_banks = 0;
> >> +
> >> +    /*
> >> +     * We always first try to allocate all dom0 memory in 1 bank.
> >> +     * However, if we failed to allocate physically contiguous memory
> >> +     * from the allocator, then we try to create more than one bank.
> >> +     */
> >> +    for ( cur_order = order; cur_order > MIN_BANK_ORDER; cur_order--)
> >
> > I think this can be just
> >         for( order = get_order_from_bytes(...) ; order > MIN_BANK_ORDER ; order-- )
> > (maybe order >= ?) or
> >         while (order > MIN_BANK_ORDER )
> >         {
> >                 ...
> >                 order--;
> >         }
> > I think the first works better.
> >
> > This does away with the need for cur_order vs order. I think order is
> > mostly unused after this patch, also not renaming cur_order would
> > hopefully reduce the diff and therefore the "scariness" of the patch wrt
> > 4.4 (although that may not be sufficient).
> 
> Yes, that's correct, however I'm intentionally using a different
> variable because I thought that this is going to make things more
> obvious. If you think it's better to use the same variable, then I'll
> update it.

Personally I found it confusing to read as it is. Although reading
further down I'm still confused and I'm not sure that my suggestion
would actually help.

> >
> >> +                             pg = alloc_domheap_pages(d, cur_order, MEMF_bits(bits));
> >> +                             if ( pg != NULL )
> >> +                                     break;
> >> +                     }
> >> +
> >> +                     if ( !pg )
> >> +                             break;
> >> +
> >> +                     spfn = page_to_mfn(pg);
> >> +                     start = pfn_to_paddr(spfn);
> >> +                     size = pfn_to_paddr((1 << cur_order));
> >> +
> >> +                 kinfo->mem.bank[index].start = start;
> >> +                 kinfo->mem.bank[index].size = size;
> >> +                 index++;
> >> +                 kinfo->mem.nr_banks++;
> >> +     }
> >> +
> >> +     if( pg )
> >> +             break;
> >> +
> >> +     nr_banks = (nr_banks - cur_bank + 1) << 1;
> >
> > <<1 ?
> 
> * 2 :)

I know that ;-) But why are you doubling the number of banks at all?

> > Is this not just kinfo->mem.nr_banks?
> 
> No, In this equation I'm calculating how much more memory will be
> needed to satisfy the memory size of dom0.
> So at the end of the iteration, I check how much memory has been
> allocated (=cur_bank * cur_order) and how much memory was needed
> (=nr_banks * cur_order), so nr_unallocated_banks = (nr_banks -
> cur_bank + 1) * cur_order
> So cur_order is decremented and nr_unallocated_banks is doubled ( <<1
> ) and we do another iteration to satisfy the rest of unallocated
> memory.

Hrm, I'm still a bit confused. I think perhaps choosing better names for
the variables (e.g. it seems like you are saying nr_banks is actually
nr_unallocated_banks?) and adding some comments might help.

But is this algorithm not more complex than it needs to be? Why not just
allocate memory, subtracting it from kinfo->unassigned_mem as you go and
adding a new bank each time? The result would the same wouldn't it?
e.g.:

	while ( kinfo->unassigned_mem )
	{
		order = get_order_of_bytes(kinfo->unsigned_mem)
		do {
			pg = alloc_domheap_pages(... order ...)
		} while(!pg && order>>=1 > MIN_BANK_ORDER)

		if (pg)
			urk!

		kinfo->mem.bank[kinfo->mem.nr_banks].{start,size} = (stuff based on pg, order etc)
		kinfo->mem.nr_banks++
		kinfo->unassigned_mem -= /*whatever it is*/

		/* maybe do guest_physmap_add_page here too */
	}

I think that will produce the same result, won't it?

In either algorithm there needs to be a check for NR_MEM_BANKS somewhere
or else it will run off the end of kinfo->mem.bank[].

> 
> >
> > The basic structure here is:
> >
> > for ( cur_order = order; cur_order > MIN_BANK_ORDER; cur_order--)
> >         for( cur_bank = 1; cur_bank <= nr_banks; cur_bank++ )
> >                 for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
> >
> > Shouldn't the iteration over bank be the outer one?
> >
> > The banks might be different sizes, right?
> >
> 
> The outer loop will define the order of the allocated bank[s] while
> the inner one will define how many banks of that order will be needed.
> 
> So you try to allocate one big bank, if it succeeds you're done. If
> not, you double the number of required banks and retry with smaller
> order (order - 1).
> 
> The code can indeed allocate banks of different sizes. So, if we
> failed to allocate 1 big bank, we will try to allocate two banks of
> (order = order -1) in that case we might only allocate the first bank
> and fail to allocate the second one. So, we try to allocate the
> required memory for the second one in two banks.
> 
> So the logic is always: In the end of the outer loop calculate how
> many banks we need to allocate for next iteration as well as the
> required order. So all allocation that occur in the same iteration is
> equal, while each iteration changes the nr banks and order depending
> on how much memory we still need to allocate

I think I get it now, but it still seems complicated. Am I missing
something with the simpler loop I suggested?

> > Also with either approach then depending on where memory is available
> > this may result in the memory not being allocated in and/or that they
> > are not in increasing order (in fact, because Xen prefer to allocate
> > higher memory first it seems likely that it will be in reverse order).
> 
> Yes, there's no restriction what so ever on the order of the
> addresses. However each allocation is trying to allocate the bank as
> early as possible
> 
> for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
> {
>     pg = alloc_domheap_pages(d, cur_order, MEMF_bits(bits));
>     if ( pg != NULL )
>         break;
> }
> 
> >
> > I don't know if either of those things matter. What does ePAPR have to
> > say on the matter?
> 
> There is no mention of address range ordering ( at least in section
> 3.4 "Memory Node" )

OK. It'll be interesting to see whether the implementations match up to
that!

Since max banks is necessarily small I do wonder if it might be easier
to just do a quick insertion sort as we go rather than risking it. I
suppose there will be plenty of time in the 4.5 cycle (which this work
is now almost certainly targeting) to hit any problem cases.

> > I'd expect that the ordering might matter from the point of view of
> > putting the kernel in the first bank -- since that may no longer be the
> > lowest address.
> >
> In the patch, when I set the loadaddr of the image I search through
> the banks for the lowest address bank not the first one anyway.

OK, I think that makes sense since the requirement is wrt position
relative to the start of RAM, not the first bank.

> > You don't seem to reference kinfo->unassigned_mem anywhere after the
> > initial order calculation -- I think you need to subtract memory from it
> > on each iteration, or else I'm not sure you will actually get the right
> > amount allocated in all cases.
> 
> It's being properly calculated in the external mapping loop.

Hrm yes with all that doubling etc.

> >>
> >> -    kinfo->mem.bank[0].start = start;
> >> -    kinfo->mem.bank[0].size = size;
> >> -    kinfo->mem.nr_banks = 1;
> >> +         if ( res )
> >> +             panic("Unable to add pages in DOM0: %d", res);
> >>
> >> -    kinfo->unassigned_mem -= size;
> >> +         kinfo->unassigned_mem -= size;
> >> +     }
> >>  }
> >>
> >>  static void allocate_memory(struct domain *d, struct kernel_info *kinfo)
> >> diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
> >> index 6a5772b..658c3de 100644
> >> --- a/xen/arch/arm/kernel.c
> >> +++ b/xen/arch/arm/kernel.c
> >> @@ -79,15 +79,43 @@ static void place_modules(struct kernel_info *info,
> >>      const paddr_t total = initrd_len + dtb_len;
> >>
> >>      /* Convenient */
> >
> > If you are going to remove all of the following convenience variables
> > then this comment is no longer correct.
> >
> > (Convenient here means a shorter local name for something complex)
> 
> It still applies to these variables except probably "unsigned int i,
> min_i = -1;", so I'll push the comment one line down.

No it doesn't AFAICT. "Convenient" here means "these are shorthand for
something longer and inconvenient to type", if the variables aren't
const then they almost certainly are not a convenience in the intended
sense. same_bank, mem_* and kernel_size are all just regular variables I
think.

> >> -    const paddr_t mem_start = info->mem.bank[0].start;
> >> -    const paddr_t mem_size = info->mem.bank[0].size;
> >> -    const paddr_t mem_end = mem_start + mem_size;
> >> -    const paddr_t kernel_size = kernel_end - kernel_start;
> >> +    unsigned int i, min_i = -1;
> >> +    bool_t same_bank = false;
> >> +
> >> +    paddr_t mem_start, mem_end, mem_size;
> >> +    paddr_t kernel_size;
> >>
> >>      paddr_t addr;
> >>
> >> -    if ( total + kernel_size > mem_size )
> >> -        panic("Not enough memory in the first bank for the dtb+initrd");
> >> +    kernel_size = kernel_end - kernel_start;
> >> +
> >> +    for ( i = 0; i < info->mem.nr_banks; i++ )
> >> +    {
> >> +        mem_start = info->mem.bank[i].start;
> >> +        mem_size = info->mem.bank[i].size;
> >> +        mem_end = mem_start + mem_size - 1;
> >> +
> >> +        if ( (kernel_end > mem_start) && (kernel_end <= mem_end) )
> >> +            same_bank = true;
> >> +        else
> >> +            same_bank = false;
> >> +
> >> +        if ( same_bank && ((total + kernel_size) < mem_size) )
> >> +            min_i = i;
> >> +        else if ( (!same_bank) && (total < mem_size) )
> >> +            min_i = i;
> >> +        else
> >> +            continue;
> >
> > What is all this doing?
> 
> Search through the banks for the bank that fits the initrd and dtb.
> Calculation is slightly different depending on whether I ended up
> using the same bank as the one used by the kernel or not. ( as
> mentioned previously, I don't just blindly choose the first bank for
> the kernel. I search through all of the banks for the lowest banks -
> address-wise - ).

Where is the address comparison before assigning min_i?

>  In the same_bank case, I just use the old
> calculations that was already there in the code, however in the
> !same_bank case I just start at the end of the bank.

Would it be clearer to do
	if ( dtb+initrd fits in kernel's back )
		ok
	else
		search
?

There is also a requirement that the dtb and initrd are within certain
ranges of the start of RAM (see the booting.txt and Booting in
Documentation/arm*/), does this implement this? It doesn't look to be
considered during the search or when placing in the !same_bank case.

This would all be simpler if we sorted the banks too. Which is another
argument for doing so IMHO.

> >> -        load_end = info->mem.bank[0].start + info->mem.bank[0].size;
> >> -        load_end = MIN(info->mem.bank[0].start + MB(128), load_end);
> >> +        /*
> >> +         * Load kernel at the lowest possible bank
> >> +         * ( check commit 6c21cb36e263de2db8716b477157a5b6cd531e1e for reason behind that )
> >
> > That commit says nothing about loading in the lowest possible bank,
> > though. If there is some relevant factor which is worth commenting on
> > please do so directly.
> 
> Loading at the lowest bank is safer because of the:
> "The current solution in Linux assuming that the delta physical
> address - virtual address is always negative".
> This delta is being calculated based on where the kernel was loaded (
> using "adr" to find physical address ).
> So loading it as early as possible is a good idea to avoid this problem.

I think this problem is now fixed upstream, the intention was to
eventually revert the workaround (Julien was going to tell me when it
had gone into stable etc, but this is now a 4.5 era revert candidate).

> However, I'm not quite sure why not just load it at the beginning of
> the memory then! I think I'll look at booting.txt for that, maybe it's
> a decompresser limitation or something!

Yes, it's all a bit complicated.

> > Actually now that the kernel is fixed upstream we don't need the
> > behaviour of that commit at all. Although there are still restrictions
> > based on load address vs start of RAM (See booting.txt in the kernel
> > docs)
> Ah, I see. I'm using an allwinner branch of the kernel (
> https://github.com/bjzhang/linux-allwinner.git ). I'll take a look at
> the latest thing.

git://github.com/linux-sunxi/linux-sunxi.git either sunxi-next or
sunxi-devel branches are pretty good baselines nowadays. I've got an
outstanding TODO to update the wiki...

Ian.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary
  2014-01-14 10:51     ` Ian Campbell
@ 2014-01-24 10:21       ` karim.allah.ahmed
  2014-04-02 12:05         ` Ian Campbell
  2014-01-24 17:11       ` Julien Grall
  1 sibling, 1 reply; 15+ messages in thread
From: karim.allah.ahmed @ 2014-01-24 10:21 UTC (permalink / raw)
  To: Ian Campbell; +Cc: tim, Julien Grall, stefano.stabellini, xen-devel

On Tue, Jan 14, 2014 at 10:51 AM, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> On Sat, 2014-01-11 at 01:58 +0000, karim.allah.ahmed@gmail.com wrote:
>> On Fri, Jan 10, 2014 at 3:48 PM, Ian Campbell <Ian.Campbell@citrix.com> wrote:
>> > On Fri, 2014-01-10 at 04:12 +0000, Karim Raslan wrote:
>> >> Create multiple banks to hold dom0 memory in case of 1:1 mapping
>> >> if we failed to find 1 large contiguous memory to hold the whole
>> >> thing.
>> >
>> > Thanks. While with my ARM maintainer hat on I'd love for this to go in
>> > for 4.4 with my acting release manager hat on I think if I have to be
>> > honest this is too big a change for 4.4 at this stage, which is a
>> > pity :-(
>> >
>> >
>> >>
>> >> Signed-off-by: Karim Raslan <karim.allah.ahmed@gmail.com>
>> >> ---
>> >>  xen/arch/arm/domain_build.c |   74 ++++++++++++++++++++++++++-----------
>> >>  xen/arch/arm/kernel.c       |   86 ++++++++++++++++++++++++++++++++++---------
>> >>  2 files changed, 121 insertions(+), 39 deletions(-)
>> >>
>> >> diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
>> >> index faff88e..bb44cdd 100644
>> >> --- a/xen/arch/arm/domain_build.c
>> >> +++ b/xen/arch/arm/domain_build.c
>> >> @@ -69,39 +69,71 @@ static void allocate_memory_11(struct domain *d, struct kernel_info *kinfo)
>> >>  {
>> >>      paddr_t start;
>> >>      paddr_t size;
>> >> +    unsigned int cur_order, cur_bank, nr_banks = 1, index = 0;
>> >>      struct page_info *pg = NULL;
>> >> -    unsigned int order = get_order_from_bytes(dom0_mem);
>> >> +    unsigned int order = get_order_from_bytes(kinfo->unassigned_mem);
>> >>      int res;
>> >>      paddr_t spfn;
>> >>      unsigned int bits;
>> >>
>> >> -    for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
>> >> +#define MIN_BANK_ORDER 10
>> >
>> > 2^10 is < PAGE_SIZE (PAGE_SHIFT is 12). 12 is the lowest allocation size
>> > which can be made, but I think in practice the lowest useful bank size
>> > is going to be somewhat larger than that.
>>
>> MIN_BANK_ORDER is in pages, so it 2^10 pages not 2^10 bytes which
>> makes the minimum 4 Mbyte
>
> My mistake. Please add a comment though.
>
>> >> +
>> >> +    kinfo->mem.nr_banks = 0;
>> >> +
>> >> +    /*
>> >> +     * We always first try to allocate all dom0 memory in 1 bank.
>> >> +     * However, if we failed to allocate physically contiguous memory
>> >> +     * from the allocator, then we try to create more than one bank.
>> >> +     */
>> >> +    for ( cur_order = order; cur_order > MIN_BANK_ORDER; cur_order--)
>> >
>> > I think this can be just
>> >         for( order = get_order_from_bytes(...) ; order > MIN_BANK_ORDER ; order-- )
>> > (maybe order >= ?) or
>> >         while (order > MIN_BANK_ORDER )
>> >         {
>> >                 ...
>> >                 order--;
>> >         }
>> > I think the first works better.
>> >
>> > This does away with the need for cur_order vs order. I think order is
>> > mostly unused after this patch, also not renaming cur_order would
>> > hopefully reduce the diff and therefore the "scariness" of the patch wrt
>> > 4.4 (although that may not be sufficient).
>>
>> Yes, that's correct, however I'm intentionally using a different
>> variable because I thought that this is going to make things more
>> obvious. If you think it's better to use the same variable, then I'll
>> update it.
>
> Personally I found it confusing to read as it is. Although reading
> further down I'm still confused and I'm not sure that my suggestion
> would actually help.
>
>> >
>> >> +                             pg = alloc_domheap_pages(d, cur_order, MEMF_bits(bits));
>> >> +                             if ( pg != NULL )
>> >> +                                     break;
>> >> +                     }
>> >> +
>> >> +                     if ( !pg )
>> >> +                             break;
>> >> +
>> >> +                     spfn = page_to_mfn(pg);
>> >> +                     start = pfn_to_paddr(spfn);
>> >> +                     size = pfn_to_paddr((1 << cur_order));
>> >> +
>> >> +                 kinfo->mem.bank[index].start = start;
>> >> +                 kinfo->mem.bank[index].size = size;
>> >> +                 index++;
>> >> +                 kinfo->mem.nr_banks++;
>> >> +     }
>> >> +
>> >> +     if( pg )
>> >> +             break;
>> >> +
>> >> +     nr_banks = (nr_banks - cur_bank + 1) << 1;
>> >
>> > <<1 ?
>>
>> * 2 :)
>
> I know that ;-) But why are you doubling the number of banks at all?
>
>> > Is this not just kinfo->mem.nr_banks?
>>
>> No, In this equation I'm calculating how much more memory will be
>> needed to satisfy the memory size of dom0.
>> So at the end of the iteration, I check how much memory has been
>> allocated (=cur_bank * cur_order) and how much memory was needed
>> (=nr_banks * cur_order), so nr_unallocated_banks = (nr_banks -
>> cur_bank + 1) * cur_order
>> So cur_order is decremented and nr_unallocated_banks is doubled ( <<1
>> ) and we do another iteration to satisfy the rest of unallocated
>> memory.
>
> Hrm, I'm still a bit confused. I think perhaps choosing better names for
> the variables (e.g. it seems like you are saying nr_banks is actually
> nr_unallocated_banks?) and adding some comments might help.
>
> But is this algorithm not more complex than it needs to be? Why not just
> allocate memory, subtracting it from kinfo->unassigned_mem as you go and
> adding a new bank each time? The result would the same wouldn't it?
> e.g.:
>
>         while ( kinfo->unassigned_mem )
>         {
>                 order = get_order_of_bytes(kinfo->unsigned_mem)
>                 do {
>                         pg = alloc_domheap_pages(... order ...)
>                 } while(!pg && order>>=1 > MIN_BANK_ORDER)
>
>                 if (pg)
>                         urk!
>
>                 kinfo->mem.bank[kinfo->mem.nr_banks].{start,size} = (stuff based on pg, order etc)
>                 kinfo->mem.nr_banks++
>                 kinfo->unassigned_mem -= /*whatever it is*/
>
>                 /* maybe do guest_physmap_add_page here too */
>         }
>
> I think that will produce the same result, won't it?
>
> In either algorithm there needs to be a check for NR_MEM_BANKS somewhere
> or else it will run off the end of kinfo->mem.bank[].
>
>>
>> >
>> > The basic structure here is:
>> >
>> > for ( cur_order = order; cur_order > MIN_BANK_ORDER; cur_order--)
>> >         for( cur_bank = 1; cur_bank <= nr_banks; cur_bank++ )
>> >                 for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
>> >
>> > Shouldn't the iteration over bank be the outer one?
>> >
>> > The banks might be different sizes, right?
>> >
>>
>> The outer loop will define the order of the allocated bank[s] while
>> the inner one will define how many banks of that order will be needed.
>>
>> So you try to allocate one big bank, if it succeeds you're done. If
>> not, you double the number of required banks and retry with smaller
>> order (order - 1).
>>
>> The code can indeed allocate banks of different sizes. So, if we
>> failed to allocate 1 big bank, we will try to allocate two banks of
>> (order = order -1) in that case we might only allocate the first bank
>> and fail to allocate the second one. So, we try to allocate the
>> required memory for the second one in two banks.
>>
>> So the logic is always: In the end of the outer loop calculate how
>> many banks we need to allocate for next iteration as well as the
>> required order. So all allocation that occur in the same iteration is
>> equal, while each iteration changes the nr banks and order depending
>> on how much memory we still need to allocate
>
> I think I get it now, but it still seems complicated. Am I missing
> something with the simpler loop I suggested?
>
>> > Also with either approach then depending on where memory is available
>> > this may result in the memory not being allocated in and/or that they
>> > are not in increasing order (in fact, because Xen prefer to allocate
>> > higher memory first it seems likely that it will be in reverse order).
>>
>> Yes, there's no restriction what so ever on the order of the
>> addresses. However each allocation is trying to allocate the bank as
>> early as possible
>>
>> for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
>> {
>>     pg = alloc_domheap_pages(d, cur_order, MEMF_bits(bits));
>>     if ( pg != NULL )
>>         break;
>> }
>>
>> >
>> > I don't know if either of those things matter. What does ePAPR have to
>> > say on the matter?
>>
>> There is no mention of address range ordering ( at least in section
>> 3.4 "Memory Node" )
>
> OK. It'll be interesting to see whether the implementations match up to
> that!
>
> Since max banks is necessarily small I do wonder if it might be easier
> to just do a quick insertion sort as we go rather than risking it. I
> suppose there will be plenty of time in the 4.5 cycle (which this work
> is now almost certainly targeting) to hit any problem cases.
>
>> > I'd expect that the ordering might matter from the point of view of
>> > putting the kernel in the first bank -- since that may no longer be the
>> > lowest address.
>> >
>> In the patch, when I set the loadaddr of the image I search through
>> the banks for the lowest address bank not the first one anyway.
>
> OK, I think that makes sense since the requirement is wrt position
> relative to the start of RAM, not the first bank.
>
>> > You don't seem to reference kinfo->unassigned_mem anywhere after the
>> > initial order calculation -- I think you need to subtract memory from it
>> > on each iteration, or else I'm not sure you will actually get the right
>> > amount allocated in all cases.
>>
>> It's being properly calculated in the external mapping loop.
>
> Hrm yes with all that doubling etc.
>
>> >>
>> >> -    kinfo->mem.bank[0].start = start;
>> >> -    kinfo->mem.bank[0].size = size;
>> >> -    kinfo->mem.nr_banks = 1;
>> >> +         if ( res )
>> >> +             panic("Unable to add pages in DOM0: %d", res);
>> >>
>> >> -    kinfo->unassigned_mem -= size;
>> >> +         kinfo->unassigned_mem -= size;
>> >> +     }
>> >>  }
>> >>
>> >>  static void allocate_memory(struct domain *d, struct kernel_info *kinfo)
>> >> diff --git a/xen/arch/arm/kernel.c b/xen/arch/arm/kernel.c
>> >> index 6a5772b..658c3de 100644
>> >> --- a/xen/arch/arm/kernel.c
>> >> +++ b/xen/arch/arm/kernel.c
>> >> @@ -79,15 +79,43 @@ static void place_modules(struct kernel_info *info,
>> >>      const paddr_t total = initrd_len + dtb_len;
>> >>
>> >>      /* Convenient */
>> >
>> > If you are going to remove all of the following convenience variables
>> > then this comment is no longer correct.
>> >
>> > (Convenient here means a shorter local name for something complex)
>>
>> It still applies to these variables except probably "unsigned int i,
>> min_i = -1;", so I'll push the comment one line down.
>
> No it doesn't AFAICT. "Convenient" here means "these are shorthand for
> something longer and inconvenient to type", if the variables aren't
> const then they almost certainly are not a convenience in the intended
> sense. same_bank, mem_* and kernel_size are all just regular variables I
> think.
>
>> >> -    const paddr_t mem_start = info->mem.bank[0].start;
>> >> -    const paddr_t mem_size = info->mem.bank[0].size;
>> >> -    const paddr_t mem_end = mem_start + mem_size;
>> >> -    const paddr_t kernel_size = kernel_end - kernel_start;
>> >> +    unsigned int i, min_i = -1;
>> >> +    bool_t same_bank = false;
>> >> +
>> >> +    paddr_t mem_start, mem_end, mem_size;
>> >> +    paddr_t kernel_size;
>> >>
>> >>      paddr_t addr;
>> >>
>> >> -    if ( total + kernel_size > mem_size )
>> >> -        panic("Not enough memory in the first bank for the dtb+initrd");
>> >> +    kernel_size = kernel_end - kernel_start;
>> >> +
>> >> +    for ( i = 0; i < info->mem.nr_banks; i++ )
>> >> +    {
>> >> +        mem_start = info->mem.bank[i].start;
>> >> +        mem_size = info->mem.bank[i].size;
>> >> +        mem_end = mem_start + mem_size - 1;
>> >> +
>> >> +        if ( (kernel_end > mem_start) && (kernel_end <= mem_end) )
>> >> +            same_bank = true;
>> >> +        else
>> >> +            same_bank = false;
>> >> +
>> >> +        if ( same_bank && ((total + kernel_size) < mem_size) )
>> >> +            min_i = i;
>> >> +        else if ( (!same_bank) && (total < mem_size) )
>> >> +            min_i = i;
>> >> +        else
>> >> +            continue;
>> >
>> > What is all this doing?
>>
>> Search through the banks for the bank that fits the initrd and dtb.
>> Calculation is slightly different depending on whether I ended up
>> using the same bank as the one used by the kernel or not. ( as
>> mentioned previously, I don't just blindly choose the first bank for
>> the kernel. I search through all of the banks for the lowest banks -
>> address-wise - ).
>
> Where is the address comparison before assigning min_i?
>
>>  In the same_bank case, I just use the old
>> calculations that was already there in the code, however in the
>> !same_bank case I just start at the end of the bank.
>
> Would it be clearer to do
>         if ( dtb+initrd fits in kernel's back )
>                 ok
>         else
>                 search
> ?
>
> There is also a requirement that the dtb and initrd are within certain
> ranges of the start of RAM (see the booting.txt and Booting in
> Documentation/arm*/), does this implement this? It doesn't look to be
> considered during the search or when placing in the !same_bank case.
>
> This would all be simpler if we sorted the banks too. Which is another
> argument for doing so IMHO.
>
>> >> -        load_end = info->mem.bank[0].start + info->mem.bank[0].size;
>> >> -        load_end = MIN(info->mem.bank[0].start + MB(128), load_end);
>> >> +        /*
>> >> +         * Load kernel at the lowest possible bank
>> >> +         * ( check commit 6c21cb36e263de2db8716b477157a5b6cd531e1e for reason behind that )
>> >
>> > That commit says nothing about loading in the lowest possible bank,
>> > though. If there is some relevant factor which is worth commenting on
>> > please do so directly.
>>
>> Loading at the lowest bank is safer because of the:
>> "The current solution in Linux assuming that the delta physical
>> address - virtual address is always negative".
>> This delta is being calculated based on where the kernel was loaded (
>> using "adr" to find physical address ).
>> So loading it as early as possible is a good idea to avoid this problem.
>
> I think this problem is now fixed upstream, the intention was to
> eventually revert the workaround (Julien was going to tell me when it
> had gone into stable etc, but this is now a 4.5 era revert candidate).
>
>> However, I'm not quite sure why not just load it at the beginning of
>> the memory then! I think I'll look at booting.txt for that, maybe it's
>> a decompresser limitation or something!
>
> Yes, it's all a bit complicated.
>
>> > Actually now that the kernel is fixed upstream we don't need the
>> > behaviour of that commit at all. Although there are still restrictions
>> > based on load address vs start of RAM (See booting.txt in the kernel
>> > docs)
>> Ah, I see. I'm using an allwinner branch of the kernel (
>> https://github.com/bjzhang/linux-allwinner.git ). I'll take a look at
>> the latest thing.
>
> git://github.com/linux-sunxi/linux-sunxi.git either sunxi-next or
> sunxi-devel branches are pretty good baselines nowadays. I've got an
> outstanding TODO to update the wiki...
Sorry for the delay!

Okay, I'm going to update my system with this tree and refresh the
patch based on your comments and resend this weekend.

>
> Ian.
>



-- 
Karim Allah Ahmed.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary
  2014-01-14 10:51     ` Ian Campbell
  2014-01-24 10:21       ` karim.allah.ahmed
@ 2014-01-24 17:11       ` Julien Grall
  2014-01-27 14:06         ` Ian Campbell
  1 sibling, 1 reply; 15+ messages in thread
From: Julien Grall @ 2014-01-24 17:11 UTC (permalink / raw)
  To: Ian Campbell; +Cc: tim, stefano.stabellini, xen-devel

On 01/14/2014 10:51 AM, Ian Campbell wrote:
> I think this problem is now fixed upstream, the intention was to
> eventually revert the workaround (Julien was going to tell me when it
> had gone into stable etc, but this is now a 4.5 era revert candidate).

The patch was merged for 3.13.

-- 
Julien Grall

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary
  2014-01-24 17:11       ` Julien Grall
@ 2014-01-27 14:06         ` Ian Campbell
  2014-04-02 12:04           ` [PATCH] Revert "xen/arm: Allocate memory for dom0 from the bottom with the 1:1 Workaround" Ian Campbell
  0 siblings, 1 reply; 15+ messages in thread
From: Ian Campbell @ 2014-01-27 14:06 UTC (permalink / raw)
  To: Julien Grall; +Cc: tim, stefano.stabellini, xen-devel

On Fri, 2014-01-24 at 17:11 +0000, Julien Grall wrote:
> On 01/14/2014 10:51 AM, Ian Campbell wrote:
> > I think this problem is now fixed upstream, the intention was to
> > eventually revert the workaround (Julien was going to tell me when it
> > had gone into stable etc, but this is now a 4.5 era revert candidate).
> 
> The patch was merged for 3.13.

Thanks. I don't think we can revert for 4.4 now, but lets revisit for
4.5.

Ian.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH] Revert "xen/arm: Allocate memory for dom0 from the bottom with the 1:1 Workaround"
  2014-01-27 14:06         ` Ian Campbell
@ 2014-04-02 12:04           ` Ian Campbell
  2014-04-02 12:07             ` Julien Grall
  2014-04-02 13:21             ` Julien Grall
  0 siblings, 2 replies; 15+ messages in thread
From: Ian Campbell @ 2014-04-02 12:04 UTC (permalink / raw)
  To: Julien Grall; +Cc: stefano.stabellini, tim, xen-devel

On Mon, 2014-01-27 at 14:06 +0000, Ian Campbell wrote:
> On Fri, 2014-01-24 at 17:11 +0000, Julien Grall wrote:
> > On 01/14/2014 10:51 AM, Ian Campbell wrote:
> > > I think this problem is now fixed upstream, the intention was to
> > > eventually revert the workaround (Julien was going to tell me when it
> > > had gone into stable etc, but this is now a 4.5 era revert candidate).
> > 
> > The patch was merged for 3.13.
> 
> Thanks. I don't think we can revert for 4.4 now, but lets revisit for
> 4.5.

So should we revert

        commit 6c21cb36e263de2db8716b477157a5b6cd531e1e
        Author: Julien Grall <julien.grall@linaro.org>
        Date:   Tue Oct 22 11:51:48 2013 +0100
        
            xen/arm: Allocate memory for dom0 from the bottom with the 1:1 Workaround

Now?

------8<----------------

>From 2e688b06954663f8ff7bc72e5bcf6a823090af9a Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Wed, 2 Apr 2014 13:03:36 +0100
Subject: [PATCH] Revert "xen/arm: Allocate memory for dom0 from the bottom
 with the 1:1 Workaround"

This reverts commit 6c21cb36e263de2db8716b477157a5b6cd531e1e.

The Linux = issue which this works around was fixed in v3.13.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
---
 xen/arch/arm/domain_build.c |   11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/xen/arch/arm/domain_build.c b/xen/arch/arm/domain_build.c
index 502db84..a0b73d2 100644
--- a/xen/arch/arm/domain_build.c
+++ b/xen/arch/arm/domain_build.c
@@ -69,19 +69,12 @@ static void allocate_memory_11(struct domain *d, struct kernel_info *kinfo)
 {
     paddr_t start;
     paddr_t size;
-    struct page_info *pg = NULL;
+    struct page_info *pg;
     unsigned int order = get_order_from_bytes(dom0_mem);
     int res;
     paddr_t spfn;
-    unsigned int bits;
-
-    for ( bits = PAGE_SHIFT + 1; bits < PADDR_BITS; bits++ )
-    {
-        pg = alloc_domheap_pages(d, order, MEMF_bits(bits));
-        if ( pg != NULL )
-            break;
-    }
 
+    pg = alloc_domheap_pages(d, order, 0);
     if ( !pg )
         panic("Failed to allocate contiguous memory for dom0");
 
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary
  2014-01-24 10:21       ` karim.allah.ahmed
@ 2014-04-02 12:05         ` Ian Campbell
  2014-04-04 15:04           ` karim.allah.ahmed
  0 siblings, 1 reply; 15+ messages in thread
From: Ian Campbell @ 2014-04-02 12:05 UTC (permalink / raw)
  To: karim.allah.ahmed@gmail.com
  Cc: tim, Julien Grall, stefano.stabellini, xen-devel

Hi Karim,

On Fri, 2014-01-24 at 10:21 +0000, karim.allah.ahmed@gmail.com wrote:
> Sorry for the delay!
> 
> Okay, I'm going to update my system with this tree and refresh the
> patch based on your comments and resend this weekend.

Do you still plan to work on this? 4.4 out so 4.5 development is in full
swing, it would be nice to get this issue resolved.

Ian.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Revert "xen/arm: Allocate memory for dom0 from the bottom with the 1:1 Workaround"
  2014-04-02 12:04           ` [PATCH] Revert "xen/arm: Allocate memory for dom0 from the bottom with the 1:1 Workaround" Ian Campbell
@ 2014-04-02 12:07             ` Julien Grall
  2014-04-02 13:21             ` Julien Grall
  1 sibling, 0 replies; 15+ messages in thread
From: Julien Grall @ 2014-04-02 12:07 UTC (permalink / raw)
  To: Ian Campbell; +Cc: stefano.stabellini, tim, xen-devel

On 04/02/2014 01:04 PM, Ian Campbell wrote:
> On Mon, 2014-01-27 at 14:06 +0000, Ian Campbell wrote:
>> On Fri, 2014-01-24 at 17:11 +0000, Julien Grall wrote:
>>> On 01/14/2014 10:51 AM, Ian Campbell wrote:
>>>> I think this problem is now fixed upstream, the intention was to
>>>> eventually revert the workaround (Julien was going to tell me when it
>>>> had gone into stable etc, but this is now a 4.5 era revert candidate).
>>>
>>> The patch was merged for 3.13.
>>
>> Thanks. I don't think we can revert for 4.4 now, but lets revisit for
>> 4.5.
> 
> So should we revert
> 
>         commit 6c21cb36e263de2db8716b477157a5b6cd531e1e
>         Author: Julien Grall <julien.grall@linaro.org>
>         Date:   Tue Oct 22 11:51:48 2013 +0100
>         
>             xen/arm: Allocate memory for dom0 from the bottom with the 1:1 Workaround
> 
> Now?

Sorry, I completely forgot this stuff. Let me try on the arndale with
this patch.

Regards,

-- 
Julien Grall

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Revert "xen/arm: Allocate memory for dom0 from the bottom with the 1:1 Workaround"
  2014-04-02 12:04           ` [PATCH] Revert "xen/arm: Allocate memory for dom0 from the bottom with the 1:1 Workaround" Ian Campbell
  2014-04-02 12:07             ` Julien Grall
@ 2014-04-02 13:21             ` Julien Grall
  2014-04-03 16:30               ` Ian Campbell
  1 sibling, 1 reply; 15+ messages in thread
From: Julien Grall @ 2014-04-02 13:21 UTC (permalink / raw)
  To: Ian Campbell; +Cc: stefano.stabellini, tim, xen-devel

On 04/02/2014 01:04 PM, Ian Campbell wrote:
> On Mon, 2014-01-27 at 14:06 +0000, Ian Campbell wrote:
>> On Fri, 2014-01-24 at 17:11 +0000, Julien Grall wrote:
>>> On 01/14/2014 10:51 AM, Ian Campbell wrote:
>>>> I think this problem is now fixed upstream, the intention was to
>>>> eventually revert the workaround (Julien was going to tell me when it
>>>> had gone into stable etc, but this is now a 4.5 era revert candidate).
>>>
>>> The patch was merged for 3.13.
>>
>> Thanks. I don't think we can revert for 4.4 now, but lets revisit for
>> 4.5.
> 
> So should we revert
> 
>         commit 6c21cb36e263de2db8716b477157a5b6cd531e1e
>         Author: Julien Grall <julien.grall@linaro.org>
>         Date:   Tue Oct 22 11:51:48 2013 +0100
>         
>             xen/arm: Allocate memory for dom0 from the bottom with the 1:1 Workaround
> 
> Now?

Reverting this commit doesn't change anything in the memory allocation.
I'm sure why...

Anyway it works on the arndale! I will have to update the wiki page (the
instructions are for Linux 3.10).

> ------8<----------------
> 
> From 2e688b06954663f8ff7bc72e5bcf6a823090af9a Mon Sep 17 00:00:00 2001
> From: Ian Campbell <ian.campbell@citrix.com>
> Date: Wed, 2 Apr 2014 13:03:36 +0100
> Subject: [PATCH] Revert "xen/arm: Allocate memory for dom0 from the bottom
>  with the 1:1 Workaround"
> 
> This reverts commit 6c21cb36e263de2db8716b477157a5b6cd531e1e.
> 
> The Linux = issue which this works around was fixed in v3.13.

Linux commit: f52bb722547f43caeaecbcc62db9f3c3b80ead9b "ARM: mm: Correct
virt_to_phys patching for 64 bit physical addresses"

With the commit mentioned:
Acked-by: Julien Grall <julien.grall@linaro.org>

Regards,

-- 
Julien Grall

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Revert "xen/arm: Allocate memory for dom0 from the bottom with the 1:1 Workaround"
  2014-04-02 13:21             ` Julien Grall
@ 2014-04-03 16:30               ` Ian Campbell
  0 siblings, 0 replies; 15+ messages in thread
From: Ian Campbell @ 2014-04-03 16:30 UTC (permalink / raw)
  To: Julien Grall; +Cc: stefano.stabellini, tim, xen-devel

On Wed, 2014-04-02 at 14:21 +0100, Julien Grall wrote:

> Linux commit: f52bb722547f43caeaecbcc62db9f3c3b80ead9b "ARM: mm: Correct
> virt_to_phys patching for 64 bit physical addresses"
> 
> With the commit mentioned:

Done and applied, thanks.

> Acked-by: Julien Grall <julien.grall@linaro.org>
> 
> Regards,
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary
  2014-04-02 12:05         ` Ian Campbell
@ 2014-04-04 15:04           ` karim.allah.ahmed
  2014-04-04 15:09             ` Ian Campbell
  2014-06-11 14:28             ` Ian Campbell
  0 siblings, 2 replies; 15+ messages in thread
From: karim.allah.ahmed @ 2014-04-04 15:04 UTC (permalink / raw)
  To: Ian Campbell; +Cc: tim, Julien Grall, stefano.stabellini, xen-devel

On Wed, Apr 2, 2014 at 1:05 PM, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> Hi Karim,
>
> On Fri, 2014-01-24 at 10:21 +0000, karim.allah.ahmed@gmail.com wrote:
>> Sorry for the delay!
>>
>> Okay, I'm going to update my system with this tree and refresh the
>> patch based on your comments and resend this weekend.
>
> Do you still plan to work on this? 4.4 out so 4.5 development is in full
> swing, it would be nice to get this issue resolved.
Ooops! I got swamped with work and completely forgot about this patch :)
Okay, I'll get my hands on the Cubieboard again and resubmit the patch.

>
> Ian.
>



-- 
Karim Allah Ahmed.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary
  2014-04-04 15:04           ` karim.allah.ahmed
@ 2014-04-04 15:09             ` Ian Campbell
  2014-06-11 14:28             ` Ian Campbell
  1 sibling, 0 replies; 15+ messages in thread
From: Ian Campbell @ 2014-04-04 15:09 UTC (permalink / raw)
  To: karim.allah.ahmed@gmail.com
  Cc: tim, Julien Grall, stefano.stabellini, xen-devel

On Fri, 2014-04-04 at 16:04 +0100, karim.allah.ahmed@gmail.com wrote:
> On Wed, Apr 2, 2014 at 1:05 PM, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> > Hi Karim,
> >
> > On Fri, 2014-01-24 at 10:21 +0000, karim.allah.ahmed@gmail.com wrote:
> >> Sorry for the delay!
> >>
> >> Okay, I'm going to update my system with this tree and refresh the
> >> patch based on your comments and resend this weekend.
> >
> > Do you still plan to work on this? 4.4 out so 4.5 development is in full
> > swing, it would be nice to get this issue resolved.
> Ooops! I got swamped with work and completely forgot about this patch :)
> Okay, I'll get my hands on the Cubieboard again and resubmit the patch.

Brilliant, thanks!

FYI http://thread.gmane.org/gmane.comp.emulators.xen.devel/194383
changes some bits in this area -- likely they will go into staging next
week.

Ian.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary
  2014-04-04 15:04           ` karim.allah.ahmed
  2014-04-04 15:09             ` Ian Campbell
@ 2014-06-11 14:28             ` Ian Campbell
  1 sibling, 0 replies; 15+ messages in thread
From: Ian Campbell @ 2014-06-11 14:28 UTC (permalink / raw)
  To: karim.allah.ahmed@gmail.com
  Cc: tim, Julien Grall, stefano.stabellini, xen-devel

On Fri, 2014-04-04 at 16:04 +0100, karim.allah.ahmed@gmail.com wrote:
> On Wed, Apr 2, 2014 at 1:05 PM, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> > Hi Karim,
> >
> > On Fri, 2014-01-24 at 10:21 +0000, karim.allah.ahmed@gmail.com wrote:
> >> Sorry for the delay!
> >>
> >> Okay, I'm going to update my system with this tree and refresh the
> >> patch based on your comments and resend this weekend.
> >
> > Do you still plan to work on this? 4.4 out so 4.5 development is in full
> > swing, it would be nice to get this issue resolved.
> Ooops! I got swamped with work and completely forgot about this patch :)
> Okay, I'll get my hands on the Cubieboard again and resubmit the patch.

FYI I've picked this up and am working on a patch.

Ian.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2014-06-11 14:28 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-01-10  4:12 [PATCH] create multiple banks for dom0 in 1:1 mapping if necessary Karim Raslan
2014-01-10 15:48 ` Ian Campbell
2014-01-11  1:58   ` karim.allah.ahmed
2014-01-14 10:51     ` Ian Campbell
2014-01-24 10:21       ` karim.allah.ahmed
2014-04-02 12:05         ` Ian Campbell
2014-04-04 15:04           ` karim.allah.ahmed
2014-04-04 15:09             ` Ian Campbell
2014-06-11 14:28             ` Ian Campbell
2014-01-24 17:11       ` Julien Grall
2014-01-27 14:06         ` Ian Campbell
2014-04-02 12:04           ` [PATCH] Revert "xen/arm: Allocate memory for dom0 from the bottom with the 1:1 Workaround" Ian Campbell
2014-04-02 12:07             ` Julien Grall
2014-04-02 13:21             ` Julien Grall
2014-04-03 16:30               ` Ian Campbell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.