* [PATCH v3 1/2] mm: memblock Add some new functions to address the mem limit issue
@ 2016-06-27 2:27 Dennis Chen
2016-06-27 2:27 ` [PATCH v3 2/2] arm64:acpi Fix the acpi alignment exeception when 'mem=' specified Dennis Chen
2016-06-27 14:28 ` [PATCH v3 1/2] mm: memblock Add some new functions to address the mem limit issue Mark Rutland
0 siblings, 2 replies; 4+ messages in thread
From: Dennis Chen @ 2016-06-27 2:27 UTC (permalink / raw)
To: linux-arm-kernel
Cc: nd, Dennis Chen, Catalin Marinas, Steve Capper, Ard Biesheuvel,
Will Deacon, Mark Rutland, Rafael J . Wysocki, Matt Fleming,
linux-mm, linux-acpi, linux-efi
In some cases, memblock is queried to determine whether a physical
address corresponds to memory present in a system even if unused by
the OS for the linear mapping, highmem, etc. For example, the ACPI
core needs this information to determine which attributes to use when
mapping ACPI regions. Use of incorrect memory types can result in
faults, data corruption, or other issues.
Removing memory with memblock_enforce_memory_limit throws away this
information, and so a kernel booted with 'mem=' may suffers from the
issues described above. To avoid this, we need to keep those NOMAP
regions instead of removing all above limit, which preserves the
information we need while preventing other use of the regions.
This patch adds new insfrastructure to retain all NOMAP memblock regions
while removing others, to cater for this.
At last, we add 'size' and 'flag' debug output in the memblock debugfs
for ease of the memblock debug.
The '/sys/kernel/debug/memblock/memory' output looks like before:
0: 0x0000008000000000..0x0000008001e7ffff
1: 0x0000008001e80000..0x00000083ff184fff
2: 0x00000083ff185000..0x00000083ff1c2fff
3: 0x00000083ff1c3000..0x00000083ff222fff
4: 0x00000083ff223000..0x00000083ffe42fff
5: 0x00000083ffe43000..0x00000083ffffffff
After applied:
0: 0x0000008000000000..0x0000008001e7ffff 0x0000000001e80000 0x4
1: 0x0000008001e80000..0x00000083ff184fff 0x00000003fd305000 0x0
2: 0x00000083ff185000..0x00000083ff1c2fff 0x000000000003e000 0x4
3: 0x00000083ff1c3000..0x00000083ff222fff 0x0000000000060000 0x0
4: 0x00000083ff223000..0x00000083ffe42fff 0x0000000000c20000 0x4
5: 0x00000083ffe43000..0x00000083ffffffff 0x00000000001bd000 0x0
Signed-off-by: Dennis Chen <dennis.chen@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: linux-mm@kvack.org
Cc: linux-acpi@vger.kernel.org
Cc: linux-efi@vger.kernel.org
---
include/linux/memblock.h | 1 +
mm/memblock.c | 55 +++++++++++++++++++++++++++++++++++++++++-------
2 files changed, 48 insertions(+), 8 deletions(-)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6c14b61..2925da2 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -332,6 +332,7 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn);
phys_addr_t memblock_start_of_DRAM(void);
phys_addr_t memblock_end_of_DRAM(void);
void memblock_enforce_memory_limit(phys_addr_t memory_limit);
+void memblock_mem_limit_remove_map(phys_addr_t limit);
bool memblock_is_memory(phys_addr_t addr);
int memblock_is_map_memory(phys_addr_t addr);
int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
diff --git a/mm/memblock.c b/mm/memblock.c
index ca09915..8099f1a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1465,14 +1465,11 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
}
-void __init memblock_enforce_memory_limit(phys_addr_t limit)
+static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
{
phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
struct memblock_region *r;
- if (!limit)
- return;
-
/* find out max address */
for_each_memblock(memory, r) {
if (limit <= r->size) {
@@ -1482,6 +1479,20 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
limit -= r->size;
}
+ return max_addr;
+}
+
+void __init memblock_enforce_memory_limit(phys_addr_t limit)
+{
+ phys_addr_t max_addr;
+
+ if (!limit)
+ return;
+
+ max_addr = __find_max_addr(limit);
+ if (max_addr == (phys_addr_t)ULLONG_MAX)
+ return;
+
/* truncate both memory and reserved regions */
memblock_remove_range(&memblock.memory, max_addr,
(phys_addr_t)ULLONG_MAX);
@@ -1489,6 +1500,32 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
(phys_addr_t)ULLONG_MAX);
}
+void __init memblock_mem_limit_remove_map(phys_addr_t limit)
+{
+ struct memblock_type *type = &memblock.memory;
+ phys_addr_t max_addr;
+ int i, ret, start_rgn, end_rgn;
+
+ if (!limit)
+ return;
+
+ max_addr = __find_max_addr(limit);
+ if (max_addr == (phys_addr_t)ULLONG_MAX)
+ return;
+
+ ret = memblock_isolate_range(type, max_addr, (phys_addr_t)ULLONG_MAX,
+ &start_rgn, &end_rgn);
+ if (ret) {
+ WARN_ONCE(1, "Mem limit failed, will not be applied!\n");
+ return;
+ }
+
+ for (i = end_rgn - 1; i >= start_rgn; i--) {
+ if (!memblock_is_nomap(&type->regions[i]))
+ memblock_remove_region(type, i);
+ }
+}
+
static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
{
unsigned int left = 0, right = type->cnt;
@@ -1677,13 +1714,15 @@ static int memblock_debug_show(struct seq_file *m, void *private)
reg = &type->regions[i];
seq_printf(m, "%4d: ", i);
if (sizeof(phys_addr_t) == 4)
- seq_printf(m, "0x%08lx..0x%08lx\n",
+ seq_printf(m, "0x%08lx..0x%08lx 0x%08lx 0x%lx\n",
(unsigned long)reg->base,
- (unsigned long)(reg->base + reg->size - 1));
+ (unsigned long)(reg->base + reg->size - 1),
+ (unsigned long)reg->size, reg->flags);
else
- seq_printf(m, "0x%016llx..0x%016llx\n",
+ seq_printf(m, "0x%016llx..0x%016llx 0x%016llx 0x%lx\n",
(unsigned long long)reg->base,
- (unsigned long long)(reg->base + reg->size - 1));
+ (unsigned long long)(reg->base + reg->size - 1),
+ (unsigned long long)reg->size, reg->flags);
}
return 0;
--
1.8.3.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v3 2/2] arm64:acpi Fix the acpi alignment exeception when 'mem=' specified
2016-06-27 2:27 [PATCH v3 1/2] mm: memblock Add some new functions to address the mem limit issue Dennis Chen
@ 2016-06-27 2:27 ` Dennis Chen
2016-06-27 14:28 ` [PATCH v3 1/2] mm: memblock Add some new functions to address the mem limit issue Mark Rutland
1 sibling, 0 replies; 4+ messages in thread
From: Dennis Chen @ 2016-06-27 2:27 UTC (permalink / raw)
To: linux-arm-kernel
Cc: nd, Dennis Chen, Catalin Marinas, Steve Capper, Ard Biesheuvel,
Will Deacon, Mark Rutland, Rafael J . Wysocki, Matt Fleming,
linux-mm, linux-acpi, linux-efi
When booting an ACPI enabled kernel with 'mem=x', probably the ACPI data
regions loaded by firmware will beyond the limit of the memory, in this
case we need to keep those NOMAP regions above the limit while not removing
them from memblock, because once a region removed from memblock, the ACPI
will think that region is not normal memory and map it as device type
memory accordingly. Since the ACPI core will produce non-alignment access
when paring AML data stream, hence result in alignment fault upon the IO
mapped memory space.
For example, below is an alignment exception observed on ARM platform when
booting the kernel with 'acpi=on mem=8G':
...
[ 0.542475] Unable to handle kernel paging request at virtual address ffff0000080521e7
[ 0.550457] pgd = ffff000008aa0000
[ 0.553880] [ffff0000080521e7] *pgd=000000801fffe003, *pud=000000801fffd003, *pmd=000000801fffc003, *pte=00e80083ff1c1707
[ 0.564939] Internal error: Oops: 96000021 [#1] PREEMPT SMP
[ 0.570553] Modules linked in:
[ 0.573626] CPU: 1 PID: 1 Comm: swapper/0 Not tainted 4.7.0-rc3-next-20160616+ #172
[ 0.581344] Hardware name: AMD Overdrive/Supercharger/Default string, BIOS ROD1001A 02/09/2016
[ 0.590025] task: ffff800001ef0000 ti: ffff800001ef8000 task.ti: ffff800001ef8000
[ 0.597571] PC is at acpi_ns_lookup+0x520/0x734
[ 0.602134] LR is at acpi_ns_lookup+0x4a4/0x734
[ 0.606693] pc : [<ffff0000083b8b10>] lr : [<ffff0000083b8a94>] pstate: 60000045
[ 0.614145] sp : ffff800001efb8b0
[ 0.617478] x29: ffff800001efb8c0 x28: 000000000000001b
[ 0.622829] x27: 0000000000000001 x26: 0000000000000000
[ 0.628181] x25: ffff800001efb9e8 x24: ffff000008a10000
[ 0.633531] x23: 0000000000000001 x22: 0000000000000001
[ 0.638881] x21: ffff000008724000 x20: 000000000000001b
[ 0.644230] x19: ffff0000080521e7 x18: 000000000000000d
[ 0.649580] x17: 00000000000038ff x16: 0000000000000002
[ 0.654929] x15: 0000000000000007 x14: 0000000000007fff
[ 0.660278] x13: ffffff0000000000 x12: 0000000000000018
[ 0.665627] x11: 000000001fffd200 x10: 00000000ffffff76
[ 0.670978] x9 : 000000000000005f x8 : ffff000008725fa8
[ 0.676328] x7 : ffff000008a8df70 x6 : ffff000008a8df70
[ 0.681679] x5 : ffff000008a8d000 x4 : 0000000000000010
[ 0.687027] x3 : 0000000000000010 x2 : 000000000000000c
[ 0.692378] x1 : 0000000000000006 x0 : 0000000000000000
...
[ 1.262235] [<ffff0000083b8b10>] acpi_ns_lookup+0x520/0x734
[ 1.267845] [<ffff0000083a7160>] acpi_ds_load1_begin_op+0x174/0x4fc
[ 1.274156] [<ffff0000083c1f4c>] acpi_ps_build_named_op+0xf8/0x220
[ 1.280380] [<ffff0000083c227c>] acpi_ps_create_op+0x208/0x33c
[ 1.286254] [<ffff0000083c1820>] acpi_ps_parse_loop+0x204/0x838
[ 1.292215] [<ffff0000083c2fd4>] acpi_ps_parse_aml+0x1bc/0x42c
[ 1.298090] [<ffff0000083bc6e8>] acpi_ns_one_complete_parse+0x1e8/0x22c
[ 1.304753] [<ffff0000083bc7b8>] acpi_ns_parse_table+0x8c/0x128
[ 1.310716] [<ffff0000083bb8fc>] acpi_ns_load_table+0xc0/0x1e8
[ 1.316591] [<ffff0000083c9068>] acpi_tb_load_namespace+0xf8/0x2e8
[ 1.322818] [<ffff000008984128>] acpi_load_tables+0x7c/0x110
[ 1.328516] [<ffff000008982ea4>] acpi_init+0x90/0x2c0
[ 1.333603] [<ffff0000080819fc>] do_one_initcall+0x38/0x12c
[ 1.339215] [<ffff000008960cd4>] kernel_init_freeable+0x148/0x1ec
[ 1.345353] [<ffff0000086b7d30>] kernel_init+0x10/0xec
[ 1.350529] [<ffff000008084e10>] ret_from_fork+0x10/0x40
[ 1.355878] Code: b9009fbc 2a00037b 36380057 3219037b (b9400260)
[ 1.362035] ---[ end trace 03381e5eb0a24de4 ]---
[ 1.366691] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
With 'efi=debug', we can see those ACPI regions loaded by firmware on
that board as:
[ 0.000000] efi: 0x0083ff185000-0x0083ff1b4fff [Reserved | | | | | | | | |WB|WT|WC|UC]*
[ 0.000000] efi: 0x0083ff1b5000-0x0083ff1c2fff [ACPI Reclaim Memory| | | | | | | | |WB|WT|WC|UC]*
[ 0.000000] efi: 0x0083ff223000-0x0083ff224fff [ACPI Memory NVS | | | | | | | | |WB|WT|WC|UC]*
This patch is trying to address the above issue by only keeping those
NOMAP regions instead of removing all above limit from memory memblock.
Signed-off-by: Dennis Chen <dennis.chen@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Steve Capper <steve.capper@arm.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: linux-mm@kvack.org
Cc: linux-acpi@vger.kernel.org
Cc: linux-efi@vger.kernel.org
---
arch/arm64/mm/init.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index d45f862..46edcef 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -226,7 +226,7 @@ void __init arm64_memblock_init(void)
* via the linear mapping.
*/
if (memory_limit != (phys_addr_t)ULLONG_MAX) {
- memblock_enforce_memory_limit(memory_limit);
+ memblock_mem_limit_remove_map(memory_limit);
memblock_add(__pa(_text), (u64)(_end - _text));
}
--
1.8.3.1
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v3 1/2] mm: memblock Add some new functions to address the mem limit issue
2016-06-27 2:27 [PATCH v3 1/2] mm: memblock Add some new functions to address the mem limit issue Dennis Chen
2016-06-27 2:27 ` [PATCH v3 2/2] arm64:acpi Fix the acpi alignment exeception when 'mem=' specified Dennis Chen
@ 2016-06-27 14:28 ` Mark Rutland
2016-06-28 2:55 ` Dennis Chen
1 sibling, 1 reply; 4+ messages in thread
From: Mark Rutland @ 2016-06-27 14:28 UTC (permalink / raw)
To: Dennis Chen
Cc: linux-arm-kernel, nd, Catalin Marinas, Steve Capper,
Ard Biesheuvel, Will Deacon, Rafael J . Wysocki, Matt Fleming,
linux-mm, linux-acpi, linux-efi
On Mon, Jun 27, 2016 at 10:27:10AM +0800, Dennis Chen wrote:
> In some cases, memblock is queried to determine whether a physical
> address corresponds to memory present in a system even if unused by
> the OS for the linear mapping, highmem, etc. For example, the ACPI
> core needs this information to determine which attributes to use when
> mapping ACPI regions. Use of incorrect memory types can result in
> faults, data corruption, or other issues.
>
> Removing memory with memblock_enforce_memory_limit throws away this
> information, and so a kernel booted with 'mem=' may suffers from the
> issues described above. To avoid this, we need to keep those NOMAP
> regions instead of removing all above limit, which preserves the
> information we need while preventing other use of the regions.
>
> This patch adds new insfrastructure to retain all NOMAP memblock regions
> while removing others, to cater for this.
>
> At last, we add 'size' and 'flag' debug output in the memblock debugfs
> for ease of the memblock debug.
> The '/sys/kernel/debug/memblock/memory' output looks like before:
> 0: 0x0000008000000000..0x0000008001e7ffff
> 1: 0x0000008001e80000..0x00000083ff184fff
> 2: 0x00000083ff185000..0x00000083ff1c2fff
> 3: 0x00000083ff1c3000..0x00000083ff222fff
> 4: 0x00000083ff223000..0x00000083ffe42fff
> 5: 0x00000083ffe43000..0x00000083ffffffff
>
> After applied:
> 0: 0x0000008000000000..0x0000008001e7ffff 0x0000000001e80000 0x4
> 1: 0x0000008001e80000..0x00000083ff184fff 0x00000003fd305000 0x0
> 2: 0x00000083ff185000..0x00000083ff1c2fff 0x000000000003e000 0x4
> 3: 0x00000083ff1c3000..0x00000083ff222fff 0x0000000000060000 0x0
> 4: 0x00000083ff223000..0x00000083ffe42fff 0x0000000000c20000 0x4
> 5: 0x00000083ffe43000..0x00000083ffffffff 0x00000000001bd000 0x0
The debugfs changes should be a separate patch. Even if they're useful
for debugging this patch, they're logically independent.
> Signed-off-by: Dennis Chen <dennis.chen@arm.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Steve Capper <steve.capper@arm.com>
> Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Mark Rutland <mark.rutland@arm.com>
> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> Cc: Matt Fleming <matt@codeblueprint.co.uk>
> Cc: linux-mm@kvack.org
> Cc: linux-acpi@vger.kernel.org
> Cc: linux-efi@vger.kernel.org
> ---
> include/linux/memblock.h | 1 +
> mm/memblock.c | 55 +++++++++++++++++++++++++++++++++++++++++-------
> 2 files changed, 48 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> index 6c14b61..2925da2 100644
> --- a/include/linux/memblock.h
> +++ b/include/linux/memblock.h
> @@ -332,6 +332,7 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn);
> phys_addr_t memblock_start_of_DRAM(void);
> phys_addr_t memblock_end_of_DRAM(void);
> void memblock_enforce_memory_limit(phys_addr_t memory_limit);
> +void memblock_mem_limit_remove_map(phys_addr_t limit);
> bool memblock_is_memory(phys_addr_t addr);
> int memblock_is_map_memory(phys_addr_t addr);
> int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
> diff --git a/mm/memblock.c b/mm/memblock.c
> index ca09915..8099f1a 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -1465,14 +1465,11 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
> return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
> }
>
> -void __init memblock_enforce_memory_limit(phys_addr_t limit)
> +static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
> {
> phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
> struct memblock_region *r;
>
> - if (!limit)
> - return;
> -
> /* find out max address */
> for_each_memblock(memory, r) {
> if (limit <= r->size) {
> @@ -1482,6 +1479,20 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
> limit -= r->size;
> }
>
> + return max_addr;
> +}
> +
> +void __init memblock_enforce_memory_limit(phys_addr_t limit)
> +{
> + phys_addr_t max_addr;
> +
> + if (!limit)
> + return;
> +
> + max_addr = __find_max_addr(limit);
> + if (max_addr == (phys_addr_t)ULLONG_MAX)
> + return;
We didn't previously return early, so do we actually need this check?
> +
> /* truncate both memory and reserved regions */
> memblock_remove_range(&memblock.memory, max_addr,
> (phys_addr_t)ULLONG_MAX);
> @@ -1489,6 +1500,32 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
> (phys_addr_t)ULLONG_MAX);
> }
>
> +void __init memblock_mem_limit_remove_map(phys_addr_t limit)
> +{
> + struct memblock_type *type = &memblock.memory;
> + phys_addr_t max_addr;
> + int i, ret, start_rgn, end_rgn;
> +
> + if (!limit)
> + return;
> +
> + max_addr = __find_max_addr(limit);
> + if (max_addr == (phys_addr_t)ULLONG_MAX)
> + return;
Likewise?
> +
> + ret = memblock_isolate_range(type, max_addr, (phys_addr_t)ULLONG_MAX,
> + &start_rgn, &end_rgn);
> + if (ret) {
> + WARN_ONCE(1, "Mem limit failed, will not be applied!\n");
> + return;
> + }
We don't have a similar warning in memblock_enforce_memory_limit, where
memblock_remove_range() might return an error code from an internal call
to memblock_isolate_range.
The two should be consistent, either both with a message or both
without.
> +
> + for (i = end_rgn - 1; i >= start_rgn; i--) {
> + if (!memblock_is_nomap(&type->regions[i]))
> + memblock_remove_region(type, i);
> + }
> +}
This will preserve nomap regions, but it does mean that we may preserve
less memory that the user asked for, since __find_max_addr counted nomap
(and reserved) regions. Given we've always counted the latter, maybe
that's ok.
We should clarify what __find_max_addr is intended to determine, with a
comment, so as to avoid future ambiguity there.
> +
> static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
> {
> unsigned int left = 0, right = type->cnt;
> @@ -1677,13 +1714,15 @@ static int memblock_debug_show(struct seq_file *m, void *private)
> reg = &type->regions[i];
> seq_printf(m, "%4d: ", i);
> if (sizeof(phys_addr_t) == 4)
> - seq_printf(m, "0x%08lx..0x%08lx\n",
> + seq_printf(m, "0x%08lx..0x%08lx 0x%08lx 0x%lx\n",
> (unsigned long)reg->base,
> - (unsigned long)(reg->base + reg->size - 1));
> + (unsigned long)(reg->base + reg->size - 1),
> + (unsigned long)reg->size, reg->flags);
> else
> - seq_printf(m, "0x%016llx..0x%016llx\n",
> + seq_printf(m, "0x%016llx..0x%016llx 0x%016llx 0x%lx\n",
> (unsigned long long)reg->base,
> - (unsigned long long)(reg->base + reg->size - 1));
> + (unsigned long long)(reg->base + reg->size - 1),
> + (unsigned long long)reg->size, reg->flags);
As mentioned above, this should be a separate patch. I have no strong
feelings either way about the logic itself.
Thanks,
Mark.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v3 1/2] mm: memblock Add some new functions to address the mem limit issue
2016-06-27 14:28 ` [PATCH v3 1/2] mm: memblock Add some new functions to address the mem limit issue Mark Rutland
@ 2016-06-28 2:55 ` Dennis Chen
0 siblings, 0 replies; 4+ messages in thread
From: Dennis Chen @ 2016-06-28 2:55 UTC (permalink / raw)
To: Mark Rutland
Cc: linux-arm-kernel, Catalin Marinas, Steve Capper, Ard Biesheuvel,
Will Deacon, Rafael J . Wysocki, Matt Fleming, linux-mm,
linux-acpi, linux-efi
On Mon, Jun 27, 2016 at 03:28:19PM +0100, Mark Rutland wrote:
> On Mon, Jun 27, 2016 at 10:27:10AM +0800, Dennis Chen wrote:
> > In some cases, memblock is queried to determine whether a physical
> > address corresponds to memory present in a system even if unused by
> > the OS for the linear mapping, highmem, etc. For example, the ACPI
> > core needs this information to determine which attributes to use when
> > mapping ACPI regions. Use of incorrect memory types can result in
> > faults, data corruption, or other issues.
> >
> > Removing memory with memblock_enforce_memory_limit throws away this
> > information, and so a kernel booted with 'mem=' may suffers from the
> > issues described above. To avoid this, we need to keep those NOMAP
> > regions instead of removing all above limit, which preserves the
> > information we need while preventing other use of the regions.
> >
> > This patch adds new insfrastructure to retain all NOMAP memblock regions
> > while removing others, to cater for this.
> >
> > At last, we add 'size' and 'flag' debug output in the memblock debugfs
> > for ease of the memblock debug.
> > The '/sys/kernel/debug/memblock/memory' output looks like before:
> > 0: 0x0000008000000000..0x0000008001e7ffff
> > 1: 0x0000008001e80000..0x00000083ff184fff
> > 2: 0x00000083ff185000..0x00000083ff1c2fff
> > 3: 0x00000083ff1c3000..0x00000083ff222fff
> > 4: 0x00000083ff223000..0x00000083ffe42fff
> > 5: 0x00000083ffe43000..0x00000083ffffffff
> >
> > After applied:
> > 0: 0x0000008000000000..0x0000008001e7ffff 0x0000000001e80000 0x4
> > 1: 0x0000008001e80000..0x00000083ff184fff 0x00000003fd305000 0x0
> > 2: 0x00000083ff185000..0x00000083ff1c2fff 0x000000000003e000 0x4
> > 3: 0x00000083ff1c3000..0x00000083ff222fff 0x0000000000060000 0x0
> > 4: 0x00000083ff223000..0x00000083ffe42fff 0x0000000000c20000 0x4
> > 5: 0x00000083ffe43000..0x00000083ffffffff 0x00000000001bd000 0x0
>
> The debugfs changes should be a separate patch. Even if they're useful
> for debugging this patch, they're logically independent.
>
Indeed. Will isolate it as an individual patch for this set.
>
> > Signed-off-by: Dennis Chen <dennis.chen@arm.com>
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Steve Capper <steve.capper@arm.com>
> > Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> > Cc: Will Deacon <will.deacon@arm.com>
> > Cc: Mark Rutland <mark.rutland@arm.com>
> > Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > Cc: Matt Fleming <matt@codeblueprint.co.uk>
> > Cc: linux-mm@kvack.org
> > Cc: linux-acpi@vger.kernel.org
> > Cc: linux-efi@vger.kernel.org
> > ---
> > include/linux/memblock.h | 1 +
> > mm/memblock.c | 55 +++++++++++++++++++++++++++++++++++++++++-------
> > 2 files changed, 48 insertions(+), 8 deletions(-)
> >
> > diff --git a/include/linux/memblock.h b/include/linux/memblock.h
> > index 6c14b61..2925da2 100644
> > --- a/include/linux/memblock.h
> > +++ b/include/linux/memblock.h
> > @@ -332,6 +332,7 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn);
> > phys_addr_t memblock_start_of_DRAM(void);
> > phys_addr_t memblock_end_of_DRAM(void);
> > void memblock_enforce_memory_limit(phys_addr_t memory_limit);
> > +void memblock_mem_limit_remove_map(phys_addr_t limit);
> > bool memblock_is_memory(phys_addr_t addr);
> > int memblock_is_map_memory(phys_addr_t addr);
> > int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
> > diff --git a/mm/memblock.c b/mm/memblock.c
> > index ca09915..8099f1a 100644
> > --- a/mm/memblock.c
> > +++ b/mm/memblock.c
> > @@ -1465,14 +1465,11 @@ phys_addr_t __init_memblock memblock_end_of_DRAM(void)
> > return (memblock.memory.regions[idx].base + memblock.memory.regions[idx].size);
> > }
> >
> > -void __init memblock_enforce_memory_limit(phys_addr_t limit)
> > +static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit)
> > {
> > phys_addr_t max_addr = (phys_addr_t)ULLONG_MAX;
> > struct memblock_region *r;
> >
> > - if (!limit)
> > - return;
> > -
> > /* find out max address */
> > for_each_memblock(memory, r) {
> > if (limit <= r->size) {
> > @@ -1482,6 +1479,20 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
> > limit -= r->size;
> > }
> >
> > + return max_addr;
> > +}
> > +
> > +void __init memblock_enforce_memory_limit(phys_addr_t limit)
> > +{
> > + phys_addr_t max_addr;
> > +
> > + if (!limit)
> > + return;
> > +
> > + max_addr = __find_max_addr(limit);
> > + if (max_addr == (phys_addr_t)ULLONG_MAX)
> > + return;
>
> We didn't previously return early, so do we actually need this check?
>
If we assign a mem limit count exceeds the actual physical RAM size in the system,
then we will fall into this case, return directly here will avoid a function call
followed though it will step out quickly.
>
> > +
> > /* truncate both memory and reserved regions */
> > memblock_remove_range(&memblock.memory, max_addr,
> > (phys_addr_t)ULLONG_MAX);
> > @@ -1489,6 +1500,32 @@ void __init memblock_enforce_memory_limit(phys_addr_t limit)
> > (phys_addr_t)ULLONG_MAX);
> > }
> >
> > +void __init memblock_mem_limit_remove_map(phys_addr_t limit)
> > +{
> > + struct memblock_type *type = &memblock.memory;
> > + phys_addr_t max_addr;
> > + int i, ret, start_rgn, end_rgn;
> > +
> > + if (!limit)
> > + return;
> > +
> > + max_addr = __find_max_addr(limit);
> > + if (max_addr == (phys_addr_t)ULLONG_MAX)
> > + return;
>
> Likewise?
>
ditto
>
> > +
> > + ret = memblock_isolate_range(type, max_addr, (phys_addr_t)ULLONG_MAX,
> > + &start_rgn, &end_rgn);
> > + if (ret) {
> > + WARN_ONCE(1, "Mem limit failed, will not be applied!\n");
> > + return;
> > + }
>
> We don't have a similar warning in memblock_enforce_memory_limit, where
> memblock_remove_range() might return an error code from an internal call
> to memblock_isolate_range.
>
Somehow cosmetic here for the warning, given it's extremely rare case to return
an error value, it doesn't hurt significantly to ignore the return value. But
WARN_ONCE at least makes me comfortable though :)
>
> The two should be consistent, either both with a message or both
> without.
>
> > +
> > + for (i = end_rgn - 1; i >= start_rgn; i--) {
> > + if (!memblock_is_nomap(&type->regions[i]))
> > + memblock_remove_region(type, i);
> > + }
> > +}
>
> This will preserve nomap regions, but it does mean that we may preserve
> less memory that the user asked for, since __find_max_addr counted nomap
> (and reserved) regions. Given we've always counted the latter, maybe
> that's ok.
>
As far as the user here, I think we have two: the firmware and the kernel.
All the memory reserved by firmware will be NOMAP marked, so it doesn't matter here.
For kernel user, we need to avoid allocate memory(e.g, memblock_alloc) before
the limit is applied, otherwise probably we may discard the user's allocated
memory once limit is applied. Fortunately, I don't find that kind of use before
using the limit. Technically, as a hack method to fake the less memory available
to the kernel than it actually is, mem limit should be applied as early as possible.
But given some limitation there, for instance, the early parameter parsing and the
possible effort, maybe a little bit fussy to do that.
>
> We should clarify what __find_max_addr is intended to determine, with a
> comment, so as to avoid future ambiguity there.
>
__find_max_addr is used to translate the limit(size) into the final limited address
within one of the memblocks, for example, in our juno board, we have two separate
physical memory block: 2G- (0x8000_0000, ...) and 6G- (0x8_8000_0000, ...), mem=4G
will hit the limited memory address as 0x8_8000_0000 + 2G in the 2nd block.
I will add some comments to clarify.
>
> > +
> > static int __init_memblock memblock_search(struct memblock_type *type, phys_addr_t addr)
> > {
> > unsigned int left = 0, right = type->cnt;
> > @@ -1677,13 +1714,15 @@ static int memblock_debug_show(struct seq_file *m, void *private)
> > reg = &type->regions[i];
> > seq_printf(m, "%4d: ", i);
> > if (sizeof(phys_addr_t) == 4)
> > - seq_printf(m, "0x%08lx..0x%08lx\n",
> > + seq_printf(m, "0x%08lx..0x%08lx 0x%08lx 0x%lx\n",
> > (unsigned long)reg->base,
> > - (unsigned long)(reg->base + reg->size - 1));
> > + (unsigned long)(reg->base + reg->size - 1),
> > + (unsigned long)reg->size, reg->flags);
> > else
> > - seq_printf(m, "0x%016llx..0x%016llx\n",
> > + seq_printf(m, "0x%016llx..0x%016llx 0x%016llx 0x%lx\n",
> > (unsigned long long)reg->base,
> > - (unsigned long long)(reg->base + reg->size - 1));
> > + (unsigned long long)(reg->base + reg->size - 1),
> > + (unsigned long long)reg->size, reg->flags);
>
> As mentioned above, this should be a separate patch. I have no strong
> feelings either way about the logic itself.
>
Hi Mark, thanks for the review!I plan to post updated version later on...
Thanks,
Dennis
>
> Thanks,
> Mark.
>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-06-28 2:55 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-06-27 2:27 [PATCH v3 1/2] mm: memblock Add some new functions to address the mem limit issue Dennis Chen
2016-06-27 2:27 ` [PATCH v3 2/2] arm64:acpi Fix the acpi alignment exeception when 'mem=' specified Dennis Chen
2016-06-27 14:28 ` [PATCH v3 1/2] mm: memblock Add some new functions to address the mem limit issue Mark Rutland
2016-06-28 2:55 ` Dennis Chen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).