All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] sparc64: valid physical address bitmap
@ 2014-09-16 16:52 Bob Picco
  2014-09-17 21:00 ` David Miller
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Bob Picco @ 2014-09-16 16:52 UTC (permalink / raw)
  To: sparclinux

From: bob picco <bpicco@meloft.net>

We need to constrain the size of sparc64_valid_addr_bitmap. Historically
it has been sized according to maximum physical address and 4Mb DIMM size.
This was sufficient with older sparc64 before larger physical address bits.

This patch limits the bitmap to 64Kb by a smaller value for a physical
address bits which cover the vast majority of sparc64.

The last_valid_pfn is used to limit the physical address limit within
the ktlb miss for identity address checking and increase the megabyte shift
granularity of the check for a valid pfn.

An LDOM guest might have an issue with this depending on how the PA to
RA ranges were assigned by the control domain. Though this issue already
seems to exist for a granularity less than 4Mb which is the current
bitmap shift and test.

Cc: sparclinux@vger.kernel.org
Signed-off-by: Bob Picco <bob.picco@oracle.com>
---
 arch/sparc/include/asm/page_64.h    |    9 ++++
 arch/sparc/include/asm/pgtable_64.h |   10 +++-
 arch/sparc/kernel/ktlb.S            |    6 +++
 arch/sparc/kernel/vmlinux.lds.S     |   10 ++++
 arch/sparc/mm/init_64.c             |   86 ++++++++++++++++++++++++++++++++++-
 arch/sparc/mm/init_64.h             |    5 +-
 6 files changed, 119 insertions(+), 7 deletions(-)

diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index bf10998..93f6508 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -129,6 +129,15 @@ extern unsigned long PAGE_OFFSET;
  */
 #define MAX_PHYS_ADDRESS_BITS	47
 
+/* This represents a subset of the physical address bits. It is used
+ * to determine how the powerof2 for sparc64_valid_addr_bitmap is changed.
+ * Basically when max_pfn execeeds this shift value we increase the powerof2
+ * for sparc64_valid_addr_bitmap. So increase the power of two one for
+ * each bit above 41. For example, 51 bits of physical address bits would
+ * be ILOG2_4MB+10 - 4Gb DIMMs.
+ */
+#define	MAX_PHYS_ADDRESS_LOBITS	41
+
 /* These two shift counts are used when indexing sparc64_valid_addr_bitmap
  * and kpte_linear_bitmap.
  */
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 3770bf5..f86c658 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -74,13 +74,19 @@
 #include <linux/sched.h>
 
 extern unsigned long sparc64_valid_addr_bitmap[];
+/* These two externs are used specifically for sparc64_valid_addr_bitmap.*/
+extern unsigned int sparc64_lomem_ilog2, sparc64_phys_address_bits_shift;
 
 /* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
 static inline bool __kern_addr_valid(unsigned long paddr)
 {
-	if ((paddr >> MAX_PHYS_ADDRESS_BITS) != 0UL)
+	int bit_set;
+
+	if ((paddr >> sparc64_phys_address_bits_shift) != 0UL)
 		return false;
-	return test_bit(paddr >> ILOG2_4MB, sparc64_valid_addr_bitmap);
+	bit_set = test_bit(paddr >> sparc64_lomem_ilog2,
+		sparc64_valid_addr_bitmap);
+	return bit_set;
 }
 
 static inline bool kern_addr_valid(unsigned long addr)
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
index 605d492..d613648 100644
--- a/arch/sparc/kernel/ktlb.S
+++ b/arch/sparc/kernel/ktlb.S
@@ -165,6 +165,9 @@
 	.section	.page_offset_shift_patch, "ax"
 	.word		661b
 	.previous
+	.section	.phys_max_ilog2_patch, "ax"
+	.word		661b
+	.previous
 
 	brnz,pn		%g2, kvmap_dtlb_longpath
 	 nop
@@ -187,6 +190,9 @@
 	.section	.page_offset_shift_patch, "ax"
 	.word		661b
 	.previous
+	.section	.phys_lomem_ilog2_patch, "ax"
+	.word		661b
+	.previous
 
 	srlx		%g2, 6, %g5
 	and		%g2, 63, %g2
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 2da4f39..422ead9 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -127,6 +127,16 @@ SECTIONS
 		*(.page_offset_shift_patch)
 		__page_offset_shift_patch_end = .;
 	}
+	.phys_lomem_ilog2_patch : {
+		__phys_lomem_ilog2_patch = .;
+		*(.phys_lomem_ilog2_patch)
+		__phys_lomem_ilog2_patch_end = .;
+	}
+	.phys_max_ilog2_patch : {
+		__phys_max_ilog2_patch = .;
+		*(.phys_max_ilog2_patch)
+		__phys_max_ilog2_patch_end = .;
+	}
 	.swapper_phys_low_1mb_patch : {
 		__swapper_phys_low_1mb_patch = .;
 		*(.swapper_phys_low_1mb_patch)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 1bf4643..4edda2c 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -165,6 +165,8 @@ static void __init read_obp_memory(const char *property,
 	     cmp_p64, NULL);
 }
 
+unsigned int sparc64_lomem_ilog2 = ILOG2_4MB;
+unsigned int sparc64_phys_address_bits_shift = MAX_PHYS_ADDRESS_BITS;
 unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES /
 					sizeof(unsigned long)];
 EXPORT_SYMBOL(sparc64_valid_addr_bitmap);
@@ -1659,6 +1661,7 @@ static void __init setup_page_offset(void)
 		PAGE_OFFSET, max_phys_bits);
 
 	page_offset_shift_patch(max_phys_bits);
+	sparc64_phys_address_bits_shift = max_phys_bits;
 }
 
 static void __init tsb_phys_patch(void)
@@ -1946,12 +1949,35 @@ static void __init reduce_memory(phys_addr_t limit_ram)
 	}
 }
 
+/* We need to capture the opcode-s of two patches before setup_page_offset()
+ * modifies them. They are patched again by compute_bitmap_parameters.
+ * compute_bitmap_parameters requires the rs1 register which might be
+ * clobbered by setup_page_offset() patching.
+ */
+#define PHYS_PATCH_CAPTURE_OPCODES	2
+static unsigned int
+	phys_patch_capture_opcodes[PHYS_PATCH_CAPTURE_OPCODES] __initdata;
+static void __init phys_patch_fetch_opcode(unsigned int *p, int index)
+{
+	unsigned int *insn = (unsigned int *)(unsigned long)*p;
+
+	phys_patch_capture_opcodes[index] = *insn;
+}
+static void __init phys_patch_capture(void)
+{
+	extern unsigned int __phys_lomem_ilog2_patch, __phys_max_ilog2_patch;
+
+	phys_patch_fetch_opcode(&__phys_lomem_ilog2_patch, 0);
+	phys_patch_fetch_opcode(&__phys_max_ilog2_patch, 1);
+}
+
 void __init paging_init(void)
 {
 	unsigned long end_pfn, shift, phys_base;
 	unsigned long real_end, i;
 	int node;
 
+	phys_patch_capture();
 	setup_page_offset();
 
 	/* These build time checkes make sure that the dcache_dirty_cpu()
@@ -2193,7 +2219,8 @@ static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap)
 
 				if (new_start <= old_start &&
 				    new_end >= (old_start + PAGE_SIZE)) {
-					set_bit(old_start >> ILOG2_4MB, bitmap);
+					set_bit(old_start >>
+						sparc64_lomem_ilog2, bitmap);
 					goto do_next_page;
 				}
 			}
@@ -2235,14 +2262,69 @@ static void __init register_page_bootmem_info(void)
 			register_page_bootmem_info_node(NODE_DATA(i));
 #endif
 }
+
+static void __init patch_phys_adjust(unsigned int *p, unsigned int opcode,
+				     unsigned int shift)
+{
+	unsigned int *insn = (unsigned int *)(unsigned long)*p;
+	unsigned int rs1 = opcode & (0x1fU << 14);
+	unsigned int rd = opcode & (0x1fU << 25);
+	unsigned int srlx = 0x81303000U;
+	unsigned int or = 0x80100000U;
+	unsigned int cnt, val;
+
+	/* This sets the shift to page_offset bits cleared from top
+	 * previously plus the new bitmap shift value. We aren't
+	 * modifying the concept of PAGE_OFFSET but preserving it.
+	 */
+	cnt = (64 - sparc64_phys_address_bits_shift) + shift;
+	if (cnt >= 64)
+		val = or | rd;
+	else
+		val = srlx | rd | rs1 | cnt;
+	*insn = val;
+
+	__asm__ __volatile__("flush	%0\n\t"
+		     : /* no outputs */
+		     : "r" (insn));
+}
+
+static void __init compute_bitmap_parameters(void)
+{
+	extern unsigned int __phys_lomem_ilog2_patch, __phys_max_ilog2_patch;
+	unsigned int last_valid_pfn_shift, max_phys_shift;
+
+	/* The maximum architecture physical limit is covered.*/
+	if (sparc64_phys_address_bits_shift <= MAX_PHYS_ADDRESS_LOBITS)
+		return;
+
+	last_valid_pfn_shift = __fls(last_valid_pfn);
+	if (!is_power_of_2(last_valid_pfn))
+		last_valid_pfn_shift++;
+	max_phys_shift = last_valid_pfn_shift + PAGE_SHIFT;
+
+	if (max_phys_shift > MAX_PHYS_ADDRESS_LOBITS)
+		sparc64_lomem_ilog2 = max_phys_shift -
+			MAX_PHYS_ADDRESS_LOBITS + sparc64_lomem_ilog2;
+
+	patch_phys_adjust(&__phys_lomem_ilog2_patch,
+		phys_patch_capture_opcodes[0], sparc64_lomem_ilog2);
+	patch_phys_adjust(&__phys_max_ilog2_patch,
+		phys_patch_capture_opcodes[1], max_phys_shift);
+	sparc64_phys_address_bits_shift = max_phys_shift;
+}
+
 void __init mem_init(void)
 {
 	unsigned long addr, last;
 
+	compute_bitmap_parameters();
+
 	addr = PAGE_OFFSET + kern_base;
 	last = PAGE_ALIGN(kern_size) + addr;
 	while (addr < last) {
-		set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap);
+		set_bit(__pa(addr) >> sparc64_lomem_ilog2,
+				sparc64_valid_addr_bitmap);
 		addr += PAGE_SIZE;
 	}
 
diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h
index 0668b36..5c86c14 100644
--- a/arch/sparc/mm/init_64.h
+++ b/arch/sparc/mm/init_64.h
@@ -8,12 +8,11 @@
  */
 
 #define MAX_PHYS_ADDRESS	(1UL << MAX_PHYS_ADDRESS_BITS)
-#define KPTE_BITMAP_CHUNK_SZ		(256UL * 1024UL * 1024UL)
+#define KPTE_BITMAP_CHUNK_SZ	(256UL * 1024UL * 1024UL)
 #define KPTE_BITMAP_BYTES	\
 	((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
-#define VALID_ADDR_BITMAP_CHUNK_SZ	(4UL * 1024UL * 1024UL)
 #define VALID_ADDR_BITMAP_BYTES	\
-	((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)
+	(1UL << (MAX_PHYS_ADDRESS_LOBITS - ILOG2_4MB - 3))
 
 extern unsigned long kern_linear_pte_xor[4];
 extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] sparc64: valid physical address bitmap
  2014-09-16 16:52 [PATCH] sparc64: valid physical address bitmap Bob Picco
@ 2014-09-17 21:00 ` David Miller
  2014-09-18 10:16 ` Bob Picco
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: David Miller @ 2014-09-17 21:00 UTC (permalink / raw)
  To: sparclinux

From: Bob Picco <bpicco@meloft.net>
Date: Tue, 16 Sep 2014 12:52:17 -0400

> From: bob picco <bpicco@meloft.net>
> 
> We need to constrain the size of sparc64_valid_addr_bitmap. Historically
> it has been sized according to maximum physical address and 4Mb DIMM size.
> This was sufficient with older sparc64 before larger physical address bits.
> 
> This patch limits the bitmap to 64Kb by a smaller value for a physical
> address bits which cover the vast majority of sparc64.
> 
> The last_valid_pfn is used to limit the physical address limit within
> the ktlb miss for identity address checking and increase the megabyte shift
> granularity of the check for a valid pfn.
> 
> An LDOM guest might have an issue with this depending on how the PA to
> RA ranges were assigned by the control domain. Though this issue already
> seems to exist for a granularity less than 4Mb which is the current
> bitmap shift and test.
> 
> Cc: sparclinux@vger.kernel.org
> Signed-off-by: Bob Picco <bob.picco@oracle.com>

Let's stop fighting this thing.

Instead, let's just kill the bitmap off completely and scan the 'reg'
property of the 'memory' OF node.  It's well formed and very small,
and now it doesn't matter what granularity works or not for LDOM
guests as well.

Bonus is that the BSS section shrinks by 4MB after this change.

This is a really delicate change, the more testing the better.  I've
only done basic sanity tests on T4-2 so far.

==========
[PATCH] sparc64: Kill sparc64_valid_addr_bitmap.

It keeps growing as we try and increase the number of physical memory
bits we support.

We really don't need it, we can just loop over the 'memory' OF node's
'reg' property which represents all valid physical memory.  These
arrays tend to be well formed, and not very large at all.

Since we are now using this in the kernel TLB miss handler fast path
we perform some straightforward transformations on the array.  First,
we merge as many entries together as we can, there are several prtconf
dumps I've seen where this will trigger.

Second, we mark the end of the array with a zero entry, so that the
kernel TLB miss handler has to keep less state in registers during the
scan.

The BSS section is now 4MB smaller after this change.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/include/asm/page_64.h    |   3 -
 arch/sparc/include/asm/pgtable_64.h |  10 +--
 arch/sparc/kernel/ktlb.S            |  67 +++++----------
 arch/sparc/mm/init_64.c             | 159 +++++++++++++++++-------------------
 arch/sparc/mm/init_64.h             |   3 -
 5 files changed, 97 insertions(+), 145 deletions(-)

diff --git a/arch/sparc/include/asm/page_64.h b/arch/sparc/include/asm/page_64.h
index bf10998..4af4e69 100644
--- a/arch/sparc/include/asm/page_64.h
+++ b/arch/sparc/include/asm/page_64.h
@@ -129,9 +129,6 @@ extern unsigned long PAGE_OFFSET;
  */
 #define MAX_PHYS_ADDRESS_BITS	47
 
-/* These two shift counts are used when indexing sparc64_valid_addr_bitmap
- * and kpte_linear_bitmap.
- */
 #define ILOG2_4MB		22
 #define ILOG2_256MB		28
 
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index 3770bf5..895a9c3 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -73,15 +73,7 @@
 
 #include <linux/sched.h>
 
-extern unsigned long sparc64_valid_addr_bitmap[];
-
-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
-static inline bool __kern_addr_valid(unsigned long paddr)
-{
-	if ((paddr >> MAX_PHYS_ADDRESS_BITS) != 0UL)
-		return false;
-	return test_bit(paddr >> ILOG2_4MB, sparc64_valid_addr_bitmap);
-}
+bool __kern_addr_valid(unsigned long paddr);
 
 static inline bool kern_addr_valid(unsigned long addr)
 {
diff --git a/arch/sparc/kernel/ktlb.S b/arch/sparc/kernel/ktlb.S
index 605d492..7036a65 100644
--- a/arch/sparc/kernel/ktlb.S
+++ b/arch/sparc/kernel/ktlb.S
@@ -150,68 +150,39 @@ kvmap_dtlb_4v:
 	 * Must preserve %g1 and %g6 (TAG).
 	 */
 kvmap_dtlb_tsb4m_miss:
-	/* Clear the PAGE_OFFSET top virtual bits, shift
-	 * down to get PFN, and make sure PFN is in range.
-	 */
+	/* Clear the PAGE_OFFSET top virtual bits. */
 661:	sllx		%g4, 0, %g5
 	.section	.page_offset_shift_patch, "ax"
 	.word		661b
 	.previous
 
-	/* Check to see if we know about valid memory at the 4MB
-	 * chunk this physical address will reside within.
-	 */
-661:	srlx		%g5, MAX_PHYS_ADDRESS_BITS, %g2
-	.section	.page_offset_shift_patch, "ax"
-	.word		661b
-	.previous
-
-	brnz,pn		%g2, kvmap_dtlb_longpath
-	 nop
-
-	/* This unconditional branch and delay-slot nop gets patched
-	 * by the sethi sequence once the bitmap is properly setup.
-	 */
-	.globl		valid_addr_bitmap_insn
-valid_addr_bitmap_insn:
-	ba,pt		%xcc, 2f
-	 nop
-	.subsection	2
-	.globl		valid_addr_bitmap_patch
-valid_addr_bitmap_patch:
-	sethi		%hi(sparc64_valid_addr_bitmap), %g7
-	or		%g7, %lo(sparc64_valid_addr_bitmap), %g7
-	.previous
+	sethi		%hi(pall), %g7
 
-661:	srlx		%g5, ILOG2_4MB, %g2
+	/* Physical address in %g5. */
+661:	srlx		%g5, 0, %g5
 	.section	.page_offset_shift_patch, "ax"
 	.word		661b
 	.previous
 
-	srlx		%g2, 6, %g5
-	and		%g2, 63, %g2
-	sllx		%g5, 3, %g5
-	ldx		[%g7 + %g5], %g5
-	mov		1, %g7
-	sllx		%g7, %g2, %g7
-	andcc		%g5, %g7, %g0
-	be,pn		%xcc, kvmap_dtlb_longpath
-
-2:	 sethi		%hi(kpte_linear_bitmap), %g2
+	or		%g7, %lo(pall), %g7
 
-	/* Get the 256MB physical address index. */
-661:	sllx		%g4, 0, %g5
-	.section	.page_offset_shift_patch, "ax"
-	.word		661b
-	.previous
+1:	ldx		[%g7 + 0x08], %g3		/* reg_size */
+	ldx		[%g7 + 0x00], %g2		/* phys_addr */
+	brz,pn		%g3, kvmap_dtlb_longpath	/* end of array? */
+	 cmp		%g5, %g2
+	blu,pn		%xcc, 8f
+	 add		%g2, %g3, %g2
+	cmp		%g5, %g2
+	blu,pt		%xcc, 2f			/* full match */
+	 nop
+8:	ba,pt		%xcc, 1b
+	 add		%g7, 0x10, %g7
 
+	/* Physical address is still in %g5. */
+2:	sethi		%hi(kpte_linear_bitmap), %g2
+	srlx		%g5, ILOG2_256MB, %g5
 	or		%g2, %lo(kpte_linear_bitmap), %g2
 
-661:	srlx		%g5, ILOG2_256MB, %g5
-	.section	.page_offset_shift_patch, "ax"
-	.word		661b
-	.previous
-
 	and		%g5, (32 - 1), %g7
 
 	/* Divide by 32 to get the offset into the bitmask.  */
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index c30a796..4acabb0 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -165,10 +165,6 @@ static void __init read_obp_memory(const char *property,
 	     cmp_p64, NULL);
 }
 
-unsigned long sparc64_valid_addr_bitmap[VALID_ADDR_BITMAP_BYTES /
-					sizeof(unsigned long)];
-EXPORT_SYMBOL(sparc64_valid_addr_bitmap);
-
 /* Kernel physical address base and size in bytes.  */
 unsigned long kern_base __read_mostly;
 unsigned long kern_size __read_mostly;
@@ -1366,8 +1362,82 @@ static unsigned long __init bootmem_init(unsigned long phys_base)
 	return end_pfn;
 }
 
-static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
-static int pall_ents __initdata;
+/* pall[] is exported for the sake of the kernel linear TLB miss handler */
+struct linux_prom64_registers pall[MAX_BANKS + 1];
+static int pall_ents;
+
+bool __kern_addr_valid(unsigned long paddr)
+{
+	int i;
+
+	for (i = 0; i < pall_ents; i++) {
+		struct linux_prom64_registers *p = &pall[i];
+
+		if (paddr < p->phys_addr)
+			continue;
+		if (paddr >= (p->phys_addr + p->reg_size))
+			continue;
+
+		return true;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL(__kern_addr_valid);
+
+static void __init optimize_pall(void)
+{
+	int i;
+
+	/* First pass, strip out all zero length entries.  This is important
+	 * because a zero reg_size marks the end of the array.
+	 */
+	for (i = 0; i < pall_ents; i++) {
+		struct linux_prom64_registers *p = &pall[i];
+
+		if (p->reg_size)
+			continue;
+
+		memmove(p, p + 1, (pall_ents - i) * sizeof(*p));
+		pall_ents--;
+		i--;
+	}
+
+	/* Second pass, merge all adjacent entries.  */
+	for (i = 0; i < pall_ents; i++) {
+		struct linux_prom64_registers *p = &pall[i];
+		unsigned long begin, end;
+		int j;
+
+		begin = p->phys_addr;
+		end = p->phys_addr + p->reg_size;
+		for (j = i + 1; j < pall_ents; j++) {
+			struct linux_prom64_registers *q;
+
+			q = &pall[j];
+			if (end = q->phys_addr) {
+				end += q->reg_size;
+			} else if (begin = q->phys_addr + q->reg_size) {
+				begin -= q->reg_size;
+			} else
+				continue;
+
+			memmove(q, q + 1, (pall_ents - j) * sizeof(*q));
+			pall_ents--;
+
+			j--;
+		}
+
+		p->phys_addr = begin;
+		p->reg_size = end - begin;
+	}
+
+	/* Force a sentinal after the last entry to simplify the kernel TLB miss
+	 * handler's scan of this table.
+	 */
+	pall[pall_ents].phys_addr = 0UL;
+	pall[pall_ents].reg_size = 0UL;
+}
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
 static unsigned long __ref kernel_map_range(unsigned long pstart,
@@ -2011,6 +2081,7 @@ void __init paging_init(void)
 	 */
 	read_obp_translations();
 	read_obp_memory("reg", &pall[0], &pall_ents);
+	optimize_pall();
 	read_obp_memory("available", &pavail[0], &pavail_ents);
 	read_obp_memory("available", &pavail[0], &pavail_ents);
 
@@ -2160,70 +2231,6 @@ int page_in_phys_avail(unsigned long paddr)
 	return 0;
 }
 
-static struct linux_prom64_registers pavail_rescan[MAX_BANKS] __initdata;
-static int pavail_rescan_ents __initdata;
-
-/* Certain OBP calls, such as fetching "available" properties, can
- * claim physical memory.  So, along with initializing the valid
- * address bitmap, what we do here is refetch the physical available
- * memory list again, and make sure it provides at least as much
- * memory as 'pavail' does.
- */
-static void __init setup_valid_addr_bitmap_from_pavail(unsigned long *bitmap)
-{
-	int i;
-
-	read_obp_memory("available", &pavail_rescan[0], &pavail_rescan_ents);
-
-	for (i = 0; i < pavail_ents; i++) {
-		unsigned long old_start, old_end;
-
-		old_start = pavail[i].phys_addr;
-		old_end = old_start + pavail[i].reg_size;
-		while (old_start < old_end) {
-			int n;
-
-			for (n = 0; n < pavail_rescan_ents; n++) {
-				unsigned long new_start, new_end;
-
-				new_start = pavail_rescan[n].phys_addr;
-				new_end = new_start +
-					pavail_rescan[n].reg_size;
-
-				if (new_start <= old_start &&
-				    new_end >= (old_start + PAGE_SIZE)) {
-					set_bit(old_start >> ILOG2_4MB, bitmap);
-					goto do_next_page;
-				}
-			}
-
-			prom_printf("mem_init: Lost memory in pavail\n");
-			prom_printf("mem_init: OLD start[%lx] size[%lx]\n",
-				    pavail[i].phys_addr,
-				    pavail[i].reg_size);
-			prom_printf("mem_init: NEW start[%lx] size[%lx]\n",
-				    pavail_rescan[i].phys_addr,
-				    pavail_rescan[i].reg_size);
-			prom_printf("mem_init: Cannot continue, aborting.\n");
-			prom_halt();
-
-		do_next_page:
-			old_start += PAGE_SIZE;
-		}
-	}
-}
-
-static void __init patch_tlb_miss_handler_bitmap(void)
-{
-	extern unsigned int valid_addr_bitmap_insn[];
-	extern unsigned int valid_addr_bitmap_patch[];
-
-	valid_addr_bitmap_insn[1] = valid_addr_bitmap_patch[1];
-	mb();
-	valid_addr_bitmap_insn[0] = valid_addr_bitmap_patch[0];
-	flushi(&valid_addr_bitmap_insn[0]);
-}
-
 static void __init register_page_bootmem_info(void)
 {
 #ifdef CONFIG_NEED_MULTIPLE_NODES
@@ -2236,18 +2243,6 @@ static void __init register_page_bootmem_info(void)
 }
 void __init mem_init(void)
 {
-	unsigned long addr, last;
-
-	addr = PAGE_OFFSET + kern_base;
-	last = PAGE_ALIGN(kern_size) + addr;
-	while (addr < last) {
-		set_bit(__pa(addr) >> ILOG2_4MB, sparc64_valid_addr_bitmap);
-		addr += PAGE_SIZE;
-	}
-
-	setup_valid_addr_bitmap_from_pavail(sparc64_valid_addr_bitmap);
-	patch_tlb_miss_handler_bitmap();
-
 	high_memory = __va(last_valid_pfn << PAGE_SHIFT);
 
 	register_page_bootmem_info();
diff --git a/arch/sparc/mm/init_64.h b/arch/sparc/mm/init_64.h
index 0668b36..bdef0a6 100644
--- a/arch/sparc/mm/init_64.h
+++ b/arch/sparc/mm/init_64.h
@@ -11,9 +11,6 @@
 #define KPTE_BITMAP_CHUNK_SZ		(256UL * 1024UL * 1024UL)
 #define KPTE_BITMAP_BYTES	\
 	((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 4)
-#define VALID_ADDR_BITMAP_CHUNK_SZ	(4UL * 1024UL * 1024UL)
-#define VALID_ADDR_BITMAP_BYTES	\
-	((MAX_PHYS_ADDRESS / VALID_ADDR_BITMAP_CHUNK_SZ) / 8)
 
 extern unsigned long kern_linear_pte_xor[4];
 extern unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
-- 
1.8.1.2


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] sparc64: valid physical address bitmap
  2014-09-16 16:52 [PATCH] sparc64: valid physical address bitmap Bob Picco
  2014-09-17 21:00 ` David Miller
@ 2014-09-18 10:16 ` Bob Picco
  2014-09-18 17:05 ` David Miller
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: Bob Picco @ 2014-09-18 10:16 UTC (permalink / raw)
  To: sparclinux

David Miller wrote:	[Wed Sep 17 2014, 05:00:48PM EDT]
> From: Bob Picco <bpicco@meloft.net>
> Date: Tue, 16 Sep 2014 12:52:17 -0400
> 
> > From: bob picco <bpicco@meloft.net>
> > 
> > We need to constrain the size of sparc64_valid_addr_bitmap. Historically
> > it has been sized according to maximum physical address and 4Mb DIMM size.
> > This was sufficient with older sparc64 before larger physical address bits.
> > 
> > This patch limits the bitmap to 64Kb by a smaller value for a physical
> > address bits which cover the vast majority of sparc64.
> > 
> > The last_valid_pfn is used to limit the physical address limit within
> > the ktlb miss for identity address checking and increase the megabyte shift
> > granularity of the check for a valid pfn.
> > 
> > An LDOM guest might have an issue with this depending on how the PA to
> > RA ranges were assigned by the control domain. Though this issue already
> > seems to exist for a granularity less than 4Mb which is the current
> > bitmap shift and test.
> > 
> > Cc: sparclinux@vger.kernel.org
> > Signed-off-by: Bob Picco <bob.picco@oracle.com>
> 
> Let's stop fighting this thing.
Oh do I understand :)
> 
> Instead, let's just kill the bitmap off completely and scan the 'reg'
> property of the 'memory' OF node.  It's well formed and very small,
> and now it doesn't matter what granularity works or not for LDOM
> guests as well.
I like the idea.

There might be one issue: 
The machine has more reg property entries than this kernel can support (32). 
Program terminated 
. It is a T4-4 LDOM guest configured with an absurd number of PA <-> RA
mappings. I haven't examined the issue further than this.

I've invested infinite zero time reviewing your code.
> 
> Bonus is that the BSS section shrinks by 4MB after this change.
> 
> This is a really delicate change, the more testing the better.  I've
> only done basic sanity tests on T4-2 so far.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sparc64: valid physical address bitmap
  2014-09-16 16:52 [PATCH] sparc64: valid physical address bitmap Bob Picco
  2014-09-17 21:00 ` David Miller
  2014-09-18 10:16 ` Bob Picco
@ 2014-09-18 17:05 ` David Miller
  2014-09-18 17:13 ` David Miller
  2014-09-18 18:13 ` Bob Picco
  4 siblings, 0 replies; 6+ messages in thread
From: David Miller @ 2014-09-18 17:05 UTC (permalink / raw)
  To: sparclinux

From: Bob Picco <bpicco@meloft.net>
Date: Thu, 18 Sep 2014 06:16:06 -0400

> David Miller wrote:	[Wed Sep 17 2014, 05:00:48PM EDT]
>> Instead, let's just kill the bitmap off completely and scan the 'reg'
>> property of the 'memory' OF node.  It's well formed and very small,
>> and now it doesn't matter what granularity works or not for LDOM
>> guests as well.
> I like the idea.
> 
> There might be one issue: 
> The machine has more reg property entries than this kernel can support (32). 
> Program terminated 
> . It is a T4-4 LDOM guest configured with an absurd number of PA <-> RA
> mappings. I haven't examined the issue further than this.

Ok.  Given some of the other work I did last night, and after some more
consideration, I think I have a better idea of how to handle this.

Let's just use kernel page tables for everything.

We'll put huge PMDs into the kernel page tables for the linear mappings
and this will serve two purposes:

1) Physical address validation

2) Huge page size selection

And the kernel page tables deal naturally with sparseness.

We just have to set it up after we take over the trap table from OF.
(otherwise we can't actually access any early memory we allocate via
memblock or similar)  And we have the infrastructure to do that, with
instruction patching.

We simply accept all TLB misses to the linear area, strictly use only
4MB TTEs, before we setup the kernel page tables.

I'll see if I can throw something together today.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sparc64: valid physical address bitmap
  2014-09-16 16:52 [PATCH] sparc64: valid physical address bitmap Bob Picco
                   ` (2 preceding siblings ...)
  2014-09-18 17:05 ` David Miller
@ 2014-09-18 17:13 ` David Miller
  2014-09-18 18:13 ` Bob Picco
  4 siblings, 0 replies; 6+ messages in thread
From: David Miller @ 2014-09-18 17:13 UTC (permalink / raw)
  To: sparclinux

From: Bob Picco <bpicco@meloft.net>
Date: Thu, 18 Sep 2014 06:16:06 -0400

> The machine has more reg property entries than this kernel can
> support (32).

Bob, is there some upper bound we can use?  I'm happy to enlarge it
to whatever reasonable size is necessary.

The 'memory' node properties are the one thing I don't think we can
reasonably dynamically allocate memory for, so they have to be
statically sized.

With the changes we are discussing here, we are getting several
megabytes of BSS space back in the kernel image, so there is lots
of room for expanding the value of MAX_BANKS :-)

Also, that debugging message from read_obp_memory() should also
print out "ents" so we know how much we might need to expand it
in the future.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] sparc64: valid physical address bitmap
  2014-09-16 16:52 [PATCH] sparc64: valid physical address bitmap Bob Picco
                   ` (3 preceding siblings ...)
  2014-09-18 17:13 ` David Miller
@ 2014-09-18 18:13 ` Bob Picco
  4 siblings, 0 replies; 6+ messages in thread
From: Bob Picco @ 2014-09-18 18:13 UTC (permalink / raw)
  To: sparclinux

David Miller wrote:	[Thu Sep 18 2014, 01:13:22PM EDT]
> From: Bob Picco <bpicco@meloft.net>
> Date: Thu, 18 Sep 2014 06:16:06 -0400
> 
> > The machine has more reg property entries than this kernel can
> > support (32).
> 
> Bob, is there some upper bound we can use?  I'm happy to enlarge it
> to whatever reasonable size is necessary.
I just sent an email because my knowledge isn't LDOM guest.
> 
> The 'memory' node properties are the one thing I don't think we can
> reasonably dynamically allocate memory for, so they have to be
> statically sized.
I agree.
> 
> With the changes we are discussing here, we are getting several
> megabytes of BSS space back in the kernel image, so there is lots
> of room for expanding the value of MAX_BANKS :-)
Indeed.
> 
> Also, that debugging message from read_obp_memory() should also
> print out "ents" so we know how much we might need to expand it
> in the future.
Yes.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2014-09-18 18:13 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-09-16 16:52 [PATCH] sparc64: valid physical address bitmap Bob Picco
2014-09-17 21:00 ` David Miller
2014-09-18 10:16 ` Bob Picco
2014-09-18 17:05 ` David Miller
2014-09-18 17:13 ` David Miller
2014-09-18 18:13 ` Bob Picco

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.