public inbox for linux-arch@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] set_pte() part 2 arch usage
@ 2005-02-24  4:07 David S. Miller
  2005-02-24  5:18 ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 16+ messages in thread
From: David S. Miller @ 2005-02-24  4:07 UTC (permalink / raw)
  To: benh; +Cc: linux-arch

[-- Attachment #1: Type: text/plain, Size: 336 bytes --]


Ok, here is the promised second installment.  This changes
sparc64 (first attachment) and ppc{32,64} (second attachment)
over to take advantage of the new set_pte_at() mm+address
arguments.

I've build and run tested the sparc64 version.  I have only
build tested the ppc32 and ppc64 changes.  Ben, can you give
them a spin?

Thanks.


[-- Attachment #2: diff1 --]
[-- Type: application/octet-stream, Size: 8930 bytes --]

# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2005/02/23 17:46:43-08:00 davem@nuts.davemloft.net 
#   [SPARC64]: Pass mm/addr directly to tlb_batch_add()
#   
#   No longer need to store this information in the pte table
#   page struct.
#   
#   Signed-off-by: David S. Miller <davem@davemloft.net>
# 
# include/asm-sparc64/pgtable.h
#   2005/02/23 17:45:37-08:00 davem@nuts.davemloft.net +6 -5
#   [SPARC64]: Pass mm/addr directly to tlb_batch_add()
# 
# include/asm-sparc64/pgalloc.h
#   2005/02/23 17:45:37-08:00 davem@nuts.davemloft.net +5 -15
#   [SPARC64]: Pass mm/addr directly to tlb_batch_add()
# 
# arch/sparc64/mm/tlb.c
#   2005/02/23 17:45:37-08:00 davem@nuts.davemloft.net +7 -10
#   [SPARC64]: Pass mm/addr directly to tlb_batch_add()
# 
# arch/sparc64/mm/init.c
#   2005/02/23 17:45:36-08:00 davem@nuts.davemloft.net +2 -1
#   [SPARC64]: Pass mm/addr directly to tlb_batch_add()
# 
# arch/sparc64/mm/hugetlbpage.c
#   2005/02/23 17:45:36-08:00 davem@nuts.davemloft.net +6 -4
#   [SPARC64]: Pass mm/addr directly to tlb_batch_add()
# 
# arch/sparc64/mm/generic.c
#   2005/02/23 17:45:36-08:00 davem@nuts.davemloft.net +14 -11
#   [SPARC64]: Pass mm/addr directly to tlb_batch_add()
# 
diff -Nru a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c
--- a/arch/sparc64/mm/generic.c	2005-02-23 19:33:31 -08:00
+++ b/arch/sparc64/mm/generic.c	2005-02-23 19:33:31 -08:00
@@ -25,8 +25,11 @@
  * side-effect bit will be turned off.  This is used as a
  * performance improvement on FFB/AFB. -DaveM
  */
-static inline void io_remap_pte_range(pte_t * pte, unsigned long address, unsigned long size,
-	unsigned long offset, pgprot_t prot, int space)
+static inline void io_remap_pte_range(struct mm_struct *mm, pte_t * pte,
+				      unsigned long address,
+				      unsigned long size,
+				      unsigned long offset, pgprot_t prot,
+				      int space)
 {
 	unsigned long end;
 
@@ -67,14 +70,14 @@
 			pte_val(entry) &= ~(_PAGE_E);
 		do {
 			BUG_ON(!pte_none(*pte));
-			set_pte(pte, entry);
+			set_pte_at(mm, address, pte, entry);
 			address += PAGE_SIZE;
 			pte++;
 		} while (address < curend);
 	} while (address < end);
 }
 
-static inline int io_remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size,
+static inline int io_remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size,
 	unsigned long offset, pgprot_t prot, int space)
 {
 	unsigned long end;
@@ -85,10 +88,10 @@
 		end = PGDIR_SIZE;
 	offset -= address;
 	do {
-		pte_t * pte = pte_alloc_map(current->mm, pmd, address);
+		pte_t * pte = pte_alloc_map(mm, pmd, address);
 		if (!pte)
 			return -ENOMEM;
-		io_remap_pte_range(pte, address, end - address, address + offset, prot, space);
+		io_remap_pte_range(mm, pte, address, end - address, address + offset, prot, space);
 		pte_unmap(pte);
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
@@ -96,7 +99,7 @@
 	return 0;
 }
 
-static inline int io_remap_pud_range(pud_t * pud, unsigned long address, unsigned long size,
+static inline int io_remap_pud_range(struct mm_struct *mm, pud_t * pud, unsigned long address, unsigned long size,
 	unsigned long offset, pgprot_t prot, int space)
 {
 	unsigned long end;
@@ -107,10 +110,10 @@
 		end = PUD_SIZE;
 	offset -= address;
 	do {
-		pmd_t *pmd = pmd_alloc(current->mm, pud, address);
+		pmd_t *pmd = pmd_alloc(mm, pud, address);
 		if (!pud)
 			return -ENOMEM;
-		io_remap_pmd_range(pmd, address, end - address, address + offset, prot, space);
+		io_remap_pmd_range(mm, pmd, address, end - address, address + offset, prot, space);
 		address = (address + PUD_SIZE) & PUD_MASK;
 		pud++;
 	} while (address < end);
@@ -132,11 +135,11 @@
 
 	spin_lock(&mm->page_table_lock);
 	while (from < end) {
-		pud_t *pud = pud_alloc(current->mm, dir, from);
+		pud_t *pud = pud_alloc(mm, dir, from);
 		error = -ENOMEM;
 		if (!pud)
 			break;
-		error = io_remap_pud_range(pud, from, end - from, offset + from, prot, space);
+		error = io_remap_pud_range(mm, pud, from, end - from, offset + from, prot, space);
 		if (error)
 			break;
 		from = (from + PGDIR_SIZE) & PGDIR_MASK;
diff -Nru a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
--- a/arch/sparc64/mm/hugetlbpage.c	2005-02-23 19:33:31 -08:00
+++ b/arch/sparc64/mm/hugetlbpage.c	2005-02-23 19:33:31 -08:00
@@ -62,6 +62,7 @@
 #define mk_pte_huge(entry) do { pte_val(entry) |= _PAGE_SZHUGE; } while (0)
 
 static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma,
+			 unsigned long addr,
 			 struct page *page, pte_t * page_table, int write_access)
 {
 	unsigned long i;
@@ -78,8 +79,9 @@
 	mk_pte_huge(entry);
 
 	for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-		set_pte(page_table, entry);
+		set_pte_at(mm, addr, page_table, entry);
 		page_table++;
+		addr += PAGE_SIZE;
 
 		pte_val(entry) += PAGE_SIZE;
 	}
@@ -116,12 +118,12 @@
 		ptepage = pte_page(entry);
 		get_page(ptepage);
 		for (i = 0; i < (1 << HUGETLB_PAGE_ORDER); i++) {
-			set_pte(dst_pte, entry);
+			set_pte_at(dst, addr, dst_pte, entry);
 			pte_val(entry) += PAGE_SIZE;
 			dst_pte++;
+			addr += PAGE_SIZE;
 		}
 		dst->rss += (HPAGE_SIZE / PAGE_SIZE);
-		addr += HPAGE_SIZE;
 	}
 	return 0;
 
@@ -261,7 +263,7 @@
 				goto out;
 			}
 		}
-		set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
+		set_huge_pte(mm, vma, addr, page, pte, vma->vm_flags & VM_WRITE);
 	}
 out:
 	spin_unlock(&mm->page_table_lock);
diff -Nru a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
--- a/arch/sparc64/mm/init.c	2005-02-23 19:33:31 -08:00
+++ b/arch/sparc64/mm/init.c	2005-02-23 19:33:31 -08:00
@@ -431,7 +431,8 @@
 				if (tlb_type == spitfire)
 					val &= ~0x0003fe0000000000UL;
 
-				set_pte (ptep, __pte(val | _PAGE_MODIFIED));
+				set_pte_at(&init_mm, vaddr,
+					   ptep, __pte(val | _PAGE_MODIFIED));
 				trans[i].data += BASE_PAGE_SIZE;
 			}
 		}
diff -Nru a/arch/sparc64/mm/tlb.c b/arch/sparc64/mm/tlb.c
--- a/arch/sparc64/mm/tlb.c	2005-02-23 19:33:31 -08:00
+++ b/arch/sparc64/mm/tlb.c	2005-02-23 19:33:31 -08:00
@@ -41,15 +41,11 @@
 	}
 }
 
-void tlb_batch_add(pte_t *ptep, pte_t orig)
+void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr,
+		   pte_t *ptep, pte_t orig)
 {
-	struct mmu_gather *mp = &__get_cpu_var(mmu_gathers);
-	struct page *ptepage;
-	struct mm_struct *mm;
-	unsigned long vaddr, nr;
-
-	ptepage = virt_to_page(ptep);
-	mm = (struct mm_struct *) ptepage->mapping;
+	struct mmu_gather *mp;
+	unsigned long nr;
 
 	/* It is more efficient to let flush_tlb_kernel_range()
 	 * handle these cases.
@@ -57,8 +53,9 @@
 	if (mm == &init_mm)
 		return;
 
-	vaddr = ptepage->index +
-		(((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE);
+	mp = &__get_cpu_var(mmu_gathers);
+
+	vaddr &= PAGE_MASK;
 	if (pte_exec(orig))
 		vaddr |= 0x1UL;
 
diff -Nru a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h
--- a/include/asm-sparc64/pgalloc.h	2005-02-23 19:33:31 -08:00
+++ b/include/asm-sparc64/pgalloc.h	2005-02-23 19:33:31 -08:00
@@ -191,25 +191,17 @@
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte = __pte_alloc_one_kernel(mm, address);
-	if (pte) {
-		struct page *page = virt_to_page(pte);
-		page->mapping = (void *) mm;
-		page->index = address & PMD_MASK;
-	}
-	return pte;
+	return __pte_alloc_one_kernel(mm, address);
 }
 
 static inline struct page *
 pte_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
 	pte_t *pte = __pte_alloc_one_kernel(mm, addr);
-	if (pte) {
-		struct page *page = virt_to_page(pte);
-		page->mapping = (void *) mm;
-		page->index = addr & PMD_MASK;
-		return page;
-	}
+
+	if (pte)
+		return virt_to_page(pte);
+
 	return NULL;
 }
 
@@ -246,13 +238,11 @@
 
 static inline void pte_free_kernel(pte_t *pte)
 {
-	virt_to_page(pte)->mapping = NULL;
 	free_pte_fast(pte);
 }
 
 static inline void pte_free(struct page *ptepage)
 {
-	ptepage->mapping = NULL;
 	free_pte_fast(page_address(ptepage));
 }
 
diff -Nru a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h
--- a/include/asm-sparc64/pgtable.h	2005-02-23 19:33:31 -08:00
+++ b/include/asm-sparc64/pgtable.h	2005-02-23 19:33:31 -08:00
@@ -333,17 +333,18 @@
 #define pte_unmap_nested(pte)		do { } while (0)
 
 /* Actual page table PTE updates.  */
-extern void tlb_batch_add(pte_t *ptep, pte_t orig);
+extern void tlb_batch_add(struct mm_struct *mm, unsigned long addr,
+			  pte_t *ptep, pte_t orig);
 
-static inline void set_pte(pte_t *ptep, pte_t pte)
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pte)
 {
 	pte_t orig = *ptep;
 
 	*ptep = pte;
-	if (pte_present(orig))
-		tlb_batch_add(ptep, orig);
+	if (pte_val(orig) & _PAGE_VALID)
+		tlb_batch_add(mm, addr, ptep, orig);
 }
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
 
 #define pte_clear(mm,addr,ptep)		\
 	set_pte_at((mm), (addr), (ptep), __pte(0UL))

[-- Attachment #3: diff2 --]
[-- Type: application/octet-stream, Size: 16739 bytes --]

# This is a BitKeeper generated diff -Nru style patch.
#
# ChangeSet
#   2005/02/23 19:27:50-08:00 davem@nuts.davemloft.net 
#   [PPC]: Use new set_pte_at() w/mm+address args.
#   
#   Based almost entirely upon an earlier patch by
#   Benjamin Herrenschmidt.
#   
#   Signed-off-by: David S. Miller <davem@davemloft.net>
# 
# include/asm-ppc64/pgtable.h
#   2005/02/23 19:26:53-08:00 davem@nuts.davemloft.net +32 -20
#   [PPC]: Use new set_pte_at() w/mm+address args.
# 
# include/asm-ppc64/pgalloc.h
#   2005/02/23 19:26:53-08:00 davem@nuts.davemloft.net +6 -22
#   [PPC]: Use new set_pte_at() w/mm+address args.
# 
# include/asm-ppc/pgtable.h
#   2005/02/23 19:26:53-08:00 davem@nuts.davemloft.net +33 -28
#   [PPC]: Use new set_pte_at() w/mm+address args.
# 
# include/asm-ppc/highmem.h
#   2005/02/23 19:26:53-08:00 davem@nuts.davemloft.net +1 -1
#   [PPC]: Use new set_pte_at() w/mm+address args.
# 
# arch/ppc64/mm/tlb.c
#   2005/02/23 19:26:53-08:00 davem@nuts.davemloft.net +2 -9
#   [PPC]: Use new set_pte_at() w/mm+address args.
# 
# arch/ppc/mm/tlb.c
#   2005/02/23 19:26:53-08:00 davem@nuts.davemloft.net +0 -20
#   [PPC]: Use new set_pte_at() w/mm+address args.
# 
# arch/ppc/mm/pgtable.c
#   2005/02/23 19:26:53-08:00 davem@nuts.davemloft.net +2 -12
#   [PPC]: Use new set_pte_at() w/mm+address args.
# 
# arch/ppc/mm/init.c
#   2005/02/23 19:26:53-08:00 davem@nuts.davemloft.net +0 -12
#   [PPC]: Use new set_pte_at() w/mm+address args.
# 
# arch/ppc/kernel/dma-mapping.c
#   2005/02/23 19:26:53-08:00 davem@nuts.davemloft.net +5 -2
#   [PPC]: Use new set_pte_at() w/mm+address args.
# 
diff -Nru a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c
--- a/arch/ppc/kernel/dma-mapping.c	2005-02-23 19:33:46 -08:00
+++ b/arch/ppc/kernel/dma-mapping.c	2005-02-23 19:33:46 -08:00
@@ -219,7 +219,8 @@
 	c = vm_region_alloc(&consistent_head, size,
 			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
 	if (c) {
-		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+		unsigned long vaddr = c->vm_start;
+		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
 		struct page *end = page + (1 << order);
 
 		/*
@@ -232,9 +233,11 @@
 
 			set_page_count(page, 1);
 			SetPageReserved(page);
-			set_pte(pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL)));
+			set_pte_at(&init_mm, vaddr,
+				   pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL)));
 			page++;
 			pte++;
+			vaddr += PAGE_SIZE;
 		} while (size -= PAGE_SIZE);
 
 		/*
diff -Nru a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c
--- a/arch/ppc/mm/init.c	2005-02-23 19:33:46 -08:00
+++ b/arch/ppc/mm/init.c	2005-02-23 19:33:46 -08:00
@@ -490,18 +490,6 @@
 		printk(KERN_INFO "AGP special page: 0x%08lx\n", agp_special_page);
 #endif
 
-	/* Make sure all our pagetable pages have page->mapping
-	   and page->index set correctly. */
-	for (addr = KERNELBASE; addr != 0; addr += PGDIR_SIZE) {
-		struct page *pg;
-		pmd_t *pmd = pmd_offset(pgd_offset_k(addr), addr);
-		if (pmd_present(*pmd)) {
-			pg = pmd_page(*pmd);
-			pg->mapping = (void *) &init_mm;
-			pg->index = addr;
-		}
-	}
-
 	mem_init_done = 1;
 }
 
diff -Nru a/arch/ppc/mm/pgtable.c b/arch/ppc/mm/pgtable.c
--- a/arch/ppc/mm/pgtable.c	2005-02-23 19:33:46 -08:00
+++ b/arch/ppc/mm/pgtable.c	2005-02-23 19:33:46 -08:00
@@ -102,11 +102,6 @@
 
 	if (mem_init_done) {
 		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-		if (pte) {
-			struct page *ptepage = virt_to_page(pte);
-			ptepage->mapping = (void *) mm;
-			ptepage->index = address & PMD_MASK;
-		}
 	} else {
 		pte = (pte_t *)early_get_page();
 		if (pte)
@@ -126,11 +121,8 @@
 #endif
 
 	ptepage = alloc_pages(flags, 0);
-	if (ptepage) {
-		ptepage->mapping = (void *) mm;
-		ptepage->index = address & PMD_MASK;
+	if (ptepage)
 		clear_highpage(ptepage);
-	}
 	return ptepage;
 }
 
@@ -139,7 +131,6 @@
 #ifdef CONFIG_SMP
 	hash_page_sync();
 #endif
-	virt_to_page(pte)->mapping = NULL;
 	free_page((unsigned long)pte);
 }
 
@@ -148,7 +139,6 @@
 #ifdef CONFIG_SMP
 	hash_page_sync();
 #endif
-	ptepage->mapping = NULL;
 	__free_page(ptepage);
 }
 
@@ -298,7 +288,7 @@
 	pg = pte_alloc_kernel(&init_mm, pd, va);
 	if (pg != 0) {
 		err = 0;
-		set_pte(pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
+		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
 		if (mem_init_done)
 			flush_HPTE(0, va, pmd_val(*pd));
 	}
diff -Nru a/arch/ppc/mm/tlb.c b/arch/ppc/mm/tlb.c
--- a/arch/ppc/mm/tlb.c	2005-02-23 19:33:46 -08:00
+++ b/arch/ppc/mm/tlb.c	2005-02-23 19:33:46 -08:00
@@ -47,26 +47,6 @@
 }
 
 /*
- * Called by ptep_test_and_clear_young()
- */
-void flush_hash_one_pte(pte_t *ptep)
-{
-	struct page *ptepage;
-	struct mm_struct *mm;
-	unsigned long ptephys;
-	unsigned long addr;
-
-	if (Hash == 0)
-		return;
-	
-	ptepage = virt_to_page(ptep);
-	mm = (struct mm_struct *) ptepage->mapping;
-	ptephys = __pa(ptep) & PAGE_MASK;
-	addr = ptepage->index + (((unsigned long)ptep & ~PAGE_MASK) << 10);
-	flush_hash_pages(mm->context, addr, ptephys, 1);
-}
-
-/*
  * Called by ptep_set_access_flags, must flush on CPUs for which the
  * DSI handler can't just "fixup" the TLB on a write fault
  */
diff -Nru a/arch/ppc64/mm/tlb.c b/arch/ppc64/mm/tlb.c
--- a/arch/ppc64/mm/tlb.c	2005-02-23 19:33:46 -08:00
+++ b/arch/ppc64/mm/tlb.c	2005-02-23 19:33:46 -08:00
@@ -74,19 +74,12 @@
  * change the existing HPTE to read-only rather than removing it
  * (if we remove it we should clear the _PTE_HPTEFLAGS bits).
  */
-void hpte_update(pte_t *ptep, unsigned long pte, int wrprot)
+void hpte_update(struct mm_struct *mm, unsigned long addr,
+		 unsigned long pte, int wrprot)
 {
-	struct page *ptepage;
-	struct mm_struct *mm;
-	unsigned long addr;
 	int i;
 	unsigned long context = 0;
 	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
-
-	ptepage = virt_to_page(ptep);
-	mm = (struct mm_struct *) ptepage->mapping;
-	addr = ptepage->index +
-		(((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE);
 
 	if (REGION_ID(addr) == USER_REGION_ID)
 		context = mm->context.id;
diff -Nru a/include/asm-ppc/highmem.h b/include/asm-ppc/highmem.h
--- a/include/asm-ppc/highmem.h	2005-02-23 19:33:46 -08:00
+++ b/include/asm-ppc/highmem.h	2005-02-23 19:33:46 -08:00
@@ -90,7 +90,7 @@
 #ifdef HIGHMEM_DEBUG
 	BUG_ON(!pte_none(*(kmap_pte+idx)));
 #endif
-	set_pte(kmap_pte+idx, mk_pte(page, kmap_prot));
+	set_pte_at(&init_mm, vaddr, kmap_pte+idx, mk_pte(page, kmap_prot));
 	flush_tlb_page(NULL, vaddr);
 
 	return (void*) vaddr;
diff -Nru a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h
--- a/include/asm-ppc/pgtable.h	2005-02-23 19:33:46 -08:00
+++ b/include/asm-ppc/pgtable.h	2005-02-23 19:33:46 -08:00
@@ -512,6 +512,17 @@
 }
 
 /*
+ * When flushing the tlb entry for a page, we also need to flush the hash
+ * table entry.  flush_hash_pages is assembler (for speed) in hashtable.S.
+ */
+extern int flush_hash_pages(unsigned context, unsigned long va,
+			    unsigned long pmdval, int count);
+
+/* Add an HPTE to the hash table */
+extern void add_hash_page(unsigned context, unsigned long va,
+			  unsigned long pmdval);
+
+/*
  * Atomic PTE updates.
  *
  * pte_update clears and sets bit atomically, and returns
@@ -542,7 +553,8 @@
  * On machines which use an MMU hash table we avoid changing the
  * _PAGE_HASHPTE bit.
  */
-static inline void set_pte(pte_t *ptep, pte_t pte)
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pte)
 {
 #if _PAGE_HASHPTE != 0
 	pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte) & ~_PAGE_HASHPTE);
@@ -550,36 +562,44 @@
 	*ptep = pte;
 #endif
 }
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
-
-extern void flush_hash_one_pte(pte_t *ptep);
 
 /*
  * 2.6 calles this without flushing the TLB entry, this is wrong
  * for our hash-based implementation, we fix that up here
  */
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
 	old = pte_update(ptep, _PAGE_ACCESSED, 0);
 #if _PAGE_HASHPTE != 0
-	if (old & _PAGE_HASHPTE)
-		flush_hash_one_pte(ptep);
+	if (old & _PAGE_HASHPTE) {
+		unsigned long ptephys = __pa(ptep) & PAGE_MASK;
+		flush_hash_pages(context, addr, ptephys, 1);
+	}
 #endif
 	return (old & _PAGE_ACCESSED) != 0;
 }
+#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
+	__ptep_test_and_clear_young((__vma)->vm_mm->context, __addr, __ptep)
 
-static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma,
+					    unsigned long addr, pte_t *ptep)
 {
 	return (pte_update(ptep, (_PAGE_DIRTY | _PAGE_HWWRITE), 0) & _PAGE_DIRTY) != 0;
 }
 
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+				       pte_t *ptep)
 {
 	return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0));
 }
 
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
 {
 	pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), 0);
 }
@@ -603,6 +623,7 @@
  */
 #define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED))
 
+#define __HAVE_ARCH_PTE_SAME
 #define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
 
 /*
@@ -655,17 +676,6 @@
 extern void paging_init(void);
 
 /*
- * When flushing the tlb entry for a page, we also need to flush the hash
- * table entry.  flush_hash_pages is assembler (for speed) in hashtable.S.
- */
-extern int flush_hash_pages(unsigned context, unsigned long va,
-			    unsigned long pmdval, int count);
-
-/* Add an HPTE to the hash table */
-extern void add_hash_page(unsigned context, unsigned long va,
-			  unsigned long pmdval);
-
-/*
  * Encode and decode a swap entry.
  * Note that the bits we use in a PTE for representing a swap entry
  * must not include the _PAGE_PRESENT bit, the _PAGE_FILE bit, or the
@@ -737,14 +747,9 @@
 
 extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep);
 
-#endif /* !__ASSEMBLY__ */
-
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTE_SAME
 #include <asm-generic/pgtable.h>
+
+#endif /* !__ASSEMBLY__ */
 
 #endif /* _PPC_PGTABLE_H */
 #endif /* __KERNEL__ */
diff -Nru a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h
--- a/include/asm-ppc64/pgalloc.h	2005-02-23 19:33:46 -08:00
+++ b/include/asm-ppc64/pgalloc.h	2005-02-23 19:33:46 -08:00
@@ -48,42 +48,26 @@
 #define pmd_populate(mm, pmd, pte_page) \
 	pmd_populate_kernel(mm, pmd, page_address(pte_page))
 
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte;
-	pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
-	if (pte) {
-		struct page *ptepage = virt_to_page(pte);
-		ptepage->mapping = (void *) mm;
-		ptepage->index = address & PMD_MASK;
-	}
-	return pte;
+	return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
 }
 
-static inline struct page *
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte;
-	pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
-	if (pte) {
-		struct page *ptepage = virt_to_page(pte);
-		ptepage->mapping = (void *) mm;
-		ptepage->index = address & PMD_MASK;
-		return ptepage;
-	}
+	pte_t *pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
+	if (pte)
+		return virt_to_page(pte);
 	return NULL;
 }
 		
 static inline void pte_free_kernel(pte_t *pte)
 {
-	virt_to_page(pte)->mapping = NULL;
 	kmem_cache_free(zero_cache, pte);
 }
 
 static inline void pte_free(struct page *ptepage)
 {
-	ptepage->mapping = NULL;
 	kmem_cache_free(zero_cache, page_address(ptepage));
 }
 
diff -Nru a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h
--- a/include/asm-ppc64/pgtable.h	2005-02-23 19:33:46 -08:00
+++ b/include/asm-ppc64/pgtable.h	2005-02-23 19:33:46 -08:00
@@ -315,9 +315,10 @@
  * batch, doesn't actually triggers the hash flush immediately,
  * you need to call flush_tlb_pending() to do that.
  */
-extern void hpte_update(pte_t *ptep, unsigned long pte, int wrprot);
+extern void hpte_update(struct mm_struct *mm, unsigned long addr, unsigned long pte,
+			int wrprot);
 
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
 
@@ -325,18 +326,25 @@
 		return 0;
 	old = pte_update(ptep, _PAGE_ACCESSED);
 	if (old & _PAGE_HASHPTE) {
-		hpte_update(ptep, old, 0);
+		hpte_update(mm, addr, old, 0);
 		flush_tlb_pending();
 	}
 	return (old & _PAGE_ACCESSED) != 0;
 }
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(__vma, __addr, __ptep)		   \
+({									   \
+	int __r;							   \
+	__r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
+	__r;								   \
+})
 
 /*
  * On RW/DIRTY bit transitions we can avoid flushing the hpte. For the
  * moment we always flush but we need to fix hpte_update and test if the
  * optimisation is worth it.
  */
-static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
 
@@ -344,10 +352,18 @@
 		return 0;
 	old = pte_update(ptep, _PAGE_DIRTY);
 	if (old & _PAGE_HASHPTE)
-		hpte_update(ptep, old, 0);
+		hpte_update(mm, addr, old, 0);
 	return (old & _PAGE_DIRTY) != 0;
 }
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+#define ptep_test_and_clear_dirty(__vma, __addr, __ptep)		   \
+({									   \
+	int __r;							   \
+	__r = __ptep_test_and_clear_dirty((__vma)->vm_mm, __addr, __ptep); \
+	__r;								   \
+})
 
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
@@ -356,7 +372,7 @@
        		return;
 	old = pte_update(ptep, _PAGE_RW);
 	if (old & _PAGE_HASHPTE)
-		hpte_update(ptep, old, 0);
+		hpte_update(mm, addr, old, 0);
 }
 
 /*
@@ -370,26 +386,27 @@
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 #define ptep_clear_flush_young(__vma, __address, __ptep)		\
 ({									\
-	int __young;							\
-	__young = ptep_test_and_clear_young(__vma, __address, __ptep);	\
+	int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
+						  __ptep);		\
 	__young;							\
 })
 
 #define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
 #define ptep_clear_flush_dirty(__vma, __address, __ptep)		\
 ({									\
-	int __dirty;							\
-	__dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep);	\
+	int __dirty = __ptep_test_and_clear_dirty((__vma)->vm_mm, __address, \
+						  __ptep); 		\
 	flush_tlb_page(__vma, __address);				\
 	__dirty;							\
 })
 
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long old = pte_update(ptep, ~0UL);
 
 	if (old & _PAGE_HASHPTE)
-		hpte_update(ptep, old, 0);
+		hpte_update(mm, addr, old, 0);
 	return __pte(old);
 }
 
@@ -398,7 +415,7 @@
 	unsigned long old = pte_update(ptep, ~0UL);
 
 	if (old & _PAGE_HASHPTE)
-		hpte_update(ptep, old, 0);
+		hpte_update(mm, addr, old, 0);
 }
 
 /*
@@ -446,6 +463,7 @@
  */
 #define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED))
 
+#define __HAVE_ARCH_PTE_SAME
 #define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
 
 extern unsigned long ioremap_bot, ioremap_base;
@@ -553,14 +571,8 @@
 	return pt;
 }
 
-#endif /* __ASSEMBLY__ */
-
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTEP_MKDIRTY
-#define __HAVE_ARCH_PTE_SAME
 #include <asm-generic/pgtable.h>
+
+#endif /* __ASSEMBLY__ */
 
 #endif /* _PPC64_PGTABLE_H */

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-24  4:07 [PATCH] set_pte() part 2 arch usage David S. Miller
@ 2005-02-24  5:18 ` Benjamin Herrenschmidt
  2005-02-24 22:36   ` David S. Miller
  0 siblings, 1 reply; 16+ messages in thread
From: Benjamin Herrenschmidt @ 2005-02-24  5:18 UTC (permalink / raw)
  To: David S. Miller; +Cc: Linux Arch list

On Wed, 2005-02-23 at 20:07 -0800, David S. Miller wrote:
> Ok, here is the promised second installment.  This changes
> sparc64 (first attachment) and ppc{32,64} (second attachment)
> over to take advantage of the new set_pte_at() mm+address
> arguments.
> 
> I've build and run tested the sparc64 version.  I have only
> build tested the ppc32 and ppc64 changes.  Ben, can you give
> them a spin?

Seems to be happy on ppc32, I'll try ppc64 asap.

Ben.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-24  5:18 ` Benjamin Herrenschmidt
@ 2005-02-24 22:36   ` David S. Miller
  2005-02-25  5:32     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 16+ messages in thread
From: David S. Miller @ 2005-02-24 22:36 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linux-arch

On Thu, 24 Feb 2005 16:18:48 +1100
Benjamin Herrenschmidt <benh@au1.ibm.com> wrote:

> Seems to be happy on ppc32, I'll try ppc64 asap.

Thanks for testing, let me know how ppc64 goes.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-24 22:36   ` David S. Miller
@ 2005-02-25  5:32     ` Benjamin Herrenschmidt
  2005-02-25  7:20       ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 16+ messages in thread
From: Benjamin Herrenschmidt @ 2005-02-25  5:32 UTC (permalink / raw)
  To: David S. Miller; +Cc: Linux Arch list

On Thu, 2005-02-24 at 14:36 -0800, David S. Miller wrote:
> On Thu, 24 Feb 2005 16:18:48 +1100
> Benjamin Herrenschmidt <benh@au1.ibm.com> wrote:
> 
> > Seems to be happy on ppc32, I'll try ppc64 asap.
> 
> Thanks for testing, let me know how ppc64 goes.

Looks fine. Didn't stress much yet tho, but certainly good enough for
early 2.6.12, we can always push further fixes in if necessary.

Ben.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-25  5:32     ` Benjamin Herrenschmidt
@ 2005-02-25  7:20       ` Benjamin Herrenschmidt
  2005-02-25  7:57         ` David S. Miller
  2005-02-25 18:37         ` David S. Miller
  0 siblings, 2 replies; 16+ messages in thread
From: Benjamin Herrenschmidt @ 2005-02-25  7:20 UTC (permalink / raw)
  To: David S. Miller; +Cc: Linux Arch list

On Fri, 2005-02-25 at 16:32 +1100, Benjamin Herrenschmidt wrote:
> On Thu, 2005-02-24 at 14:36 -0800, David S. Miller wrote:
> > On Thu, 24 Feb 2005 16:18:48 +1100
> > Benjamin Herrenschmidt <benh@au1.ibm.com> wrote:
> > 
> > > Seems to be happy on ppc32, I'll try ppc64 asap.
> > 
> > Thanks for testing, let me know how ppc64 goes.
> 
> Looks fine. Didn't stress much yet tho, but certainly good enough for
> early 2.6.12, we can always push further fixes in if necessary.

Ok, LTP dies on POWER5, investigation in progress ;)

Ben.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-25  7:20       ` Benjamin Herrenschmidt
@ 2005-02-25  7:57         ` David S. Miller
  2005-02-25 18:37         ` David S. Miller
  1 sibling, 0 replies; 16+ messages in thread
From: David S. Miller @ 2005-02-25  7:57 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linux-arch

On Fri, 25 Feb 2005 18:20:55 +1100
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:

> Ok, LTP dies on POWER5, investigation in progress ;)

OK, FWIW I did a full LTP run on sparc64 with the changes
installed.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-25  7:20       ` Benjamin Herrenschmidt
  2005-02-25  7:57         ` David S. Miller
@ 2005-02-25 18:37         ` David S. Miller
  2005-02-26  0:56           ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 16+ messages in thread
From: David S. Miller @ 2005-02-25 18:37 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linux-arch

On Fri, 25 Feb 2005 18:20:55 +1100
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:

> Ok, LTP dies on POWER5, investigation in progress ;)

I bet the address arg is incorrect in some case.  It is easy
to add debugging for this, for example in your set_pte_at()
implementation you can do something like:

static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
			      pte_t *ptep, pte_t pte)
{
#ifdef DEBUG_SET_PTE_AT
	{
		pgd_t *pgd_check = pgd_offset(mm, addr);
		pud_t *pud_check = pud_offset(pgd_check, addr);
		pmd_t *pmd_check = pmd_offset(pud_check, addr);
		pte_t *pte_check = pte_offset(pmd_check, addr);

		BUG_ON(pte_check != ptep);
	}
#endif
}

BUG_ON() may hinder debugging if you hit a range of such
incorrect calculations, so maybe a single printk which
just prints __builtin_return_address(0) or current_text_addr()
as well.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-25 18:37         ` David S. Miller
@ 2005-02-26  0:56           ` Benjamin Herrenschmidt
  2005-02-27  3:09             ` David S. Miller
  0 siblings, 1 reply; 16+ messages in thread
From: Benjamin Herrenschmidt @ 2005-02-26  0:56 UTC (permalink / raw)
  To: David S. Miller; +Cc: Linux Arch list

On Fri, 2005-02-25 at 10:37 -0800, David S. Miller wrote:
> On Fri, 25 Feb 2005 18:20:55 +1100
> Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:
> 
> > Ok, LTP dies on POWER5, investigation in progress ;)
> 
> I bet the address arg is incorrect in some case.

Yes, probably. That's the same symptoms we had when zeromap_pud_range
had the bug getting the address wrong, which means we fail to properly
flush the hash & TLB for this PTE. I'm not at work (it's sat. already
here :) but I'll have a look asap.

> It is ea<sy
> to add debugging for this, for example in your set_pte_at()
> implementation you can do something like:
> 
> static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
> 			      pte_t *ptep, pte_t pte)
> {
> #ifdef DEBUG_SET_PTE_AT
> 	{
> 		pgd_t *pgd_check = pgd_offset(mm, addr);
> 		pud_t *pud_check = pud_offset(pgd_check, addr);
> 		pmd_t *pmd_check = pmd_offset(pud_check, addr);
> 		pte_t *pte_check = pte_offset(pmd_check, addr);
> 
> 		BUG_ON(pte_check != ptep);
> 	}
> #endif
> }

Good idea.

> BUG_ON() may hinder debugging if you hit a range of such
> incorrect calculations, so maybe a single printk which
> just prints __builtin_return_address(0) or current_text_addr()
> as well.

Nah, just xmon(NULL); :=)

Ben.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-26  0:56           ` Benjamin Herrenschmidt
@ 2005-02-27  3:09             ` David S. Miller
  2005-02-27  5:19               ` David S. Miller
  2005-02-27  9:35               ` Benjamin Herrenschmidt
  0 siblings, 2 replies; 16+ messages in thread
From: David S. Miller @ 2005-02-27  3:09 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linux-arch

On Sat, 26 Feb 2005 11:56:09 +1100
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:

> On Fri, 2005-02-25 at 10:37 -0800, David S. Miller wrote:
> > I bet the address arg is incorrect in some case.
> 
> Yes, probably. That's the same symptoms we had when zeromap_pud_range
> had the bug getting the address wrong, which means we fail to properly
> flush the hash & TLB for this PTE. I'm not at work (it's sat. already
> here :) but I'll have a look asap.

I added the debugging I suggested and did some runs on sparc64.

There are many pte level looping constructs of the form:

	pte = ...(pmd, address);
	address &= ~PMD_MASK;
	end = address + size;
	if (end > PMD_SIZE)
		end = PMD_SIZE;

	some_loop() {
		...
		set_pte_at(mm, address, pte);
		address += PAGE_SIZE;
		pte++;
		...
	}

This "address" mask screws everything up.

I know of at least three such cases so far, vmalloc.c:unmap_area_pte(),
vmalloc.c:map_area_pte(), and mprotect.c:change_pte_range()

The latter could definitely explain the behavior you are seeing on
ppc64.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-27  3:09             ` David S. Miller
@ 2005-02-27  5:19               ` David S. Miller
  2005-03-01  1:45                 ` Benjamin Herrenschmidt
  2005-03-01  2:22                 ` Benjamin Herrenschmidt
  2005-02-27  9:35               ` Benjamin Herrenschmidt
  1 sibling, 2 replies; 16+ messages in thread
From: David S. Miller @ 2005-02-27  5:19 UTC (permalink / raw)
  To: David S. Miller; +Cc: benh, linux-arch

On Sat, 26 Feb 2005 19:09:43 -0800
"David S. Miller" <davem@davemloft.net> wrote:

> I know of at least three such cases so far, vmalloc.c:unmap_area_pte(),
> vmalloc.c:map_area_pte(), and mprotect.c:change_pte_range()
> 
> The latter could definitely explain the behavior you are seeing on
> ppc64.

Ok, here's a fix, let me know if it makes the PPC64 problems
go away Ben.

Thanks.

===== mm/memory.c 1.224 vs edited =====
--- 1.224/mm/memory.c	2005-02-23 15:40:53 -08:00
+++ edited/mm/memory.c	2005-02-26 20:12:21 -08:00
@@ -992,16 +992,17 @@
 			      unsigned long address,
 			      unsigned long size, pgprot_t prot)
 {
-	unsigned long end;
+	unsigned long base, end;
 
+	base = address & PMD_MASK;
 	address &= ~PMD_MASK;
 	end = address + size;
 	if (end > PMD_SIZE)
 		end = PMD_SIZE;
 	do {
-		pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
+		pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(base+address), prot));
 		BUG_ON(!pte_none(*pte));
-		set_pte_at(mm, address, pte, zero_pte);
+		set_pte_at(mm, base+address, pte, zero_pte);
 		address += PAGE_SIZE;
 		pte++;
 	} while (address && (address < end));
@@ -1106,8 +1107,9 @@
 		unsigned long address, unsigned long size,
 		unsigned long pfn, pgprot_t prot)
 {
-	unsigned long end;
+	unsigned long base, end;
 
+	base = address & PMD_MASK;
 	address &= ~PMD_MASK;
 	end = address + size;
 	if (end > PMD_SIZE)
@@ -1115,7 +1117,7 @@
 	do {
 		BUG_ON(!pte_none(*pte));
 		if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
-			set_pte_at(mm, address, pte, pfn_pte(pfn, prot));
+			set_pte_at(mm, base+address, pte, pfn_pte(pfn, prot));
 		address += PAGE_SIZE;
 		pfn++;
 		pte++;
===== mm/mprotect.c 1.41 vs edited =====
--- 1.41/mm/mprotect.c	2005-02-23 15:40:53 -08:00
+++ edited/mm/mprotect.c	2005-02-26 20:40:27 -08:00
@@ -30,7 +30,7 @@
 		unsigned long size, pgprot_t newprot)
 {
 	pte_t * pte;
-	unsigned long end;
+	unsigned long base, end;
 
 	if (pmd_none(*pmd))
 		return;
@@ -40,6 +40,7 @@
 		return;
 	}
 	pte = pte_offset_map(pmd, address);
+	base = address & PMD_MASK;
 	address &= ~PMD_MASK;
 	end = address + size;
 	if (end > PMD_SIZE)
@@ -52,8 +53,8 @@
 			 * bits by wiping the pte and then setting the new pte
 			 * into place.
 			 */
-			entry = ptep_get_and_clear(mm, address, pte);
-			set_pte_at(mm, address, pte, pte_modify(entry, newprot));
+			entry = ptep_get_and_clear(mm, base + address, pte);
+			set_pte_at(mm, base + address, pte, pte_modify(entry, newprot));
 		}
 		address += PAGE_SIZE;
 		pte++;
@@ -66,7 +67,7 @@
 		 unsigned long size, pgprot_t newprot)
 {
 	pmd_t * pmd;
-	unsigned long end;
+	unsigned long base, end;
 
 	if (pud_none(*pud))
 		return;
@@ -76,12 +77,13 @@
 		return;
 	}
 	pmd = pmd_offset(pud, address);
+	base = address & PUD_MASK;
 	address &= ~PUD_MASK;
 	end = address + size;
 	if (end > PUD_SIZE)
 		end = PUD_SIZE;
 	do {
-		change_pte_range(mm, pmd, address, end - address, newprot);
+		change_pte_range(mm, pmd, base + address, end - address, newprot);
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address && (address < end));
@@ -92,7 +94,7 @@
 		 unsigned long size, pgprot_t newprot)
 {
 	pud_t * pud;
-	unsigned long end;
+	unsigned long base, end;
 
 	if (pgd_none(*pgd))
 		return;
@@ -102,12 +104,13 @@
 		return;
 	}
 	pud = pud_offset(pgd, address);
+	base = address & PGDIR_MASK;
 	address &= ~PGDIR_MASK;
 	end = address + size;
 	if (end > PGDIR_SIZE)
 		end = PGDIR_SIZE;
 	do {
-		change_pmd_range(mm, pud, address, end - address, newprot);
+		change_pmd_range(mm, pud, base + address, end - address, newprot);
 		address = (address + PUD_SIZE) & PUD_MASK;
 		pud++;
 	} while (address && (address < end));
===== mm/vmalloc.c 1.41 vs edited =====
--- 1.41/mm/vmalloc.c	2005-02-23 15:40:53 -08:00
+++ edited/mm/vmalloc.c	2005-02-26 20:18:44 -08:00
@@ -26,7 +26,7 @@
 static void unmap_area_pte(pmd_t *pmd, unsigned long address,
 				  unsigned long size)
 {
-	unsigned long end;
+	unsigned long base, end;
 	pte_t *pte;
 
 	if (pmd_none(*pmd))
@@ -38,6 +38,7 @@
 	}
 
 	pte = pte_offset_kernel(pmd, address);
+	base = address & PMD_MASK;
 	address &= ~PMD_MASK;
 	end = address + size;
 	if (end > PMD_SIZE)
@@ -45,7 +46,7 @@
 
 	do {
 		pte_t page;
-		page = ptep_get_and_clear(&init_mm, address, pte);
+		page = ptep_get_and_clear(&init_mm, base + address, pte);
 		address += PAGE_SIZE;
 		pte++;
 		if (pte_none(page))
@@ -59,7 +60,7 @@
 static void unmap_area_pmd(pud_t *pud, unsigned long address,
 				  unsigned long size)
 {
-	unsigned long end;
+	unsigned long base, end;
 	pmd_t *pmd;
 
 	if (pud_none(*pud))
@@ -71,13 +72,14 @@
 	}
 
 	pmd = pmd_offset(pud, address);
+	base = address & PUD_MASK;
 	address &= ~PUD_MASK;
 	end = address + size;
 	if (end > PUD_SIZE)
 		end = PUD_SIZE;
 
 	do {
-		unmap_area_pte(pmd, address, end - address);
+		unmap_area_pte(pmd, base + address, end - address);
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;
 	} while (address < end);
@@ -87,7 +89,7 @@
 			   unsigned long size)
 {
 	pud_t *pud;
-	unsigned long end;
+	unsigned long base, end;
 
 	if (pgd_none(*pgd))
 		return;
@@ -98,13 +100,14 @@
 	}
 
 	pud = pud_offset(pgd, address);
+	base = address & PGDIR_MASK;
 	address &= ~PGDIR_MASK;
 	end = address + size;
 	if (end > PGDIR_SIZE)
 		end = PGDIR_SIZE;
 
 	do {
-		unmap_area_pmd(pud, address, end - address);
+		unmap_area_pmd(pud, base + address, end - address);
 		address = (address + PUD_SIZE) & PUD_MASK;
 		pud++;
 	} while (address && (address < end));
@@ -114,8 +117,9 @@
 			       unsigned long size, pgprot_t prot,
 			       struct page ***pages)
 {
-	unsigned long end;
+	unsigned long base, end;
 
+	base = address & PMD_MASK;
 	address &= ~PMD_MASK;
 	end = address + size;
 	if (end > PMD_SIZE)
@@ -127,7 +131,7 @@
 		if (!page)
 			return -ENOMEM;
 
-		set_pte_at(&init_mm, address, pte, mk_pte(page, prot));
+		set_pte_at(&init_mm, base + address, pte, mk_pte(page, prot));
 		address += PAGE_SIZE;
 		pte++;
 		(*pages)++;
@@ -151,7 +155,7 @@
 		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, base + address);
 		if (!pte)
 			return -ENOMEM;
-		if (map_area_pte(pte, address, end - address, prot, pages))
+		if (map_area_pte(pte, base + address, end - address, prot, pages))
 			return -ENOMEM;
 		address = (address + PMD_SIZE) & PMD_MASK;
 		pmd++;

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-27  3:09             ` David S. Miller
  2005-02-27  5:19               ` David S. Miller
@ 2005-02-27  9:35               ` Benjamin Herrenschmidt
  2005-02-28  4:14                 ` David S. Miller
  1 sibling, 1 reply; 16+ messages in thread
From: Benjamin Herrenschmidt @ 2005-02-27  9:35 UTC (permalink / raw)
  To: David S. Miller; +Cc: Linux Arch list

On Sat, 2005-02-26 at 19:09 -0800, David S. Miller wrote:
> .../...
> There are many pte level looping constructs of the form:
> 
> 	pte = ...(pmd, address);
> 	address &= ~PMD_MASK;
> 	end = address + size;
> 	if (end > PMD_SIZE)
> 		end = PMD_SIZE;
> 
> 	some_loop() {
> 		...
> 		set_pte_at(mm, address, pte);
> 		address += PAGE_SIZE;
> 		pte++;
> 		...
> 	}
> 
> This "address" mask screws everything up.
> 
> I know of at least three such cases so far, vmalloc.c:unmap_area_pte(),
> vmalloc.c:map_area_pte(), and mprotect.c:change_pte_range()
> 
> The latter could definitely explain the behavior you are seeing on
> ppc64.

Indeed. Sorry, didn't have a chance to test myself yet here, but that's
definitely a bad one. Note that some of those functions use an address +
base (which would work, I suspect that was a later fix to the ones that
do pte_alloc etc...) and some use yet another scheme where the proper
address is passed along (the ones precalculating "next"). I prefer the
later version.

That's just another argument for having all these sharing common code,
even if it's a bit bulky macro at first, the current state of affairs is
just too prone to fail in weird ways.

Ben.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-27  9:35               ` Benjamin Herrenschmidt
@ 2005-02-28  4:14                 ` David S. Miller
  0 siblings, 0 replies; 16+ messages in thread
From: David S. Miller @ 2005-02-28  4:14 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linux-arch

On Sun, 27 Feb 2005 20:35:57 +1100
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:

> That's just another argument for having all these sharing common code,
> even if it's a bit bulky macro at first, the current state of affairs is
> just too prone to fail in weird ways.

Yep, the current situation is a joke.  That's why any arguments
against consolidation I hear will fall on deaf ears.  I'm really
tired of fixing bugs like these 5 or 6 times over instead of
once.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-27  5:19               ` David S. Miller
@ 2005-03-01  1:45                 ` Benjamin Herrenschmidt
  2005-03-01 23:02                   ` David S. Miller
  2005-03-01  2:22                 ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 16+ messages in thread
From: Benjamin Herrenschmidt @ 2005-03-01  1:45 UTC (permalink / raw)
  To: David S. Miller; +Cc: Linux Arch list

On Sat, 2005-02-26 at 21:19 -0800, David S. Miller wrote:
> On Sat, 26 Feb 2005 19:09:43 -0800
> "David S. Miller" <davem@davemloft.net> wrote:
> 
> > I know of at least three such cases so far, vmalloc.c:unmap_area_pte(),
> > vmalloc.c:map_area_pte(), and mprotect.c:change_pte_range()
> > 
> > The latter could definitely explain the behavior you are seeing on
> > ppc64.
> 
> Ok, here's a fix, let me know if it makes the PPC64 problems
> go away Ben.

Test machine has been LTP'ing happily over night with that patch.

Ben.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-02-27  5:19               ` David S. Miller
  2005-03-01  1:45                 ` Benjamin Herrenschmidt
@ 2005-03-01  2:22                 ` Benjamin Herrenschmidt
  2005-03-01 23:32                   ` David S. Miller
  1 sibling, 1 reply; 16+ messages in thread
From: Benjamin Herrenschmidt @ 2005-03-01  2:22 UTC (permalink / raw)
  To: David S. Miller; +Cc: Linux Arch list

And here's an updated version of your 3rd patch to make it apply on top
of current bk (a small fix to hpte_update broke it).

Index: linux-work/arch/ppc/kernel/dma-mapping.c
===================================================================
--- linux-work.orig/arch/ppc/kernel/dma-mapping.c	2005-03-01 12:50:50.000000000 +1100
+++ linux-work/arch/ppc/kernel/dma-mapping.c	2005-03-01 12:50:55.000000000 +1100
@@ -219,7 +219,8 @@
 	c = vm_region_alloc(&consistent_head, size,
 			    gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
 	if (c) {
-		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
+		unsigned long vaddr = c->vm_start;
+		pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
 		struct page *end = page + (1 << order);
 
 		/*
@@ -232,9 +233,11 @@
 
 			set_page_count(page, 1);
 			SetPageReserved(page);
-			set_pte(pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL)));
+			set_pte_at(&init_mm, vaddr,
+				   pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL)));
 			page++;
 			pte++;
+			vaddr += PAGE_SIZE;
 		} while (size -= PAGE_SIZE);
 
 		/*
Index: linux-work/arch/ppc/mm/init.c
===================================================================
--- linux-work.orig/arch/ppc/mm/init.c	2005-03-01 12:50:50.000000000 +1100
+++ linux-work/arch/ppc/mm/init.c	2005-03-01 12:50:55.000000000 +1100
@@ -490,18 +490,6 @@
 		printk(KERN_INFO "AGP special page: 0x%08lx\n", agp_special_page);
 #endif
 
-	/* Make sure all our pagetable pages have page->mapping
-	   and page->index set correctly. */
-	for (addr = KERNELBASE; addr != 0; addr += PGDIR_SIZE) {
-		struct page *pg;
-		pmd_t *pmd = pmd_offset(pgd_offset_k(addr), addr);
-		if (pmd_present(*pmd)) {
-			pg = pmd_page(*pmd);
-			pg->mapping = (void *) &init_mm;
-			pg->index = addr;
-		}
-	}
-
 	mem_init_done = 1;
 }
 
Index: linux-work/arch/ppc/mm/pgtable.c
===================================================================
--- linux-work.orig/arch/ppc/mm/pgtable.c	2005-03-01 12:50:50.000000000 +1100
+++ linux-work/arch/ppc/mm/pgtable.c	2005-03-01 12:50:55.000000000 +1100
@@ -102,11 +102,6 @@
 
 	if (mem_init_done) {
 		pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-		if (pte) {
-			struct page *ptepage = virt_to_page(pte);
-			ptepage->mapping = (void *) mm;
-			ptepage->index = address & PMD_MASK;
-		}
 	} else {
 		pte = (pte_t *)early_get_page();
 		if (pte)
@@ -126,11 +121,8 @@
 #endif
 
 	ptepage = alloc_pages(flags, 0);
-	if (ptepage) {
-		ptepage->mapping = (void *) mm;
-		ptepage->index = address & PMD_MASK;
+	if (ptepage)
 		clear_highpage(ptepage);
-	}
 	return ptepage;
 }
 
@@ -139,7 +131,6 @@
 #ifdef CONFIG_SMP
 	hash_page_sync();
 #endif
-	virt_to_page(pte)->mapping = NULL;
 	free_page((unsigned long)pte);
 }
 
@@ -148,7 +139,6 @@
 #ifdef CONFIG_SMP
 	hash_page_sync();
 #endif
-	ptepage->mapping = NULL;
 	__free_page(ptepage);
 }
 
@@ -298,7 +288,7 @@
 	pg = pte_alloc_kernel(&init_mm, pd, va);
 	if (pg != 0) {
 		err = 0;
-		set_pte(pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
+		set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags)));
 		if (mem_init_done)
 			flush_HPTE(0, va, pmd_val(*pd));
 	}
Index: linux-work/arch/ppc/mm/tlb.c
===================================================================
--- linux-work.orig/arch/ppc/mm/tlb.c	2005-03-01 12:50:50.000000000 +1100
+++ linux-work/arch/ppc/mm/tlb.c	2005-03-01 12:50:55.000000000 +1100
@@ -47,26 +47,6 @@
 }
 
 /*
- * Called by ptep_test_and_clear_young()
- */
-void flush_hash_one_pte(pte_t *ptep)
-{
-	struct page *ptepage;
-	struct mm_struct *mm;
-	unsigned long ptephys;
-	unsigned long addr;
-
-	if (Hash == 0)
-		return;
-	
-	ptepage = virt_to_page(ptep);
-	mm = (struct mm_struct *) ptepage->mapping;
-	ptephys = __pa(ptep) & PAGE_MASK;
-	addr = ptepage->index + (((unsigned long)ptep & ~PAGE_MASK) << 10);
-	flush_hash_pages(mm->context, addr, ptephys, 1);
-}
-
-/*
  * Called by ptep_set_access_flags, must flush on CPUs for which the
  * DSI handler can't just "fixup" the TLB on a write fault
  */
Index: linux-work/arch/ppc64/mm/tlb.c
===================================================================
--- linux-work.orig/arch/ppc64/mm/tlb.c	2005-03-01 12:50:50.000000000 +1100
+++ linux-work/arch/ppc64/mm/tlb.c	2005-03-01 12:50:55.000000000 +1100
@@ -74,24 +74,13 @@
  * change the existing HPTE to read-only rather than removing it
  * (if we remove it we should clear the _PTE_HPTEFLAGS bits).
  */
-void hpte_update(pte_t *ptep, unsigned long pte, int wrprot)
+ void hpte_update(struct mm_struct *mm, unsigned long addr,
+		  unsigned long pte, int wrprot)
 {
-	struct page *ptepage;
-	struct mm_struct *mm;
-	unsigned long addr;
 	int i;
 	unsigned long context = 0;
 	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
 
-	ptepage = virt_to_page(ptep);
-	mm = (struct mm_struct *) ptepage->mapping;
-	addr = ptepage->index;
-	if (pte_huge(pte))
-		addr +=  ((unsigned long)ptep & ~PAGE_MASK)
-			/ sizeof(*ptep) * HPAGE_SIZE;
-	else
-		addr += ((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE;
-
 	if (REGION_ID(addr) == USER_REGION_ID)
 		context = mm->context.id;
 	i = batch->index;
Index: linux-work/include/asm-ppc/highmem.h
===================================================================
--- linux-work.orig/include/asm-ppc/highmem.h	2005-03-01 12:50:50.000000000 +1100
+++ linux-work/include/asm-ppc/highmem.h	2005-03-01 12:50:55.000000000 +1100
@@ -90,7 +90,7 @@
 #ifdef HIGHMEM_DEBUG
 	BUG_ON(!pte_none(*(kmap_pte+idx)));
 #endif
-	set_pte(kmap_pte+idx, mk_pte(page, kmap_prot));
+	set_pte_at(&init_mm, vaddr, kmap_pte+idx, mk_pte(page, kmap_prot));
 	flush_tlb_page(NULL, vaddr);
 
 	return (void*) vaddr;
Index: linux-work/include/asm-ppc/pgtable.h
===================================================================
--- linux-work.orig/include/asm-ppc/pgtable.h	2005-03-01 12:50:50.000000000 +1100
+++ linux-work/include/asm-ppc/pgtable.h	2005-03-01 12:50:55.000000000 +1100
@@ -512,6 +512,17 @@
 }
 
 /*
+ * When flushing the tlb entry for a page, we also need to flush the hash
+ * table entry.  flush_hash_pages is assembler (for speed) in hashtable.S.
+ */
+extern int flush_hash_pages(unsigned context, unsigned long va,
+			    unsigned long pmdval, int count);
+
+/* Add an HPTE to the hash table */
+extern void add_hash_page(unsigned context, unsigned long va,
+			  unsigned long pmdval);
+
+/*
  * Atomic PTE updates.
  *
  * pte_update clears and sets bit atomically, and returns
@@ -542,7 +553,8 @@
  * On machines which use an MMU hash table we avoid changing the
  * _PAGE_HASHPTE bit.
  */
-static inline void set_pte(pte_t *ptep, pte_t pte)
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep, pte_t pte)
 {
 #if _PAGE_HASHPTE != 0
 	pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte) & ~_PAGE_HASHPTE);
@@ -550,36 +562,44 @@
 	*ptep = pte;
 #endif
 }
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
-
-extern void flush_hash_one_pte(pte_t *ptep);
 
 /*
  * 2.6 calles this without flushing the TLB entry, this is wrong
  * for our hash-based implementation, we fix that up here
  */
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
 	old = pte_update(ptep, _PAGE_ACCESSED, 0);
 #if _PAGE_HASHPTE != 0
-	if (old & _PAGE_HASHPTE)
-		flush_hash_one_pte(ptep);
+	if (old & _PAGE_HASHPTE) {
+		unsigned long ptephys = __pa(ptep) & PAGE_MASK;
+		flush_hash_pages(context, addr, ptephys, 1);
+	}
 #endif
 	return (old & _PAGE_ACCESSED) != 0;
 }
+#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
+	__ptep_test_and_clear_young((__vma)->vm_mm->context, __addr, __ptep)
 
-static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma,
+					    unsigned long addr, pte_t *ptep)
 {
 	return (pte_update(ptep, (_PAGE_DIRTY | _PAGE_HWWRITE), 0) & _PAGE_DIRTY) != 0;
 }
 
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+				       pte_t *ptep)
 {
 	return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0));
 }
 
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
 {
 	pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), 0);
 }
@@ -603,6 +623,7 @@
  */
 #define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED))
 
+#define __HAVE_ARCH_PTE_SAME
 #define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
 
 /*
@@ -655,17 +676,6 @@
 extern void paging_init(void);
 
 /*
- * When flushing the tlb entry for a page, we also need to flush the hash
- * table entry.  flush_hash_pages is assembler (for speed) in hashtable.S.
- */
-extern int flush_hash_pages(unsigned context, unsigned long va,
-			    unsigned long pmdval, int count);
-
-/* Add an HPTE to the hash table */
-extern void add_hash_page(unsigned context, unsigned long va,
-			  unsigned long pmdval);
-
-/*
  * Encode and decode a swap entry.
  * Note that the bits we use in a PTE for representing a swap entry
  * must not include the _PAGE_PRESENT bit, the _PAGE_FILE bit, or the
@@ -737,14 +747,9 @@
 
 extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep);
 
-#endif /* !__ASSEMBLY__ */
-
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTE_SAME
 #include <asm-generic/pgtable.h>
 
+#endif /* !__ASSEMBLY__ */
+
 #endif /* _PPC_PGTABLE_H */
 #endif /* __KERNEL__ */
Index: linux-work/include/asm-ppc64/pgalloc.h
===================================================================
--- linux-work.orig/include/asm-ppc64/pgalloc.h	2005-03-01 12:50:50.000000000 +1100
+++ linux-work/include/asm-ppc64/pgalloc.h	2005-03-01 12:50:55.000000000 +1100
@@ -48,42 +48,26 @@
 #define pmd_populate(mm, pmd, pte_page) \
 	pmd_populate_kernel(mm, pmd, page_address(pte_page))
 
-static inline pte_t *
-pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte;
-	pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
-	if (pte) {
-		struct page *ptepage = virt_to_page(pte);
-		ptepage->mapping = (void *) mm;
-		ptepage->index = address & PMD_MASK;
-	}
-	return pte;
+	return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
 }
 
-static inline struct page *
-pte_alloc_one(struct mm_struct *mm, unsigned long address)
+static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
 {
-	pte_t *pte;
-	pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
-	if (pte) {
-		struct page *ptepage = virt_to_page(pte);
-		ptepage->mapping = (void *) mm;
-		ptepage->index = address & PMD_MASK;
-		return ptepage;
-	}
+	pte_t *pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT);
+	if (pte)
+		return virt_to_page(pte);
 	return NULL;
 }
 		
 static inline void pte_free_kernel(pte_t *pte)
 {
-	virt_to_page(pte)->mapping = NULL;
 	kmem_cache_free(zero_cache, pte);
 }
 
 static inline void pte_free(struct page *ptepage)
 {
-	ptepage->mapping = NULL;
 	kmem_cache_free(zero_cache, page_address(ptepage));
 }
 
Index: linux-work/include/asm-ppc64/pgtable.h
===================================================================
--- linux-work.orig/include/asm-ppc64/pgtable.h	2005-03-01 12:50:50.000000000 +1100
+++ linux-work/include/asm-ppc64/pgtable.h	2005-03-01 12:50:55.000000000 +1100
@@ -315,9 +315,10 @@
  * batch, doesn't actually triggers the hash flush immediately,
  * you need to call flush_tlb_pending() to do that.
  */
-extern void hpte_update(pte_t *ptep, unsigned long pte, int wrprot);
+extern void hpte_update(struct mm_struct *mm, unsigned long addr, unsigned long pte,
+			int wrprot);
 
-static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
 
@@ -325,18 +326,25 @@
 		return 0;
 	old = pte_update(ptep, _PAGE_ACCESSED);
 	if (old & _PAGE_HASHPTE) {
-		hpte_update(ptep, old, 0);
+		hpte_update(mm, addr, old, 0);
 		flush_tlb_pending();
 	}
 	return (old & _PAGE_ACCESSED) != 0;
 }
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(__vma, __addr, __ptep)		   \
+({									   \
+	int __r;							   \
+	__r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
+	__r;								   \
+})
 
 /*
  * On RW/DIRTY bit transitions we can avoid flushing the hpte. For the
  * moment we always flush but we need to fix hpte_update and test if the
  * optimisation is worth it.
  */
-static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
+static inline int __ptep_test_and_clear_dirty(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
 
@@ -344,10 +352,18 @@
 		return 0;
 	old = pte_update(ptep, _PAGE_DIRTY);
 	if (old & _PAGE_HASHPTE)
-		hpte_update(ptep, old, 0);
+		hpte_update(mm, addr, old, 0);
 	return (old & _PAGE_DIRTY) != 0;
 }
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
+#define ptep_test_and_clear_dirty(__vma, __addr, __ptep)		   \
+({									   \
+	int __r;							   \
+	__r = __ptep_test_and_clear_dirty((__vma)->vm_mm, __addr, __ptep); \
+	__r;								   \
+})
 
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long old;
@@ -356,7 +372,7 @@
        		return;
 	old = pte_update(ptep, _PAGE_RW);
 	if (old & _PAGE_HASHPTE)
-		hpte_update(ptep, old, 0);
+		hpte_update(mm, addr, old, 0);
 }
 
 /*
@@ -370,26 +386,27 @@
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 #define ptep_clear_flush_young(__vma, __address, __ptep)		\
 ({									\
-	int __young;							\
-	__young = ptep_test_and_clear_young(__vma, __address, __ptep);	\
+	int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
+						  __ptep);		\
 	__young;							\
 })
 
 #define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
 #define ptep_clear_flush_dirty(__vma, __address, __ptep)		\
 ({									\
-	int __dirty;							\
-	__dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep);	\
+	int __dirty = __ptep_test_and_clear_dirty((__vma)->vm_mm, __address, \
+						  __ptep); 		\
 	flush_tlb_page(__vma, __address);				\
 	__dirty;							\
 })
 
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long old = pte_update(ptep, ~0UL);
 
 	if (old & _PAGE_HASHPTE)
-		hpte_update(ptep, old, 0);
+		hpte_update(mm, addr, old, 0);
 	return __pte(old);
 }
 
@@ -398,7 +415,7 @@
 	unsigned long old = pte_update(ptep, ~0UL);
 
 	if (old & _PAGE_HASHPTE)
-		hpte_update(ptep, old, 0);
+		hpte_update(mm, addr, old, 0);
 }
 
 /*
@@ -446,6 +463,7 @@
  */
 #define pgprot_noncached(prot)	(__pgprot(pgprot_val(prot) | _PAGE_NO_CACHE | _PAGE_GUARDED))
 
+#define __HAVE_ARCH_PTE_SAME
 #define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
 
 extern unsigned long ioremap_bot, ioremap_base;
@@ -553,14 +571,8 @@
 	return pt;
 }
 
-#endif /* __ASSEMBLY__ */
-
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-#define __HAVE_ARCH_PTEP_MKDIRTY
-#define __HAVE_ARCH_PTE_SAME
 #include <asm-generic/pgtable.h>
 
+#endif /* __ASSEMBLY__ */
+
 #endif /* _PPC64_PGTABLE_H */

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-03-01  1:45                 ` Benjamin Herrenschmidt
@ 2005-03-01 23:02                   ` David S. Miller
  0 siblings, 0 replies; 16+ messages in thread
From: David S. Miller @ 2005-03-01 23:02 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linux-arch

On Tue, 01 Mar 2005 12:45:28 +1100
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:

> Test machine has been LTP'ing happily over night with that patch.

Great, along with the s390 build fix the tree should be
ready to go once 2.6.12 opens up.  I'll be sure to push it
along.

Thanks everyone.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] set_pte() part 2 arch usage
  2005-03-01  2:22                 ` Benjamin Herrenschmidt
@ 2005-03-01 23:32                   ` David S. Miller
  0 siblings, 0 replies; 16+ messages in thread
From: David S. Miller @ 2005-03-01 23:32 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linux-arch

On Tue, 01 Mar 2005 13:22:18 +1100
Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:

> And here's an updated version of your 3rd patch to make it apply on top
> of current bk (a small fix to hpte_update broke it).

I should be able to resolve the conflict when I
re-root the tree for submission to Linus.  If for
some reason I can't, sure I'll use your patch
thanks :-)

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2005-03-01 23:32 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-02-24  4:07 [PATCH] set_pte() part 2 arch usage David S. Miller
2005-02-24  5:18 ` Benjamin Herrenschmidt
2005-02-24 22:36   ` David S. Miller
2005-02-25  5:32     ` Benjamin Herrenschmidt
2005-02-25  7:20       ` Benjamin Herrenschmidt
2005-02-25  7:57         ` David S. Miller
2005-02-25 18:37         ` David S. Miller
2005-02-26  0:56           ` Benjamin Herrenschmidt
2005-02-27  3:09             ` David S. Miller
2005-02-27  5:19               ` David S. Miller
2005-03-01  1:45                 ` Benjamin Herrenschmidt
2005-03-01 23:02                   ` David S. Miller
2005-03-01  2:22                 ` Benjamin Herrenschmidt
2005-03-01 23:32                   ` David S. Miller
2005-02-27  9:35               ` Benjamin Herrenschmidt
2005-02-28  4:14                 ` David S. Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox