From mboxrd@z Thu Jan 1 00:00:00 1970 Message-ID: <48E52912.7030200@domain.hid> Date: Thu, 02 Oct 2008 22:03:30 +0200 From: Gilles Chanteperdrix MIME-Version: 1.0 References: <899865CA54E4444DAF2E3639C04C5F48E4DB5C@trillian.at.omicron.at> In-Reply-To: <899865CA54E4444DAF2E3639C04C5F48E4DB5C@trillian.at.omicron.at> Content-Type: multipart/mixed; boundary="------------010105000804010908020907" Subject: Re: [Adeos-main] FW: [PATCH] repost: ARM FCSE List-Id: General discussion about Adeos List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Richard Cochran Cc: adeos-main@gna.org, Sebastian Smolorz This is a multi-part message in MIME format. --------------010105000804010908020907 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Richard Cochran wrote: > In any case, I found one huge error in the patch...you mangled my name > ;) Here is a better one, with this error fixed, as well as a fix in switch_mm which makes that now, we really do not flush the cache. Previous version flushed the cache at each switch due to an error in the register we checked. I have tested this version with Xenomai, and we get lower latencies (we gained 100us). -- Gilles. --------------010105000804010908020907 Content-Type: text/x-diff; name="fcse.3.diff" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="fcse.3.diff" diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index ad455ff..4481a30 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -10,6 +10,7 @@ obj-y := compat.o entry-armv.o entry-common.o irq.o \ process.o ptrace.o setup.o signal.o \ sys_arm.o stacktrace.o time.o traps.o +obj-$(CONFIG_ARM_FCSE) += fcse.o obj-$(CONFIG_ISA_DMA_API) += dma.o obj-$(CONFIG_ARCH_ACORN) += ecard.o obj-$(CONFIG_FIQ) += fiq.o diff --git a/arch/arm/kernel/fcse.c b/arch/arm/kernel/fcse.c new file mode 100644 index 0000000..1900a79 --- /dev/null +++ b/arch/arm/kernel/fcse.c @@ -0,0 +1,37 @@ +#include +#include +#include +#include +#include + +#define MAX_PID (MODULE_START / FCSE_PID_TASK_SIZE) +#define PIDS_LONGS (MAX_PID + 8 * sizeof(long) - 1) / (8 * sizeof(long)) + +static spinlock_t fcse_lock = SPIN_LOCK_UNLOCKED; +static unsigned long fcse_pids_bits[PIDS_LONGS]; + +int fcse_pid_alloc(void) +{ + unsigned long flags; + unsigned bit; + + spin_lock_irqsave(&fcse_lock, flags); + bit = find_first_zero_bit(fcse_pids_bits, MAX_PID); + if (bit == MAX_PID) { + spin_unlock(&fcse_lock); + return -1; + } + set_bit(bit, fcse_pids_bits); + spin_unlock_irqrestore(&fcse_lock, flags); + + return bit; +} + +void fcse_pid_free(unsigned pid) +{ + unsigned long flags; + + spin_lock_irqsave(&fcse_lock, flags); + pid = test_and_clear_bit(pid, fcse_pids_bits); + spin_unlock_irqrestore(&fcse_lock, flags); +} diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index eefae1d..b13d8a5 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -32,6 +32,7 @@ #include #include #include +#include /* * bitmask of present and online CPUs. @@ -736,14 +737,14 @@ void flush_tlb_all(void) void flush_tlb_mm(struct mm_struct *mm) { - cpumask_t mask = mm->cpu_vm_mask; + cpumask_t mask = fcse_tlb_mask(mm); on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, 1, mask); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { - cpumask_t mask = vma->vm_mm->cpu_vm_mask; + cpumask_t mask = fcse_tlb_mask(vma->vm_mm); struct tlb_args ta; ta.ta_vma = vma; @@ -764,7 +765,7 @@ void flush_tlb_kernel_page(unsigned long kaddr) void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - cpumask_t mask = vma->vm_mm->cpu_vm_mask; + cpumask_t mask = fcse_tlb_mask(vma->vm_mm); struct tlb_args ta; ta.ta_vma = vma; diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 0128687..b8d1605 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -61,7 +61,18 @@ inline long do_mmap2( if (file) fput(file); +#ifdef CONFIG_ARM_FCSE + /* FIXME, this really sucks, and we should really recheck in mremap, + mprotect, and munmap */ + if (likely((unsigned) error < (unsigned)(-4096)) + && (flags & MAP_SHARED) && (prot & PROT_WRITE)) { + struct vm_area_struct *vma = find_vma(current->mm, error); + if (vma->vm_page_prot & (L_PTE_CACHEABLE | L_PTE_BUFFERABLE)) + ++current->mm->context.mappings_needing_flush; + } +#endif out: + return error; } diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 33ed048..6c40ac0 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -716,3 +716,10 @@ config CACHE_L2X0 select OUTER_CACHE help This option enables the L2x0 PrimeCell. + +config ARM_FCSE + bool "Fast Context Switch Extension (EXPERIMENTAL)" + depends on EXPERIMENTAL + default n + help + Say Y here to enable the ARM FCSE. If unsure, say N. diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 28ad7ab..b23d3c9 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -13,11 +13,13 @@ #include #include #include +#include #include #include #include #include +#include #include "fault.h" @@ -55,6 +57,10 @@ void show_pte(struct mm_struct *mm, unsigned long addr) if (!mm) mm = &init_mm; +#ifdef CONFIG_ARM_FCSE + printk(KERN_ALERT "fcse pid: %ld, 0x%08lx\n", + mm->context.pid >> FCSE_PID_SHIFT, mm->context.pid); +#endif /* CONFIG_ARM_FCSE */ printk(KERN_ALERT "pgd = %p\n", mm->pgd); pgd = pgd_offset(mm, addr); printk(KERN_ALERT "[%08lx] *pgd=%08lx", addr, pgd_val(*pgd)); @@ -466,6 +472,8 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6); struct siginfo info; + addr = fcse_mva_to_va(addr); + if (!inf->fn(addr, fsr, regs)) return; @@ -484,4 +492,3 @@ do_PrefetchAbort(unsigned long addr, struct pt_regs *regs) { do_translation_fault(addr, 0, regs); } - diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 9df507d..412a10a 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "mm.h" @@ -58,9 +59,11 @@ void flush_cache_mm(struct mm_struct *mm) void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { if (cache_is_vivt()) { - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) - __cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end), - vma->vm_flags); + if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) { + start = fcse_va_to_mva(vma->vm_mm, start) & PAGE_MASK; + end = PAGE_ALIGN(fcse_va_to_mva(vma->vm_mm, end)); + __cpuc_flush_user_range(start, end, vma->vm_flags); + } return; } @@ -78,7 +81,8 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsig { if (cache_is_vivt()) { if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) { - unsigned long addr = user_addr & PAGE_MASK; + unsigned long addr; + addr = fcse_va_to_mva(vma->vm_mm, user_addr) & PAGE_MASK; __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags); } return; diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index e0f19ab..4da5e30 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -16,7 +16,11 @@ #include "mm.h" +#ifndef CONFIG_ARM_FCSE #define FIRST_KERNEL_PGD_NR (FIRST_USER_PGD_NR + USER_PTRS_PER_PGD) +#else /* CONFIG_ARM_FCSE */ +#define FIRST_KERNEL_PGD_NR (MODULE_START / PGDIR_SIZE) +#endif /* CONFIG_ARM_FCSE */ /* * need to get a 16k page for level 1 @@ -26,6 +30,15 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) pgd_t *new_pgd, *init_pgd; pmd_t *new_pmd, *init_pmd; pte_t *new_pte, *init_pte; +#ifdef CONFIG_ARM_FCSE + int pid; + + pid = fcse_pid_alloc(); + if (pid == -1) + goto no_pgd; + + mm->context.pid = pid << FCSE_PID_SHIFT; +#endif /* CONFIG_ARM_FCSE */ new_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL, 2); if (!new_pgd) @@ -43,11 +56,15 @@ pgd_t *get_pgd_slow(struct mm_struct *mm) clean_dcache_area(new_pgd, PTRS_PER_PGD * sizeof(pgd_t)); if (!vectors_high()) { + /* We can not use pgd_offset here since mm->pgd is not yet + initialized. */ + pgd_t *pgd = new_pgd + pgd_index(fcse_va_to_mva(mm, 0)); + /* * On ARM, first page must always be allocated since it * contains the machine vectors. */ - new_pmd = pmd_alloc(mm, new_pgd, 0); + new_pmd = pmd_alloc(mm, pgd, 0); if (!new_pmd) goto no_pmd; @@ -96,4 +113,7 @@ void free_pgd_slow(struct mm_struct *mm, pgd_t *pgd) pmd_free(mm, pmd); free: free_pages((unsigned long) pgd, 2); +#ifdef CONFIG_ARM_FCSE + fcse_pid_free(mm->context.pid >> FCSE_PID_SHIFT); +#endif /* CONFIG_ARM_FCSE */ } diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 28cdb06..2454d0b 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -321,6 +321,10 @@ ENTRY(cpu_arm920_dcache_clean_area) ENTRY(cpu_arm920_switch_mm) #ifdef CONFIG_MMU mov ip, #0 +#ifdef CONFIG_ARM_FCSE + cmp r2, #0 + beq .LCnoflush +#endif #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache #else @@ -338,6 +342,9 @@ ENTRY(cpu_arm920_switch_mm) #endif mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_ARM_FCSE +.LCnoflush: +#endif mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index 4cd3316..ca7f4ee 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -337,6 +337,10 @@ ENTRY(cpu_arm926_dcache_clean_area) ENTRY(cpu_arm926_switch_mm) #ifdef CONFIG_MMU mov ip, #0 +#ifdef CONFIG_ARM_FCSE + cmp r2, #0 + beq .LCnoflush +#endif #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH mcr p15, 0, ip, c7, c6, 0 @ invalidate D cache #else @@ -346,6 +350,9 @@ ENTRY(cpu_arm926_switch_mm) #endif mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache mcr p15, 0, ip, c7, c10, 4 @ drain WB +#ifdef CONFIG_ARM_FCSE +.LCnoflush: +#endif mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs #endif diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index 2dd8527..c101e5d 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -417,9 +417,16 @@ ENTRY(cpu_xscale_dcache_clean_area) */ .align 5 ENTRY(cpu_xscale_switch_mm) +#ifdef CONFIG_ARM_FCSE + cmp r2, #0 + beq .LCnoflush +#endif clean_d_cache r1, r2 mcr p15, 0, ip, c7, c5, 0 @ Invalidate I cache & BTB mcr p15, 0, ip, c7, c10, 4 @ Drain Write (& Fill) Buffer +#ifdef CONFIG_ARM_FCSE +.LCnoflush: +#endif mcr p15, 0, r0, c2, c0, 0 @ load page table pointer mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs cpwait_ret lr, ip diff --git a/include/asm-arm/cacheflush.h b/include/asm-arm/cacheflush.h index 759a97b..b4b3b08 100644 --- a/include/asm-arm/cacheflush.h +++ b/include/asm-arm/cacheflush.h @@ -15,6 +15,7 @@ #include #include +#include #define CACHE_COLOUR(vaddr) ((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT) @@ -339,16 +340,20 @@ static inline void flush_cache_mm(struct mm_struct *mm) static inline void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) + if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) { + start = fcse_va_to_mva(vma->vm_mm,start); + end = fcse_va_to_mva(vma->vm_mm,end); __cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end), vma->vm_flags); + } } static inline void flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn) { if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) { - unsigned long addr = user_addr & PAGE_MASK; + unsigned long addr; + addr = fcse_va_to_mva(vma->vm_mm,user_addr) & PAGE_MASK; __cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags); } } @@ -379,8 +384,14 @@ extern void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, * Harvard caches are synchronised for the user space address range. * This is used for the ARM private sys_cacheflush system call. */ -#define flush_cache_user_range(vma,start,end) \ - __cpuc_coherent_user_range((start) & PAGE_MASK, PAGE_ALIGN(end)) +#define flush_cache_user_range(vma,start,end) \ + ({ \ + struct mm_struct *_mm = (vma)->vm_mm; \ + unsigned long _start, _end; \ + _start = fcse_va_to_mva(_mm,start) & PAGE_MASK; \ + _end = PAGE_ALIGN(fcse_va_to_mva(_mm,end)); \ + __cpuc_coherent_user_range(_start, _end); \ + }) /* * Perform necessary cache operations to ensure that data previously @@ -417,7 +428,7 @@ static inline void flush_anon_page(struct vm_area_struct *vma, extern void __flush_anon_page(struct vm_area_struct *vma, struct page *, unsigned long); if (PageAnon(page)) - __flush_anon_page(vma, page, vmaddr); + __flush_anon_page(vma, page, fcse_va_to_mva(vma->vm_mm,vmaddr)); } #define flush_dcache_mmap_lock(mapping) \ diff --git a/include/asm-arm/cpu-multi32.h b/include/asm-arm/cpu-multi32.h index 3479de9..627daf3 100644 --- a/include/asm-arm/cpu-multi32.h +++ b/include/asm-arm/cpu-multi32.h @@ -52,7 +52,7 @@ extern struct processor { /* * Set the page table */ - void (*switch_mm)(unsigned long pgd_phys, struct mm_struct *mm); + void (*switch_mm)(unsigned long pgd_phys, struct mm_struct *mm, unsigned cacheflush); /* * Set a possibly extended PTE. Non-extended PTEs should * ignore 'ext'. @@ -66,4 +66,4 @@ extern struct processor { #define cpu_do_idle() processor._do_idle() #define cpu_dcache_clean_area(addr,sz) processor.dcache_clean_area(addr,sz) #define cpu_set_pte_ext(ptep,pte,ext) processor.set_pte_ext(ptep,pte,ext) -#define cpu_do_switch_mm(pgd,mm) processor.switch_mm(pgd,mm) +#define cpu_do_switch_mm(pgd,mm,flush) processor.switch_mm(pgd,mm,flush) diff --git a/include/asm-arm/cpu-single.h b/include/asm-arm/cpu-single.h index 0b120ee..e3a59f7 100644 --- a/include/asm-arm/cpu-single.h +++ b/include/asm-arm/cpu-single.h @@ -39,6 +39,6 @@ extern void cpu_proc_init(void); extern void cpu_proc_fin(void); extern int cpu_do_idle(void); extern void cpu_dcache_clean_area(void *, int); -extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm); +extern void cpu_do_switch_mm(unsigned long pgd_phys, struct mm_struct *mm, unsigned cacheflush); extern void cpu_set_pte_ext(pte_t *ptep, pte_t pte, unsigned int ext); extern void cpu_reset(unsigned long addr) __attribute__((noreturn)); diff --git a/include/asm-arm/fcse.h b/include/asm-arm/fcse.h new file mode 100644 index 0000000..c9ee051 --- /dev/null +++ b/include/asm-arm/fcse.h @@ -0,0 +1,78 @@ +/* + * Filename: include/asm-arm/fcse.h + * Description: ARM Process ID (PID) includes for Fast Address Space Switching + * (FASS) in ARM Linux. + * Created: 14/10/2001 + * Changes: 19/02/2002 - Macros added. + * 03/08/2007 - Adapted to kernel 2.6.21 (ssm) + * Feb 2008 - Simplified a bit (rco) + * + * Copyright: (C) 2001, 2002 Adam Wiggins + * (C) 2007 Sebastian Smolorz + * (C) 2008 Richard Cochran + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of teh GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_ARM_FCSE_H +#define __ASM_ARM_FCSE_H + +#ifdef CONFIG_ARM_FCSE + +#define FCSE_PID_SHIFT 25 + +/* Size of PID relocation area */ +#define FCSE_PID_TASK_SIZE (1UL << FCSE_PID_SHIFT) + +/* Mask to get rid of PID from relocated address */ +#define FCSE_PID_MASK (FCSE_PID_TASK_SIZE - 1) + +#define fcse_tlb_mask(mm) ((mm)->context.cpu_tlb_mask) +#define fcse_cpu_set_vm_mask(cpu, mm) cpu_set(cpu, (mm)->cpu_vm_mask) +#define fcse_needs_flush(mm) ((mm)->context.mappings_needing_flush) + +/* Sets the CPU's PID Register */ +static inline void fcse_pid_set(unsigned long pid) +{ + __asm__ __volatile__("mcr p15, 0, %0, c13, c0, 0": /* */: "r" (pid)); +} + +/* Returns the state of the CPU's PID Register */ +static inline unsigned long fcse_pid_get(void) +{ + unsigned long pid; + __asm__ __volatile__("mrc p15, 0, %0, c13, c0, 0" : "=&r" (pid)); + return (pid & (~FCSE_PID_MASK)); +} + +static inline unsigned long fcse_mva_to_va(unsigned long mva) +{ + unsigned long pid = fcse_pid_get(); + if (pid && (pid == (mva & ~FCSE_PID_MASK))) { + return mva & FCSE_PID_MASK; + } + return mva; +} + +static inline unsigned long fcse_va_to_mva(struct mm_struct *mm, unsigned long va) +{ + if (va < FCSE_PID_TASK_SIZE) { + return mm->context.pid | va; + } + return va; +} + +int fcse_pid_alloc(void); +void fcse_pid_free(unsigned pid); + +#else /* CONFIG_ARM_FCSE */ +#define fcse_pid_set(pid) do { } while(0) +#define fcse_mva_to_va(x) (x) +#define fcse_va_to_mva(vma,x) (x) +#define fcse_tlb_mask(mm) ((mm)->cpu_vm_mask) +#define fcse_cpu_set_vm_mask(cpu, mm) do { } while(0) +#define fcse_needs_flush(mm) (1) +#endif + +#endif /* __ASM_ARM_FCSE_H */ diff --git a/include/asm-arm/memory.h b/include/asm-arm/memory.h index 9ba4d71..bda4e74 100644 --- a/include/asm-arm/memory.h +++ b/include/asm-arm/memory.h @@ -34,14 +34,23 @@ * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area */ +#ifdef CONFIG_ARM_FCSE +#define TASK_SIZE UL(0x02000000) +#define TASK_UNMAPPED_BASE UL(0x01000000) +#else #define TASK_SIZE UL(0xbf000000) #define TASK_UNMAPPED_BASE UL(0x40000000) #endif +#endif /* * The maximum size of a 26-bit user space task. */ +#ifdef CONFIG_ARM_FCSE +#define TASK_SIZE_26 UL(0x02000000) +#else #define TASK_SIZE_26 UL(0x04000000) +#endif /* * Page offset: 3GB diff --git a/include/asm-arm/mmu.h b/include/asm-arm/mmu.h index 53099d4..b74d736 100644 --- a/include/asm-arm/mmu.h +++ b/include/asm-arm/mmu.h @@ -7,6 +7,11 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID unsigned int id; #endif +#ifdef CONFIG_ARM_FCSE + unsigned long pid; + unsigned mappings_needing_flush; + cpumask_t cpu_tlb_mask; +#endif unsigned int kvm_seq; } mm_context_t; diff --git a/include/asm-arm/mmu_context.h b/include/asm-arm/mmu_context.h index 6913d02..47339c2 100644 --- a/include/asm-arm/mmu_context.h +++ b/include/asm-arm/mmu_context.h @@ -17,6 +17,7 @@ #include #include #include +#include void __check_kvm_seq(struct mm_struct *mm); @@ -64,7 +65,15 @@ static inline void check_context(struct mm_struct *mm) __check_kvm_seq(mm); } -#define init_new_context(tsk,mm) 0 + +static inline int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ +#ifdef CONFIG_ARM_FCSE + cpus_clear(mm->context.cpu_tlb_mask); + mm->context.mappings_needing_flush = 0; +#endif /* CONFIG_ARM_FCSE */ + return 0; +} #endif @@ -97,11 +106,13 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, #ifdef CONFIG_MMU unsigned int cpu = smp_processor_id(); - if (!cpu_test_and_set(cpu, next->cpu_vm_mask) || prev != next) { + if (!cpu_test_and_set(cpu, fcse_tlb_mask(next)) || prev != next) { + fcse_cpu_set_vm_mask(cpu, next); check_context(next); - cpu_switch_mm(next->pgd, next); + fcse_pid_set(next->context.pid); + cpu_switch_mm(next->pgd, next, fcse_needs_flush(next)); if (cache_is_vivt()) - cpu_clear(cpu, prev->cpu_vm_mask); + cpu_clear(cpu, fcse_tlb_mask(prev)); } #endif } diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h index 5571c13..701f458 100644 --- a/include/asm-arm/pgtable.h +++ b/include/asm-arm/pgtable.h @@ -344,10 +344,14 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) /* to find an entry in a page-table-directory */ #define pgd_index(addr) ((addr) >> PGDIR_SHIFT) -#define pgd_offset(mm, addr) ((mm)->pgd+pgd_index(addr)) +#define pgd_offset(mm, addr) \ + ({ \ + struct mm_struct *_mm = (mm); \ + (_mm->pgd + pgd_index(fcse_va_to_mva(_mm,(addr)))); \ + }) /* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(addr) pgd_offset(&init_mm, addr) +#define pgd_offset_k(addr) (init_mm.pgd+pgd_index(addr)) /* Find an entry in the second-level page table.. */ #define pmd_offset(dir, addr) ((pmd_t *)(dir)) diff --git a/include/asm-arm/proc-fns.h b/include/asm-arm/proc-fns.h index 75ec760..37ba564 100644 --- a/include/asm-arm/proc-fns.h +++ b/include/asm-arm/proc-fns.h @@ -223,7 +223,8 @@ #ifdef CONFIG_MMU -#define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm) +#define cpu_switch_mm(pgd,mm,cacheflush) \ + cpu_do_switch_mm(virt_to_phys(pgd),mm,(cacheflush)) #define cpu_get_pgd() \ ({ \ diff --git a/include/asm-arm/tlbflush.h b/include/asm-arm/tlbflush.h index 8c6bc1b..98cd28f 100644 --- a/include/asm-arm/tlbflush.h +++ b/include/asm-arm/tlbflush.h @@ -158,6 +158,7 @@ #ifndef __ASSEMBLY__ #include +#include struct cpu_tlb_fns { void (*flush_user_range)(unsigned long, unsigned long, struct vm_area_struct *); @@ -292,7 +293,7 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm) if (tlb_flag(TLB_WB)) dsb(); - if (cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) { + if (cpu_isset(smp_processor_id(), fcse_tlb_mask(mm))) { if (tlb_flag(TLB_V3_FULL)) asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (zero) : "cc"); if (tlb_flag(TLB_V4_U_FULL)) @@ -325,12 +326,13 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) const int zero = 0; const unsigned int __tlb_flag = __cpu_tlb_flags; - uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm); + uaddr = (fcse_va_to_mva(vma->vm_mm,uaddr) & PAGE_MASK) + | ASID(vma->vm_mm); if (tlb_flag(TLB_WB)) dsb(); - if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) { + if (cpu_isset(smp_processor_id(), fcse_tlb_mask(vma->vm_mm))) { if (tlb_flag(TLB_V3_PAGE)) asm("mcr p15, 0, %0, c6, c0, 0" : : "r" (uaddr) : "cc"); if (tlb_flag(TLB_V4_U_PAGE)) @@ -437,7 +439,15 @@ static inline void clean_pmd_entry(pmd_t *pmd) /* * Convert calls to our calling convention. */ -#define local_flush_tlb_range(vma,start,end) __cpu_flush_user_tlb_range(start,end,vma) +#define local_flush_tlb_range(vma,start,end) \ + ({ \ + struct mm_struct *_mm = (vma)->vm_mm; \ + unsigned long _start, _end; \ + _start = fcse_va_to_mva(_mm, start); \ + _end = fcse_va_to_mva(_mm, end); \ + __cpu_flush_user_tlb_range(_start, _end, vma); \ + }) + #define local_flush_tlb_kernel_range(s,e) __cpu_flush_kern_tlb_range(s,e) #ifndef CONFIG_SMP --------------010105000804010908020907--