All of lore.kernel.org
 help / color / mirror / Atom feed
* [Adeos-main] FW: [PATCH] repost: ARM FCSE
@ 2008-03-07 16:19 Richard Cochran
  2008-03-07 16:31 ` Gilles Chanteperdrix
  2008-09-21 15:52 ` Gilles Chanteperdrix
  0 siblings, 2 replies; 17+ messages in thread
From: Richard Cochran @ 2008-03-07 16:19 UTC (permalink / raw)
  To: adeos-main

[-- Attachment #1: Type: text/plain, Size: 2246 bytes --]

I posted this patch today on linux-arm-kernel, but I repeat it
here because there does not seem to be too much interest on that
list for the ARM FCSE.

I also tried to combine this patch with ipipe for kernel 2.6.20
running on the Intel IXDP465, but after booting I soon get a BUG.

Anyhow, perhaps the ARM people might take a look at combining
ipipe with FCSE...

-----Original Message-----
From: linux-arm-kernel-bounces@domain.hid
[mailto:linux-arm-kernel-bounces@domain.hid] On Behalf Of
Richard Cochran
Sent: Friday, March 07, 2008 4:52 PM
To: linux-arm-kernel@domain.hid
Subject: [PATCH] repost: ARM FCSE


It looks like my mailer just wrapped the line endings,
and I don't know how to make it stop! (How does one
escape from Outlook + Exchange?)

Here is the patch once again, as an attachment.

Richard

---

This patch implements the ARM FCSE for Linux in a minimally intrusive
way. The patch is against kernel 2.6.24, but it will also work with
other recent kernels. I have tested the patch on the following
machine/kernel combinations.

Linksys  NSLU2    2.6.21
Omicron  DEVXIP   2.6.23  (this board is similar to the IXDP425)
Intel    IXDP465  2.6.24

Using the patch, I measured an improvement in the task switching time
of about 100 us on all three platforms. The test program does
something like this:

1. blocking read on network socket
2. process packet
3. send reply packet

I externally measured the time from the input packet to appear on the
line until the reply packet appear. I attribute the improvement in
response time to the faster scheduling, due to the FCSE.

KNOWN ISSUES:

1. The avoidance of flushing the D/I caches is only implemented for
   Xscale, in arch/arm/mm/proc-xscale.S. If you want to try other
   types, you should make similar changes for that type, otherwise
   enabling FCSE only limits your machine with no performance benefit.

2. The patch limits the total number of PIDs to 96, including each
   kernel and user thread. This is a bit wasteful, and it could be
   improved.

3. On the NSLU2, the 'ldconfig' program hangs after calling MMAP2(2)
   many times, but I do not know why.

Looking forward to your comments,

Richard



[-- Attachment #2: fcse.diff --]
[-- Type: application/octet-stream, Size: 11303 bytes --]

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 12161ae..088e8b9 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -651,3 +651,10 @@ config OUTER_CACHE
 config CACHE_L2X0
 	bool
 	select OUTER_CACHE
+
+config ARM_FCSE
+	bool "Fast Context Switch Extension (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	default n
+	help
+	  Say Y here to enable the ARM FCSE. If unsure, say N.
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 846cce4..60346a9 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -17,6 +17,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
+#include <asm/pid.h>
 
 #include "fault.h"
 
@@ -439,6 +440,8 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
 	const struct fsr_info *inf = fsr_info + (fsr & 15) + ((fsr & (1 << 10)) >> 6);
 	struct siginfo info;
 
+	addr = mva_to_va(addr);
+
 	if (!inf->fn(addr, fsr, regs))
 		return;
 
diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S
index c156dda..35c4106 100644
--- a/arch/arm/mm/proc-xscale.S
+++ b/arch/arm/mm/proc-xscale.S
@@ -413,8 +413,10 @@ ENTRY(cpu_xscale_dcache_clean_area)
  */
 	.align	5
 ENTRY(cpu_xscale_switch_mm)
+#ifndef CONFIG_ARM_FCSE
 	clean_d_cache r1, r2
 	mcr	p15, 0, ip, c7, c5, 0		@ Invalidate I cache & BTB
+#endif
 	mcr	p15, 0, ip, c7, c10, 4		@ Drain Write (& Fill) Buffer
 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
diff --git a/include/asm-arm/cacheflush.h b/include/asm-arm/cacheflush.h
index 6c1c968..019a85f 100644
--- a/include/asm-arm/cacheflush.h
+++ b/include/asm-arm/cacheflush.h
@@ -15,6 +15,7 @@
 
 #include <asm/glue.h>
 #include <asm/shmparam.h>
+#include <asm/pid.h>
 
 #define CACHE_COLOUR(vaddr)	((vaddr & (SHMLBA - 1)) >> PAGE_SHIFT)
 
@@ -331,16 +332,19 @@ static inline void flush_cache_mm(struct mm_struct *mm)
 static inline void
 flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
 {
-	if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask))
+	if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
+		start = va_to_mva(vma,start);
+		end = va_to_mva(vma,end);
 		__cpuc_flush_user_range(start & PAGE_MASK, PAGE_ALIGN(end),
 					vma->vm_flags);
+	}
 }
 
 static inline void
 flush_cache_page(struct vm_area_struct *vma, unsigned long user_addr, unsigned long pfn)
 {
 	if (cpu_isset(smp_processor_id(), vma->vm_mm->cpu_vm_mask)) {
-		unsigned long addr = user_addr & PAGE_MASK;
+		unsigned long addr = va_to_mva(vma,user_addr) & PAGE_MASK;
 		__cpuc_flush_user_range(addr, addr + PAGE_SIZE, vma->vm_flags);
 	}
 }
@@ -372,7 +376,7 @@ extern void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
  * This is used for the ARM private sys_cacheflush system call.
  */
 #define flush_cache_user_range(vma,start,end) \
-	__cpuc_coherent_user_range((start) & PAGE_MASK, PAGE_ALIGN(end))
+	__cpuc_coherent_user_range((va_to_mva(vma,start)) & PAGE_MASK, PAGE_ALIGN(va_to_mva(vma,end)))
 
 /*
  * Perform necessary cache operations to ensure that data previously
@@ -384,7 +388,7 @@ extern void flush_ptrace_access(struct vm_area_struct *vma, struct page *page,
  * Perform necessary cache operations to ensure that the TLB will
  * see data written in the specified area.
  */
-#define clean_dcache_area(start,size)	cpu_dcache_clean_area(start, size)
+#define clean_dcache_area(start,size) cpu_dcache_clean_area(va_to_mva_p(start), size)
 
 /*
  * flush_dcache_page is used when the kernel has written to the page
@@ -409,7 +413,7 @@ static inline void flush_anon_page(struct vm_area_struct *vma,
 	extern void __flush_anon_page(struct vm_area_struct *vma,
 				struct page *, unsigned long);
 	if (PageAnon(page))
-		__flush_anon_page(vma, page, vmaddr);
+		__flush_anon_page(vma, page, va_to_mva(vma,vmaddr));
 }
 
 #define flush_dcache_mmap_lock(mapping) \
diff --git a/include/asm-arm/memory.h b/include/asm-arm/memory.h
index d9bfb39..a959a3b 100644
--- a/include/asm-arm/memory.h
+++ b/include/asm-arm/memory.h
@@ -34,14 +34,23 @@
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
  */
+#ifdef CONFIG_ARM_FCSE
+#define TASK_SIZE		UL(0x02000000)
+#define TASK_UNMAPPED_BASE	UL(0x01000000)
+#else
 #define TASK_SIZE		UL(0xbf000000)
 #define TASK_UNMAPPED_BASE	UL(0x40000000)
 #endif
+#endif
 
 /*
  * The maximum size of a 26-bit user space task.
  */
+#ifdef CONFIG_ARM_FCSE
+#define TASK_SIZE_26		UL(0x02000000)
+#else
 #define TASK_SIZE_26		UL(0x04000000)
+#endif
 
 /*
  * Page offset: 3GB
diff --git a/include/asm-arm/mmu.h b/include/asm-arm/mmu.h
index 53099d4..6f69eab 100644
--- a/include/asm-arm/mmu.h
+++ b/include/asm-arm/mmu.h
@@ -7,6 +7,9 @@ typedef struct {
 #ifdef CONFIG_CPU_HAS_ASID
 	unsigned int id;
 #endif
+#ifdef CONFIG_ARM_FCSE
+	unsigned int pid;
+#endif
 	unsigned int kvm_seq;
 } mm_context_t;
 
diff --git a/include/asm-arm/mmu_context.h b/include/asm-arm/mmu_context.h
index 6913d02..97ed8da 100644
--- a/include/asm-arm/mmu_context.h
+++ b/include/asm-arm/mmu_context.h
@@ -17,6 +17,7 @@
 #include <asm/cacheflush.h>
 #include <asm/proc-fns.h>
 #include <asm-generic/mm_hooks.h>
+#include <asm/pid.h>
 
 void __check_kvm_seq(struct mm_struct *mm);
 
@@ -64,7 +65,28 @@ static inline void check_context(struct mm_struct *mm)
 		__check_kvm_seq(mm);
 }
 
+#ifdef CONFIG_ARM_FCSE
+
+static inline int init_new_context(struct task_struct *tsk,
+				   struct mm_struct *mm)
+{
+	mm->context.pid = (tsk->pid - 1) << ARMPID_SHIFT;
+	return 0;
+}
+
+static void set_pid_register(struct mm_struct *mm)
+{
+	u32 pid = 0;
+	if (mm) {
+		pid = mm->context.pid;
+	}
+	set_armpid(pid);
+}
+
+#else
 #define init_new_context(tsk,mm)	0
+#define set_pid_register(mm)		do { } while(0)
+#endif
 
 #endif
 
@@ -99,6 +121,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
 
 	if (!cpu_test_and_set(cpu, next->cpu_vm_mask) || prev != next) {
 		check_context(next);
+		set_pid_register(next);
 		cpu_switch_mm(next->pgd, next);
 		if (cache_is_vivt())
 			cpu_clear(cpu, prev->cpu_vm_mask);
diff --git a/include/asm-arm/pgalloc.h b/include/asm-arm/pgalloc.h
index 4d43945..95b4acd 100644
--- a/include/asm-arm/pgalloc.h
+++ b/include/asm-arm/pgalloc.h
@@ -126,6 +126,12 @@ static inline void
 pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep)
 {
 	__pmd_populate(pmdp, page_to_pfn(ptep) << PAGE_SHIFT | _PAGE_USER_TABLE);
+#ifdef CONFIG_ARM_FCSE
+	pmdp += mm->context.pid >> 20;
+	if (pmd_none(*pmdp)) {
+		__pmd_populate(pmdp, page_to_pfn(ptep) << PAGE_SHIFT | _PAGE_USER_TABLE);
+	}
+#endif
 }
 
 #endif /* CONFIG_MMU */
diff --git a/include/asm-arm/pid.h b/include/asm-arm/pid.h
new file mode 100644
index 0000000..10c9393
--- /dev/null
+++ b/include/asm-arm/pid.h
@@ -0,0 +1,95 @@
+/*
+ * Filename:    include/asm-arm/pid.h                                 
+ * Description: ARM Porcess ID (PID) includes for Fast Address Space Switching
+ *              (FASS) in ARM Linux.
+ * Created:     14/10/2001
+ * Changes:     19/02/2002 - Macros added.
+ *              03/08/2007 - Adapted to kernel 2.6.21 (ssm)
+ *              Feb 2008   - Simplified a bit (rco)
+ *
+ * Copyright:   (C) 2001, 2002 Adam Wiggins <awiggins@cse.unsw.edu.au>
+ *              (C) 2007 Sebastian Smolorz <ssm@emlix.com>
+ *              (C) 2008 Richard Cochran
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of teh GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_PROC_PID_H
+#define __ASM_PROC_PID_H
+
+#ifdef CONFIG_ARM_FCSE
+
+#define ARMPID_SHIFT 25
+
+/* Size of PID relocation area */
+#define ARMPID_TASK_SIZE (1UL << ARMPID_SHIFT)
+
+/* Mask to get rid of PID from relocated address */
+#define ARMPID_MASK (ARMPID_TASK_SIZE - 1)
+
+/* Convert PID number ot address space location */
+#define PIDNUM_TO_PID(pid_num) ((pid_num) << ARMPID_SHIFT)
+
+/* And back again */
+#define PID_TO_PIDNUM(pid) ((pid) >> ARMPID_SHIFT)
+
+/* Gets the ARM PID register value from a Modified Virtual Address (MVA) */
+#define MVA_TO_PID(mva) ((mva) & ~ARMPID_MASK)
+#define MVA_TO_PIDNUM(mva) (PID_TO_PIDNUM(MVA_TO_PID(mva)))
+#define MVA_TO_VA(mva) (mva & ARMPID_MASK)
+#define VA_TO_MVA(va) (va | get_armpid())
+
+/* Find out the CPD index offset due to ARM PID relocation */
+#define ARMPID_CPD_OFFSET(pid) ((pid) >> PGDIR_SHIFT)
+
+/* Sets the CPU's PID Register */
+static inline void set_armpid(u32 pid)
+{
+	__asm__ __volatile__("mcr	p15, 0, %0, c13, c0, 0"
+			     : : "r" (pid));
+}
+
+/* Returns the state of the CPU's PID Register */
+static inline u32 get_armpid(void)
+{
+	u32 pid;
+	__asm__ __volatile__("mrc	p15, 0, %0, c13, c0, 0	@ Get ARM PID"
+			     : "=&r" (pid) : );
+	return (pid & (~ARMPID_MASK));
+}
+
+static inline unsigned long mva_to_va(unsigned long mva)
+{
+	unsigned long pid = get_armpid();
+	if (pid && (pid == MVA_TO_PID(mva))) {
+		return MVA_TO_VA(mva);
+	}
+	return mva;
+}
+
+static inline unsigned long va_to_mva(struct vm_area_struct *vma,
+				      unsigned long va)
+{
+	if (va < ARMPID_TASK_SIZE) {
+		return VA_TO_MVA(va);
+	}
+	return va;
+}
+
+static inline void* va_to_mva_p(void *va)
+{
+	u32 tmp = (u32) va;
+	if (tmp < ARMPID_TASK_SIZE) {
+		va = (void*) VA_TO_MVA(tmp);
+	}
+	return va;
+}
+
+#else /* CONFIG_ARM_FCSE */
+#define mva_to_va(x) (x)
+#define va_to_mva(vma,x) (x)
+#define va_to_mva_p(x) (x)
+#endif
+
+#endif
diff --git a/include/asm-arm/tlbflush.h b/include/asm-arm/tlbflush.h
index 71be4fd..81d963d 100644
--- a/include/asm-arm/tlbflush.h
+++ b/include/asm-arm/tlbflush.h
@@ -158,6 +158,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/sched.h>
+#include <asm/pid.h>
 
 struct cpu_tlb_fns {
 	void (*flush_user_range)(unsigned long, unsigned long, struct vm_area_struct *);
@@ -325,7 +326,7 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
 	const int zero = 0;
 	const unsigned int __tlb_flag = __cpu_tlb_flags;
 
-	uaddr = (uaddr & PAGE_MASK) | ASID(vma->vm_mm);
+	uaddr = (va_to_mva(vma,uaddr) & PAGE_MASK) | ASID(vma->vm_mm);
 
 	if (tlb_flag(TLB_WB))
 		dsb();
diff --git a/include/linux/threads.h b/include/linux/threads.h
index 38d1a5d..ed0d5a2 100644
--- a/include/linux/threads.h
+++ b/include/linux/threads.h
@@ -24,13 +24,21 @@
 /*
  * This controls the default maximum pid allocated to a process
  */
+#ifdef CONFIG_ARM_FCSE
+#define PID_MAX_DEFAULT 96
+#else
 #define PID_MAX_DEFAULT (CONFIG_BASE_SMALL ? 0x1000 : 0x8000)
+#endif
 
 /*
  * A maximum of 4 million PIDs should be enough for a while.
  * [NOTE: PID/TIDs are limited to 2^29 ~= 500+ million, see futex.h.]
  */
+#ifdef CONFIG_ARM_FCSE
+#define PID_MAX_LIMIT 96
+#else
 #define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \
 	(sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))
+#endif
 
 #endif
diff --git a/kernel/pid.c b/kernel/pid.c
index c6e3f9f..11c505e 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -37,7 +37,11 @@ struct pid init_struct_pid = INIT_STRUCT_PID;
 
 int pid_max = PID_MAX_DEFAULT;
 
+#ifdef CONFIG_ARM_FCSE
+#define RESERVED_PIDS		20
+#else
 #define RESERVED_PIDS		300
+#endif
 
 int pid_max_min = RESERVED_PIDS + 1;
 int pid_max_max = PID_MAX_LIMIT;

[-- Attachment #3: ATT18375870.txt --]
[-- Type: text/plain, Size: 274 bytes --]

-------------------------------------------------------------------
List admin: http://lists.arm.linux.org.uk/mailman/listinfo/linux-arm-kernel
FAQ:        http://www.arm.linux.org.uk/mailinglists/faq.php
Etiquette:  http://www.arm.linux.org.uk/mailinglists/etiquette.php

^ permalink raw reply related	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2008-10-03  7:20 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-03-07 16:19 [Adeos-main] FW: [PATCH] repost: ARM FCSE Richard Cochran
2008-03-07 16:31 ` Gilles Chanteperdrix
2008-09-21 15:52 ` Gilles Chanteperdrix
2008-09-21 16:30   ` Gilles Chanteperdrix
2008-09-23 18:32   ` Gilles Chanteperdrix
2008-09-24  5:47     ` Richard Cochran
2008-09-24  7:38       ` Gilles Chanteperdrix
2008-09-27 17:53     ` Gilles Chanteperdrix
2008-10-02  8:44       ` Gilles Chanteperdrix
2008-10-02 14:54         ` Richard Cochran
2008-10-02 15:30           ` Gilles Chanteperdrix
2008-10-02 17:36             ` Gilles Chanteperdrix
2008-10-02 20:03           ` Gilles Chanteperdrix
2008-10-03  7:20             ` Richard Cochran
2008-10-02 22:04         ` Bosko Radivojevic
2008-10-02 22:34           ` Gilles Chanteperdrix
2008-10-03  1:57             ` Philippe Gerum

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.