public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] to support hookable flush_tlb* functions
@ 2003-01-24 14:24 Thomas Schlichter
  2003-01-24 21:21 ` Andrew Morton
  0 siblings, 1 reply; 4+ messages in thread
From: Thomas Schlichter @ 2003-01-24 14:24 UTC (permalink / raw)
  To: linux-kernel, torvalds; +Cc: Patrick R. Schulz

[-- Attachment #1: Type: text/plain, Size: 1717 bytes --]

Hello,

with this mail I send a patch that allows kernel modules to hook into the
different flush_tlb* functions defined in <asm/tlbflush.h> or <asm/pgtable.h> in
order to synchronize devices TLBs.

This is necessary for devices that provide their own TLB and cannot participate
on the CPU busses shootdown protocol. With this patch it is possible to assure
TLB consistency.

Currently this extension could possibly be used by high performance
interconnects like QsNet from Quadrics (www.quadrics.com), and in the near
future by even more high performance, low latency NICs that will implement
direct user space DMA transfers to not pinned user pages. TLBs are there a
mandatory requirement.

Currently I am writing my diploma thesis about the development of such a device
where I need this patch, and as it looks good to me I want to provide it to the
public community so it can be reviewed and even more tested. (the i386 parts are
tested work fine for me)

The patch consists of two parts, one generic part and for each supported
architecture an other part that depends on the generic one.

Attached to this mail is only the generic part and the architecture dependend
part for i386 compatible machines just not to waste everyones bandwidth... But
if requested I can send you patches for the other architectures, too.

The i386 patch also includes some cleanups by renaming __flush_tlb_* to
local_flush_tlb_*.

I hope some time this patches will make it into the kernel sources. (perhaps
even into 2.6.x ?)

Sincerely yours

  Thomas Schlichter

P.S.: To test this patch I've also written a module that counts the different
flush_tlb* calls and shows them in /proc/tlbstat. If requested I could send you
this, too.

[-- Attachment #2: tlbhook_generic.patch --]
[-- Type: application/octet-stream, Size: 5106 bytes --]

diff -urP linux-2.5.59/include/linux/tlbhook.h linux-2.5.59_patched/include/linux/tlbhook.h
--- linux-2.5.59/include/linux/tlbhook.h	Thu Jan  1 01:00:00 1970
+++ linux-2.5.59_patched/include/linux/tlbhook.h	Wed Jan 22 22:29:30 2003
@@ -0,0 +1,122 @@
+#ifndef _LINUX_TLBHOOK_H
+#define _LINUX_TLBHOOK_H
+
+#include <linux/mm.h>
+
+typedef struct tlb_hook_struct {
+	void (*flush_tlb)( void );
+	void (*flush_tlb_all)( void );
+	void (*flush_tlb_mm)( struct mm_struct *mm );
+	void (*flush_tlb_page)( struct vm_area_struct *vma,
+				unsigned long addr );
+	void (*flush_tlb_range)( struct vm_area_struct *vma,
+				unsigned long start, unsigned long end );
+	void (*flush_tlb_kernel_range)( unsigned long start,
+				unsigned long end );
+	void (*flush_tlb_pgtables)( struct mm_struct *mm,
+				unsigned long start, unsigned long end );
+
+	struct tlb_hook_struct *next;
+	struct tlb_hook_struct *last;
+} tlb_hook_t;
+
+extern tlb_hook_t *tlb_hook_root;
+
+extern int register_tlb_hook( tlb_hook_t *hook );
+extern int unregister_tlb_hook( tlb_hook_t *hook );
+
+static inline void flush_tlb_hook( void )
+{
+	tlb_hook_t *hook = tlb_hook_root;
+
+	while( hook )
+	{
+		if( hook->flush_tlb )
+			hook->flush_tlb( );
+		hook = hook->next;
+	}
+}
+
+
+static inline void flush_tlb_all_hook( void )
+{
+	tlb_hook_t *hook = tlb_hook_root;
+
+	while( hook )
+	{
+		if( hook->flush_tlb_all )
+			hook->flush_tlb_all( );
+		hook = hook->next;
+	}
+}
+
+
+static inline void flush_tlb_mm_hook( struct mm_struct *mm )
+{
+	tlb_hook_t *hook = tlb_hook_root;
+
+	while( hook )
+	{
+		if( hook->flush_tlb_mm )
+			hook->flush_tlb_mm( mm );
+		hook = hook->next;
+	}
+}
+
+
+static inline void flush_tlb_page_hook( struct vm_area_struct *vma,
+				unsigned long addr )
+{
+	tlb_hook_t *hook = tlb_hook_root;
+
+	while( hook )
+	{
+		if( hook->flush_tlb_page )
+			hook->flush_tlb_page( vma, addr );
+		hook = hook->next;
+	}
+}
+
+
+static inline void flush_tlb_range_hook( struct vm_area_struct *vma,
+				unsigned long start, unsigned long end )
+{
+	tlb_hook_t *hook = tlb_hook_root;
+
+	while( hook )
+	{
+		if( hook->flush_tlb_range )
+			hook->flush_tlb_range( vma, start, end );
+		hook = hook->next;
+	}
+}
+
+
+static inline void flush_tlb_kernel_range_hook( unsigned long start,
+				unsigned long end )
+{
+	tlb_hook_t *hook = tlb_hook_root;
+
+	while( hook)
+	{
+		if( hook->flush_tlb_kernel_range )
+			hook->flush_tlb_kernel_range( start, end );
+		hook = hook->next;
+	}
+}
+
+
+static inline void flush_tlb_pgtables_hook( struct mm_struct *mm,
+				unsigned long start, unsigned long end )
+{
+	tlb_hook_t *hook = tlb_hook_root;
+
+	while( hook )
+	{
+		if( hook->flush_tlb_pgtables )
+			hook->flush_tlb_pgtables( mm, start, end );
+		hook = hook->next;
+	}
+}
+
+#endif /* _LINUX_TLBHOOK_H */
diff -urP linux-2.5.59/mm/Makefile linux-2.5.59_patched/mm/Makefile
--- linux-2.5.59/mm/Makefile	Fri Jan 17 03:22:20 2003
+++ linux-2.5.59_patched/mm/Makefile	Wed Jan 22 22:34:06 2003
@@ -2,7 +2,8 @@
 # Makefile for the linux memory manager.
 #
 
-export-objs := shmem.o filemap.o mempool.o page_alloc.o page-writeback.o
+export-objs 		:= shmem.o filemap.o mempool.o page_alloc.o page-writeback.o \
+			   tlbhook.o
 
 mmu-y			:= nommu.o
 mmu-$(CONFIG_MMU)	:= fremap.o highmem.o madvise.o memory.o mincore.o \
@@ -11,6 +12,6 @@
 
 obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o \
 			   page_alloc.o page-writeback.o pdflush.o readahead.o \
-			   slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y)
+			   slab.o swap.o tlbhook.o truncate.o vcache.o vmscan.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
diff -urP linux-2.5.59/mm/tlbhook.c linux-2.5.59_patched/mm/tlbhook.c
--- linux-2.5.59/mm/tlbhook.c	Thu Jan  1 01:00:00 1970
+++ linux-2.5.59_patched/mm/tlbhook.c	Wed Jan 22 22:38:42 2003
@@ -0,0 +1,70 @@
+#include <linux/module.h>
+#include <linux/tlbhook.h>
+
+static spinlock_t tlb_hook_lock = SPIN_LOCK_UNLOCKED;
+
+tlb_hook_t *tlb_hook_root = NULL;
+
+
+/* register hooks for the flush_tlb* functions */
+int register_tlb_hook( tlb_hook_t *hook )
+{
+	tlb_hook_t *last;
+
+	if( !hook )
+		return -EINVAL;
+
+	hook->next = NULL;
+
+	// lock the tlb_hook_struct to avoid race conditions
+	spin_lock( &tlb_hook_lock );
+
+	if( tlb_hook_root )
+	{
+		last = tlb_hook_root->last;
+		tlb_hook_root->last = hook;
+
+		hook->last = last;
+		last->next = hook;
+	} else {
+		hook->last = hook;
+		tlb_hook_root = hook;
+	}
+
+	spin_unlock( &tlb_hook_lock );
+
+	return 0;
+}
+
+
+/* unregister hooks for the flush_tlb* functions */
+int unregister_tlb_hook( tlb_hook_t *hook )
+{
+	if( !hook )
+		return -EINVAL;
+
+	// lock the tlb_hook_struct to avoid race conditions
+	spin_lock( &tlb_hook_lock );
+
+	if( hook == tlb_hook_root )
+	{
+		tlb_hook_root = hook->next;
+	} else {
+		hook->last->next = hook->next;
+	}
+
+	if( hook->next )
+	{
+		hook->next->last = hook->last;
+	} else if( tlb_hook_root ) {
+		tlb_hook_root->last = hook->last;
+	}
+
+	spin_unlock( &tlb_hook_lock );
+
+	return 0;
+}
+
+
+EXPORT_SYMBOL( register_tlb_hook );
+EXPORT_SYMBOL( unregister_tlb_hook );

[-- Attachment #3: tlbhook_i386.patch --]
[-- Type: application/octet-stream, Size: 9481 bytes --]

diff -urP linux-2.5.59/arch/i386/kernel/smp.c linux-2.5.59_patched/arch/i386/kernel/smp.c
--- linux-2.5.59/arch/i386/kernel/smp.c	Fri Jan 17 03:21:38 2003
+++ linux-2.5.59_patched/arch/i386/kernel/smp.c	Wed Jan 22 23:19:45 2003
@@ -327,7 +327,7 @@
 			if (flush_va == FLUSH_ALL)
 				local_flush_tlb();
 			else
-				__flush_tlb_one(flush_va);
+				local_flush_tlb_one(flush_va);
 		} else
 			leave_mm(cpu);
 	}
@@ -382,7 +382,7 @@
 	spin_unlock(&tlbstate_lock);
 }
 	
-void flush_tlb_current_task(void)
+void smp_flush_tlb(void)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long cpu_mask;
@@ -396,7 +396,7 @@
 	preempt_enable();
 }
 
-void flush_tlb_mm (struct mm_struct * mm)
+void smp_flush_tlb_mm (struct mm_struct * mm)
 {
 	unsigned long cpu_mask;
 
@@ -415,7 +415,7 @@
 	preempt_enable();
 }
 
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+void smp_flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long cpu_mask;
@@ -425,7 +425,7 @@
 
 	if (current->active_mm == mm) {
 		if(current->mm)
-			__flush_tlb_one(va);
+			local_flush_tlb_one(va);
 		 else
 		 	leave_mm(smp_processor_id());
 	}
@@ -440,7 +440,7 @@
 {
 	unsigned long cpu = smp_processor_id();
 
-	__flush_tlb_all();
+	local_flush_tlb_all();
 	if (cpu_tlbstate[cpu].state == TLBSTATE_LAZY)
 		leave_mm(cpu);
 }
@@ -450,7 +450,7 @@
 	do_flush_tlb_all_local();
 }
 
-void flush_tlb_all(void)
+void smp_flush_tlb_all(void)
 {
 	smp_call_function (flush_tlb_all_ipi,0,1,1);
 
diff -urP linux-2.5.59/arch/i386/mach-voyager/voyager_smp.c linux-2.5.59_patched/arch/i386/mach-voyager/voyager_smp.c
--- linux-2.5.59/arch/i386/mach-voyager/voyager_smp.c	Fri Jan 17 03:22:02 2003
+++ linux-2.5.59_patched/arch/i386/mach-voyager/voyager_smp.c	Wed Jan 22 23:18:03 2003
@@ -892,7 +892,7 @@
 			if (flush_va == FLUSH_ALL)
 				local_flush_tlb();
 			else
-				__flush_tlb_one(flush_va);
+				local_flush_tlb_one(flush_va);
 		} else
 			leave_mm(cpu);
 	}
@@ -948,7 +948,7 @@
 }
 
 void
-flush_tlb_current_task(void)
+smp_flush_tlb(void)
 {
 	struct mm_struct *mm = current->mm;
 	unsigned long cpu_mask;
@@ -965,7 +965,7 @@
 
 
 void
-flush_tlb_mm (struct mm_struct * mm)
+smp_flush_tlb_mm (struct mm_struct * mm)
 {
 	unsigned long cpu_mask;
 
@@ -985,7 +985,7 @@
 	preempt_enable();
 }
 
-void flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
+void smp_flush_tlb_page(struct vm_area_struct * vma, unsigned long va)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long cpu_mask;
@@ -995,7 +995,7 @@
 	cpu_mask = mm->cpu_vm_mask & ~(1 << smp_processor_id());
 	if (current->active_mm == mm) {
 		if(current->mm)
-			__flush_tlb_one(va);
+			local_flush_tlb_one(va);
 		 else
 		 	leave_mm(smp_processor_id());
 	}
@@ -1217,7 +1217,7 @@
 {
 	unsigned long cpu = smp_processor_id();
 
-	__flush_tlb_all();
+	local_flush_tlb_all();
 	if (cpu_tlbstate[cpu].state == TLBSTATE_LAZY)
 		leave_mm(cpu);
 }
@@ -1231,7 +1231,7 @@
 
 /* flush the TLB of every active CPU in the system */
 void
-flush_tlb_all(void)
+smp_flush_tlb_all(void)
 {
 	smp_call_function (flush_tlb_all_function, 0, 1, 1);
 
diff -urP linux-2.5.59/arch/i386/mm/highmem.c linux-2.5.59_patched/arch/i386/mm/highmem.c
--- linux-2.5.59/arch/i386/mm/highmem.c	Fri Jan 17 03:22:14 2003
+++ linux-2.5.59_patched/arch/i386/mm/highmem.c	Wed Jan 22 23:14:33 2003
@@ -42,7 +42,7 @@
 		BUG();
 #endif
 	set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
-	__flush_tlb_one(vaddr);
+	local_flush_tlb_one(vaddr);
 
 	return (void*) vaddr;
 }
@@ -66,7 +66,7 @@
 	 * this pte without first remap it
 	 */
 	pte_clear(kmap_pte-idx);
-	__flush_tlb_one(vaddr);
+	local_flush_tlb_one(vaddr);
 #endif
 
 	dec_preempt_count();
diff -urP linux-2.5.59/arch/i386/mm/init.c linux-2.5.59_patched/arch/i386/mm/init.c
--- linux-2.5.59/arch/i386/mm/init.c	Fri Jan 17 03:22:27 2003
+++ linux-2.5.59_patched/arch/i386/mm/init.c	Wed Jan 22 23:15:05 2003
@@ -369,7 +369,7 @@
 	if (cpu_has_pae)
 		set_in_cr4(X86_CR4_PAE);
 #endif
-	__flush_tlb_all();
+	local_flush_tlb_all();
 
 	kmap_init();
 	zone_sizes_init();
diff -urP linux-2.5.59/arch/i386/mm/pageattr.c linux-2.5.59_patched/arch/i386/mm/pageattr.c
--- linux-2.5.59/arch/i386/mm/pageattr.c	Fri Jan 17 03:22:04 2003
+++ linux-2.5.59_patched/arch/i386/mm/pageattr.c	Wed Jan 22 23:14:01 2003
@@ -53,7 +53,7 @@
 	/* Flush all to work around Errata in early athlons regarding 
 	 * large page flushing. 
 	 */
-	__flush_tlb_all(); 	
+	local_flush_tlb_all(); 	
 }
 
 static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) 
diff -urP linux-2.5.59/arch/i386/mm/pgtable.c linux-2.5.59_patched/arch/i386/mm/pgtable.c
--- linux-2.5.59/arch/i386/mm/pgtable.c	Fri Jan 17 03:23:01 2003
+++ linux-2.5.59_patched/arch/i386/mm/pgtable.c	Wed Jan 22 23:15:32 2003
@@ -81,7 +81,7 @@
 	 * It's enough to flush this one mapping.
 	 * (PGE mappings get flushed as well)
 	 */
-	__flush_tlb_one(vaddr);
+	local_flush_tlb_one(vaddr);
 }
 
 /*
@@ -114,7 +114,7 @@
 	 * It's enough to flush this one mapping.
 	 * (PGE mappings get flushed as well)
 	 */
-	__flush_tlb_one(vaddr);
+	local_flush_tlb_one(vaddr);
 }

 void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
diff -urP linux-2.5.59/include/asm-i386/tlbflush.h linux-2.5.59_patched/include/asm-i386/tlbflush.h
--- linux-2.5.59/include/asm-i386/tlbflush.h	Fri Jan 17 03:22:49 2003
+++ linux-2.5.59_patched/include/asm-i386/tlbflush.h	Wed Jan 22 23:12:10 2003
@@ -2,10 +2,10 @@
 #define _I386_TLBFLUSH_H

 #include <linux/config.h>
-#include <linux/mm.h>
+#include <linux/tlbhook.h>
 #include <asm/processor.h>

-#define __flush_tlb()							\
+#define local_flush_tlb()						\
 	do {								\
 		unsigned int tmpreg;					\
 									\
@@ -37,12 +37,12 @@

 extern unsigned long pgkern_mask;

-# define __flush_tlb_all()						\
+# define local_flush_tlb_all()						\
 	do {								\
 		if (cpu_has_pge)					\
 			__flush_tlb_global();				\
 		else							\
-			__flush_tlb();					\
+			local_flush_tlb();				\
 	} while (0)
 
 #define cpu_has_invlpg	(boot_cpu_data.x86 > 3)
@@ -51,14 +51,14 @@
 	__asm__ __volatile__("invlpg %0": :"m" (*(char *) addr))
 
 #ifdef CONFIG_X86_INVLPG
-# define __flush_tlb_one(addr) __flush_tlb_single(addr)
+# define local_flush_tlb_one(addr) __flush_tlb_single(addr)
 #else
-# define __flush_tlb_one(addr)						\
+# define local_flush_tlb_one(addr)					\
 	do {								\
 		if (cpu_has_invlpg)					\
 			__flush_tlb_single(addr);			\
 		else							\
-			__flush_tlb();					\
+			local_flush_tlb();				\
 	} while (0)
 #endif
 
@@ -79,47 +79,94 @@
 
 #ifndef CONFIG_SMP
 
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() __flush_tlb_all()
-#define local_flush_tlb() __flush_tlb()
+static inline void flush_tlb(void)
+{
+	flush_tlb_hook();
+	local_flush_tlb();
+}
+
+static inline void flush_tlb_all(void)
+{
+	flush_tlb_all_hook();
+	local_flush_tlb_all();
+}

 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
+	flush_tlb_mm_hook(mm);
 	if (mm == current->active_mm)
-		__flush_tlb();
+		local_flush_tlb();
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
 	unsigned long addr)
 {
+	flush_tlb_page_hook(vma, addr);
 	if (vma->vm_mm == current->active_mm)
-		__flush_tlb_one(addr);
+		local_flush_tlb_one(addr);
 }
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 	unsigned long start, unsigned long end)
 {
+	flush_tlb_range_hook(vma, start, end);
 	if (vma->vm_mm == current->active_mm)
-		__flush_tlb();
+		local_flush_tlb();
+}
+
+static inline void flush_tlb_kernel_range(unsigned long start,
+	unsigned long end)
+{
+	flush_tlb_kernel_range_hook(start, end);
+	local_flush_tlb_all();
 }
 
 #else
 
 #include <asm/smp.h>
 
-#define local_flush_tlb() \
-	__flush_tlb()
+extern void smp_flush_tlb(void);
+extern void smp_flush_tlb_all(void);
+extern void smp_flush_tlb_mm(struct mm_struct *);
+extern void smp_flush_tlb_page(struct vm_area_struct *, unsigned long);
+
+static inline void flush_tlb(void)
+{
+	flush_tlb_hook();
+	smp_flush_tlb();
+}
+
+static inline void flush_tlb_all(void)
+{
+	flush_tlb_all_hook();
+	smp_flush_tlb_all();
+}
+
+static inline void flush_tlb_mm(struct mm_struct *mm)
+{
+	flush_tlb_mm_hook(mm);
+	smp_flush_tlb_mm(mm);
+}
 
-extern void flush_tlb_all(void);
-extern void flush_tlb_current_task(void);
-extern void flush_tlb_mm(struct mm_struct *);
-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+	unsigned long addr)
+{
+	flush_tlb_page_hook(vma, addr);
+	smp_flush_tlb_page(vma, addr);
+}
 
-#define flush_tlb()	flush_tlb_current_task()
+static inline void flush_tlb_range(struct vm_area_struct * vma,
+	unsigned long start, unsigned long end)
+{
+	flush_tlb_range_hook(vma, start, end);
+	smp_flush_tlb_mm(vma->vm_mm);
+}
 
-static inline void flush_tlb_range(struct vm_area_struct * vma, unsigned long start, unsigned long end)
+static inline void flush_tlb_kernel_range(unsigned long start,
+	unsigned long end)
 {
-	flush_tlb_mm(vma->vm_mm);
+	flush_tlb_kernel_range_hook(start, end);
+	smp_flush_tlb_all();
 }

 #define TLBSTATE_OK	1
@@ -136,11 +183,10 @@

 #endif

-#define flush_tlb_kernel_range(start, end) flush_tlb_all()
-
 static inline void flush_tlb_pgtables(struct mm_struct *mm,
 				      unsigned long start, unsigned long end)
 {
+	flush_tlb_pgtables_hook(mm, start, end);
 	/* i386 does not keep any page table caches in TLB */
 }


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2003-01-26 23:21 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-01-24 14:24 [PATCH] to support hookable flush_tlb* functions Thomas Schlichter
2003-01-24 21:21 ` Andrew Morton
2003-01-24 22:39   ` David S. Miller
2003-01-26 23:30   ` Thomas Schlichter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox