From mboxrd@z Thu Jan 1 00:00:00 1970 From: Wink Saville Subject: [PATCH 2/4] ACE implementation, conifguration and makefile Date: Sat, 05 May 2007 18:54:44 -0700 Message-ID: <463D3564.9060307@saville.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org Return-path: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: kvm-devel-bounces-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org Errors-To: kvm-devel-bounces-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org List-Id: kvm.vger.kernel.org Signed-off-by: Wink Saville --- arch/x86_64/kernel/Makefile | 2 arch/x86_64/kernel/ace.S | 204 ++++++++++++++++++++++++++++++++++++ arch/x86_64/kernel/entry.S | 45 ++++++++ drivers/Makefile | 1 drivers/ace/Kconfig | 15 ++ drivers/ace/Makefile | 5 drivers/ace/ace_device.c | 246 ++++++++++++++++++++++++++++++++++++++++++++ include/asm-x86_64/ace.h | 124 ++++++++++++++++++++++ mm/Kconfig | 2 9 files changed, 644 insertions(+) create mode 100644 arch/x86_64/kernel/ace.S create mode 100644 drivers/ace/Kconfig create mode 100644 drivers/ace/Makefile create mode 100644 drivers/ace/ace_device.c create mode 100644 include/asm-x86_64/ace.h Index: linux-2.6/arch/x86_64/kernel/Makefile =================================================================== --- linux-2.6.orig/arch/x86_64/kernel/Makefile +++ linux-2.6/arch/x86_64/kernel/Makefile @@ -41,6 +41,8 @@ obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_PCI) += early-quirks.o +obj-$(CONFIG_ACE_DEVICE) += ace.o + obj-y += topology.o obj-y += intel_cacheinfo.o obj-y += pcspeaker.o Index: linux-2.6/arch/x86_64/kernel/ace.S =================================================================== --- /dev/null +++ linux-2.6/arch/x86_64/kernel/ace.S @@ -0,0 +1,204 @@ +/** + * ace.S + * + * This must be position independent code + * and must not use any stack. + * + * Copyright (C) 2006 Saville Software, Inc. + * + * This code may be used for any purpose whatsoever, but + * no warranty of any kind is provided. + * + * Register usage: + * r11 = return address + * rdi = parm1 + * rsi = parm2 + * rdx = parm3 + * rcx = parm4 + * r8 = parm5 + * r9 = parm6 + * + * Preserved registers: + * rbx, rsp, rbp, r12, r13, r14, r15 + * + * Stack usage: + * THE STACK MAY NOT BE USED! + */ + +#define __ASSEMBLY__ 1 +#include + +#define ACE_SPIN_LOCK_UNLOCKED 1 +#define ACE_CONFIG_SMP 1 +#define MP_LIST_STRUCT_SIZE 16 + +##################################### +# Globals we're exporting +##################################### + + .global ace_code_beg, ace_code_end, ace_code_vtable, ace_code_vtable_end + + +##################################### +# Enter/leave an ace routine. +##################################### + + .macro ENTER_ROUTINE tag, name, sep="_" + .global \tag\sep\name + .section ace_thunk,"ax" +\tag\sep\name: + popq %r11 + jmpq *vtable_\name(%rip) + .previous + + .section ace_vtable,"ax" +vtable_\name: + .quad ACE_CODE_ADDR + \name - ace_code_beg + .previous + +\name: + .endm + + .macro LEAVE_ROUTINE + jmpq *%r11 + .endm + +##################################### +# Begin an ace routine. +# Generate the preamble for an +# ace routine, which always begins +# with acquring the ace_spin_lock +##################################### + + .macro BEG_ROUTINE tag, name, sep="_" + ENTER_ROUTINE \tag, \name, \sep + +#if ACE_CONFIG_SMP +test_spin_lock_\name: + lock; decq ace_spinlock(%rip) + js wait_spin_lock_\name +#endif + .endm + + .macro RET_ROUTINE +#if ACE_CONFIG_SMP + movq $ACE_SPIN_LOCK_UNLOCKED, ace_spinlock(%rip) +#endif + LEAVE_ROUTINE + .endm + +##################################### +# End an ace routine. +# Generate the postamble for an +# ace routine, which always ends +# releasing the ace_spin_lock. +# +# This macro also generates the +# routines ace_thunk which allows +# the ace routine to be called from +# C and it generates the vtable +# entry. +##################################### + + .macro END_ROUTINE tag, name, sep="_" + RET_ROUTINE + +#if ACE_CONFIG_SMP +wait_spin_lock_\name: + pause + cmpq $0, ace_spinlock(%rip) + jle wait_spin_lock_\name + jmp test_spin_lock_\name +#endif + + .endm + + +##################################### +# Begin the ace_code on a page boundry +##################################### + + .text + .code64 + .align ACE_CODE_SIZE +ace_code_beg: + +##################################### +# Reserve space for pAce_data +##################################### + + .rept ACE_DATA_MAX_SIZE + .byte 0 + .endr + + +##################################### +# Other ace data +##################################### + + .align 64 +ace_spinlock: + .quad ACE_SPIN_LOCK_UNLOCKED /* Spin lock */ + + .align 64 +counters: + .rept ACE_TEST_NUM_COUNTERS + .quad 0 + .endr + +##################################### +# Define the beginning of the vtable +##################################### + + .section ace_vtable, "ax" +ace_code_vtable: + .previous + + .align 64 + +##################################### +# Increment two of the test counters +# rdi is index of the first and +# rsi is the index of the second +##################################### + +BEG_ROUTINE ace, inc_two_counters + lea counters(%rip), %rax + movq (%rax, %rdi, 8), %rdx + incq %rdx + movq %rdx, (%rax, %rdi, 8) + movq (%rax, %rsi, 8), %rdx + incq %rdx + movq %rdx, (%rax, %rsi, 8) +END_ROUTINE ace, inc_two_counters + +##################################### +# Return the address of a snapshot of +# the counters +##################################### + +BEG_ROUTINE ace, get_counters_snapshot + lea counters(%rip), %rsi + movq $ACE_TEST_NUM_COUNTERS, %rcx + rep movsq +END_ROUTINE ace, get_counters_snapshot + +##################################### +# Define the end of the ace code +##################################### + +ace_code_end: + .byte 0 + + +##################################### +# Define the end of the vtable +##################################### + + .section ace_vtable,"ax" +ace_code_vtable_end: + .previous + + + .end + Index: linux-2.6/arch/x86_64/kernel/entry.S =================================================================== --- linux-2.6.orig/arch/x86_64/kernel/entry.S +++ linux-2.6/arch/x86_64/kernel/entry.S @@ -464,6 +464,50 @@ ENTRY(stub_rt_sigreturn) CFI_ENDPROC END(stub_rt_sigreturn) +#ifdef CONFIG_ACE_DEVICE +/* + * Atomic Code Execution handling + */ +#include + +ace_common: + pushq %r9 # Save r9 + xchgq 4*8(%rsp), %r11 # Exchange the return-rip for what's in r11. When this + # interrupt completes it will continue at the + # address that was in r11. + pushfq # Get the current flags + popq %r10 # to r10 + andq $~0xCD5, %r10 # Zero the app level bits (OF,DF,SF,ZF,AF,PF,CF) + movq 6*8(%rsp), %r9 # Get return-rflags + andq $0xCD5, %r9 # Isolaate app level flags (OF,DF,SF,ZF,AF,PF,CF) + orq %r9, %r10 # Use the flags when we continue + pushq %r10 + popfq # Restore flags + popq %r9 # Restore r9 + popq %r10 # Restore r10 + pushq %r11 # Push return-rip which is where we'll continue the ace code + movq $ace_return, %r11 # ace code will return to ace_return + ret # Complete ace code +ace_return: + ret # Return to invoker + + .align 8 +ace_code_addr: + .quad ACE_CODE_ADDR + + .macro HANDLE_ACE + pushq $1f # Push the return address + pushq %r10 # r10 will be used as a temporary register + movq 3*8(%rsp), %r10 # Get return-rip + andq $ACE_CODE_ADDR_MASK, %r10 # Align return-rip to the page boundary + cmp ace_code_addr(%rip), %r10 # Is this in the ace_page + je ace_common # Jump if it was + popq %r10 # Restore r10 + ret # Return to 1f aka: 1: below +1: + .endm +#endif + /* * initial frame state for interrupts and exceptions */ @@ -494,6 +538,7 @@ END(stub_rt_sigreturn) /* 0(%rsp): interrupt number */ .macro interrupt func + HANDLE_ACE cld SAVE_ARGS leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler Index: linux-2.6/drivers/Makefile =================================================================== --- linux-2.6.orig/drivers/Makefile +++ linux-2.6/drivers/Makefile @@ -80,3 +80,4 @@ obj-$(CONFIG_GENERIC_TIME) += clocksourc obj-$(CONFIG_DMA_ENGINE) += dma/ obj-$(CONFIG_HID) += hid/ obj-$(CONFIG_PPC_PS3) += ps3/ +obj-$(CONFIG_ACE_DEVICE) += ace/ Index: linux-2.6/drivers/ace/Kconfig =================================================================== --- /dev/null +++ linux-2.6/drivers/ace/Kconfig @@ -0,0 +1,15 @@ +# +# ACE configuration +# + +menu "Atomic Code Execution (ACE)" + +config ACE_DEVICE + bool "ACE support" + ---help--- + ACE allows code to be atomically executed either from kernel + space or user space as if it was surrounded by spin_lock_irqsave + and spin_lock_irqrestore. + +endmenu + Index: linux-2.6/drivers/ace/Makefile =================================================================== --- /dev/null +++ linux-2.6/drivers/ace/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for ACE. +# + +obj-$(CONFIG_ACE_DEVICE) += ace_device.o Index: linux-2.6/drivers/ace/ace_device.c =================================================================== --- /dev/null +++ linux-2.6/drivers/ace/ace_device.c @@ -0,0 +1,246 @@ +/* + * Copyright (C) 2006 Saville Software, Inc. + * + * This code may be used for any purpose whatsoever, but + * no warranty of any kind is provided. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#define ACE_DEBUG +#ifdef ACE_DEBUG +#define DPK(fmt, args...) printk(KERN_ERR "ace " fmt, ## args) +#else +#define DPK(fmt, args...) +#endif + +struct ace_dev_struct +{ + struct cdev cdev; /* Character device structure */ + struct page * ace_code_page; /* The ace code page struct */ + unsigned long ace_code_kvaddr; /* The ace code page as kernel virtual address */ + unsigned long ace_code_addr; /* The ace code page */ + unsigned long ace_code_size; /* Size of ace_code_addr */ + struct timer_list timer; /* Timer */ + unsigned long timer_delay; /* Delay for timer */ +}; + +MODULE_AUTHOR("Wink Saville"); +MODULE_LICENSE("Dual BSD/GPL"); + +int ace_open(struct inode *inode, struct file *pFile); +int ace_release(struct inode *inode, struct file *pFile); +int ace_ioctl(struct inode *pInode, struct file *pFile, unsigned int cmd, unsigned long arg); + +/* + * Module parameters + */ +static int major = 240; /* 240 a "local/expermental" device number for the moment */ +static int minor = 0; + +module_param(major, int, S_IRUGO); +module_param(minor, int, S_IRUGO); + +/* + * Globals + */ +struct ace_dev_struct ace_dev; +EXPORT_SYMBOL(ace_dev); + +/* + * File operations + */ +struct file_operations ace_f_ops = { + .owner = THIS_MODULE, + .open = ace_open, + .ioctl = ace_ioctl, + .release = ace_release, +}; + +/* + * Initialize the ace page. + * + * NO-ONE may be using the ACE_CODE at the time this is called. + */ +void ace_init(void) +{ + unsigned long ace_code_len = &ace_code_end - &ace_code_beg; + + DPK("ace_init: E\n"); + + /* + * Be sure there is enough space for the ACE_DATA + */ + BUG_ON(sizeof(struct ace_data_struct) > ACE_DATA_MAX_SIZE); + + /* + * Allocate an ace page and copy the code to it + */ + ace_dev.ace_code_size = ACE_CODE_SIZE; + ace_dev.ace_code_addr = (unsigned long)kshmem_alloc_at(ACE_CODE_ADDR, ace_dev.ace_code_size, PAGE_SHARED_EXEC); + ace_dev.ace_code_page = vmalloc_to_page((unsigned char *)ace_dev.ace_code_addr); + ace_dev.ace_code_kvaddr = kshmem_addr_to_kvaddr(ace_dev.ace_code_addr); + + memcpy((void *)ace_dev.ace_code_addr, &ace_code_beg, ace_code_len); + + /* + * Validate + */ + BUG_ON(ace_dev.ace_code_addr != ACE_CODE_ADDR); + BUG_ON(ace_dev.ace_code_page != virt_to_page(ace_dev.ace_code_kvaddr)); + + DPK("ace_init: X\n"); +} +EXPORT_SYMBOL(ace_init); + +/* + * test timer + */ +static void ace_timer(unsigned long arg) +{ + struct ace_dev_struct *pDev = (struct ace_dev_struct *)arg; + + ace_inc_two_counters(0, 1); + + pDev->timer.expires += pDev->timer_delay; + add_timer(&pDev->timer); +} + +/* + * Open + */ +int ace_open(struct inode *inode, struct file *pFile) +{ + int result = 0; + struct ace_dev_struct *pDev; + + DPK("ace_open: E\n"); + + pDev = container_of(inode->i_cdev, struct ace_dev_struct, cdev); + pFile->private_data = (void *)pDev; + kshmem_user_enable(); + + DPK("ace_open: X result=%d\n", result); + return result; +} + +/* + * Release/Close + */ +int ace_release(struct inode *inode, struct file *pFile) +{ + int result = 0; + + DPK("ace_release: E\n"); + + kshmem_user_disable(); + + DPK("ace_release: X result=%d\n", result); + return result; +} + +/* + * Ioctl + */ +int ace_ioctl(struct inode *pInode, struct file *pFile, unsigned int cmd, unsigned long arg) +{ + int result = -EFAULT; + + DPK("ace_ioctl: E\n"); + + DPK("ace_ioctl: X result=%d\n", result); + return result; +} + +/* + * Init routine for the ace device + */ +static int ace_device_init(void) +{ + int result; + dev_t dev_number = 0; + static struct class *ace_class; + + DPK("ace_device_init: E\n"); + + if (major) { + dev_number = MKDEV(major, minor); + result = register_chrdev_region(dev_number, 1, "ace"); + DPK("ace_device_init: static major result=%d\n", result); + } else { + result = alloc_chrdev_region(&dev_number, minor, 1, "ace"); + major = MAJOR(dev_number); + DPK("ace_device_init: dynamic major result=%d\n", result); + } + + if (result < 0) { + printk(KERN_WARNING "ace: can't get major %d\n", major); + goto done; + } + + if (ace_dev.ace_code_addr == 0) + ace_init(); + + cdev_init(&ace_dev.cdev, &ace_f_ops); + ace_dev.cdev.owner = THIS_MODULE; + ace_dev.cdev.ops = &ace_f_ops; + + result = cdev_add(&ace_dev.cdev, dev_number, 1); + if (result) + { + DPK("ace_device_init: cdev_add failed\n"); + goto done; + } + + /* + * Make an ace class and create the device + */ + ace_class = class_create(THIS_MODULE, "ace"); + class_device_create(ace_class, NULL, dev_number, NULL, "ace"); + + /* + * Start timer + */ + ace_dev.timer_delay = msecs_to_jiffies(1); + ace_dev.timer.expires = jiffies + ace_dev.timer_delay; + ace_dev.timer.data = (unsigned long)&ace_dev; + ace_dev.timer.function = ace_timer; + init_timer(&ace_dev.timer); + add_timer(&ace_dev.timer); + +done: + DPK("ace_device_init: X result=%d major=%d minor=%d\n", result, major, minor); + return result; +} + +/* + * Exit routine for ace device + */ +static void ace_device_exit(void) +{ + dev_t dev_number = MKDEV(major, minor); + + DPK("ace_device_exit: E\n"); + + del_timer_sync(&ace_dev.timer); + + unregister_chrdev_region(dev_number, 1); + + DPK("ace_device_exit: X\n"); +} + +module_init(ace_device_init); +module_exit(ace_device_exit); + Index: linux-2.6/include/asm-x86_64/ace.h =================================================================== --- /dev/null +++ linux-2.6/include/asm-x86_64/ace.h @@ -0,0 +1,124 @@ +/** * Copyright (C) 2006 Saville Software, Inc. + * + * This code may be used for any purpose whatsoever, but + * no warranty of any kind is provided. + */ + +#ifndef _ACE_H +#define _ACE_H + +#define ACE_CODE_ADDR 0x6ffffffff000 +#define ACE_CODE_SIZE 4096 +#define ACE_CODE_ADDR_MASK (~(ACE_CODE_SIZE-1)) +#define ACE_DATA_MAX_SIZE 256 +#define ACE_TEST_NUM_COUNTERS 16 + +#ifndef __ASSEMBLY__ + +/* + * This data is located at ACE_CODE_ADDR and + * must not exceed ACE_DATA_MAX_SIZE. + */ +struct ace_data_struct { + struct mp_struct * pMprocs; /* array of mprocs, pMprocs[0] is kernel's */ + unsigned long mprocs_count; /* Number elements in pMprocs */ + unsigned int kernel_pool; /* Id of kernel pool, must be 0 */ + struct mp_mem_pool_struct * pMem_pools; /* Array of mem pools */ + unsigned long mem_pools_count;/* Number of elements in pMem_pools */ + struct mp_msg_list_struct * pMsg_lists; /* Array of msg lists */ + unsigned long msg_lists_count;/* Number of elements in pMsg_lists */ +}; +#define pAce_data ((struct ace_data_struct *)ACE_CODE_ADDR) + +/* + * Beginning and end of the ace code in ace.S + */ +extern char ace_code_beg; +extern char ace_code_end; + +/* + * Initialization routine, called from init/main.c + */ +extern void ace_init(void); + +/* + * Ace routines for testing + */ +extern uint64_t ace_inc_two_counters(uint64_t cnt0, uint64_t cnt1); +extern void ace_get_counters_snapshot(uint64_t snapshot[ACE_TEST_NUM_COUNTERS]); + +/** + * Atomic operations. For x86_64 these are + * inherenently atomic so they do not need + * to be executed in the ace page. For other + * architectures this may need to be defined + * in the ace page. + */ + +/* + * Atomic increment + */ +static __inline__ void ace_inc(volatile int *pVal) +{ + __asm__ __volatile__( + "lock incl %0" + :"=m" (*pVal) + :"m" (*pVal)); +} + +/* + * Atomic decrement + */ +static __inline__ void ace_dec(volatile int *pVal) +{ + __asm__ __volatile__( + "lock decl %0" + :"=m" (*pVal) + :"m" (*pVal)); +} + +/* + * Atomic decrement. + * + * Return true if the value becomes zero + * else return false. + */ +static __inline__ int ace_dec_and_test(volatile int *pVal) +{ + unsigned char c; + + __asm__ __volatile__( + "lock decl %0; sete %1" + :"=m" (*pVal), "=qm" (c) + :"m" (*pVal) : "memory"); + return c != 0; +} + +/* + * Atomic compare and exchange, atomicly + * execute the following algrothim: + * + * if the current value equals old_value + * then change it to new value but + * always return the current value + * + * If (*pVal == old_val) { + * *pVal = new; + * return old_val; + * } else { + * return *pVal; + * } + */ +static __inline__ int ace_cmpxchg(volatile int *pVal, int old_val, int new_val) +{ + int prev_val; + + __asm__ __volatile__("lock cmpxchg %1,%2" + : "=a"(prev_val) + : "r"(new_val), "m"(*pVal), "0"(old_val) + : "memory"); + return prev_val; +} +#endif /* !__ASSEMBLY__ */ + +#endif /* _ACE_H */ Index: linux-2.6/mm/Kconfig =================================================================== --- linux-2.6.orig/mm/Kconfig +++ linux-2.6/mm/Kconfig @@ -171,3 +171,5 @@ config KSHMEM user space programs. For instance interrupt service routines and user space programs may share the same memory. +source "drivers/ace/Kconfig" + ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/