* [PATCH 2/4] ACE implementation, conifguration and makefile
@ 2007-05-06 1:54 Wink Saville
0 siblings, 0 replies; only message in thread
From: Wink Saville @ 2007-05-06 1:54 UTC (permalink / raw)
To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
Signed-off-by: Wink Saville <wink-hKg/bvL8yClBDgjK7y7TUQ@public.gmane.org>
---
arch/x86_64/kernel/Makefile | 2
arch/x86_64/kernel/ace.S | 204 ++++++++++++++++++++++++++++++++++++
arch/x86_64/kernel/entry.S | 45 ++++++++
drivers/Makefile | 1
drivers/ace/Kconfig | 15 ++
drivers/ace/Makefile | 5
drivers/ace/ace_device.c | 246 ++++++++++++++++++++++++++++++++++++++++++++
include/asm-x86_64/ace.h | 124 ++++++++++++++++++++++
mm/Kconfig | 2
9 files changed, 644 insertions(+)
create mode 100644 arch/x86_64/kernel/ace.S
create mode 100644 drivers/ace/Kconfig
create mode 100644 drivers/ace/Makefile
create mode 100644 drivers/ace/ace_device.c
create mode 100644 include/asm-x86_64/ace.h
Index: linux-2.6/arch/x86_64/kernel/Makefile
===================================================================
--- linux-2.6.orig/arch/x86_64/kernel/Makefile
+++ linux-2.6/arch/x86_64/kernel/Makefile
@@ -41,6 +41,8 @@ obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_PCI) += early-quirks.o
+obj-$(CONFIG_ACE_DEVICE) += ace.o
+
obj-y += topology.o
obj-y += intel_cacheinfo.o
obj-y += pcspeaker.o
Index: linux-2.6/arch/x86_64/kernel/ace.S
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86_64/kernel/ace.S
@@ -0,0 +1,204 @@
+/**
+ * ace.S
+ *
+ * This must be position independent code
+ * and must not use any stack.
+ *
+ * Copyright (C) 2006 Saville Software, Inc.
+ *
+ * This code may be used for any purpose whatsoever, but
+ * no warranty of any kind is provided.
+ *
+ * Register usage:
+ * r11 = return address
+ * rdi = parm1
+ * rsi = parm2
+ * rdx = parm3
+ * rcx = parm4
+ * r8 = parm5
+ * r9 = parm6
+ *
+ * Preserved registers:
+ * rbx, rsp, rbp, r12, r13, r14, r15
+ *
+ * Stack usage:
+ * THE STACK MAY NOT BE USED!
+ */
+
+#define __ASSEMBLY__ 1
+#include <asm/ace.h>
+
+#define ACE_SPIN_LOCK_UNLOCKED 1
+#define ACE_CONFIG_SMP 1
+#define MP_LIST_STRUCT_SIZE 16
+
+#####################################
+# Globals we're exporting
+#####################################
+
+ .global ace_code_beg, ace_code_end, ace_code_vtable, ace_code_vtable_end
+
+
+#####################################
+# Enter/leave an ace routine.
+#####################################
+
+ .macro ENTER_ROUTINE tag, name, sep="_"
+ .global \tag\sep\name
+ .section ace_thunk,"ax"
+\tag\sep\name:
+ popq %r11
+ jmpq *vtable_\name(%rip)
+ .previous
+
+ .section ace_vtable,"ax"
+vtable_\name:
+ .quad ACE_CODE_ADDR + \name - ace_code_beg
+ .previous
+
+\name:
+ .endm
+
+ .macro LEAVE_ROUTINE
+ jmpq *%r11
+ .endm
+
+#####################################
+# Begin an ace routine.
+# Generate the preamble for an
+# ace routine, which always begins
+# with acquring the ace_spin_lock
+#####################################
+
+ .macro BEG_ROUTINE tag, name, sep="_"
+ ENTER_ROUTINE \tag, \name, \sep
+
+#if ACE_CONFIG_SMP
+test_spin_lock_\name:
+ lock; decq ace_spinlock(%rip)
+ js wait_spin_lock_\name
+#endif
+ .endm
+
+ .macro RET_ROUTINE
+#if ACE_CONFIG_SMP
+ movq $ACE_SPIN_LOCK_UNLOCKED, ace_spinlock(%rip)
+#endif
+ LEAVE_ROUTINE
+ .endm
+
+#####################################
+# End an ace routine.
+# Generate the postamble for an
+# ace routine, which always ends
+# releasing the ace_spin_lock.
+#
+# This macro also generates the
+# routines ace_thunk which allows
+# the ace routine to be called from
+# C and it generates the vtable
+# entry.
+#####################################
+
+ .macro END_ROUTINE tag, name, sep="_"
+ RET_ROUTINE
+
+#if ACE_CONFIG_SMP
+wait_spin_lock_\name:
+ pause
+ cmpq $0, ace_spinlock(%rip)
+ jle wait_spin_lock_\name
+ jmp test_spin_lock_\name
+#endif
+
+ .endm
+
+
+#####################################
+# Begin the ace_code on a page boundry
+#####################################
+
+ .text
+ .code64
+ .align ACE_CODE_SIZE
+ace_code_beg:
+
+#####################################
+# Reserve space for pAce_data
+#####################################
+
+ .rept ACE_DATA_MAX_SIZE
+ .byte 0
+ .endr
+
+
+#####################################
+# Other ace data
+#####################################
+
+ .align 64
+ace_spinlock:
+ .quad ACE_SPIN_LOCK_UNLOCKED /* Spin lock */
+
+ .align 64
+counters:
+ .rept ACE_TEST_NUM_COUNTERS
+ .quad 0
+ .endr
+
+#####################################
+# Define the beginning of the vtable
+#####################################
+
+ .section ace_vtable, "ax"
+ace_code_vtable:
+ .previous
+
+ .align 64
+
+#####################################
+# Increment two of the test counters
+# rdi is index of the first and
+# rsi is the index of the second
+#####################################
+
+BEG_ROUTINE ace, inc_two_counters
+ lea counters(%rip), %rax
+ movq (%rax, %rdi, 8), %rdx
+ incq %rdx
+ movq %rdx, (%rax, %rdi, 8)
+ movq (%rax, %rsi, 8), %rdx
+ incq %rdx
+ movq %rdx, (%rax, %rsi, 8)
+END_ROUTINE ace, inc_two_counters
+
+#####################################
+# Return the address of a snapshot of
+# the counters
+#####################################
+
+BEG_ROUTINE ace, get_counters_snapshot
+ lea counters(%rip), %rsi
+ movq $ACE_TEST_NUM_COUNTERS, %rcx
+ rep movsq
+END_ROUTINE ace, get_counters_snapshot
+
+#####################################
+# Define the end of the ace code
+#####################################
+
+ace_code_end:
+ .byte 0
+
+
+#####################################
+# Define the end of the vtable
+#####################################
+
+ .section ace_vtable,"ax"
+ace_code_vtable_end:
+ .previous
+
+
+ .end
+
Index: linux-2.6/arch/x86_64/kernel/entry.S
===================================================================
--- linux-2.6.orig/arch/x86_64/kernel/entry.S
+++ linux-2.6/arch/x86_64/kernel/entry.S
@@ -464,6 +464,50 @@ ENTRY(stub_rt_sigreturn)
CFI_ENDPROC
END(stub_rt_sigreturn)
+#ifdef CONFIG_ACE_DEVICE
+/*
+ * Atomic Code Execution handling
+ */
+#include <asm/ace.h>
+
+ace_common:
+ pushq %r9 # Save r9
+ xchgq 4*8(%rsp), %r11 # Exchange the return-rip for what's in r11. When this
+ # interrupt completes it will continue at the
+ # address that was in r11.
+ pushfq # Get the current flags
+ popq %r10 # to r10
+ andq $~0xCD5, %r10 # Zero the app level bits (OF,DF,SF,ZF,AF,PF,CF)
+ movq 6*8(%rsp), %r9 # Get return-rflags
+ andq $0xCD5, %r9 # Isolaate app level flags (OF,DF,SF,ZF,AF,PF,CF)
+ orq %r9, %r10 # Use the flags when we continue
+ pushq %r10
+ popfq # Restore flags
+ popq %r9 # Restore r9
+ popq %r10 # Restore r10
+ pushq %r11 # Push return-rip which is where we'll continue the ace code
+ movq $ace_return, %r11 # ace code will return to ace_return
+ ret # Complete ace code
+ace_return:
+ ret # Return to invoker
+
+ .align 8
+ace_code_addr:
+ .quad ACE_CODE_ADDR
+
+ .macro HANDLE_ACE
+ pushq $1f # Push the return address
+ pushq %r10 # r10 will be used as a temporary register
+ movq 3*8(%rsp), %r10 # Get return-rip
+ andq $ACE_CODE_ADDR_MASK, %r10 # Align return-rip to the page boundary
+ cmp ace_code_addr(%rip), %r10 # Is this in the ace_page
+ je ace_common # Jump if it was
+ popq %r10 # Restore r10
+ ret # Return to 1f aka: 1: below
+1:
+ .endm
+#endif
+
/*
* initial frame state for interrupts and exceptions
*/
@@ -494,6 +538,7 @@ END(stub_rt_sigreturn)
/* 0(%rsp): interrupt number */
.macro interrupt func
+ HANDLE_ACE
cld
SAVE_ARGS
leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
Index: linux-2.6/drivers/Makefile
===================================================================
--- linux-2.6.orig/drivers/Makefile
+++ linux-2.6/drivers/Makefile
@@ -80,3 +80,4 @@ obj-$(CONFIG_GENERIC_TIME) += clocksourc
obj-$(CONFIG_DMA_ENGINE) += dma/
obj-$(CONFIG_HID) += hid/
obj-$(CONFIG_PPC_PS3) += ps3/
+obj-$(CONFIG_ACE_DEVICE) += ace/
Index: linux-2.6/drivers/ace/Kconfig
===================================================================
--- /dev/null
+++ linux-2.6/drivers/ace/Kconfig
@@ -0,0 +1,15 @@
+#
+# ACE configuration
+#
+
+menu "Atomic Code Execution (ACE)"
+
+config ACE_DEVICE
+ bool "ACE support"
+ ---help---
+ ACE allows code to be atomically executed either from kernel
+ space or user space as if it was surrounded by spin_lock_irqsave
+ and spin_lock_irqrestore.
+
+endmenu
+
Index: linux-2.6/drivers/ace/Makefile
===================================================================
--- /dev/null
+++ linux-2.6/drivers/ace/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for ACE.
+#
+
+obj-$(CONFIG_ACE_DEVICE) += ace_device.o
Index: linux-2.6/drivers/ace/ace_device.c
===================================================================
--- /dev/null
+++ linux-2.6/drivers/ace/ace_device.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2006 Saville Software, Inc.
+ *
+ * This code may be used for any purpose whatsoever, but
+ * no warranty of any kind is provided.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/device.h>
+#include <linux/kshmem.h>
+
+#include <asm/ace.h>
+
+#define ACE_DEBUG
+#ifdef ACE_DEBUG
+#define DPK(fmt, args...) printk(KERN_ERR "ace " fmt, ## args)
+#else
+#define DPK(fmt, args...)
+#endif
+
+struct ace_dev_struct
+{
+ struct cdev cdev; /* Character device structure */
+ struct page * ace_code_page; /* The ace code page struct */
+ unsigned long ace_code_kvaddr; /* The ace code page as kernel virtual address */
+ unsigned long ace_code_addr; /* The ace code page */
+ unsigned long ace_code_size; /* Size of ace_code_addr */
+ struct timer_list timer; /* Timer */
+ unsigned long timer_delay; /* Delay for timer */
+};
+
+MODULE_AUTHOR("Wink Saville");
+MODULE_LICENSE("Dual BSD/GPL");
+
+int ace_open(struct inode *inode, struct file *pFile);
+int ace_release(struct inode *inode, struct file *pFile);
+int ace_ioctl(struct inode *pInode, struct file *pFile, unsigned int cmd, unsigned long arg);
+
+/*
+ * Module parameters
+ */
+static int major = 240; /* 240 a "local/expermental" device number for the moment */
+static int minor = 0;
+
+module_param(major, int, S_IRUGO);
+module_param(minor, int, S_IRUGO);
+
+/*
+ * Globals
+ */
+struct ace_dev_struct ace_dev;
+EXPORT_SYMBOL(ace_dev);
+
+/*
+ * File operations
+ */
+struct file_operations ace_f_ops = {
+ .owner = THIS_MODULE,
+ .open = ace_open,
+ .ioctl = ace_ioctl,
+ .release = ace_release,
+};
+
+/*
+ * Initialize the ace page.
+ *
+ * NO-ONE may be using the ACE_CODE at the time this is called.
+ */
+void ace_init(void)
+{
+ unsigned long ace_code_len = &ace_code_end - &ace_code_beg;
+
+ DPK("ace_init: E\n");
+
+ /*
+ * Be sure there is enough space for the ACE_DATA
+ */
+ BUG_ON(sizeof(struct ace_data_struct) > ACE_DATA_MAX_SIZE);
+
+ /*
+ * Allocate an ace page and copy the code to it
+ */
+ ace_dev.ace_code_size = ACE_CODE_SIZE;
+ ace_dev.ace_code_addr = (unsigned long)kshmem_alloc_at(ACE_CODE_ADDR, ace_dev.ace_code_size, PAGE_SHARED_EXEC);
+ ace_dev.ace_code_page = vmalloc_to_page((unsigned char *)ace_dev.ace_code_addr);
+ ace_dev.ace_code_kvaddr = kshmem_addr_to_kvaddr(ace_dev.ace_code_addr);
+
+ memcpy((void *)ace_dev.ace_code_addr, &ace_code_beg, ace_code_len);
+
+ /*
+ * Validate
+ */
+ BUG_ON(ace_dev.ace_code_addr != ACE_CODE_ADDR);
+ BUG_ON(ace_dev.ace_code_page != virt_to_page(ace_dev.ace_code_kvaddr));
+
+ DPK("ace_init: X\n");
+}
+EXPORT_SYMBOL(ace_init);
+
+/*
+ * test timer
+ */
+static void ace_timer(unsigned long arg)
+{
+ struct ace_dev_struct *pDev = (struct ace_dev_struct *)arg;
+
+ ace_inc_two_counters(0, 1);
+
+ pDev->timer.expires += pDev->timer_delay;
+ add_timer(&pDev->timer);
+}
+
+/*
+ * Open
+ */
+int ace_open(struct inode *inode, struct file *pFile)
+{
+ int result = 0;
+ struct ace_dev_struct *pDev;
+
+ DPK("ace_open: E\n");
+
+ pDev = container_of(inode->i_cdev, struct ace_dev_struct, cdev);
+ pFile->private_data = (void *)pDev;
+ kshmem_user_enable();
+
+ DPK("ace_open: X result=%d\n", result);
+ return result;
+}
+
+/*
+ * Release/Close
+ */
+int ace_release(struct inode *inode, struct file *pFile)
+{
+ int result = 0;
+
+ DPK("ace_release: E\n");
+
+ kshmem_user_disable();
+
+ DPK("ace_release: X result=%d\n", result);
+ return result;
+}
+
+/*
+ * Ioctl
+ */
+int ace_ioctl(struct inode *pInode, struct file *pFile, unsigned int cmd, unsigned long arg)
+{
+ int result = -EFAULT;
+
+ DPK("ace_ioctl: E\n");
+
+ DPK("ace_ioctl: X result=%d\n", result);
+ return result;
+}
+
+/*
+ * Init routine for the ace device
+ */
+static int ace_device_init(void)
+{
+ int result;
+ dev_t dev_number = 0;
+ static struct class *ace_class;
+
+ DPK("ace_device_init: E\n");
+
+ if (major) {
+ dev_number = MKDEV(major, minor);
+ result = register_chrdev_region(dev_number, 1, "ace");
+ DPK("ace_device_init: static major result=%d\n", result);
+ } else {
+ result = alloc_chrdev_region(&dev_number, minor, 1, "ace");
+ major = MAJOR(dev_number);
+ DPK("ace_device_init: dynamic major result=%d\n", result);
+ }
+
+ if (result < 0) {
+ printk(KERN_WARNING "ace: can't get major %d\n", major);
+ goto done;
+ }
+
+ if (ace_dev.ace_code_addr == 0)
+ ace_init();
+
+ cdev_init(&ace_dev.cdev, &ace_f_ops);
+ ace_dev.cdev.owner = THIS_MODULE;
+ ace_dev.cdev.ops = &ace_f_ops;
+
+ result = cdev_add(&ace_dev.cdev, dev_number, 1);
+ if (result)
+ {
+ DPK("ace_device_init: cdev_add failed\n");
+ goto done;
+ }
+
+ /*
+ * Make an ace class and create the device
+ */
+ ace_class = class_create(THIS_MODULE, "ace");
+ class_device_create(ace_class, NULL, dev_number, NULL, "ace");
+
+ /*
+ * Start timer
+ */
+ ace_dev.timer_delay = msecs_to_jiffies(1);
+ ace_dev.timer.expires = jiffies + ace_dev.timer_delay;
+ ace_dev.timer.data = (unsigned long)&ace_dev;
+ ace_dev.timer.function = ace_timer;
+ init_timer(&ace_dev.timer);
+ add_timer(&ace_dev.timer);
+
+done:
+ DPK("ace_device_init: X result=%d major=%d minor=%d\n", result, major, minor);
+ return result;
+}
+
+/*
+ * Exit routine for ace device
+ */
+static void ace_device_exit(void)
+{
+ dev_t dev_number = MKDEV(major, minor);
+
+ DPK("ace_device_exit: E\n");
+
+ del_timer_sync(&ace_dev.timer);
+
+ unregister_chrdev_region(dev_number, 1);
+
+ DPK("ace_device_exit: X\n");
+}
+
+module_init(ace_device_init);
+module_exit(ace_device_exit);
+
Index: linux-2.6/include/asm-x86_64/ace.h
===================================================================
--- /dev/null
+++ linux-2.6/include/asm-x86_64/ace.h
@@ -0,0 +1,124 @@
+/** * Copyright (C) 2006 Saville Software, Inc.
+ *
+ * This code may be used for any purpose whatsoever, but
+ * no warranty of any kind is provided.
+ */
+
+#ifndef _ACE_H
+#define _ACE_H
+
+#define ACE_CODE_ADDR 0x6ffffffff000
+#define ACE_CODE_SIZE 4096
+#define ACE_CODE_ADDR_MASK (~(ACE_CODE_SIZE-1))
+#define ACE_DATA_MAX_SIZE 256
+#define ACE_TEST_NUM_COUNTERS 16
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This data is located at ACE_CODE_ADDR and
+ * must not exceed ACE_DATA_MAX_SIZE.
+ */
+struct ace_data_struct {
+ struct mp_struct * pMprocs; /* array of mprocs, pMprocs[0] is kernel's */
+ unsigned long mprocs_count; /* Number elements in pMprocs */
+ unsigned int kernel_pool; /* Id of kernel pool, must be 0 */
+ struct mp_mem_pool_struct * pMem_pools; /* Array of mem pools */
+ unsigned long mem_pools_count;/* Number of elements in pMem_pools */
+ struct mp_msg_list_struct * pMsg_lists; /* Array of msg lists */
+ unsigned long msg_lists_count;/* Number of elements in pMsg_lists */
+};
+#define pAce_data ((struct ace_data_struct *)ACE_CODE_ADDR)
+
+/*
+ * Beginning and end of the ace code in ace.S
+ */
+extern char ace_code_beg;
+extern char ace_code_end;
+
+/*
+ * Initialization routine, called from init/main.c
+ */
+extern void ace_init(void);
+
+/*
+ * Ace routines for testing
+ */
+extern uint64_t ace_inc_two_counters(uint64_t cnt0, uint64_t cnt1);
+extern void ace_get_counters_snapshot(uint64_t snapshot[ACE_TEST_NUM_COUNTERS]);
+
+/**
+ * Atomic operations. For x86_64 these are
+ * inherenently atomic so they do not need
+ * to be executed in the ace page. For other
+ * architectures this may need to be defined
+ * in the ace page.
+ */
+
+/*
+ * Atomic increment
+ */
+static __inline__ void ace_inc(volatile int *pVal)
+{
+ __asm__ __volatile__(
+ "lock incl %0"
+ :"=m" (*pVal)
+ :"m" (*pVal));
+}
+
+/*
+ * Atomic decrement
+ */
+static __inline__ void ace_dec(volatile int *pVal)
+{
+ __asm__ __volatile__(
+ "lock decl %0"
+ :"=m" (*pVal)
+ :"m" (*pVal));
+}
+
+/*
+ * Atomic decrement.
+ *
+ * Return true if the value becomes zero
+ * else return false.
+ */
+static __inline__ int ace_dec_and_test(volatile int *pVal)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ "lock decl %0; sete %1"
+ :"=m" (*pVal), "=qm" (c)
+ :"m" (*pVal) : "memory");
+ return c != 0;
+}
+
+/*
+ * Atomic compare and exchange, atomicly
+ * execute the following algrothim:
+ *
+ * if the current value equals old_value
+ * then change it to new value but
+ * always return the current value
+ *
+ * If (*pVal == old_val) {
+ * *pVal = new;
+ * return old_val;
+ * } else {
+ * return *pVal;
+ * }
+ */
+static __inline__ int ace_cmpxchg(volatile int *pVal, int old_val, int new_val)
+{
+ int prev_val;
+
+ __asm__ __volatile__("lock cmpxchg %1,%2"
+ : "=a"(prev_val)
+ : "r"(new_val), "m"(*pVal), "0"(old_val)
+ : "memory");
+ return prev_val;
+}
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ACE_H */
Index: linux-2.6/mm/Kconfig
===================================================================
--- linux-2.6.orig/mm/Kconfig
+++ linux-2.6/mm/Kconfig
@@ -171,3 +171,5 @@ config KSHMEM
user space programs. For instance interrupt service routines
and user space programs may share the same memory.
+source "drivers/ace/Kconfig"
+
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2007-05-06 1:54 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-06 1:54 [PATCH 2/4] ACE implementation, conifguration and makefile Wink Saville
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox