From: Wink Saville <wink-hKg/bvL8yClBDgjK7y7TUQ@public.gmane.org>
To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
Subject: [PATCH 2/4] ACE implementation, conifguration and makefile
Date: Sat, 05 May 2007 18:54:44 -0700 [thread overview]
Message-ID: <463D3564.9060307@saville.com> (raw)
Signed-off-by: Wink Saville <wink-hKg/bvL8yClBDgjK7y7TUQ@public.gmane.org>
---
arch/x86_64/kernel/Makefile | 2
arch/x86_64/kernel/ace.S | 204 ++++++++++++++++++++++++++++++++++++
arch/x86_64/kernel/entry.S | 45 ++++++++
drivers/Makefile | 1
drivers/ace/Kconfig | 15 ++
drivers/ace/Makefile | 5
drivers/ace/ace_device.c | 246 ++++++++++++++++++++++++++++++++++++++++++++
include/asm-x86_64/ace.h | 124 ++++++++++++++++++++++
mm/Kconfig | 2
9 files changed, 644 insertions(+)
create mode 100644 arch/x86_64/kernel/ace.S
create mode 100644 drivers/ace/Kconfig
create mode 100644 drivers/ace/Makefile
create mode 100644 drivers/ace/ace_device.c
create mode 100644 include/asm-x86_64/ace.h
Index: linux-2.6/arch/x86_64/kernel/Makefile
===================================================================
--- linux-2.6.orig/arch/x86_64/kernel/Makefile
+++ linux-2.6/arch/x86_64/kernel/Makefile
@@ -41,6 +41,8 @@ obj-$(CONFIG_AUDIT) += audit.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_PCI) += early-quirks.o
+obj-$(CONFIG_ACE_DEVICE) += ace.o
+
obj-y += topology.o
obj-y += intel_cacheinfo.o
obj-y += pcspeaker.o
Index: linux-2.6/arch/x86_64/kernel/ace.S
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86_64/kernel/ace.S
@@ -0,0 +1,204 @@
+/**
+ * ace.S
+ *
+ * This must be position independent code
+ * and must not use any stack.
+ *
+ * Copyright (C) 2006 Saville Software, Inc.
+ *
+ * This code may be used for any purpose whatsoever, but
+ * no warranty of any kind is provided.
+ *
+ * Register usage:
+ * r11 = return address
+ * rdi = parm1
+ * rsi = parm2
+ * rdx = parm3
+ * rcx = parm4
+ * r8 = parm5
+ * r9 = parm6
+ *
+ * Preserved registers:
+ * rbx, rsp, rbp, r12, r13, r14, r15
+ *
+ * Stack usage:
+ * THE STACK MAY NOT BE USED!
+ */
+
+#define __ASSEMBLY__ 1
+#include <asm/ace.h>
+
+#define ACE_SPIN_LOCK_UNLOCKED 1
+#define ACE_CONFIG_SMP 1
+#define MP_LIST_STRUCT_SIZE 16
+
+#####################################
+# Globals we're exporting
+#####################################
+
+ .global ace_code_beg, ace_code_end, ace_code_vtable, ace_code_vtable_end
+
+
+#####################################
+# Enter/leave an ace routine.
+#####################################
+
+ .macro ENTER_ROUTINE tag, name, sep="_"
+ .global \tag\sep\name
+ .section ace_thunk,"ax"
+\tag\sep\name:
+ popq %r11
+ jmpq *vtable_\name(%rip)
+ .previous
+
+ .section ace_vtable,"ax"
+vtable_\name:
+ .quad ACE_CODE_ADDR + \name - ace_code_beg
+ .previous
+
+\name:
+ .endm
+
+ .macro LEAVE_ROUTINE
+ jmpq *%r11
+ .endm
+
+#####################################
+# Begin an ace routine.
+# Generate the preamble for an
+# ace routine, which always begins
+# with acquring the ace_spin_lock
+#####################################
+
+ .macro BEG_ROUTINE tag, name, sep="_"
+ ENTER_ROUTINE \tag, \name, \sep
+
+#if ACE_CONFIG_SMP
+test_spin_lock_\name:
+ lock; decq ace_spinlock(%rip)
+ js wait_spin_lock_\name
+#endif
+ .endm
+
+ .macro RET_ROUTINE
+#if ACE_CONFIG_SMP
+ movq $ACE_SPIN_LOCK_UNLOCKED, ace_spinlock(%rip)
+#endif
+ LEAVE_ROUTINE
+ .endm
+
+#####################################
+# End an ace routine.
+# Generate the postamble for an
+# ace routine, which always ends
+# releasing the ace_spin_lock.
+#
+# This macro also generates the
+# routines ace_thunk which allows
+# the ace routine to be called from
+# C and it generates the vtable
+# entry.
+#####################################
+
+ .macro END_ROUTINE tag, name, sep="_"
+ RET_ROUTINE
+
+#if ACE_CONFIG_SMP
+wait_spin_lock_\name:
+ pause
+ cmpq $0, ace_spinlock(%rip)
+ jle wait_spin_lock_\name
+ jmp test_spin_lock_\name
+#endif
+
+ .endm
+
+
+#####################################
+# Begin the ace_code on a page boundry
+#####################################
+
+ .text
+ .code64
+ .align ACE_CODE_SIZE
+ace_code_beg:
+
+#####################################
+# Reserve space for pAce_data
+#####################################
+
+ .rept ACE_DATA_MAX_SIZE
+ .byte 0
+ .endr
+
+
+#####################################
+# Other ace data
+#####################################
+
+ .align 64
+ace_spinlock:
+ .quad ACE_SPIN_LOCK_UNLOCKED /* Spin lock */
+
+ .align 64
+counters:
+ .rept ACE_TEST_NUM_COUNTERS
+ .quad 0
+ .endr
+
+#####################################
+# Define the beginning of the vtable
+#####################################
+
+ .section ace_vtable, "ax"
+ace_code_vtable:
+ .previous
+
+ .align 64
+
+#####################################
+# Increment two of the test counters
+# rdi is index of the first and
+# rsi is the index of the second
+#####################################
+
+BEG_ROUTINE ace, inc_two_counters
+ lea counters(%rip), %rax
+ movq (%rax, %rdi, 8), %rdx
+ incq %rdx
+ movq %rdx, (%rax, %rdi, 8)
+ movq (%rax, %rsi, 8), %rdx
+ incq %rdx
+ movq %rdx, (%rax, %rsi, 8)
+END_ROUTINE ace, inc_two_counters
+
+#####################################
+# Return the address of a snapshot of
+# the counters
+#####################################
+
+BEG_ROUTINE ace, get_counters_snapshot
+ lea counters(%rip), %rsi
+ movq $ACE_TEST_NUM_COUNTERS, %rcx
+ rep movsq
+END_ROUTINE ace, get_counters_snapshot
+
+#####################################
+# Define the end of the ace code
+#####################################
+
+ace_code_end:
+ .byte 0
+
+
+#####################################
+# Define the end of the vtable
+#####################################
+
+ .section ace_vtable,"ax"
+ace_code_vtable_end:
+ .previous
+
+
+ .end
+
Index: linux-2.6/arch/x86_64/kernel/entry.S
===================================================================
--- linux-2.6.orig/arch/x86_64/kernel/entry.S
+++ linux-2.6/arch/x86_64/kernel/entry.S
@@ -464,6 +464,50 @@ ENTRY(stub_rt_sigreturn)
CFI_ENDPROC
END(stub_rt_sigreturn)
+#ifdef CONFIG_ACE_DEVICE
+/*
+ * Atomic Code Execution handling
+ */
+#include <asm/ace.h>
+
+ace_common:
+ pushq %r9 # Save r9
+ xchgq 4*8(%rsp), %r11 # Exchange the return-rip for what's in r11. When this
+ # interrupt completes it will continue at the
+ # address that was in r11.
+ pushfq # Get the current flags
+ popq %r10 # to r10
+ andq $~0xCD5, %r10 # Zero the app level bits (OF,DF,SF,ZF,AF,PF,CF)
+ movq 6*8(%rsp), %r9 # Get return-rflags
+ andq $0xCD5, %r9 # Isolaate app level flags (OF,DF,SF,ZF,AF,PF,CF)
+ orq %r9, %r10 # Use the flags when we continue
+ pushq %r10
+ popfq # Restore flags
+ popq %r9 # Restore r9
+ popq %r10 # Restore r10
+ pushq %r11 # Push return-rip which is where we'll continue the ace code
+ movq $ace_return, %r11 # ace code will return to ace_return
+ ret # Complete ace code
+ace_return:
+ ret # Return to invoker
+
+ .align 8
+ace_code_addr:
+ .quad ACE_CODE_ADDR
+
+ .macro HANDLE_ACE
+ pushq $1f # Push the return address
+ pushq %r10 # r10 will be used as a temporary register
+ movq 3*8(%rsp), %r10 # Get return-rip
+ andq $ACE_CODE_ADDR_MASK, %r10 # Align return-rip to the page boundary
+ cmp ace_code_addr(%rip), %r10 # Is this in the ace_page
+ je ace_common # Jump if it was
+ popq %r10 # Restore r10
+ ret # Return to 1f aka: 1: below
+1:
+ .endm
+#endif
+
/*
* initial frame state for interrupts and exceptions
*/
@@ -494,6 +538,7 @@ END(stub_rt_sigreturn)
/* 0(%rsp): interrupt number */
.macro interrupt func
+ HANDLE_ACE
cld
SAVE_ARGS
leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler
Index: linux-2.6/drivers/Makefile
===================================================================
--- linux-2.6.orig/drivers/Makefile
+++ linux-2.6/drivers/Makefile
@@ -80,3 +80,4 @@ obj-$(CONFIG_GENERIC_TIME) += clocksourc
obj-$(CONFIG_DMA_ENGINE) += dma/
obj-$(CONFIG_HID) += hid/
obj-$(CONFIG_PPC_PS3) += ps3/
+obj-$(CONFIG_ACE_DEVICE) += ace/
Index: linux-2.6/drivers/ace/Kconfig
===================================================================
--- /dev/null
+++ linux-2.6/drivers/ace/Kconfig
@@ -0,0 +1,15 @@
+#
+# ACE configuration
+#
+
+menu "Atomic Code Execution (ACE)"
+
+config ACE_DEVICE
+ bool "ACE support"
+ ---help---
+ ACE allows code to be atomically executed either from kernel
+ space or user space as if it was surrounded by spin_lock_irqsave
+ and spin_lock_irqrestore.
+
+endmenu
+
Index: linux-2.6/drivers/ace/Makefile
===================================================================
--- /dev/null
+++ linux-2.6/drivers/ace/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for ACE.
+#
+
+obj-$(CONFIG_ACE_DEVICE) += ace_device.o
Index: linux-2.6/drivers/ace/ace_device.c
===================================================================
--- /dev/null
+++ linux-2.6/drivers/ace/ace_device.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2006 Saville Software, Inc.
+ *
+ * This code may be used for any purpose whatsoever, but
+ * no warranty of any kind is provided.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/device.h>
+#include <linux/kshmem.h>
+
+#include <asm/ace.h>
+
+#define ACE_DEBUG
+#ifdef ACE_DEBUG
+#define DPK(fmt, args...) printk(KERN_ERR "ace " fmt, ## args)
+#else
+#define DPK(fmt, args...)
+#endif
+
+struct ace_dev_struct
+{
+ struct cdev cdev; /* Character device structure */
+ struct page * ace_code_page; /* The ace code page struct */
+ unsigned long ace_code_kvaddr; /* The ace code page as kernel virtual address */
+ unsigned long ace_code_addr; /* The ace code page */
+ unsigned long ace_code_size; /* Size of ace_code_addr */
+ struct timer_list timer; /* Timer */
+ unsigned long timer_delay; /* Delay for timer */
+};
+
+MODULE_AUTHOR("Wink Saville");
+MODULE_LICENSE("Dual BSD/GPL");
+
+int ace_open(struct inode *inode, struct file *pFile);
+int ace_release(struct inode *inode, struct file *pFile);
+int ace_ioctl(struct inode *pInode, struct file *pFile, unsigned int cmd, unsigned long arg);
+
+/*
+ * Module parameters
+ */
+static int major = 240; /* 240 a "local/expermental" device number for the moment */
+static int minor = 0;
+
+module_param(major, int, S_IRUGO);
+module_param(minor, int, S_IRUGO);
+
+/*
+ * Globals
+ */
+struct ace_dev_struct ace_dev;
+EXPORT_SYMBOL(ace_dev);
+
+/*
+ * File operations
+ */
+struct file_operations ace_f_ops = {
+ .owner = THIS_MODULE,
+ .open = ace_open,
+ .ioctl = ace_ioctl,
+ .release = ace_release,
+};
+
+/*
+ * Initialize the ace page.
+ *
+ * NO-ONE may be using the ACE_CODE at the time this is called.
+ */
+void ace_init(void)
+{
+ unsigned long ace_code_len = &ace_code_end - &ace_code_beg;
+
+ DPK("ace_init: E\n");
+
+ /*
+ * Be sure there is enough space for the ACE_DATA
+ */
+ BUG_ON(sizeof(struct ace_data_struct) > ACE_DATA_MAX_SIZE);
+
+ /*
+ * Allocate an ace page and copy the code to it
+ */
+ ace_dev.ace_code_size = ACE_CODE_SIZE;
+ ace_dev.ace_code_addr = (unsigned long)kshmem_alloc_at(ACE_CODE_ADDR, ace_dev.ace_code_size, PAGE_SHARED_EXEC);
+ ace_dev.ace_code_page = vmalloc_to_page((unsigned char *)ace_dev.ace_code_addr);
+ ace_dev.ace_code_kvaddr = kshmem_addr_to_kvaddr(ace_dev.ace_code_addr);
+
+ memcpy((void *)ace_dev.ace_code_addr, &ace_code_beg, ace_code_len);
+
+ /*
+ * Validate
+ */
+ BUG_ON(ace_dev.ace_code_addr != ACE_CODE_ADDR);
+ BUG_ON(ace_dev.ace_code_page != virt_to_page(ace_dev.ace_code_kvaddr));
+
+ DPK("ace_init: X\n");
+}
+EXPORT_SYMBOL(ace_init);
+
+/*
+ * test timer
+ */
+static void ace_timer(unsigned long arg)
+{
+ struct ace_dev_struct *pDev = (struct ace_dev_struct *)arg;
+
+ ace_inc_two_counters(0, 1);
+
+ pDev->timer.expires += pDev->timer_delay;
+ add_timer(&pDev->timer);
+}
+
+/*
+ * Open
+ */
+int ace_open(struct inode *inode, struct file *pFile)
+{
+ int result = 0;
+ struct ace_dev_struct *pDev;
+
+ DPK("ace_open: E\n");
+
+ pDev = container_of(inode->i_cdev, struct ace_dev_struct, cdev);
+ pFile->private_data = (void *)pDev;
+ kshmem_user_enable();
+
+ DPK("ace_open: X result=%d\n", result);
+ return result;
+}
+
+/*
+ * Release/Close
+ */
+int ace_release(struct inode *inode, struct file *pFile)
+{
+ int result = 0;
+
+ DPK("ace_release: E\n");
+
+ kshmem_user_disable();
+
+ DPK("ace_release: X result=%d\n", result);
+ return result;
+}
+
+/*
+ * Ioctl
+ */
+int ace_ioctl(struct inode *pInode, struct file *pFile, unsigned int cmd, unsigned long arg)
+{
+ int result = -EFAULT;
+
+ DPK("ace_ioctl: E\n");
+
+ DPK("ace_ioctl: X result=%d\n", result);
+ return result;
+}
+
+/*
+ * Init routine for the ace device
+ */
+static int ace_device_init(void)
+{
+ int result;
+ dev_t dev_number = 0;
+ static struct class *ace_class;
+
+ DPK("ace_device_init: E\n");
+
+ if (major) {
+ dev_number = MKDEV(major, minor);
+ result = register_chrdev_region(dev_number, 1, "ace");
+ DPK("ace_device_init: static major result=%d\n", result);
+ } else {
+ result = alloc_chrdev_region(&dev_number, minor, 1, "ace");
+ major = MAJOR(dev_number);
+ DPK("ace_device_init: dynamic major result=%d\n", result);
+ }
+
+ if (result < 0) {
+ printk(KERN_WARNING "ace: can't get major %d\n", major);
+ goto done;
+ }
+
+ if (ace_dev.ace_code_addr == 0)
+ ace_init();
+
+ cdev_init(&ace_dev.cdev, &ace_f_ops);
+ ace_dev.cdev.owner = THIS_MODULE;
+ ace_dev.cdev.ops = &ace_f_ops;
+
+ result = cdev_add(&ace_dev.cdev, dev_number, 1);
+ if (result)
+ {
+ DPK("ace_device_init: cdev_add failed\n");
+ goto done;
+ }
+
+ /*
+ * Make an ace class and create the device
+ */
+ ace_class = class_create(THIS_MODULE, "ace");
+ class_device_create(ace_class, NULL, dev_number, NULL, "ace");
+
+ /*
+ * Start timer
+ */
+ ace_dev.timer_delay = msecs_to_jiffies(1);
+ ace_dev.timer.expires = jiffies + ace_dev.timer_delay;
+ ace_dev.timer.data = (unsigned long)&ace_dev;
+ ace_dev.timer.function = ace_timer;
+ init_timer(&ace_dev.timer);
+ add_timer(&ace_dev.timer);
+
+done:
+ DPK("ace_device_init: X result=%d major=%d minor=%d\n", result, major, minor);
+ return result;
+}
+
+/*
+ * Exit routine for ace device
+ */
+static void ace_device_exit(void)
+{
+ dev_t dev_number = MKDEV(major, minor);
+
+ DPK("ace_device_exit: E\n");
+
+ del_timer_sync(&ace_dev.timer);
+
+ unregister_chrdev_region(dev_number, 1);
+
+ DPK("ace_device_exit: X\n");
+}
+
+module_init(ace_device_init);
+module_exit(ace_device_exit);
+
Index: linux-2.6/include/asm-x86_64/ace.h
===================================================================
--- /dev/null
+++ linux-2.6/include/asm-x86_64/ace.h
@@ -0,0 +1,124 @@
+/** * Copyright (C) 2006 Saville Software, Inc.
+ *
+ * This code may be used for any purpose whatsoever, but
+ * no warranty of any kind is provided.
+ */
+
+#ifndef _ACE_H
+#define _ACE_H
+
+#define ACE_CODE_ADDR 0x6ffffffff000
+#define ACE_CODE_SIZE 4096
+#define ACE_CODE_ADDR_MASK (~(ACE_CODE_SIZE-1))
+#define ACE_DATA_MAX_SIZE 256
+#define ACE_TEST_NUM_COUNTERS 16
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This data is located at ACE_CODE_ADDR and
+ * must not exceed ACE_DATA_MAX_SIZE.
+ */
+struct ace_data_struct {
+ struct mp_struct * pMprocs; /* array of mprocs, pMprocs[0] is kernel's */
+ unsigned long mprocs_count; /* Number elements in pMprocs */
+ unsigned int kernel_pool; /* Id of kernel pool, must be 0 */
+ struct mp_mem_pool_struct * pMem_pools; /* Array of mem pools */
+ unsigned long mem_pools_count;/* Number of elements in pMem_pools */
+ struct mp_msg_list_struct * pMsg_lists; /* Array of msg lists */
+ unsigned long msg_lists_count;/* Number of elements in pMsg_lists */
+};
+#define pAce_data ((struct ace_data_struct *)ACE_CODE_ADDR)
+
+/*
+ * Beginning and end of the ace code in ace.S
+ */
+extern char ace_code_beg;
+extern char ace_code_end;
+
+/*
+ * Initialization routine, called from init/main.c
+ */
+extern void ace_init(void);
+
+/*
+ * Ace routines for testing
+ */
+extern uint64_t ace_inc_two_counters(uint64_t cnt0, uint64_t cnt1);
+extern void ace_get_counters_snapshot(uint64_t snapshot[ACE_TEST_NUM_COUNTERS]);
+
+/**
+ * Atomic operations. For x86_64 these are
+ * inherenently atomic so they do not need
+ * to be executed in the ace page. For other
+ * architectures this may need to be defined
+ * in the ace page.
+ */
+
+/*
+ * Atomic increment
+ */
+static __inline__ void ace_inc(volatile int *pVal)
+{
+ __asm__ __volatile__(
+ "lock incl %0"
+ :"=m" (*pVal)
+ :"m" (*pVal));
+}
+
+/*
+ * Atomic decrement
+ */
+static __inline__ void ace_dec(volatile int *pVal)
+{
+ __asm__ __volatile__(
+ "lock decl %0"
+ :"=m" (*pVal)
+ :"m" (*pVal));
+}
+
+/*
+ * Atomic decrement.
+ *
+ * Return true if the value becomes zero
+ * else return false.
+ */
+static __inline__ int ace_dec_and_test(volatile int *pVal)
+{
+ unsigned char c;
+
+ __asm__ __volatile__(
+ "lock decl %0; sete %1"
+ :"=m" (*pVal), "=qm" (c)
+ :"m" (*pVal) : "memory");
+ return c != 0;
+}
+
+/*
+ * Atomic compare and exchange, atomicly
+ * execute the following algrothim:
+ *
+ * if the current value equals old_value
+ * then change it to new value but
+ * always return the current value
+ *
+ * If (*pVal == old_val) {
+ * *pVal = new;
+ * return old_val;
+ * } else {
+ * return *pVal;
+ * }
+ */
+static __inline__ int ace_cmpxchg(volatile int *pVal, int old_val, int new_val)
+{
+ int prev_val;
+
+ __asm__ __volatile__("lock cmpxchg %1,%2"
+ : "=a"(prev_val)
+ : "r"(new_val), "m"(*pVal), "0"(old_val)
+ : "memory");
+ return prev_val;
+}
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ACE_H */
Index: linux-2.6/mm/Kconfig
===================================================================
--- linux-2.6.orig/mm/Kconfig
+++ linux-2.6/mm/Kconfig
@@ -171,3 +171,5 @@ config KSHMEM
user space programs. For instance interrupt service routines
and user space programs may share the same memory.
+source "drivers/ace/Kconfig"
+
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
reply other threads:[~2007-05-06 1:54 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=463D3564.9060307@saville.com \
--to=wink-hkg/bvl8yclbdgjk7y7tuq@public.gmane.org \
--cc=kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.