All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/4] ACE implementation, conifguration and makefile
@ 2007-05-06  1:54 Wink Saville
  0 siblings, 0 replies; only message in thread
From: Wink Saville @ 2007-05-06  1:54 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Signed-off-by: Wink Saville <wink-hKg/bvL8yClBDgjK7y7TUQ@public.gmane.org>
---
  arch/x86_64/kernel/Makefile |    2
  arch/x86_64/kernel/ace.S    |  204 ++++++++++++++++++++++++++++++++++++
  arch/x86_64/kernel/entry.S  |   45 ++++++++
  drivers/Makefile            |    1
  drivers/ace/Kconfig         |   15 ++
  drivers/ace/Makefile        |    5
  drivers/ace/ace_device.c    |  246 ++++++++++++++++++++++++++++++++++++++++++++
  include/asm-x86_64/ace.h    |  124 ++++++++++++++++++++++
  mm/Kconfig                  |    2
  9 files changed, 644 insertions(+)
  create mode 100644 arch/x86_64/kernel/ace.S
  create mode 100644 drivers/ace/Kconfig
  create mode 100644 drivers/ace/Makefile
  create mode 100644 drivers/ace/ace_device.c
  create mode 100644 include/asm-x86_64/ace.h

Index: linux-2.6/arch/x86_64/kernel/Makefile
===================================================================
--- linux-2.6.orig/arch/x86_64/kernel/Makefile
+++ linux-2.6/arch/x86_64/kernel/Makefile
@@ -41,6 +41,8 @@ obj-$(CONFIG_AUDIT)		+= audit.o
  obj-$(CONFIG_MODULES)		+= module.o
  obj-$(CONFIG_PCI)		+= early-quirks.o

+obj-$(CONFIG_ACE_DEVICE)	+= ace.o
+
  obj-y				+= topology.o
  obj-y				+= intel_cacheinfo.o
  obj-y				+= pcspeaker.o
Index: linux-2.6/arch/x86_64/kernel/ace.S
===================================================================
--- /dev/null
+++ linux-2.6/arch/x86_64/kernel/ace.S
@@ -0,0 +1,204 @@
+/**
+ * ace.S
+ *
+ * This must be position independent code
+ * and must not use any stack.
+ *
+ * Copyright (C) 2006 Saville Software, Inc.
+ *
+ * This code may be used for any purpose whatsoever, but
+ * no warranty of any kind is provided.
+ *
+ * Register usage:
+ *  r11 = return address
+ *  rdi = parm1
+ *  rsi = parm2
+ *  rdx = parm3
+ *  rcx = parm4
+ *  r8  = parm5
+ *  r9  = parm6
+ *
+ * Preserved registers:
+ * rbx, rsp, rbp, r12, r13, r14, r15
+ *
+ * Stack usage:
+ *  THE STACK MAY NOT BE USED!
+ */
+
+#define __ASSEMBLY__		1
+#include <asm/ace.h>
+
+#define	ACE_SPIN_LOCK_UNLOCKED	1
+#define	ACE_CONFIG_SMP		1
+#define MP_LIST_STRUCT_SIZE	16
+
+#####################################
+# Globals we're exporting
+#####################################
+
+	.global	ace_code_beg, ace_code_end, ace_code_vtable, ace_code_vtable_end
+
+
+#####################################
+# Enter/leave an ace routine.
+#####################################
+
+	.macro	ENTER_ROUTINE	tag, name, sep="_"
+	.global \tag\sep\name
+	.section ace_thunk,"ax"
+\tag\sep\name:
+	popq	%r11
+	jmpq	*vtable_\name(%rip)
+	.previous
+
+	.section ace_vtable,"ax"
+vtable_\name:
+	.quad	ACE_CODE_ADDR + \name - ace_code_beg
+	.previous
+
+\name:
+	.endm
+
+	.macro LEAVE_ROUTINE
+	jmpq	*%r11
+	.endm
+
+#####################################
+# Begin an ace routine.
+# Generate the preamble for an
+# ace routine, which always begins
+# with acquring the ace_spin_lock
+#####################################
+
+	.macro	BEG_ROUTINE	tag, name, sep="_"
+	ENTER_ROUTINE	\tag, \name, \sep
+
+#if ACE_CONFIG_SMP
+test_spin_lock_\name:
+	lock; decq ace_spinlock(%rip)
+	js	wait_spin_lock_\name
+#endif
+	.endm
+
+	.macro RET_ROUTINE
+#if ACE_CONFIG_SMP
+	movq	$ACE_SPIN_LOCK_UNLOCKED, ace_spinlock(%rip)
+#endif
+	LEAVE_ROUTINE
+	.endm
+
+#####################################
+# End an ace routine.
+# Generate the postamble for an
+# ace routine, which always ends
+# releasing the ace_spin_lock.
+#
+# This macro also generates the
+# routines ace_thunk which allows
+# the ace routine to be called from
+# C and it generates the vtable
+# entry.
+#####################################
+
+	.macro	END_ROUTINE tag, name, sep="_"
+	RET_ROUTINE
+
+#if ACE_CONFIG_SMP
+wait_spin_lock_\name:
+	pause
+	cmpq	$0, ace_spinlock(%rip)
+	jle	wait_spin_lock_\name
+	jmp	test_spin_lock_\name
+#endif
+
+	.endm
+
+
+#####################################
+# Begin the ace_code on a page boundry
+#####################################
+
+	.text
+	.code64
+	.align	ACE_CODE_SIZE
+ace_code_beg:
+
+#####################################
+# Reserve space for pAce_data
+#####################################
+
+	.rept	ACE_DATA_MAX_SIZE
+	.byte	0
+	.endr
+
+
+#####################################
+# Other ace data
+#####################################
+
+	.align	64
+ace_spinlock:
+	.quad	ACE_SPIN_LOCK_UNLOCKED	/* Spin lock */
+
+	.align	64
+counters:
+	.rept	ACE_TEST_NUM_COUNTERS
+	.quad	0
+	.endr
+
+#####################################
+# Define the beginning of the vtable
+#####################################
+
+	.section ace_vtable, "ax"
+ace_code_vtable:
+	.previous
+
+	.align	64
+
+#####################################
+# Increment two of the test counters
+# rdi is index of the first and
+# rsi is the index of the second
+#####################################
+
+BEG_ROUTINE ace, inc_two_counters
+	lea	counters(%rip), %rax
+	movq	(%rax, %rdi, 8), %rdx
+	incq	%rdx
+	movq	%rdx, (%rax, %rdi, 8)
+	movq	(%rax, %rsi, 8), %rdx
+	incq	%rdx
+	movq	%rdx, (%rax, %rsi, 8)
+END_ROUTINE ace, inc_two_counters
+
+#####################################
+# Return the address of a snapshot of
+# the counters
+#####################################
+
+BEG_ROUTINE ace, get_counters_snapshot
+	lea	counters(%rip), %rsi
+	movq	$ACE_TEST_NUM_COUNTERS, %rcx
+	rep movsq
+END_ROUTINE ace, get_counters_snapshot
+
+#####################################
+# Define the end of the ace code
+#####################################
+
+ace_code_end:
+	.byte	0
+
+
+#####################################
+# Define the end of the vtable
+#####################################
+
+	.section ace_vtable,"ax"
+ace_code_vtable_end:
+	.previous
+
+
+	.end
+
Index: linux-2.6/arch/x86_64/kernel/entry.S
===================================================================
--- linux-2.6.orig/arch/x86_64/kernel/entry.S
+++ linux-2.6/arch/x86_64/kernel/entry.S
@@ -464,6 +464,50 @@ ENTRY(stub_rt_sigreturn)
  	CFI_ENDPROC
  END(stub_rt_sigreturn)

+#ifdef CONFIG_ACE_DEVICE
+/*
+ * Atomic Code Execution handling
+ */
+#include <asm/ace.h>
+
+ace_common:
+	pushq	%r9				# Save r9
+	xchgq	4*8(%rsp), %r11			# Exchange the return-rip for what's in r11. When this
+						# interrupt completes it will continue at the
+						# address that was in r11.
+	pushfq					# Get the current flags
+	popq	%r10				#   to r10
+	andq	$~0xCD5, %r10			# Zero the app level bits (OF,DF,SF,ZF,AF,PF,CF)
+	movq	6*8(%rsp), %r9			# Get return-rflags
+	andq	$0xCD5, %r9			# Isolaate app level flags (OF,DF,SF,ZF,AF,PF,CF)
+	orq	%r9, %r10			# Use the flags when we continue
+	pushq	%r10
+	popfq					# Restore flags
+	popq	%r9				# Restore r9
+	popq	%r10				# Restore r10
+	pushq	%r11				# Push return-rip which is where we'll continue the ace code
+	movq	$ace_return, %r11		# ace code will return to ace_return
+	ret					# Complete ace code
+ace_return:
+	ret					# Return to invoker
+
+	.align	8
+ace_code_addr:
+	.quad	ACE_CODE_ADDR
+
+	.macro HANDLE_ACE
+	pushq	$1f				# Push the return address
+	pushq	%r10				# r10 will be used as a temporary register
+	movq	3*8(%rsp), %r10			# Get return-rip
+	andq	$ACE_CODE_ADDR_MASK, %r10	# Align return-rip to the page boundary
+	cmp	ace_code_addr(%rip), %r10	# Is this in the ace_page
+	je	ace_common			# Jump if it was
+	popq	%r10				# Restore r10
+	ret					# Return to 1f aka: 1: below
+1:
+	.endm
+#endif
+
  /*
   * initial frame state for interrupts and exceptions
   */
@@ -494,6 +538,7 @@ END(stub_rt_sigreturn)

  /* 0(%rsp): interrupt number */
  	.macro interrupt func
+	HANDLE_ACE
  	cld
  	SAVE_ARGS
  	leaq -ARGOFFSET(%rsp),%rdi	# arg1 for handler
Index: linux-2.6/drivers/Makefile
===================================================================
--- linux-2.6.orig/drivers/Makefile
+++ linux-2.6/drivers/Makefile
@@ -80,3 +80,4 @@ obj-$(CONFIG_GENERIC_TIME)	+= clocksourc
  obj-$(CONFIG_DMA_ENGINE)	+= dma/
  obj-$(CONFIG_HID)		+= hid/
  obj-$(CONFIG_PPC_PS3)		+= ps3/
+obj-$(CONFIG_ACE_DEVICE)	+= ace/
Index: linux-2.6/drivers/ace/Kconfig
===================================================================
--- /dev/null
+++ linux-2.6/drivers/ace/Kconfig
@@ -0,0 +1,15 @@
+#
+# ACE configuration
+#
+
+menu "Atomic Code Execution (ACE)"
+
+config ACE_DEVICE
+	bool "ACE support"
+	---help---
+	  ACE allows code to be atomically executed either from kernel
+          space or user space as if it was surrounded by spin_lock_irqsave
+          and spin_lock_irqrestore.
+
+endmenu
+
Index: linux-2.6/drivers/ace/Makefile
===================================================================
--- /dev/null
+++ linux-2.6/drivers/ace/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for ACE.
+#
+
+obj-$(CONFIG_ACE_DEVICE)	+= ace_device.o
Index: linux-2.6/drivers/ace/ace_device.c
===================================================================
--- /dev/null
+++ linux-2.6/drivers/ace/ace_device.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2006 Saville Software, Inc.
+ *
+ * This code may be used for any purpose whatsoever, but
+ * no warranty of any kind is provided.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/cdev.h>
+#include <linux/jiffies.h>
+#include <linux/timer.h>
+#include <linux/device.h>
+#include <linux/kshmem.h>
+
+#include <asm/ace.h>
+
+#define ACE_DEBUG
+#ifdef ACE_DEBUG
+#define DPK(fmt, args...) printk(KERN_ERR "ace " fmt, ## args)
+#else
+#define DPK(fmt, args...)
+#endif
+
+struct ace_dev_struct
+{
+	struct	cdev		cdev;			/* Character device structure */
+	struct page *		ace_code_page;		/* The ace code page struct */
+	unsigned long		ace_code_kvaddr;	/* The ace code page as kernel virtual address */
+	unsigned long		ace_code_addr;		/* The ace code page */
+	unsigned long		ace_code_size;		/* Size of ace_code_addr */
+	struct timer_list	timer;			/* Timer */
+	unsigned long		timer_delay;		/* Delay for timer */
+};
+
+MODULE_AUTHOR("Wink Saville");
+MODULE_LICENSE("Dual BSD/GPL");
+
+int ace_open(struct inode *inode, struct file *pFile);
+int ace_release(struct inode *inode, struct file *pFile);
+int ace_ioctl(struct inode *pInode, struct file *pFile, unsigned int cmd, unsigned long arg);
+
+/*
+ * Module parameters
+ */
+static int major = 240;	/* 240 a "local/expermental" device number for the moment */
+static int minor = 0;
+
+module_param(major, int, S_IRUGO);
+module_param(minor, int, S_IRUGO);
+
+/*
+ * Globals
+ */
+struct ace_dev_struct ace_dev;
+EXPORT_SYMBOL(ace_dev);
+
+/*
+ * File operations
+ */
+struct file_operations ace_f_ops = {
+	.owner		=	THIS_MODULE,
+	.open		=	ace_open,
+	.ioctl		=	ace_ioctl,
+	.release	=	ace_release,
+};
+
+/*
+ * Initialize the ace page.
+ *
+ * NO-ONE may be using the ACE_CODE at the time this is called.
+ */
+void ace_init(void)
+{
+	unsigned long	ace_code_len = &ace_code_end - &ace_code_beg;
+
+	DPK("ace_init: E\n");
+
+	/*
+	 * Be sure there is enough space for the ACE_DATA
+	 */
+	BUG_ON(sizeof(struct ace_data_struct) > ACE_DATA_MAX_SIZE);
+
+	/*
+	 * Allocate an ace page and copy the code to it
+	 */
+	ace_dev.ace_code_size = ACE_CODE_SIZE;
+	ace_dev.ace_code_addr = (unsigned long)kshmem_alloc_at(ACE_CODE_ADDR, ace_dev.ace_code_size, PAGE_SHARED_EXEC);
+	ace_dev.ace_code_page = vmalloc_to_page((unsigned char *)ace_dev.ace_code_addr);
+	ace_dev.ace_code_kvaddr = kshmem_addr_to_kvaddr(ace_dev.ace_code_addr);
+
+	memcpy((void *)ace_dev.ace_code_addr, &ace_code_beg, ace_code_len);
+
+	/*
+	 * Validate
+	 */
+	BUG_ON(ace_dev.ace_code_addr != ACE_CODE_ADDR);
+	BUG_ON(ace_dev.ace_code_page != virt_to_page(ace_dev.ace_code_kvaddr));
+
+	DPK("ace_init: X\n");
+}
+EXPORT_SYMBOL(ace_init);
+
+/*
+ * test timer
+ */
+static void ace_timer(unsigned long arg)
+{
+	struct ace_dev_struct *pDev = (struct ace_dev_struct *)arg;
+
+	ace_inc_two_counters(0, 1);
+
+	pDev->timer.expires += pDev->timer_delay;
+	add_timer(&pDev->timer);
+}
+
+/*
+ * Open
+ */
+int ace_open(struct inode *inode, struct file *pFile)
+{
+	int result = 0;
+	struct ace_dev_struct *pDev;
+
+	DPK("ace_open: E\n");
+
+	pDev = container_of(inode->i_cdev, struct ace_dev_struct, cdev);
+	pFile->private_data = (void *)pDev;
+	kshmem_user_enable();
+
+	DPK("ace_open: X result=%d\n", result);
+	return result;
+}
+
+/*
+ * Release/Close
+ */
+int ace_release(struct inode *inode, struct file *pFile)
+{
+	int	result = 0;
+
+	DPK("ace_release: E\n");
+
+	kshmem_user_disable();
+
+	DPK("ace_release: X result=%d\n", result);
+	return result;
+}
+
+/*
+ * Ioctl
+ */
+int ace_ioctl(struct inode *pInode, struct file *pFile, unsigned int cmd, unsigned long arg)
+{
+	int result = -EFAULT;
+
+	DPK("ace_ioctl: E\n");
+
+	DPK("ace_ioctl: X result=%d\n", result);
+	return result;
+}
+
+/*
+ * Init routine for the ace device
+ */
+static int ace_device_init(void)
+{
+	int 		result;
+	dev_t 		dev_number = 0;
+	static struct class *ace_class;
+
+	DPK("ace_device_init: E\n");
+
+	if (major) {
+		dev_number = MKDEV(major, minor);
+		result = register_chrdev_region(dev_number, 1, "ace");
+		DPK("ace_device_init: static major result=%d\n", result);
+	} else {
+		result = alloc_chrdev_region(&dev_number, minor, 1, "ace");
+		major = MAJOR(dev_number);
+		DPK("ace_device_init: dynamic major result=%d\n", result);
+	}
+
+	if (result < 0) {
+		printk(KERN_WARNING "ace: can't get major %d\n", major);
+		goto done;
+	}
+
+	if (ace_dev.ace_code_addr == 0)
+		ace_init();
+
+	cdev_init(&ace_dev.cdev, &ace_f_ops);
+	ace_dev.cdev.owner = THIS_MODULE;
+	ace_dev.cdev.ops = &ace_f_ops;
+
+	result = cdev_add(&ace_dev.cdev, dev_number, 1);
+	if (result)
+	{
+		DPK("ace_device_init: cdev_add failed\n");
+		goto done;
+	}
+
+	/*
+	 * Make an ace class and create the device
+	 */
+	ace_class = class_create(THIS_MODULE, "ace");
+	class_device_create(ace_class, NULL, dev_number, NULL, "ace");
+
+	/*
+	 * Start timer
+	 */
+	ace_dev.timer_delay = msecs_to_jiffies(1);
+	ace_dev.timer.expires = jiffies + ace_dev.timer_delay;
+	ace_dev.timer.data = (unsigned long)&ace_dev;
+	ace_dev.timer.function = ace_timer;
+	init_timer(&ace_dev.timer);
+	add_timer(&ace_dev.timer);
+
+done:
+	DPK("ace_device_init: X result=%d major=%d minor=%d\n", result, major, minor);
+	return result;
+}
+
+/*
+ * Exit routine for ace device
+ */
+static void ace_device_exit(void)
+{
+	dev_t dev_number = MKDEV(major, minor);
+
+	DPK("ace_device_exit: E\n");
+
+	del_timer_sync(&ace_dev.timer);
+
+	unregister_chrdev_region(dev_number, 1);
+
+	DPK("ace_device_exit: X\n");
+}
+
+module_init(ace_device_init);
+module_exit(ace_device_exit);
+
Index: linux-2.6/include/asm-x86_64/ace.h
===================================================================
--- /dev/null
+++ linux-2.6/include/asm-x86_64/ace.h
@@ -0,0 +1,124 @@
+/** * Copyright (C) 2006 Saville Software, Inc.
+ *
+ * This code may be used for any purpose whatsoever, but
+ * no warranty of any kind is provided.
+ */
+
+#ifndef _ACE_H
+#define _ACE_H
+
+#define	ACE_CODE_ADDR		0x6ffffffff000
+#define	ACE_CODE_SIZE		4096
+#define	ACE_CODE_ADDR_MASK	(~(ACE_CODE_SIZE-1))
+#define ACE_DATA_MAX_SIZE	256
+#define ACE_TEST_NUM_COUNTERS	16
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This data is located at ACE_CODE_ADDR and
+ * must not exceed ACE_DATA_MAX_SIZE.
+ */
+struct ace_data_struct {
+	struct mp_struct *		pMprocs;	/* array of mprocs, pMprocs[0] is kernel's */
+	unsigned long			mprocs_count;	/* Number elements in pMprocs */
+	unsigned int			kernel_pool;	/* Id of kernel pool, must be 0 */
+	struct mp_mem_pool_struct *	pMem_pools;	/* Array of mem pools */
+	unsigned long			mem_pools_count;/* Number of elements in pMem_pools */
+	struct mp_msg_list_struct *	pMsg_lists;	/* Array of msg lists */
+	unsigned long			msg_lists_count;/* Number of elements in pMsg_lists */
+};
+#define pAce_data		((struct ace_data_struct *)ACE_CODE_ADDR)
+
+/*
+ * Beginning and end of the ace code in ace.S
+ */
+extern char			ace_code_beg;
+extern char 			ace_code_end;
+
+/*
+ * Initialization routine, called from init/main.c
+ */
+extern void			ace_init(void);
+
+/*
+ * Ace routines for testing
+ */
+extern uint64_t			ace_inc_two_counters(uint64_t cnt0, uint64_t cnt1);
+extern void			ace_get_counters_snapshot(uint64_t snapshot[ACE_TEST_NUM_COUNTERS]);
+
+/**
+ * Atomic operations. For x86_64 these are
+ * inherenently atomic so they do not need
+ * to be executed in the ace page. For other
+ * architectures this may need to be defined
+ * in the ace page.
+ */
+
+/*
+ * Atomic increment
+ */
+static __inline__ void ace_inc(volatile int *pVal)
+{
+	__asm__ __volatile__(
+		"lock incl %0"
+		:"=m" (*pVal)
+		:"m" (*pVal));
+}
+
+/*
+ * Atomic decrement
+ */
+static __inline__ void ace_dec(volatile int *pVal)
+{
+	__asm__ __volatile__(
+		"lock decl %0"
+		:"=m" (*pVal)
+		:"m" (*pVal));
+}
+
+/*
+ * Atomic decrement.
+ *
+ * Return true if the value becomes zero
+ * else return false.
+ */
+static __inline__ int ace_dec_and_test(volatile int *pVal)
+{
+	unsigned char c;
+
+	__asm__ __volatile__(
+		"lock decl %0; sete %1"
+		:"=m" (*pVal), "=qm" (c)
+		:"m" (*pVal) : "memory");
+	return c != 0;
+}
+
+/*
+ * Atomic compare and exchange, atomicly
+ * execute the following algrothim:
+ *
+ * if the current value equals old_value
+ * then change it to new value but
+ * always return the current value
+ *
+ * If (*pVal == old_val) {
+ * 	*pVal = new;
+ * 	return old_val;
+ * } else {
+ *  	return *pVal;
+ * }
+ */
+static __inline__ int ace_cmpxchg(volatile int *pVal, int old_val, int new_val)
+{
+	int prev_val;
+
+	__asm__ __volatile__("lock cmpxchg %1,%2"
+			     : "=a"(prev_val)
+			     : "r"(new_val), "m"(*pVal), "0"(old_val)
+			     : "memory");
+	return prev_val;
+}
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ACE_H */
Index: linux-2.6/mm/Kconfig
===================================================================
--- linux-2.6.orig/mm/Kconfig
+++ linux-2.6/mm/Kconfig
@@ -171,3 +171,5 @@ config KSHMEM
  	  user space programs. For instance interrupt service routines
  	  and user space programs may share the same memory.

+source "drivers/ace/Kconfig"
+


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2007-05-06  1:54 UTC | newest]

Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-06  1:54 [PATCH 2/4] ACE implementation, conifguration and makefile Wink Saville

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.