linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] powerpc: Merge align.c
@ 2005-11-14  8:00 Benjamin Herrenschmidt
  2005-11-14 19:53 ` Becky Bruce
  0 siblings, 1 reply; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2005-11-14  8:00 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc64-dev, linuxppc-dev list

Need testing !!!

This patch merges align.c, the result isn't quite what was in ppc64 nor
what was in ppc32 :) It should implement all the functionalities of both
though. Kumar, since you played with that in the past, I suppose you
have some test cases for verifying that it works properly before I dig
out the 601 machine ? :)

Since it's likely that I won't be able to test all scenario, code
inspection is much welcome.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>


Index: linux-work/arch/powerpc/kernel/Makefile
===================================================================
--- linux-work.orig/arch/powerpc/kernel/Makefile	2005-11-14 15:17:57.000000000 +1100
+++ linux-work/arch/powerpc/kernel/Makefile	2005-11-14 17:18:14.000000000 +1100
@@ -12,7 +12,7 @@
 endif
 
 obj-y				:= semaphore.o cputable.o ptrace.o syscalls.o \
-				   irq.o signal_32.o pmc.o vdso.o
+				   irq.o align.o signal_32.o pmc.o vdso.o
 obj-y				+= vdso32/
 obj-$(CONFIG_PPC64)		+= setup_64.o binfmt_elf32.o sys_ppc32.o \
 				   signal_64.o ptrace32.o systbl.o \
Index: linux-work/arch/powerpc/kernel/align.c
===================================================================
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ linux-work/arch/powerpc/kernel/align.c	2005-11-14 18:41:22.000000000 +1100
@@ -0,0 +1,513 @@
+/* align.c - handle alignment exceptions for the Power PC.
+ *
+ * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
+ * Copyright (c) 1998-1999 TiVo, Inc.
+ *   PowerPC 403GCX modifications.
+ * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
+ *   PowerPC 403GCX/405GP modifications.
+ * Copyright (c) 2001-2002 PPC64 team, IBM Corp
+ *   64-bit and Power4 support
+ * Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp
+ *                    <benh@kernel.crashing.org>
+ *   Merge ppc32 and ppc64 implementations
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/cache.h>
+#include <asm/cputable.h>
+
+struct aligninfo {
+	unsigned char len;
+	unsigned char flags;
+};
+
+#define IS_XFORM(inst)	(((inst) >> 26) == 31)
+#define IS_DSFORM(inst)	(((inst) >> 26) >= 56)
+
+#define INVALID	{ 0, 0 }
+
+#define LD	1	/* load */
+#define ST	2	/* store */
+#define	SE	4	/* sign-extend value */
+#define F	8	/* to/from fp regs */
+#define U	0x10	/* update index register */
+#define M	0x20	/* multiple load/store */
+#define SW	0x40	/* byte swap int or ... */
+#define S	0x40	/* ... single-precision fp */
+#define SX	0x40	/* byte count in XER */
+#define HARD	0x80	/* string, stwcx. */
+
+#define DCBZ	0x5f	/* 8xx/82xx dcbz faults when cache not enabled */
+
+#define SWAP(a, b)	(t = (a), (a) = (b), (b) = t)
+
+/*
+ * The PowerPC stores certain bits of the instruction that caused the
+ * alignment exception in the DSISR register.  This array maps those
+ * bits to information about the operand length and what the
+ * instruction would do.
+ */
+static struct aligninfo aligninfo[128] = {
+	{ 4, LD },		/* 00 0 0000: lwz / lwarx */
+	INVALID,		/* 00 0 0001 */
+	{ 4, ST },		/* 00 0 0010: stw */
+	INVALID,		/* 00 0 0011 */
+	{ 2, LD },		/* 00 0 0100: lhz */
+	{ 2, LD+SE },		/* 00 0 0101: lha */
+	{ 2, ST },		/* 00 0 0110: sth */
+	{ 4, LD+M },		/* 00 0 0111: lmw */
+	{ 4, LD+F+S },		/* 00 0 1000: lfs */
+	{ 8, LD+F },		/* 00 0 1001: lfd */
+	{ 4, ST+F+S },		/* 00 0 1010: stfs */
+	{ 8, ST+F },		/* 00 0 1011: stfd */
+	INVALID,		/* 00 0 1100 */
+	{ 8, LD },		/* 00 0 1101: ld/ldu/lwa */
+	INVALID,		/* 00 0 1110 */
+	{ 8, ST },		/* 00 0 1111: std/stdu */
+	{ 4, LD+U },		/* 00 1 0000: lwzu */
+	INVALID,		/* 00 1 0001 */
+	{ 4, ST+U },		/* 00 1 0010: stwu */
+	INVALID,		/* 00 1 0011 */
+	{ 2, LD+U },		/* 00 1 0100: lhzu */
+	{ 2, LD+SE+U },		/* 00 1 0101: lhau */
+	{ 2, ST+U },		/* 00 1 0110: sthu */
+	{ 4, ST+M },		/* 00 1 0111: stmw */
+	{ 4, LD+F+S+U },	/* 00 1 1000: lfsu */
+	{ 8, LD+F+U },		/* 00 1 1001: lfdu */
+	{ 4, ST+F+S+U },	/* 00 1 1010: stfsu */
+	{ 8, ST+F+U },		/* 00 1 1011: stfdu */
+	INVALID,		/* 00 1 1100 */
+	INVALID,		/* 00 1 1101 */
+	INVALID,		/* 00 1 1110 */
+	INVALID,		/* 00 1 1111 */
+	{ 8, LD },		/* 01 0 0000: ldx */
+	INVALID,		/* 01 0 0001 */
+	{ 8, ST },		/* 01 0 0010: stdx */
+	INVALID,		/* 01 0 0011 */
+	INVALID,		/* 01 0 0100 */
+	{ 4, LD+SE },		/* 01 0 0101: lwax */
+	INVALID,		/* 01 0 0110 */
+	INVALID,		/* 01 0 0111 */
+	{ 4, LD+M+HARD+SX },	/* 01 0 1000: lswx */
+	{ 4, LD+M+HARD },	/* 01 0 1001: lswi */
+	{ 4, ST+M+HARD+SX },	/* 01 0 1010: stswx */
+	{ 4, ST+M+HARD },	/* 01 0 1011: stswi */
+	INVALID,		/* 01 0 1100 */
+	{ 8, LD+U },		/* 01 0 1101: ldu */
+	INVALID,		/* 01 0 1110 */
+	{ 8, ST+U },		/* 01 0 1111: stdu */
+	{ 8, LD+U },		/* 01 1 0000: ldux */
+	INVALID,		/* 01 1 0001 */
+	{ 8, ST+U },		/* 01 1 0010: stdux */
+	INVALID,		/* 01 1 0011 */
+	INVALID,		/* 01 1 0100 */
+	{ 4, LD+SE+U },		/* 01 1 0101: lwaux */
+	INVALID,		/* 01 1 0110 */
+	INVALID,		/* 01 1 0111 */
+	INVALID,		/* 01 1 1000 */
+	INVALID,		/* 01 1 1001 */
+	INVALID,		/* 01 1 1010 */
+	INVALID,		/* 01 1 1011 */
+	INVALID,		/* 01 1 1100 */
+	INVALID,		/* 01 1 1101 */
+	INVALID,		/* 01 1 1110 */
+	INVALID,		/* 01 1 1111 */
+	INVALID,		/* 10 0 0000 */
+	INVALID,		/* 10 0 0001 */
+	INVALID,		/* 10 0 0010: stwcx. */
+	INVALID,		/* 10 0 0011 */
+	INVALID,		/* 10 0 0100 */
+	INVALID,		/* 10 0 0101 */
+	INVALID,		/* 10 0 0110 */
+	INVALID,		/* 10 0 0111 */
+	{ 4, LD+SW },		/* 10 0 1000: lwbrx */
+	INVALID,		/* 10 0 1001 */
+	{ 4, ST+SW },		/* 10 0 1010: stwbrx */
+	INVALID,		/* 10 0 1011 */
+	{ 2, LD+SW },		/* 10 0 1100: lhbrx */
+	{ 4, LD+SE },		/* 10 0 1101  lwa */
+	{ 2, ST+SW },		/* 10 0 1110: sthbrx */
+	INVALID,		/* 10 0 1111 */
+	INVALID,		/* 10 1 0000 */
+	INVALID,		/* 10 1 0001 */
+	INVALID,		/* 10 1 0010 */
+	INVALID,		/* 10 1 0011 */
+	INVALID,		/* 10 1 0100 */
+	INVALID,		/* 10 1 0101 */
+	INVALID,		/* 10 1 0110 */
+	INVALID,		/* 10 1 0111 */
+	INVALID,		/* 10 1 1000 */
+	INVALID,		/* 10 1 1001 */
+	INVALID,		/* 10 1 1010 */
+	INVALID,		/* 10 1 1011 */
+	INVALID,		/* 10 1 1100 */
+	INVALID,		/* 10 1 1101 */
+	INVALID,		/* 10 1 1110 */
+	{ 0, ST+HARD },		/* 10 1 1111: dcbz */
+	{ 4, LD },		/* 11 0 0000: lwzx */
+	INVALID,		/* 11 0 0001 */
+	{ 4, ST },		/* 11 0 0010: stwx */
+	INVALID,		/* 11 0 0011 */
+	{ 2, LD },		/* 11 0 0100: lhzx */
+	{ 2, LD+SE },		/* 11 0 0101: lhax */
+	{ 2, ST },		/* 11 0 0110: sthx */
+	INVALID,		/* 11 0 0111 */
+	{ 4, LD+F+S },		/* 11 0 1000: lfsx */
+	{ 8, LD+F },		/* 11 0 1001: lfdx */
+	{ 4, ST+F+S },		/* 11 0 1010: stfsx */
+	{ 8, ST+F },		/* 11 0 1011: stfdx */
+	INVALID,		/* 11 0 1100 */
+	{ 8, LD+M },		/* 11 0 1101: lmd */
+	INVALID,		/* 11 0 1110 */
+	{ 8, ST+M },		/* 11 0 1111: stmd */
+	{ 4, LD+U },		/* 11 1 0000: lwzux */
+	INVALID,		/* 11 1 0001 */
+	{ 4, ST+U },		/* 11 1 0010: stwux */
+	INVALID,		/* 11 1 0011 */
+	{ 2, LD+U },		/* 11 1 0100: lhzux */
+	{ 2, LD+SE+U },		/* 11 1 0101: lhaux */
+	{ 2, ST+U },		/* 11 1 0110: sthux */
+	INVALID,		/* 11 1 0111 */
+	{ 4, LD+F+S+U },	/* 11 1 1000: lfsux */
+	{ 8, LD+F+U },		/* 11 1 1001: lfdux */
+	{ 4, ST+F+S+U },	/* 11 1 1010: stfsux */
+	{ 8, ST+F+U },		/* 11 1 1011: stfdux */
+	INVALID,		/* 11 1 1100 */
+	INVALID,		/* 11 1 1101 */
+	INVALID,		/* 11 1 1110 */
+	INVALID,		/* 11 1 1111 */
+};
+
+/*
+ * Create a DSISR value from the instruction
+ */
+static inline unsigned make_dsisr(unsigned instr)
+{
+	unsigned dsisr;
+
+
+	/* bits  6:15 --> 22:31 */
+	dsisr = (instr & 0x03ff0000) >> 16;
+
+	if ( IS_XFORM(instr) ) {
+		/* bits 29:30 --> 15:16 */
+		dsisr |= (instr & 0x00000006) << 14;
+		/* bit     25 -->    17 */
+		dsisr |= (instr & 0x00000040) << 8;
+		/* bits 21:24 --> 18:21 */
+		dsisr |= (instr & 0x00000780) << 3;
+	}
+	else {
+		/* bit      5 -->    17 */
+		dsisr |= (instr & 0x04000000) >> 12;
+		/* bits  1: 4 --> 18:21 */
+		dsisr |= (instr & 0x78000000) >> 17;
+		/* bits 30:31 --> 12:13 */
+		if ( IS_DSFORM(instr) )
+			dsisr |= (instr & 0x00000003) << 18;
+	}
+
+	return dsisr;
+}
+
+/*
+ * The dcbz (data cache block zero) instruction
+ * gives an alignment fault if used on non-cacheable
+ * memory.  We handle the fault mainly for the
+ * case when we are running with the cache disabled
+ * for debugging.
+ */
+static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
+{
+	long __user *p;
+	int i, size;
+
+#ifdef __powerpc64__
+	size = ppc64_caches.dline_size;
+#else
+	size = L1_CACHE_BYTES;
+#endif
+	p = (long __user *) (regs->dar & -size);
+	if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, size))
+		return -EFAULT;
+	for (i = 0; i < size / sizeof(long); ++i)
+		if (__put_user(0, p+i))
+			return -EFAULT;
+	return 1;
+}
+
+/*
+ * Emulate load & store multiple instructions
+ */
+static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
+			    unsigned int reg, unsigned int nb,
+			    unsigned int flags, unsigned int instr)
+{
+	unsigned char *rptr;
+	int nb0, i;
+
+	/*
+	 * We do not try to emulate 8 bytes multiple as they aren't really
+	 * available in our operating environments and we don't try to
+	 * emulate multiples operations in kernel land as they should never
+	 * be used/generated there at least not on unaligned boundaries
+	 */
+	if (unlikely((nb > 4) || !user_mode(regs)))
+		return 0;
+
+	/* lmw, stmw, lswi/x, stswi/x */
+	nb0 = 0;
+	if (flags & HARD) {
+		if (flags & SX) {
+			nb = regs->xer & 127;
+			if (nb == 0)
+				return 1;
+		} else {
+			if (__get_user(instr,
+				       (unsigned int __user *)regs->nip))
+				return -EFAULT;
+			nb = (instr >> 11) & 0x1f;
+			if (nb == 0)
+				nb = 32;
+		}
+		if (nb + reg * 4 > 128) {
+			nb0 = nb + reg * 4 - 128;
+			nb = 128 - reg * 4;
+		}
+	} else {
+		/* lwm, stmw */
+		nb = (32 - reg) * 4;
+	}
+
+	if (!access_ok((flags & ST ? VERIFY_WRITE: VERIFY_READ), addr, nb+nb0))
+		return -EFAULT;	/* bad address */
+
+	rptr = (unsigned char *) &regs->gpr[reg];
+	if (flags & LD) {
+		for (i = 0; i < nb; ++i)
+			if (__get_user(rptr[i], addr + i))
+				return -EFAULT;
+		if (nb0 > 0) {
+			rptr = (unsigned char *) &regs->gpr[0];
+			addr += nb;
+			for (i = 0; i < nb0; ++i)
+				if (__get_user(rptr[i], addr + i))
+					return -EFAULT;
+		}
+		for (; (i & 3) != 0; ++i)
+			rptr[i] = 0;
+	} else {
+		for (i = 0; i < nb; ++i)
+			if (__put_user(rptr[i], addr + i))
+				return -EFAULT;
+		if (nb0 > 0) {
+			rptr = (unsigned char *) &regs->gpr[0];
+			addr += nb;
+			for (i = 0; i < nb0; ++i)
+				if (__put_user(rptr[i], addr + i))
+					return -EFAULT;
+		}
+	}
+	return 1;
+}
+
+
+/*
+ * Called on alignment exception. Attempts to fixup
+ *
+ * Return 1 on success
+ * Return 0 if unable to handle the interrupt
+ * Return -EFAULT if data address is bad
+ */
+
+int fix_alignment(struct pt_regs *regs)
+{
+	unsigned int instr, nb, flags;
+	unsigned int reg, areg;
+	unsigned int dsisr;
+	unsigned char __user *addr;
+	unsigned char __user *p;
+	int ret, t;
+	union {
+		long ll;
+		double dd;
+		unsigned char v[8];
+		struct {
+			unsigned hi32;
+			int	 low32;
+		} x32;
+		struct {
+			unsigned char hi48[6];
+			short	      low16;
+		} x16;
+	} data;
+
+	/*
+	 * We require a complete register set, if not, then our assembly
+	 * is broken
+	 */
+	CHECK_FULL_REGS(regs);
+
+	dsisr = regs->dsisr;
+
+	/* Some processors don't provide us with a DSISR we can use here,
+	 * let's make one up from the instruction
+	 */
+	if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
+		unsigned int real_instr;
+		if (unlikely(__get_user(real_instr,
+					(unsigned int __user *)regs->nip)))
+			return -EFAULT;
+		dsisr = make_dsisr(real_instr);
+	}
+
+	/* extract the operation and registers from the dsisr */
+	reg = (dsisr >> 5) & 0x1f;	/* source/dest register */
+	areg = dsisr & 0x1f;		/* register to update */
+	instr = (dsisr >> 10) & 0x7f;
+	instr |= (dsisr >> 13) & 0x60;
+
+	/* Lookup the operation in our table */
+	nb = aligninfo[instr].len;
+	flags = aligninfo[instr].flags;
+
+	/* DAR has the operand effective address */
+	addr = (unsigned char __user *)regs->dar;
+
+	/* A size of 0 indicates an instruction we don't support, with
+	 * the exception of DCBZ which is handled as a special case here
+	 */
+	if (instr == DCBZ)
+		return emulate_dcbz(regs, addr);
+	if (unlikely(nb == 0))
+		return 0;
+
+	/* Load/Store Multiple instructions are handled in their own
+	 * function
+	 */
+	if (flags & M)
+		return emulate_multiple(regs, addr, reg, nb, flags, instr);
+
+	/* Verify the address of the operand */
+	if (unlikely(user_mode(regs) &&
+		     !access_ok((flags & ST ? VERIFY_WRITE : VERIFY_READ),
+				addr, nb)))
+		return -EFAULT;
+
+	/* Force the fprs into the save area so we can reference them */
+	if (flags & F) {
+		/* userland only */
+		if (unlikely(!user_mode(regs)))
+			return 0;
+		flush_fp_to_thread(current);
+	}
+
+	/* If we are loading, get the data from user space, else
+	 * get it from register values
+	 */
+	if (flags & LD) {
+		data.ll = 0;
+		ret = 0;
+		p = addr;
+		switch (nb) {
+		case 8:
+			ret |= __get_user(data.v[0], p++);
+			ret |= __get_user(data.v[1], p++);
+			ret |= __get_user(data.v[2], p++);
+			ret |= __get_user(data.v[3], p++);
+		case 4:
+			ret |= __get_user(data.v[4], p++);
+			ret |= __get_user(data.v[5], p++);
+		case 2:
+			ret |= __get_user(data.v[6], p++);
+			ret |= __get_user(data.v[7], p++);
+			if (unlikely(ret))
+				return -EFAULT;
+		}
+	} else if (flags & F)
+		data.dd = current->thread.fpr[reg];
+	else
+		data.ll = regs->gpr[reg];
+
+	/* Perform other misc operations like sign extension, byteswap,
+	 * or floating point single precision conversion
+	 */
+	switch (flags & ~U) {
+	case LD+SE:	/* sign extend */
+		if ( nb == 2 )
+			data.ll = data.x16.low16;
+		else	/* nb must be 4 */
+			data.ll = data.x32.low32;
+		break;
+	case LD+S:	/* byte-swap */
+	case ST+S:
+		if (nb == 2) {
+			SWAP(data.v[6], data.v[7]);
+		} else {
+			SWAP(data.v[4], data.v[7]);
+			SWAP(data.v[5], data.v[6]);
+		}
+		break;
+
+	/* Single-precision FP load and store require conversions... */
+	case LD+F+S:
+#ifdef CONFIG_PPC_FPU
+		preempt_disable();
+		enable_kernel_fp();
+		cvt_fd((float *)&data.v[4], &data.dd, &current->thread);
+		preempt_enable();
+#else
+		return 0;
+#endif
+		break;
+	case ST+F+S:
+#ifdef CONFIG_PPC_FPU
+		preempt_disable();
+		enable_kernel_fp();
+		cvt_df(&data.dd, (float *)&data.v[4], &current->thread);
+		preempt_enable();
+#else
+		return 0;
+#endif
+		break;
+	}
+
+	/* Store result to memory or update registers */
+	if (flags & ST) {
+		ret = 0;
+		p = addr;
+		switch (nb) {
+		case 8:
+			ret |= __put_user(data.v[0], p++);
+			ret |= __put_user(data.v[1], p++);
+			ret |= __put_user(data.v[2], p++);
+			ret |= __put_user(data.v[3], p++);
+		case 4:
+			ret |= __put_user(data.v[4], p++);
+			ret |= __put_user(data.v[5], p++);
+		case 2:
+			ret |= __put_user(data.v[6], p++);
+			ret |= __put_user(data.v[7], p++);
+		}
+		if (unlikely(ret))
+			return -EFAULT;
+	} else if (flags & F)
+		current->thread.fpr[reg] = data.dd;
+	else
+		regs->gpr[reg] = data.ll;
+
+	/* Update RA as needed */
+	if (flags & U)
+		regs->gpr[areg] = regs->dar;
+
+	return 1;
+}
Index: linux-work/arch/ppc/kernel/Makefile
===================================================================
--- linux-work.orig/arch/ppc/kernel/Makefile	2005-11-11 10:14:48.000000000 +1100
+++ linux-work/arch/ppc/kernel/Makefile	2005-11-14 18:42:30.000000000 +1100
@@ -13,7 +13,7 @@
 extra-y				+= vmlinux.lds
 
 obj-y				:= entry.o traps.o idle.o time.o misc.o \
-					process.o align.o \
+					process.o \
 					setup.o \
 					ppc_htab.o
 obj-$(CONFIG_6xx)		+= l2cr.o cpu_setup_6xx.o
Index: linux-work/arch/ppc64/kernel/Makefile
===================================================================
--- linux-work.orig/arch/ppc64/kernel/Makefile	2005-11-14 15:20:05.000000000 +1100
+++ linux-work/arch/ppc64/kernel/Makefile	2005-11-14 18:42:12.000000000 +1100
@@ -11,9 +11,7 @@
 
 endif
 
-obj-y               +=	idle.o dma.o \
-			align.o \
-			iommu.o
+obj-y               +=	idle.o dma.o iommu.o
 
 pci-obj-$(CONFIG_PPC_MULTIPLATFORM)	+= pci_dn.o pci_direct_iommu.o
 
Index: linux-work/include/asm-powerpc/cputable.h
===================================================================
--- linux-work.orig/include/asm-powerpc/cputable.h	2005-11-11 10:14:49.000000000 +1100
+++ linux-work/include/asm-powerpc/cputable.h	2005-11-14 18:33:42.000000000 +1100
@@ -90,6 +90,7 @@
 #define CPU_FTR_NEED_COHERENT		ASM_CONST(0x0000000000020000)
 #define CPU_FTR_NO_BTIC			ASM_CONST(0x0000000000040000)
 #define CPU_FTR_BIG_PHYS		ASM_CONST(0x0000000000080000)
+#define CPU_FTR_NODSISRALIGN  		ASM_CONST(0x0000000000100000)
 
 #ifdef __powerpc64__
 /* Add the 64b processor unique features in the top half of the word */
@@ -97,7 +98,6 @@
 #define CPU_FTR_16M_PAGE      		ASM_CONST(0x0000000200000000)
 #define CPU_FTR_TLBIEL         		ASM_CONST(0x0000000400000000)
 #define CPU_FTR_NOEXECUTE     		ASM_CONST(0x0000000800000000)
-#define CPU_FTR_NODSISRALIGN  		ASM_CONST(0x0000001000000000)
 #define CPU_FTR_IABR  			ASM_CONST(0x0000002000000000)
 #define CPU_FTR_MMCRA  			ASM_CONST(0x0000004000000000)
 #define CPU_FTR_CTRL			ASM_CONST(0x0000008000000000)
@@ -113,7 +113,6 @@
 #define CPU_FTR_16M_PAGE      		ASM_CONST(0x0)
 #define CPU_FTR_TLBIEL         		ASM_CONST(0x0)
 #define CPU_FTR_NOEXECUTE     		ASM_CONST(0x0)
-#define CPU_FTR_NODSISRALIGN  		ASM_CONST(0x0)
 #define CPU_FTR_IABR  			ASM_CONST(0x0)
 #define CPU_FTR_MMCRA  			ASM_CONST(0x0)
 #define CPU_FTR_CTRL			ASM_CONST(0x0)
@@ -273,18 +272,21 @@
 	CPU_FTRS_POWER3_32 = CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE |
 	    CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE,
 	CPU_FTRS_POWER4_32 = CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE |
-	    CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE,
+	    CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_NODSISRALIGN,
 	CPU_FTRS_970_32 = CPU_FTR_COMMON | CPU_FTR_SPLIT_ID_CACHE |
 	    CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | CPU_FTR_ALTIVEC_COMP |
-	    CPU_FTR_MAYBE_CAN_NAP,
+	    CPU_FTR_MAYBE_CAN_NAP | CPU_FTR_NODSISRALIGN,
 	CPU_FTRS_8XX = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB,
-	CPU_FTRS_40X = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB,
-	CPU_FTRS_44X = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB,
-	CPU_FTRS_E200 = CPU_FTR_USE_TB,
-	CPU_FTRS_E500 = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB,
+	CPU_FTRS_40X = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB |
+	    CPU_FTR_NODSISRALIGN,
+	CPU_FTRS_44X = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB |
+	    CPU_FTR_NODSISRALIGN,
+	CPU_FTRS_E200 = CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN,
+	CPU_FTRS_E500 = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB |
+	    CPU_FTR_NODSISRALIGN,
 	CPU_FTRS_E500_2 = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB |
-	    CPU_FTR_BIG_PHYS,
-	CPU_FTRS_GENERIC_32 = CPU_FTR_COMMON,
+	    CPU_FTR_BIG_PHYS | CPU_FTR_NODSISRALIGN,
+	CPU_FTRS_GENERIC_32 = CPU_FTR_COMMON | CPU_FTR_NODSISRALIGN,
 #ifdef __powerpc64__
 	CPU_FTRS_POWER3 = CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB |
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_IABR,

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-14  8:00 [PATCH] powerpc: Merge align.c Benjamin Herrenschmidt
@ 2005-11-14 19:53 ` Becky Bruce
  2005-11-14 20:55   ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 20+ messages in thread
From: Becky Bruce @ 2005-11-14 19:53 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc64-dev, linuxppc-dev list

Ben,

I talked to Kumar about this a little bit (I had started a merge of=20
this file, but got distracted!) and he doesn't have any test cases. =20
I'll put something together and test this out on some of the 32-bit=20
systems I have here in my lab.  It won't be complete, but it will be=20
something.......

Cheers,
B

On Nov 14, 2005, at 2:00 AM, Benjamin Herrenschmidt wrote:

> Need testing !!!
>
> This patch merges align.c, the result isn't quite what was in ppc64 =
nor
> what was in ppc32 :) It should implement all the functionalities of=20
> both
> though. Kumar, since you played with that in the past, I suppose you
> have some test cases for verifying that it works properly before I dig
> out the 601 machine ? :)
>
> Since it's likely that I won't be able to test all scenario, code
> inspection is much welcome.
>
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
>
>
> Index: linux-work/arch/powerpc/kernel/Makefile
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- linux-work.orig/arch/powerpc/kernel/Makefile=A0=A0=A0=A0=A0=A0=A0 =
2005-11-14=20
> 15:17:57.000000000 +1100
> +++ linux-work/arch/powerpc/kernel/Makefile=A0=A0=A0=A0 2005-11-14=20
> 17:18:14.000000000 +1100
> @@ -12,7 +12,7 @@
> =A0endif
> =A0
> =A0obj-y=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=
=A0 :=3D semaphore.o cputable.o ptrace.o=20
> syscalls.o \
> -=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 =A0=A0 irq.o signal_32.o pmc.o vdso.o
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 =A0=A0 irq.o align.o signal_32.o pmc.o=20
> vdso.o
> =A0obj-y=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=
=A0 +=3D vdso32/
> =A0obj-$(CONFIG_PPC64)=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 +=3D setup_64.o =
binfmt_elf32.o=20
> sys_ppc32.o \
> =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 =A0=A0 signal_64.o ptrace32.o systbl.o \
> Index: linux-work/arch/powerpc/kernel/align.c
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- /dev/null=A0=A0 1970-01-01 00:00:00.000000000 +0000
> +++ linux-work/arch/powerpc/kernel/align.c=A0=A0=A0=A0=A0 2005-11-14=20=

> 18:41:22.000000000 +1100
> @@ -0,0 +1,513 @@
> +/* align.c - handle alignment exceptions for the Power PC.
> + *
> + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au>
> + * Copyright (c) 1998-1999 TiVo, Inc.
> + *=A0=A0 PowerPC 403GCX modifications.
> + * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu>
> + *=A0=A0 PowerPC 403GCX/405GP modifications.
> + * Copyright (c) 2001-2002 PPC64 team, IBM Corp
> + *=A0=A0 64-bit and Power4 support
> + * Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp
> + *=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 =
<benh@kernel.crashing.org>
> + *=A0=A0 Merge ppc32 and ppc64 implementations
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
> +#include <asm/processor.h>
> +#include <asm/uaccess.h>
> +#include <asm/system.h>
> +#include <asm/cache.h>
> +#include <asm/cputable.h>
> +
> +struct aligninfo {
> +=A0=A0=A0=A0=A0=A0 unsigned char len;
> +=A0=A0=A0=A0=A0=A0 unsigned char flags;
> +};
> +
> +#define IS_XFORM(inst) (((inst) >> 26) =3D=3D 31)
> +#define IS_DSFORM(inst)=A0=A0=A0=A0=A0=A0=A0 (((inst) >> 26) >=3D 56)
> +
> +#define INVALID=A0=A0=A0=A0=A0=A0=A0 { 0, 0 }
> +
> +#define LD=A0=A0=A0=A0 1=A0=A0=A0=A0=A0=A0 /* load */
> +#define ST=A0=A0=A0=A0 2=A0=A0=A0=A0=A0=A0 /* store */
> +#define=A0=A0=A0=A0=A0=A0=A0 SE=A0=A0=A0=A0=A0 4=A0=A0=A0=A0=A0=A0 /* =
sign-extend value */
> +#define F=A0=A0=A0=A0=A0 8=A0=A0=A0=A0=A0=A0 /* to/from fp regs */
> +#define U=A0=A0=A0=A0=A0 0x10=A0=A0=A0 /* update index register */
> +#define M=A0=A0=A0=A0=A0 0x20=A0=A0=A0 /* multiple load/store */
> +#define SW=A0=A0=A0=A0 0x40=A0=A0=A0 /* byte swap int or ... */
> +#define S=A0=A0=A0=A0=A0 0x40=A0=A0=A0 /* ... single-precision fp */
> +#define SX=A0=A0=A0=A0 0x40=A0=A0=A0 /* byte count in XER */
> +#define HARD=A0=A0 0x80=A0=A0=A0 /* string, stwcx. */
> +
> +#define DCBZ=A0=A0 0x5f=A0=A0=A0 /* 8xx/82xx dcbz faults when cache =
not enabled=20
> */
> +
> +#define SWAP(a, b)=A0=A0=A0=A0 (t =3D (a), (a) =3D (b), (b) =3D t)
> +
> +/*
> + * The PowerPC stores certain bits of the instruction that caused the
> + * alignment exception in the DSISR register.=A0 This array maps =
those
> + * bits to information about the operand length and what the
> + * instruction would do.
> + */
> +static struct aligninfo aligninfo[128] =3D {
> +=A0=A0=A0=A0=A0=A0 { 4, LD },=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* =
00 0 0000: lwz / lwarx */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 00 0 0001 */
> +=A0=A0=A0=A0=A0=A0 { 4, ST },=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* =
00 0 0010: stw */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 00 0 0011 */
> +=A0=A0=A0=A0=A0=A0 { 2, LD },=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* =
00 0 0100: lhz */
> +=A0=A0=A0=A0=A0=A0 { 2, LD+SE },=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 0 =
0101: lha */
> +=A0=A0=A0=A0=A0=A0 { 2, ST },=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* =
00 0 0110: sth */
> +=A0=A0=A0=A0=A0=A0 { 4, LD+M },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 =
0 0111: lmw */
> +=A0=A0=A0=A0=A0=A0 { 4, LD+F+S },=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 0 =
1000: lfs */
> +=A0=A0=A0=A0=A0=A0 { 8, LD+F },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 =
0 1001: lfd */
> +=A0=A0=A0=A0=A0=A0 { 4, ST+F+S },=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 0 =
1010: stfs */
> +=A0=A0=A0=A0=A0=A0 { 8, ST+F },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 =
0 1011: stfd */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 00 0 1100 */
> +=A0=A0=A0=A0=A0=A0 { 8, LD },=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* =
00 0 1101: ld/ldu/lwa */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 00 0 1110 */
> +=A0=A0=A0=A0=A0=A0 { 8, ST },=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* =
00 0 1111: std/stdu */
> +=A0=A0=A0=A0=A0=A0 { 4, LD+U },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 =
1 0000: lwzu */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 00 1 0001 */
> +=A0=A0=A0=A0=A0=A0 { 4, ST+U },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 =
1 0010: stwu */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 00 1 0011 */
> +=A0=A0=A0=A0=A0=A0 { 2, LD+U },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 =
1 0100: lhzu */
> +=A0=A0=A0=A0=A0=A0 { 2, LD+SE+U }, =A0=A0=A0=A0=A0=A0=A0 /* 00 1 =
0101: lhau */
> +=A0=A0=A0=A0=A0=A0 { 2, ST+U },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 =
1 0110: sthu */
> +=A0=A0=A0=A0=A0=A0 { 4, ST+M },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 =
1 0111: stmw */
> +=A0=A0=A0=A0=A0=A0 { 4, LD+F+S+U },=A0=A0=A0=A0=A0=A0=A0 /* 00 1 =
1000: lfsu */
> +=A0=A0=A0=A0=A0=A0 { 8, LD+F+U },=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 1 =
1001: lfdu */
> +=A0=A0=A0=A0=A0=A0 { 4, ST+F+S+U },=A0=A0=A0=A0=A0=A0=A0 /* 00 1 =
1010: stfsu */
> +=A0=A0=A0=A0=A0=A0 { 8, ST+F+U },=A0 =A0=A0=A0=A0=A0=A0=A0 /* 00 1 =
1011: stfdu */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 00 1 1100 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 00 1 1101 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 00 1 1110 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 00 1 1111 */
> +=A0=A0=A0=A0=A0=A0 { 8, LD },=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* =
01 0 0000: ldx */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 0 0001 */
> +=A0=A0=A0=A0=A0=A0 { 8, ST },=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* =
01 0 0010: stdx */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 0 0011 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 0 0100 */
> +=A0=A0=A0=A0=A0=A0 { 4, LD+SE },=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 01 0 =
0101: lwax */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 0 0110 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 0 0111 */
> +=A0=A0=A0=A0=A0=A0 { 4, LD+M+HARD+SX },=A0=A0=A0 /* 01 0 1000: lswx =
*/
> +=A0=A0=A0=A0=A0=A0 { 4, LD+M+HARD },=A0=A0=A0=A0=A0=A0 /* 01 0 1001: =
lswi */
> +=A0=A0=A0=A0=A0=A0 { 4, ST+M+HARD+SX },=A0=A0=A0 /* 01 0 1010: stswx =
*/
> +=A0=A0=A0=A0=A0=A0 { 4, ST+M+HARD },=A0=A0=A0=A0=A0=A0 /* 01 0 1011: =
stswi */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 0 1100 */
> +=A0=A0=A0=A0=A0=A0 { 8, LD+U },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 01 =
0 1101: ldu */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 0 1110 */
> +=A0=A0=A0=A0=A0=A0 { 8, ST+U },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 01 =
0 1111: stdu */
> +=A0=A0=A0=A0=A0=A0 { 8, LD+U },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 01 =
1 0000: ldux */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 0001 */
> +=A0=A0=A0=A0=A0=A0 { 8, ST+U },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 01 =
1 0010: stdux */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 0011 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 0100 */
> +=A0=A0=A0=A0=A0=A0 { 4, LD+SE+U }, =A0=A0=A0=A0=A0=A0=A0 /* 01 1 =
0101: lwaux */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 0110 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 0111 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 1000 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 1001 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 1010 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 1011 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 1100 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 1101 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 1110 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 01 1 1111 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 0 0000 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 0 0001 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 0 0010: stwcx. */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 0 0011 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 0 0100 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 0 0101 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 0 0110 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 0 0111 */
> +=A0=A0=A0=A0=A0=A0 { 4, LD+SW },=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 10 0 =
1000: lwbrx */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 0 1001 */
> +=A0=A0=A0=A0=A0=A0 { 4, ST+SW },=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 10 0 =
1010: stwbrx */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 0 1011 */
> +=A0=A0=A0=A0=A0=A0 { 2, LD+SW },=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 10 0 =
1100: lhbrx */
> +=A0=A0=A0=A0=A0=A0 { 4, LD+SE },=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 10 0 =
1101=A0 lwa */
> +=A0=A0=A0=A0=A0=A0 { 2, ST+SW },=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 10 0 =
1110: sthbrx */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 0 1111 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 0000 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 0001 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 0010 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 0011 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 0100 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 0101 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 0110 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 0111 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 1000 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 1001 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 1010 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 1011 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 1100 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 1101 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 10 1 1110 */
> +=A0=A0=A0=A0=A0=A0 { 0, ST+HARD }, =A0=A0=A0=A0=A0=A0=A0 /* 10 1 =
1111: dcbz */
> +=A0=A0=A0=A0=A0=A0 { 4, LD },=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* =
11 0 0000: lwzx */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 11 0 0001 */
> +=A0=A0=A0=A0=A0=A0 { 4, ST },=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* =
11 0 0010: stwx */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 11 0 0011 */
> +=A0=A0=A0=A0=A0=A0 { 2, LD },=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* =
11 0 0100: lhzx */
> +=A0=A0=A0=A0=A0=A0 { 2, LD+SE },=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 0 =
0101: lhax */
> +=A0=A0=A0=A0=A0=A0 { 2, ST },=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* =
11 0 0110: sthx */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 11 0 0111 */
> +=A0=A0=A0=A0=A0=A0 { 4, LD+F+S },=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 0 =
1000: lfsx */
> +=A0=A0=A0=A0=A0=A0 { 8, LD+F },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 =
0 1001: lfdx */
> +=A0=A0=A0=A0=A0=A0 { 4, ST+F+S },=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 0 =
1010: stfsx */
> +=A0=A0=A0=A0=A0=A0 { 8, ST+F },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 =
0 1011: stfdx */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 11 0 1100 */
> +=A0=A0=A0=A0=A0=A0 { 8, LD+M },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 =
0 1101: lmd */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 11 0 1110 */
> +=A0=A0=A0=A0=A0=A0 { 8, ST+M },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 =
0 1111: stmd */
> +=A0=A0=A0=A0=A0=A0 { 4, LD+U },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 =
1 0000: lwzux */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 11 1 0001 */
> +=A0=A0=A0=A0=A0=A0 { 4, ST+U },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 =
1 0010: stwux */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 11 1 0011 */
> +=A0=A0=A0=A0=A0=A0 { 2, LD+U },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 =
1 0100: lhzux */
> +=A0=A0=A0=A0=A0=A0 { 2, LD+SE+U }, =A0=A0=A0=A0=A0=A0=A0 /* 11 1 =
0101: lhaux */
> +=A0=A0=A0=A0=A0=A0 { 2, ST+U },=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 =
1 0110: sthux */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 11 1 0111 */
> +=A0=A0=A0=A0=A0=A0 { 4, LD+F+S+U },=A0=A0=A0=A0=A0=A0=A0 /* 11 1 =
1000: lfsux */
> +=A0=A0=A0=A0=A0=A0 { 8, LD+F+U },=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 1 =
1001: lfdux */
> +=A0=A0=A0=A0=A0=A0 { 4, ST+F+S+U },=A0=A0=A0=A0=A0=A0=A0 /* 11 1 =
1010: stfsux */
> +=A0=A0=A0=A0=A0=A0 { 8, ST+F+U },=A0 =A0=A0=A0=A0=A0=A0=A0 /* 11 1 =
1011: stfdux */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 11 1 1100 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 11 1 1101 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 11 1 1110 */
> +=A0=A0=A0=A0=A0=A0 INVALID,=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
/* 11 1 1111 */
> +};
> +
> +/*
> + * Create a DSISR value from the instruction
> + */
> +static inline unsigned make_dsisr(unsigned instr)
> +{
> +=A0=A0=A0=A0=A0=A0 unsigned dsisr;
> +
> +
> +=A0=A0=A0=A0=A0=A0 /* bits=A0 6:15 --> 22:31 */
> +=A0=A0=A0=A0=A0=A0 dsisr =3D (instr & 0x03ff0000) >> 16;
> +
> +=A0=A0=A0=A0=A0=A0 if ( IS_XFORM(instr) ) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* bits 29:30 --> 15:16 */
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 dsisr |=3D (instr & =
0x00000006) << 14;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* bit=A0=A0=A0=A0 25 -->=A0=A0=
=A0 17 */
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 dsisr |=3D (instr & =
0x00000040) << 8;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* bits 21:24 --> 18:21 */
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 dsisr |=3D (instr & =
0x00000780) << 3;
> +=A0=A0=A0=A0=A0=A0 }
> +=A0=A0=A0=A0=A0=A0 else {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* bit=A0=A0=A0=A0=A0 5 =
-->=A0=A0=A0 17 */
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 dsisr |=3D (instr & =
0x04000000) >> 12;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* bits=A0 1: 4 --> 18:21 */
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 dsisr |=3D (instr & =
0x78000000) >> 17;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* bits 30:31 --> 12:13 */
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if ( IS_DSFORM(instr) )
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 dsisr =
|=3D (instr & 0x00000003) << 18;
> +=A0=A0=A0=A0=A0=A0 }
> +
> +=A0=A0=A0=A0=A0=A0 return dsisr;
> +}
> +
> +/*
> + * The dcbz (data cache block zero) instruction
> + * gives an alignment fault if used on non-cacheable
> + * memory.=A0 We handle the fault mainly for the
> + * case when we are running with the cache disabled
> + * for debugging.
> + */
> +static int emulate_dcbz(struct pt_regs *regs, unsigned char __user=20
> *addr)
> +{
> +=A0=A0=A0=A0=A0=A0 long __user *p;
> +=A0=A0=A0=A0=A0=A0 int i, size;
> +
> +#ifdef __powerpc64__
> +=A0=A0=A0=A0=A0=A0 size =3D ppc64_caches.dline_size;
> +#else
> +=A0=A0=A0=A0=A0=A0 size =3D L1_CACHE_BYTES;
> +#endif
> +=A0=A0=A0=A0=A0=A0 p =3D (long __user *) (regs->dar & -size);
> +=A0=A0=A0=A0=A0=A0 if (user_mode(regs) && !access_ok(VERIFY_WRITE, p, =
size))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return -EFAULT;
> +=A0=A0=A0=A0=A0=A0 for (i =3D 0; i < size / sizeof(long); ++i)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if (__put_user(0, p+i))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return =
-EFAULT;
> +=A0=A0=A0=A0=A0=A0 return 1;
> +}
> +
> +/*
> + * Emulate load & store multiple instructions
> + */
> +static int emulate_multiple(struct pt_regs *regs, unsigned char=20
> __user *addr,
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=
 unsigned int reg, unsigned int nb,
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=
 unsigned int flags, unsigned int instr)
> +{
> +=A0=A0=A0=A0=A0=A0 unsigned char *rptr;
> +=A0=A0=A0=A0=A0=A0 int nb0, i;
> +
> +=A0=A0=A0=A0=A0=A0 /*
> +=A0=A0=A0=A0=A0=A0=A0 * We do not try to emulate 8 bytes multiple as =
they aren't=20
> really
> +=A0=A0=A0=A0=A0=A0=A0 * available in our operating environments and =
we don't try to
> +=A0=A0=A0=A0=A0=A0=A0 * emulate multiples operations in kernel land =
as they should=20
> never
> +=A0=A0=A0=A0=A0=A0=A0 * be used/generated there at least not on =
unaligned boundaries
> +=A0=A0=A0=A0=A0=A0=A0 */
> +=A0=A0=A0=A0=A0=A0 if (unlikely((nb > 4) || !user_mode(regs)))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return 0;
> +
> +=A0=A0=A0=A0=A0=A0 /* lmw, stmw, lswi/x, stswi/x */
> +=A0=A0=A0=A0=A0=A0 nb0 =3D 0;
> +=A0=A0=A0=A0=A0=A0 if (flags & HARD) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if (flags & SX) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 nb =3D =
regs->xer & 127;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if (nb =
=3D=3D 0)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 return 1;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 } else {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if =
(__get_user(instr,
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0 (unsigned int __user=20
> *)regs->nip))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 return -EFAULT;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 nb =3D =
(instr >> 11) & 0x1f;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if (nb =
=3D=3D 0)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 nb =3D 32;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 }
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if (nb + reg * 4 > 128) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 nb0 =3D =
nb + reg * 4 - 128;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 nb =3D =
128 - reg * 4;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 }
> +=A0=A0=A0=A0=A0=A0 } else {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* lwm, stmw */
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 nb =3D (32 - reg) * 4;
> +=A0=A0=A0=A0=A0=A0 }
> +
> +=A0=A0=A0=A0=A0=A0 if (!access_ok((flags & ST ? VERIFY_WRITE: =
VERIFY_READ), addr,=20
> nb+nb0))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return -EFAULT; /* bad =
address */
> +
> +=A0=A0=A0=A0=A0=A0 rptr =3D (unsigned char *) &regs->gpr[reg];
> +=A0=A0=A0=A0=A0=A0 if (flags & LD) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 for (i =3D 0; i < nb; ++i)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if =
(__get_user(rptr[i], addr + i))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 return -EFAULT;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if (nb0 > 0) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 rptr =3D=
 (unsigned char *) &regs->gpr[0];
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 addr =
+=3D nb;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 for (i =
=3D 0; i < nb0; ++i)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 if (__get_user(rptr[i], addr + i))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return -EFAULT;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 }
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 for (; (i & 3) !=3D 0; ++i)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
rptr[i] =3D 0;
> +=A0=A0=A0=A0=A0=A0 } else {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 for (i =3D 0; i < nb; ++i)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if =
(__put_user(rptr[i], addr + i))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 return -EFAULT;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if (nb0 > 0) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 rptr =3D=
 (unsigned char *) &regs->gpr[0];
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 addr =
+=3D nb;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 for (i =
=3D 0; i < nb0; ++i)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 if (__put_user(rptr[i], addr + i))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return -EFAULT;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 }
> +=A0=A0=A0=A0=A0=A0 }
> +=A0=A0=A0=A0=A0=A0 return 1;
> +}
> +
> +
> +/*
> + * Called on alignment exception. Attempts to fixup
> + *
> + * Return 1 on success
> + * Return 0 if unable to handle the interrupt
> + * Return -EFAULT if data address is bad
> + */
> +
> +int fix_alignment(struct pt_regs *regs)
> +{
> +=A0=A0=A0=A0=A0=A0 unsigned int instr, nb, flags;
> +=A0=A0=A0=A0=A0=A0 unsigned int reg, areg;
> +=A0=A0=A0=A0=A0=A0 unsigned int dsisr;
> +=A0=A0=A0=A0=A0=A0 unsigned char __user *addr;
> +=A0=A0=A0=A0=A0=A0 unsigned char __user *p;
> +=A0=A0=A0=A0=A0=A0 int ret, t;
> +=A0=A0=A0=A0=A0=A0 union {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 long ll;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 double dd;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 unsigned char v[8];
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 struct {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
unsigned hi32;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
int=A0=A0=A0=A0=A0 low32;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 } x32;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 struct {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
unsigned char hi48[6];
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
short=A0=A0 =A0=A0=A0=A0=A0 low16;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 } x16;
> +=A0=A0=A0=A0=A0=A0 } data;
> +
> +=A0=A0=A0=A0=A0=A0 /*
> +=A0=A0=A0=A0=A0=A0=A0 * We require a complete register set, if not, =
then our=20
> assembly
> +=A0=A0=A0=A0=A0=A0=A0 * is broken
> +=A0=A0=A0=A0=A0=A0=A0 */
> +=A0=A0=A0=A0=A0=A0 CHECK_FULL_REGS(regs);
> +
> +=A0=A0=A0=A0=A0=A0 dsisr =3D regs->dsisr;
> +
> +=A0=A0=A0=A0=A0=A0 /* Some processors don't provide us with a DSISR =
we can use=20
> here,
> +=A0=A0=A0=A0=A0=A0=A0 * let's make one up from the instruction
> +=A0=A0=A0=A0=A0=A0=A0 */
> +=A0=A0=A0=A0=A0=A0 if (cpu_has_feature(CPU_FTR_NODSISRALIGN)) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 unsigned int real_instr;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if =
(unlikely(__get_user(real_instr,
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 (unsigned int __user=20
> *)regs->nip)))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return =
-EFAULT;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 dsisr =3D =
make_dsisr(real_instr);
> +=A0=A0=A0=A0=A0=A0 }
> +
> +=A0=A0=A0=A0=A0=A0 /* extract the operation and registers from the =
dsisr */
> +=A0=A0=A0=A0=A0=A0 reg =3D (dsisr >> 5) & 0x1f;=A0=A0=A0=A0=A0 /* =
source/dest register */
> +=A0=A0=A0=A0=A0=A0 areg =3D dsisr & 0x1f;=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0=
 /* register to update */
> +=A0=A0=A0=A0=A0=A0 instr =3D (dsisr >> 10) & 0x7f;
> +=A0=A0=A0=A0=A0=A0 instr |=3D (dsisr >> 13) & 0x60;
> +
> +=A0=A0=A0=A0=A0=A0 /* Lookup the operation in our table */
> +=A0=A0=A0=A0=A0=A0 nb =3D aligninfo[instr].len;
> +=A0=A0=A0=A0=A0=A0 flags =3D aligninfo[instr].flags;
> +
> +=A0=A0=A0=A0=A0=A0 /* DAR has the operand effective address */
> +=A0=A0=A0=A0=A0=A0 addr =3D (unsigned char __user *)regs->dar;
> +
> +=A0=A0=A0=A0=A0=A0 /* A size of 0 indicates an instruction we don't =
support, with
> +=A0=A0=A0=A0=A0=A0=A0 * the exception of DCBZ which is handled as a =
special case=20
> here
> +=A0=A0=A0=A0=A0=A0=A0 */
> +=A0=A0=A0=A0=A0=A0 if (instr =3D=3D DCBZ)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return emulate_dcbz(regs, =
addr);
> +=A0=A0=A0=A0=A0=A0 if (unlikely(nb =3D=3D 0))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return 0;
> +
> +=A0=A0=A0=A0=A0=A0 /* Load/Store Multiple instructions are handled in =
their own
> +=A0=A0=A0=A0=A0=A0=A0 * function
> +=A0=A0=A0=A0=A0=A0=A0 */
> +=A0=A0=A0=A0=A0=A0 if (flags & M)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return =
emulate_multiple(regs, addr, reg, nb, flags,=20
> instr);
> +
> +=A0=A0=A0=A0=A0=A0 /* Verify the address of the operand */
> +=A0=A0=A0=A0=A0=A0 if (unlikely(user_mode(regs) &&
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0 =
!access_ok((flags & ST ? VERIFY_WRITE :=20
> VERIFY_READ),
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 addr, nb)))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return -EFAULT;
> +
> +=A0=A0=A0=A0=A0=A0 /* Force the fprs into the save area so we can =
reference them=20
> */
> +=A0=A0=A0=A0=A0=A0 if (flags & F) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 /* userland only */
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if =
(unlikely(!user_mode(regs)))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return =
0;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 flush_fp_to_thread(current);
> +=A0=A0=A0=A0=A0=A0 }
> +
> +=A0=A0=A0=A0=A0=A0 /* If we are loading, get the data from user =
space, else
> +=A0=A0=A0=A0=A0=A0=A0 * get it from register values
> +=A0=A0=A0=A0=A0=A0=A0 */
> +=A0=A0=A0=A0=A0=A0 if (flags & LD) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 data.ll =3D 0;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret =3D 0;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 p =3D addr;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 switch (nb) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 case 8:
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __get_user(data.v[0], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __get_user(data.v[1], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __get_user(data.v[2], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __get_user(data.v[3], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 case 4:
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __get_user(data.v[4], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __get_user(data.v[5], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 case 2:
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __get_user(data.v[6], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __get_user(data.v[7], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if =
(unlikely(ret))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 return -EFAULT;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 }
> +=A0=A0=A0=A0=A0=A0 } else if (flags & F)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 data.dd =3D =
current->thread.fpr[reg];
> +=A0=A0=A0=A0=A0=A0 else
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 data.ll =3D regs->gpr[reg];
> +
> +=A0=A0=A0=A0=A0=A0 /* Perform other misc operations like sign =
extension, byteswap,
> +=A0=A0=A0=A0=A0=A0=A0 * or floating point single precision conversion
> +=A0=A0=A0=A0=A0=A0=A0 */
> +=A0=A0=A0=A0=A0=A0 switch (flags & ~U) {
> +=A0=A0=A0=A0=A0=A0 case LD+SE:=A0=A0=A0=A0 /* sign extend */
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if ( nb =3D=3D 2 )
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
data.ll =3D data.x16.low16;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 else=A0=A0=A0 /* nb must be =
4 */
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
data.ll =3D data.x32.low32;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 break;
> +=A0=A0=A0=A0=A0=A0 case LD+S:=A0=A0=A0=A0=A0 /* byte-swap */
> +=A0=A0=A0=A0=A0=A0 case ST+S:
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if (nb =3D=3D 2) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
SWAP(data.v[6], data.v[7]);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 } else {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
SWAP(data.v[4], data.v[7]);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
SWAP(data.v[5], data.v[6]);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 }
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 break;
> +
> +=A0=A0=A0=A0=A0=A0 /* Single-precision FP load and store require =
conversions... */
> +=A0=A0=A0=A0=A0=A0 case LD+F+S:
> +#ifdef CONFIG_PPC_FPU
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 preempt_disable();
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 enable_kernel_fp();
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 cvt_fd((float *)&data.v[4], =
&data.dd,=20
> &current->thread);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 preempt_enable();
> +#else
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return 0;
> +#endif
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 break;
> +=A0=A0=A0=A0=A0=A0 case ST+F+S:
> +#ifdef CONFIG_PPC_FPU
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 preempt_disable();
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 enable_kernel_fp();
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 cvt_df(&data.dd, (float =
*)&data.v[4],=20
> &current->thread);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 preempt_enable();
> +#else
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return 0;
> +#endif
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 break;
> +=A0=A0=A0=A0=A0=A0 }
> +
> +=A0=A0=A0=A0=A0=A0 /* Store result to memory or update registers */
> +=A0=A0=A0=A0=A0=A0 if (flags & ST) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret =3D 0;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 p =3D addr;
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 switch (nb) {
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 case 8:
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __put_user(data.v[0], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __put_user(data.v[1], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __put_user(data.v[2], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __put_user(data.v[3], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 case 4:
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __put_user(data.v[4], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __put_user(data.v[5], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 case 2:
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __put_user(data.v[6], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ret |=3D=
 __put_user(data.v[7], p++);
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 }
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 if (unlikely(ret))
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 return =
-EFAULT;
> +=A0=A0=A0=A0=A0=A0 } else if (flags & F)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 current->thread.fpr[reg] =3D =
data.dd;
> +=A0=A0=A0=A0=A0=A0 else
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 regs->gpr[reg] =3D data.ll;
> +
> +=A0=A0=A0=A0=A0=A0 /* Update RA as needed */
> +=A0=A0=A0=A0=A0=A0 if (flags & U)
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 regs->gpr[areg] =3D =
regs->dar;
> +
> +=A0=A0=A0=A0=A0=A0 return 1;
> +}
> Index: linux-work/arch/ppc/kernel/Makefile
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- linux-work.orig/arch/ppc/kernel/Makefile=A0=A0=A0 2005-11-11=20
> 10:14:48.000000000 +1100
> +++ linux-work/arch/ppc/kernel/Makefile 2005-11-14 18:42:30.000000000=20=

> +1100
> @@ -13,7 +13,7 @@
> =A0extra-y=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=
=A0 =A0=A0=A0=A0=A0=A0=A0 +=3D vmlinux.lds
> =A0
> =A0obj-y=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=
=A0 :=3D entry.o traps.o idle.o time.o=20
> misc.o \
> -=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 process.o align.o \
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 process.o \
> =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 setup.o \
> =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 ppc_htab.o
> =A0obj-$(CONFIG_6xx)=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 +=3D l2cr.o =
cpu_setup_6xx.o
> Index: linux-work/arch/ppc64/kernel/Makefile
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- linux-work.orig/arch/ppc64/kernel/Makefile=A0 2005-11-14=20
> 15:20:05.000000000 +1100
> +++ linux-work/arch/ppc64/kernel/Makefile=A0=A0=A0=A0=A0=A0 2005-11-14=20=

> 18:42:12.000000000 +1100
> @@ -11,9 +11,7 @@
> =A0
> =A0endif
> =A0
> -obj-y=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 +=3D idle.o dma.o \
> -=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
align.o \
> -=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
iommu.o
> +obj-y=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 +=3D idle.o dma.o =
iommu.o
> =A0
> =A0pci-obj-$(CONFIG_PPC_MULTIPLATFORM)=A0=A0=A0 +=3D pci_dn.o =
pci_direct_iommu.o
> =A0
> Index: linux-work/include/asm-powerpc/cputable.h
> =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=
=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
> --- linux-work.orig/include/asm-powerpc/cputable.h=A0=A0=A0=A0=A0 =
2005-11-11=20
> 10:14:49.000000000 +1100
> +++ linux-work/include/asm-powerpc/cputable.h=A0=A0 2005-11-14=20
> 18:33:42.000000000 +1100
> @@ -90,6 +90,7 @@
> =A0#define CPU_FTR_NEED_COHERENT=A0 =A0=A0=A0=A0=A0=A0=A0 =
ASM_CONST(0x0000000000020000)
> =A0#define CPU_FTR_NO_BTIC=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0=20
> ASM_CONST(0x0000000000040000)
> =A0#define CPU_FTR_BIG_PHYS=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
ASM_CONST(0x0000000000080000)
> +#define CPU_FTR_NODSISRALIGN=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
ASM_CONST(0x0000000000100000)
> =A0
> =A0#ifdef __powerpc64__
> =A0/* Add the 64b processor unique features in the top half of the =
word=20
> */
> @@ -97,7 +98,6 @@
> =A0#define CPU_FTR_16M_PAGE=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
ASM_CONST(0x0000000200000000)
> =A0#define CPU_FTR_TLBIEL=A0=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0=20
> ASM_CONST(0x0000000400000000)
> =A0#define CPU_FTR_NOEXECUTE=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
ASM_CONST(0x0000000800000000)
> -#define CPU_FTR_NODSISRALIGN=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
ASM_CONST(0x0000001000000000)
> =A0#define CPU_FTR_IABR=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0=
 ASM_CONST(0x0000002000000000)
> =A0#define CPU_FTR_MMCRA=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0=20
> ASM_CONST(0x0000004000000000)
> =A0#define CPU_FTR_CTRL=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0=
 ASM_CONST(0x0000008000000000)
> @@ -113,7 +113,6 @@
> =A0#define CPU_FTR_16M_PAGE=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
ASM_CONST(0x0)
> =A0#define CPU_FTR_TLBIEL=A0=A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 ASM_CONST(0x0)
> =A0#define CPU_FTR_NOEXECUTE=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
ASM_CONST(0x0)
> -#define CPU_FTR_NODSISRALIGN=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
ASM_CONST(0x0)
> =A0#define CPU_FTR_IABR=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0=
 ASM_CONST(0x0)
> =A0#define CPU_FTR_MMCRA=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =
=A0=A0=A0=A0=A0=A0=A0 ASM_CONST(0x0)
> =A0#define CPU_FTR_CTRL=A0=A0 =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0=A0=A0=A0=A0=
 ASM_CONST(0x0)
> @@ -273,18 +272,21 @@
> =A0=A0=A0=A0=A0=A0=A0 CPU_FTRS_POWER3_32 =3D CPU_FTR_COMMON | =
CPU_FTR_SPLIT_ID_CACHE |
> =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE,
> =A0=A0=A0=A0=A0=A0=A0 CPU_FTRS_POWER4_32 =3D CPU_FTR_COMMON | =
CPU_FTR_SPLIT_ID_CACHE |
> -=A0=A0=A0=A0=A0=A0 =A0=A0=A0 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE,
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | =
CPU_FTR_NODSISRALIGN,
> =A0=A0=A0=A0=A0=A0=A0 CPU_FTRS_970_32 =3D CPU_FTR_COMMON | =
CPU_FTR_SPLIT_ID_CACHE |
> =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0 CPU_FTR_USE_TB | CPU_FTR_HPTE_TABLE | =
CPU_FTR_ALTIVEC_COMP=20
> |
> -=A0=A0=A0=A0=A0=A0 =A0=A0=A0 CPU_FTR_MAYBE_CAN_NAP,
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0 CPU_FTR_MAYBE_CAN_NAP | =
CPU_FTR_NODSISRALIGN,
> =A0=A0=A0=A0=A0=A0=A0 CPU_FTRS_8XX =3D CPU_FTR_SPLIT_ID_CACHE | =
CPU_FTR_USE_TB,
> -=A0=A0=A0=A0=A0=A0 CPU_FTRS_40X =3D CPU_FTR_SPLIT_ID_CACHE | =
CPU_FTR_USE_TB,
> -=A0=A0=A0=A0=A0=A0 CPU_FTRS_44X =3D CPU_FTR_SPLIT_ID_CACHE | =
CPU_FTR_USE_TB,
> -=A0=A0=A0=A0=A0=A0 CPU_FTRS_E200 =3D CPU_FTR_USE_TB,
> -=A0=A0=A0=A0=A0=A0 CPU_FTRS_E500 =3D CPU_FTR_SPLIT_ID_CACHE | =
CPU_FTR_USE_TB,
> +=A0=A0=A0=A0=A0=A0 CPU_FTRS_40X =3D CPU_FTR_SPLIT_ID_CACHE | =
CPU_FTR_USE_TB |
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0 CPU_FTR_NODSISRALIGN,
> +=A0=A0=A0=A0=A0=A0 CPU_FTRS_44X =3D CPU_FTR_SPLIT_ID_CACHE | =
CPU_FTR_USE_TB |
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0 CPU_FTR_NODSISRALIGN,
> +=A0=A0=A0=A0=A0=A0 CPU_FTRS_E200 =3D CPU_FTR_USE_TB | =
CPU_FTR_NODSISRALIGN,
> +=A0=A0=A0=A0=A0=A0 CPU_FTRS_E500 =3D CPU_FTR_SPLIT_ID_CACHE | =
CPU_FTR_USE_TB |
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0 CPU_FTR_NODSISRALIGN,
> =A0=A0=A0=A0=A0=A0=A0 CPU_FTRS_E500_2 =3D CPU_FTR_SPLIT_ID_CACHE | =
CPU_FTR_USE_TB |
> -=A0=A0=A0=A0=A0=A0 =A0=A0=A0 CPU_FTR_BIG_PHYS,
> -=A0=A0=A0=A0=A0=A0 CPU_FTRS_GENERIC_32 =3D CPU_FTR_COMMON,
> +=A0=A0=A0=A0=A0=A0 =A0=A0=A0 CPU_FTR_BIG_PHYS | CPU_FTR_NODSISRALIGN,
> +=A0=A0=A0=A0=A0=A0 CPU_FTRS_GENERIC_32 =3D CPU_FTR_COMMON | =
CPU_FTR_NODSISRALIGN,
> =A0#ifdef __powerpc64__
> =A0=A0=A0=A0=A0=A0=A0 CPU_FTRS_POWER3 =3D CPU_FTR_SPLIT_ID_CACHE | =
CPU_FTR_USE_TB |
> =A0=A0=A0=A0=A0=A0=A0 =A0=A0=A0 CPU_FTR_HPTE_TABLE | CPU_FTR_IABR,
>
>
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-dev

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-14 19:53 ` Becky Bruce
@ 2005-11-14 20:55   ` Benjamin Herrenschmidt
  2005-11-15  5:10     ` Becky Bruce
  0 siblings, 1 reply; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2005-11-14 20:55 UTC (permalink / raw)
  To: Becky Bruce; +Cc: linuxppc64-dev, linuxppc-dev list

On Mon, 2005-11-14 at 13:53 -0600, Becky Bruce wrote:
> Ben,
> 
> I talked to Kumar about this a little bit (I had started a merge of 
> this file, but got distracted!) and he doesn't have any test cases.  
> I'll put something together and test this out on some of the 32-bit 
> systems I have here in my lab.  It won't be complete, but it will be 
> something.......

Thanks,
Ben.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-14 20:55   ` Benjamin Herrenschmidt
@ 2005-11-15  5:10     ` Becky Bruce
  2005-11-15  5:35       ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 20+ messages in thread
From: Becky Bruce @ 2005-11-15  5:10 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc64-dev, linuxppc-dev list

Ben,

I've just done some basic testing of lmw/stmw, lwz/stw, lhx/sth,=20
lfs/stfs, and lfd/stfd misaligned across a doubleword boundary, and=20
everything looks good so far.   I'll check out the byte reversals and a=20=

few other forms tomorrow.

Cheers,
B

On Nov 14, 2005, at 2:55 PM, Benjamin Herrenschmidt wrote:

> On Mon, 2005-11-14 at 13:53 -0600, Becky Bruce wrote:
> > Ben,
> >
> > I talked to Kumar about this a little bit (I had started a merge of
> > this file, but got distracted!) and he doesn't have any test cases.=A0=

> > I'll put something together and test this out on some of the 32-bit
> > systems I have here in my lab.=A0 It won't be complete, but it will =
be
> > something.......
>
> Thanks,
> Ben.
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-15  5:10     ` Becky Bruce
@ 2005-11-15  5:35       ` Benjamin Herrenschmidt
  2005-11-16  2:19         ` Becky Bruce
  0 siblings, 1 reply; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2005-11-15  5:35 UTC (permalink / raw)
  To: Becky Bruce; +Cc: linuxppc64-dev, linuxppc-dev list

On Mon, 2005-11-14 at 23:10 -0600, Becky Bruce wrote:
> Ben,
> 
> I've just done some basic testing of lmw/stmw, lwz/stw, lhx/sth, 
> lfs/stfs, and lfd/stfd misaligned across a doubleword boundary, and 
> everything looks good so far.   I'll check out the byte reversals and a 
> few other forms tomorrow.

Excellent, thanks ! BTW. Make sure you test these one CPUs that actually
trap on misaligned accesses :) Best is probably to do the misaligned
access accross a page boundary, that's what most CPUs can do.

Ben.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-15  5:35       ` Benjamin Herrenschmidt
@ 2005-11-16  2:19         ` Becky Bruce
  2005-11-16  2:34           ` Benjamin Herrenschmidt
  2005-11-16  9:36           ` Gabriel Paubert
  0 siblings, 2 replies; 20+ messages in thread
From: Becky Bruce @ 2005-11-16  2:19 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc64-dev, linuxppc-dev list

Ben,

Yeah,  I clearly shouldn't run testcases at 11pm, because I got in a=20
rush and only confirmed that lmw/stmw were actually taking the=20
exception.  Those 2 are working beautifully.  To test the others, I=20
need to run on a different board which, of course,  isn't bootable at=20
the moment.  As soon as I can get that up and running, I'll try some of=20=

the other cases and let you know how it goes......

BTW, Based on the pile of docs I have here, I think the list of=20
alignment-exception-causing events on FSL's current parts (603, 603e,=20
750, 74x, 74xx, e500) is:

- lmw/stmw (all procs, non-word aligned)
- single and double precision floating point ld/st ops (non-E500, non=20
data size aligned)
- dcbz to WT or CI memory (all procs)
- dcbz with cache disabled (all procs but 603e?)
- misaligned little endian accesses (603e)
- lwarx/stwcx (all procs)
- multiple/string with LE set (750, 603e, 7450, 7400)
- eciwx/ecowx (750, 7450, 7400)
- a couple of others related to vector processing

If anybody knows offhand of something missing there, let me know.

Cheers,
B



On Nov 14, 2005, at 11:35 PM, Benjamin Herrenschmidt wrote:

> On Mon, 2005-11-14 at 23:10 -0600, Becky Bruce wrote:
> > Ben,
> >
> > I've just done some basic testing of lmw/stmw, lwz/stw, lhx/sth,
> > lfs/stfs, and lfd/stfd misaligned across a doubleword boundary, and
> > everything looks good so far.=A0=A0 I'll check out the byte =
reversals=20
> and a
> > few other forms tomorrow.
>
> Excellent, thanks ! BTW. Make sure you test these one CPUs that=20
> actually
> trap on misaligned accesses :) Best is probably to do the misaligned
> access accross a page boundary, that's what most CPUs can do.
>
> Ben.
>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16  2:19         ` Becky Bruce
@ 2005-11-16  2:34           ` Benjamin Herrenschmidt
  2005-11-16  3:23             ` Becky Bruce
  2005-11-16  4:26             ` Dan Malek
  2005-11-16  9:36           ` Gabriel Paubert
  1 sibling, 2 replies; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2005-11-16  2:34 UTC (permalink / raw)
  To: Becky Bruce; +Cc: linuxppc64-dev, linuxppc-dev list

On Tue, 2005-11-15 at 20:19 -0600, Becky Bruce wrote:
> Ben,
> 
> Yeah,  I clearly shouldn't run testcases at 11pm, because I got in a 
> rush and only confirmed that lmw/stmw were actually taking the 
> exception.  Those 2 are working beautifully.  To test the others, I 
> need to run on a different board which, of course,  isn't bootable at 
> the moment.  As soon as I can get that up and running, I'll try some of 
> the other cases and let you know how it goes......
> 
> BTW, Based on the pile of docs I have here, I think the list of 
> alignment-exception-causing events on FSL's current parts (603, 603e, 
> 750, 74x, 74xx, e500) is:
> 
> - lmw/stmw (all procs, non-word aligned)
> - single and double precision floating point ld/st ops (non-E500, non 
> data size aligned)
> - dcbz to WT or CI memory (all procs)
> - dcbz with cache disabled (all procs but 603e?)
> - misaligned little endian accesses (603e)
> - lwarx/stwcx (all procs)
> - multiple/string with LE set (750, 603e, 7450, 7400)
> - eciwx/ecowx (750, 7450, 7400)
> - a couple of others related to vector processing
> 
> If anybody knows offhand of something missing there, let me know.

What about lwz/stw cropssing page boundaries ? Is this handled in HW ?

Ben.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16  2:34           ` Benjamin Herrenschmidt
@ 2005-11-16  3:23             ` Becky Bruce
  2005-11-16 16:54               ` Andrey Volkov
  2005-11-16  4:26             ` Dan Malek
  1 sibling, 1 reply; 20+ messages in thread
From: Becky Bruce @ 2005-11-16  3:23 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc64-dev, linuxppc-dev list

On Nov 15, 2005, at 8:34 PM, Benjamin Herrenschmidt wrote:
> >
> > BTW, Based on the pile of docs I have here, I think the list of
> > alignment-exception-causing events on FSL's current parts (603, 603e,
> > 750, 74x, 74xx, e500) is:
> >
> > - lmw/stmw (all procs, non-word aligned)
> > - single and double precision floating point ld/st ops (non-E500, non
> > data size aligned)
> > - dcbz to WT or CI memory (all procs)
> > - dcbz with cache disabled (all procs but 603e?)
> > - misaligned little endian accesses (603e)
> > - lwarx/stwcx (all procs)
> > - multiple/string with LE set (750, 603e, 7450, 7400)
> > - eciwx/ecowx (750, 7450, 7400)
> > - a couple of others related to vector processing
> >
> > If anybody knows offhand of something missing there, let me know.
>
> What about lwz/stw cropssing page boundaries ? Is this handled in HW ?
>
> Ben.

Apparently so, much to my surprise - I ran the testcase with those 
instructions misaligned across a page boundary last night and got no 
alignment exception.  I was surprised, and asked my husband about it 
(he worked on the load/store units for a bunch of our parts), and he 
says these guys never cause an exception for any of FSL's current parts 
as far as he knows.  This is supported by our documentation as well - 
the only place I see these listed is on 603e, where they can cause an 
exception if the page is mapped little endian.

-B

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16  2:34           ` Benjamin Herrenschmidt
  2005-11-16  3:23             ` Becky Bruce
@ 2005-11-16  4:26             ` Dan Malek
  2005-11-16  5:00               ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 20+ messages in thread
From: Dan Malek @ 2005-11-16  4:26 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev list, linuxppc64-dev


On Nov 15, 2005, at 9:34 PM, Benjamin Herrenschmidt wrote:

> What about lwz/stw cropssing page boundaries ? Is this handled in HW ?

Yep.  All of these hardware alignment support features on
the Freescale processors are the reasons they are used
so extensively in data communication processing (where
unaligned data can sometimes occur).  All of the load/store
alignment issues are handled in the cache subsystem, so
to the external world all you really see are cache line
operations.  In the event of uncached data operations, you
get the performance penalty of two bus accesses, where
some of the data is discarded.

	-- Dan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16  4:26             ` Dan Malek
@ 2005-11-16  5:00               ` Benjamin Herrenschmidt
  2005-11-16  5:35                 ` Dan Malek
  0 siblings, 1 reply; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2005-11-16  5:00 UTC (permalink / raw)
  To: Dan Malek; +Cc: linuxppc-dev list, linuxppc64-dev

On Tue, 2005-11-15 at 23:26 -0500, Dan Malek wrote:
> On Nov 15, 2005, at 9:34 PM, Benjamin Herrenschmidt wrote:
> 
> > What about lwz/stw cropssing page boundaries ? Is this handled in HW ?
> 
> Yep.  All of these hardware alignment support features on
> the Freescale processors are the reasons they are used
> so extensively in data communication processing (where
> unaligned data can sometimes occur).  All of the load/store
> alignment issues are handled in the cache subsystem, so
> to the external world all you really see are cache line
> operations.  In the event of uncached data operations, you
> get the performance penalty of two bus accesses, where
> some of the data is discarded.

Oh well, I suppose I'll have to dig out paulus' 601 based mac :)

Becky, can you send me a copy of your testcase ?

Ben.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16  5:00               ` Benjamin Herrenschmidt
@ 2005-11-16  5:35                 ` Dan Malek
  2005-11-16  6:13                   ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 20+ messages in thread
From: Dan Malek @ 2005-11-16  5:35 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev list, linuxppc64-dev


On Nov 16, 2005, at 12:00 AM, Benjamin Herrenschmidt wrote:

> Oh well, I suppose I'll have to dig out paulus' 601 based mac :)

If we don't have any contemporary processors that need
this solution, can we just put on aside until someone
has hardware that requires it?

Thanks.

	-- Dan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16  5:35                 ` Dan Malek
@ 2005-11-16  6:13                   ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 20+ messages in thread
From: Benjamin Herrenschmidt @ 2005-11-16  6:13 UTC (permalink / raw)
  To: Dan Malek; +Cc: linuxppc-dev list, linuxppc64-dev

On Wed, 2005-11-16 at 00:35 -0500, Dan Malek wrote:
> On Nov 16, 2005, at 12:00 AM, Benjamin Herrenschmidt wrote:
> 
> > Oh well, I suppose I'll have to dig out paulus' 601 based mac :)
> 
> If we don't have any contemporary processors that need
> this solution, can we just put on aside until someone
> has hardware that requires it?

I do not want to break an existing functionality with the merged file,
though for now, I suppose the merged file will only apply to
ARCH=powerpc, I can keep the old align.c in arch/ppc/kernel until it has
been properly tested on old machines.

Ben.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16  2:19         ` Becky Bruce
  2005-11-16  2:34           ` Benjamin Herrenschmidt
@ 2005-11-16  9:36           ` Gabriel Paubert
  2005-11-16 15:15             ` Kumar Gala
  1 sibling, 1 reply; 20+ messages in thread
From: Gabriel Paubert @ 2005-11-16  9:36 UTC (permalink / raw)
  To: Becky Bruce; +Cc: linuxppc-dev list, linuxppc64-dev

On Tue, Nov 15, 2005 at 08:19:58PM -0600, Becky Bruce wrote:
> Ben,
> 
> Yeah,  I clearly shouldn't run testcases at 11pm, because I got in a 
> rush and only confirmed that lmw/stmw were actually taking the 
> exception.  Those 2 are working beautifully.  To test the others, I 
> need to run on a different board which, of course,  isn't bootable at 
> the moment.  As soon as I can get that up and running, I'll try some of 
> the other cases and let you know how it goes......
> 
> BTW, Based on the pile of docs I have here, I think the list of 
> alignment-exception-causing events on FSL's current parts (603, 603e, 
> 750, 74x, 74xx, e500) is:

The 603 is still in production? And is the upcoming 8641 exactly 
the same as the 74xx series in this respect? 

> 
> - lmw/stmw (all procs, non-word aligned)

Do we really want to emulate these instructions? 

Their purpose is to minimize code size in functions prologue and
epilogue. If you hit an alignment execption with lwm/stmw, your 
stack is probably misaligned for some stupid reason or bug (back 
chain pointer corrrupted because of some buffer overflow comes to 
mind, and you want to know ASAP).

> - single and double precision floating point ld/st ops (non-E500, non 
> data size aligned)

Hmm, you can load a double from any 4 byte aligned address AFAIR.

> - dcbz to WT or CI memory (all procs)
> - dcbz with cache disabled (all procs but 603e?)
> - misaligned little endian accesses (603e)

I understand that you mention it for completeness since we 
don't care about LE mode AFAICT. But I believe that there
were some differences between 603 and 603e in this area.

However we do care about byte reversal instructions, which 
probably believe like the corresponding normal instruction
(i.e., lwbrx has the same rules as lwzx, etc.)

> - lwarx/stwcx (all procs)

And ldarx/stdcx. on 64 bit, but these ones should not 
be emulated. So it's easy ;-)

> - multiple/string with LE set (750, 603e, 7450, 7400)

Again LE mode is probably irrelevant.

> - eciwx/ecowx (750, 7450, 7400)

Have these instructions ever been used for something 
under Linux?

> - a couple of others related to vector processing

Which ones? The Altivec load and store instructions
simply mask the low order bits AFAIR.

> If anybody knows offhand of something missing there, let me know.

Nothing, but did you check when crossing a segment (256MB) boundary.
I seem to remember that some processors performed misaligned 
load/store across pages but not across segments.

	Regards,
	Gabriel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16  9:36           ` Gabriel Paubert
@ 2005-11-16 15:15             ` Kumar Gala
  2005-11-16 16:31               ` Becky Bruce
  2005-11-16 19:20               ` Dan Malek
  0 siblings, 2 replies; 20+ messages in thread
From: Kumar Gala @ 2005-11-16 15:15 UTC (permalink / raw)
  To: Gabriel Paubert; +Cc: linuxppc64-dev, linuxppc-dev list


On Nov 16, 2005, at 3:36 AM, Gabriel Paubert wrote:

> On Tue, Nov 15, 2005 at 08:19:58PM -0600, Becky Bruce wrote:
>> Ben,
>>
>> Yeah,  I clearly shouldn't run testcases at 11pm, because I got in a
>> rush and only confirmed that lmw/stmw were actually taking the
>> exception.  Those 2 are working beautifully.  To test the others, I
>> need to run on a different board which, of course,  isn't bootable at
>> the moment.  As soon as I can get that up and running, I'll try  
>> some of
>> the other cases and let you know how it goes......
>>
>> BTW, Based on the pile of docs I have here, I think the list of
>> alignment-exception-causing events on FSL's current parts (603, 603e,
>> 750, 74x, 74xx, e500) is:
>
> The 603 is still in production? And is the upcoming 8641 exactly
> the same as the 74xx series in this respect?

603 is used in all 82xx/83xx processors from Freescale. The 8641 is  
the same core as 7448.

>> - single and double precision floating point ld/st ops (non-E500, non
>> data size aligned)
>
> Hmm, you can load a double from any 4 byte aligned address AFAIR.

This is only because every processor handles the misalignment for  
you.  Its completely valid for someone to build a PPC that has an  
alignment exception in this case.

>> - dcbz to WT or CI memory (all procs)
>> - dcbz with cache disabled (all procs but 603e?)
>> - misaligned little endian accesses (603e)
>
> I understand that you mention it for completeness since we
> don't care about LE mode AFAICT. But I believe that there
> were some differences between 603 and 603e in this area.
>
> However we do care about byte reversal instructions, which
> probably believe like the corresponding normal instruction
> (i.e., lwbrx has the same rules as lwzx, etc.)
>
>> - lwarx/stwcx (all procs)
>
> And ldarx/stdcx. on 64 bit, but these ones should not
> be emulated. So it's easy ;-)
>
>> - multiple/string with LE set (750, 603e, 7450, 7400)
>
> Again LE mode is probably irrelevant.

Agree with that. We dont support LE on classic.

>> - eciwx/ecowx (750, 7450, 7400)
>
> Have these instructions ever been used for something
> under Linux?

I dont believe so.

>> - a couple of others related to vector processing
>
> Which ones? The Altivec load and store instructions
> simply mask the low order bits AFAIR.

SPE misalignment is something to look at.

>> If anybody knows offhand of something missing there, let me know.
>
> Nothing, but did you check when crossing a segment (256MB) boundary.
> I seem to remember that some processors performed misaligned
> load/store across pages but not across segments.

- kumar

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16 15:15             ` Kumar Gala
@ 2005-11-16 16:31               ` Becky Bruce
  2005-11-16 19:24                 ` Dan Malek
  2005-11-16 19:20               ` Dan Malek
  1 sibling, 1 reply; 20+ messages in thread
From: Becky Bruce @ 2005-11-16 16:31 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev list, linuxppc64-dev

> >
> > The 603 is still in production? And is the upcoming 8641 exactly
> > the same as the 74xx series in this respect?
>
> 603 is used in all 82xx/83xx processors from Freescale. The 8641 is=A0
> the same core as 7448.

The differences between 603 and 603e wrt alignment exceptions, as far=20
as I can tell, are:
- 603 does not take exception on misaligned LE accesses except for=20
strings and multiples
- 603 takes an alignment exception on ecowx/eciwx, 603e does not
- 603 generates an alignment when a ld/st crosses a segment boundary=20
and the T bit is different in the 2 segments

I should have listed these out above, sorry!

>
> >> - single and double precision floating point ld/st ops (non-E500,=20=

> non
> >> data size aligned)
> >
> > Hmm, you can load a double from any 4 byte aligned address AFAIR.
>
> This is only because every processor handles the misalignment for=A0
> you.=A0 Its completely valid for someone to build a PPC that has an=A0
> alignment exception in this case.

You're right, I should have said "word-aligned", not "data size=20
aligned".  While a load of a doubleword from a word aligned address is=20=

considered misaligned by the hardware, it doesn't generate an exception=20=

in any parts we have now that I know of.

> > However we do care about byte reversal instructions, which
> > probably believe like the corresponding normal instruction
> > (i.e., lwbrx has the same rules as lwzx, etc.)

Yep, they would work the same way, which for all of FSL's current parts=20=

would mean no exception.

> >
> >> - lwarx/stwcx (all procs)
> >
> > And ldarx/stdcx. on 64 bit, but these ones should not
> > be emulated. So it's easy ;-)
> >
> >> - multiple/string with LE set (750, 603e, 7450, 7400)
> >
> > Again LE mode is probably irrelevant.
>
> Agree with that. We dont support LE on classic.

Yep.  Just listed for completeness.

>
>
> >> - eciwx/ecowx (750, 7450, 7400)
> >
> > Have these instructions ever been used for something
> > under Linux?
>
> I dont believe so.

These guys are legagy - I don't think anyone uses them, and the=20
alignment exception doesn't (and, IMHO shouldn't) care about them at=20
all.   They're just listed here for completeness.

>
> >> - a couple of others related to vector processing
> >
> > Which ones? The Altivec load and store instructions
> > simply mask the low order bits AFAIR.
>
> SPE misalignment is something to look at.

I'll look into it when I have a moment to breathe......  There are 2=20
conditions here that aren't currently handled (from the manual):
- SPFP and SPE instructions are not aligned on a natural boundary=20
(defined by the size of the data element being accessed)
- physical address of certain evld/st instructions is not aligned on a=20=

64-bit boundary.
=09
>
> >> If anybody knows offhand of something missing there, let me know.
> >
> > Nothing, but did you check when crossing a segment (256MB) boundary.
> > I seem to remember that some processors performed misaligned
> > load/store across pages but not across segments.

As far as I can tell, the only one that cares about segment boundaries=20=

is 603 (604, 604e, and 601 may care, but I don't consider those=20
"current", and I don't have any working hardware).  And it only takes=20
an exception if there's a difference in the T-bit across the segments.

Cheers!
-B=

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16  3:23             ` Becky Bruce
@ 2005-11-16 16:54               ` Andrey Volkov
  0 siblings, 0 replies; 20+ messages in thread
From: Andrey Volkov @ 2005-11-16 16:54 UTC (permalink / raw)
  To: Becky Bruce; +Cc: linuxppc-dev list, linuxppc64-dev

Becky Bruce wrote:
> On Nov 15, 2005, at 8:34 PM, Benjamin Herrenschmidt wrote:
> 
>> >
>> > BTW, Based on the pile of docs I have here, I think the list of
>> > alignment-exception-causing events on FSL's current parts (603, 603e,
>> > 750, 74x, 74xx, e500) is:
>> >
>> > - lmw/stmw (all procs, non-word aligned)
>> > - single and double precision floating point ld/st ops (non-E500, non
>> > data size aligned)
>> > - dcbz to WT or CI memory (all procs)
>> > - dcbz with cache disabled (all procs but 603e?)
>> > - misaligned little endian accesses (603e)
>> > - lwarx/stwcx (all procs)
>> > - multiple/string with LE set (750, 603e, 7450, 7400)
>> > - eciwx/ecowx (750, 7450, 7400)
>> > - a couple of others related to vector processing
>> >
>> > If anybody knows offhand of something missing there, let me know.
>>
>> What about lwz/stw cropssing page boundaries ? Is this handled in HW ?
>>
>> Ben.
> 
> 
> Apparently so, much to my surprise - I ran the testcase with those
> instructions misaligned across a page boundary last night and got no
> alignment exception.  I was surprised, and asked my husband about it (he
> worked on the load/store units for a bunch of our parts), and he says
> these guys never cause an exception for any of FSL's current parts as
> far as he knows.  This is supported by our documentation as well - the
> only place I see these listed is on 603e, where they can cause an
> exception if the page is mapped little endian.
> 
Try this for 603e (BE):
 memcpy(xxxx3, xxxx0, 8);

I get invalid behavior (0 in second dword) on MPC5200 for external flash
access.

--
Regards
Andrey Volkov

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16 15:15             ` Kumar Gala
  2005-11-16 16:31               ` Becky Bruce
@ 2005-11-16 19:20               ` Dan Malek
  2005-11-16 19:45                 ` Gabriel Paubert
  1 sibling, 1 reply; 20+ messages in thread
From: Dan Malek @ 2005-11-16 19:20 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc64-dev, linuxppc-dev list


On Nov 16, 2005, at 10:15 AM, Kumar Gala wrote:

> 603 is used in all 82xx/83xx processors from Freescale. The 8641 is 
> the same core as 7448.

The 82xx uses G2_LE, and 83xx is e300, which are
similar to the old 603 but do have some subtle
improvements that make them better cores.


	-- Dan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16 16:31               ` Becky Bruce
@ 2005-11-16 19:24                 ` Dan Malek
  0 siblings, 0 replies; 20+ messages in thread
From: Dan Malek @ 2005-11-16 19:24 UTC (permalink / raw)
  To: Becky Bruce; +Cc: linuxppc-dev list, linuxppc64-dev


On Nov 16, 2005, at 11:31 AM, Becky Bruce wrote:

> As far as I can tell, the only one that cares about segment boundaries 
> is 603

Why would 603 care about segment boundaries?  I couldn't
find any documentation old enough that indicated such a thing :-)

Thanks.

	-- Dan

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16 19:20               ` Dan Malek
@ 2005-11-16 19:45                 ` Gabriel Paubert
  2005-11-16 20:36                   ` Dan Malek
  0 siblings, 1 reply; 20+ messages in thread
From: Gabriel Paubert @ 2005-11-16 19:45 UTC (permalink / raw)
  To: Dan Malek; +Cc: linuxppc64-dev, linuxppc-dev list

On Wed, Nov 16, 2005 at 02:20:43PM -0500, Dan Malek wrote:
> 
> On Nov 16, 2005, at 10:15 AM, Kumar Gala wrote:
> 
> >603 is used in all 82xx/83xx processors from Freescale. The 8641 is 
> >the same core as 7448.
> 
> The 82xx uses G2_LE, and 83xx is e300, which are
> similar to the old 603 but do have some subtle
> improvements that make them better cores.

I originally asked because I believed that these cores are 
actually closer to the 603e than to the original 603.

But take this with a pinch of salt, I might be wrong.

	Gabriel

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] powerpc: Merge align.c
  2005-11-16 19:45                 ` Gabriel Paubert
@ 2005-11-16 20:36                   ` Dan Malek
  0 siblings, 0 replies; 20+ messages in thread
From: Dan Malek @ 2005-11-16 20:36 UTC (permalink / raw)
  To: Gabriel Paubert; +Cc: linuxppc64-dev, linuxppc-dev list


On Nov 16, 2005, at 2:45 PM, Gabriel Paubert wrote:

> I originally asked because I believed that these cores are
> actually closer to the 603e than to the original 603.

That's correct.  In fact, I think the original 8260 and
perhaps the 5200 were 603e cores.  As I mentioned,
the newer ones are subtly different, but better than
the 603e ;-)

Thanks.

	-- Dan

^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2005-11-16 20:36 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-11-14  8:00 [PATCH] powerpc: Merge align.c Benjamin Herrenschmidt
2005-11-14 19:53 ` Becky Bruce
2005-11-14 20:55   ` Benjamin Herrenschmidt
2005-11-15  5:10     ` Becky Bruce
2005-11-15  5:35       ` Benjamin Herrenschmidt
2005-11-16  2:19         ` Becky Bruce
2005-11-16  2:34           ` Benjamin Herrenschmidt
2005-11-16  3:23             ` Becky Bruce
2005-11-16 16:54               ` Andrey Volkov
2005-11-16  4:26             ` Dan Malek
2005-11-16  5:00               ` Benjamin Herrenschmidt
2005-11-16  5:35                 ` Dan Malek
2005-11-16  6:13                   ` Benjamin Herrenschmidt
2005-11-16  9:36           ` Gabriel Paubert
2005-11-16 15:15             ` Kumar Gala
2005-11-16 16:31               ` Becky Bruce
2005-11-16 19:24                 ` Dan Malek
2005-11-16 19:20               ` Dan Malek
2005-11-16 19:45                 ` Gabriel Paubert
2005-11-16 20:36                   ` Dan Malek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).