LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [v2][PATCH 4/6] book3e/kgdb: Fix a single stgep case of lazy IRQ
From: Tiejun Chen @ 2013-01-21  7:44 UTC (permalink / raw)
  To: benh, galak; +Cc: linuxppc-dev, linux-kernel, jason.wessel
In-Reply-To: <1358754255-31484-1-git-send-email-tiejun.chen@windriver.com>

When we're in kgdb_singlestep(), we have to work around to get
thread_info by copying from the kernel stack before calling
kgdb_handle_exception(), then copying it back afterwards.

But for PPC64, we have a lazy interrupt implementation. So after
copying thread info frome kernle stack, if we need to replay an
interrupt, we shouldn't restore that previous backup thread info
to make sure we can replay an interrupt lately with a proper
thread info.

This patch use __check_irq_replay() to guarantee this process.

Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
---
 arch/powerpc/kernel/irq.c  |   10 ++++++++++
 arch/powerpc/kernel/kgdb.c |    3 ++-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 4f97fe3..bb8d27a 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -339,7 +339,17 @@ bool prep_irq_for_idle(void)
 	return true;
 }
 
+notrace unsigned int check_irq_replay(void)
+{
+	return __check_irq_replay();
+}
+#else
+notrace unsigned int check_irq_replay(void)
+{
+	return 0;
+}
 #endif /* CONFIG_PPC64 */
+EXPORT_SYMBOL(check_irq_replay);
 
 int arch_show_interrupts(struct seq_file *p, int prec)
 {
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index eb30a40..2f22807 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -151,6 +151,7 @@ static int kgdb_handle_breakpoint(struct pt_regs *regs)
 	return 1;
 }
 
+extern notrace unsigned int check_irq_replay(void);
 static int kgdb_singlestep(struct pt_regs *regs)
 {
 	struct thread_info *thread_info, *exception_thread_info;
@@ -181,7 +182,7 @@ static int kgdb_singlestep(struct pt_regs *regs)
 
 	kgdb_handle_exception(0, SIGTRAP, 0, regs);
 
-	if (thread_info != exception_thread_info)
+	if ((thread_info != exception_thread_info) && (!check_irq_replay()))
 		/* Restore current_thread_info lastly. */
 		memcpy(exception_thread_info, backup_current_thread_info, sizeof *thread_info);
 
-- 
1.7.9.5

^ permalink raw reply related

* [v2][PATCH 3/6] book3e/kgdb: update thread's dbcr0
From: Tiejun Chen @ 2013-01-21  7:44 UTC (permalink / raw)
  To: benh, galak; +Cc: linuxppc-dev, linux-kernel, jason.wessel
In-Reply-To: <1358754255-31484-1-git-send-email-tiejun.chen@windriver.com>

gdb always need to generate a single step properly to invoke
a kgdb state. But with lazy interrupt, book3e can't always
trigger a debug exception with a single step since the current
is blocked for handling those pending exception, then we miss
that expected dbcr configuration at last to generate a debug
exception.

So here we also update thread's dbcr0 to make sure the current
can go back with that missed dbcr0 configuration.

Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
---
 arch/powerpc/kernel/kgdb.c |   13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 8747447..eb30a40 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -409,7 +409,7 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
 			       struct pt_regs *linux_regs)
 {
 	char *ptr = &remcom_in_buffer[1];
-	unsigned long addr;
+	unsigned long addr, dbcr0;
 
 	switch (remcom_in_buffer[0]) {
 		/*
@@ -426,8 +426,15 @@ int kgdb_arch_handle_exception(int vector, int signo, int err_code,
 		/* set the trace bit if we're stepping */
 		if (remcom_in_buffer[0] == 's') {
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
-			mtspr(SPRN_DBCR0,
-			      mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM);
+			dbcr0 = mfspr(SPRN_DBCR0) | DBCR0_IC | DBCR0_IDM;
+			mtspr(SPRN_DBCR0, dbcr0);
+#ifdef CONFIG_PPC_BOOK3E_64
+			/* With lazy interrut we have to update thread dbcr0 here
+			 * to make sure we can set debug properly at last to invoke
+			 * kgdb again to work well.
+			 */
+			current->thread.dbcr0 = dbcr0;
+#endif
 			linux_regs->msr |= MSR_DE;
 #else
 			linux_regs->msr |= MSR_SE;
-- 
1.7.9.5

^ permalink raw reply related

* [v2][PATCH 2/6] powerpc/book3e: store critical/machine/debug exception thread info
From: Tiejun Chen @ 2013-01-21  7:44 UTC (permalink / raw)
  To: benh, galak; +Cc: linuxppc-dev, linux-kernel, jason.wessel
In-Reply-To: <1358754255-31484-1-git-send-email-tiejun.chen@windriver.com>

We need to store thread info to these exception thread info like something
we already did for PPC32.

Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
---
 arch/powerpc/kernel/exceptions-64e.S |   15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 767f856..423a936 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -58,6 +58,18 @@
 	std	r10,PACA_##level##_STACK(r13);
 #endif
 
+/* Store something to exception thread info */
+#define	BOOK3E_STORE_EXC_LEVEL_THEAD_INFO(type)					\
+	ld	r1,PACAKSAVE(r13);						\
+	CURRENT_THREAD_INFO(r14, r14);						\
+	CURRENT_THREAD_INFO(r15, r1);						\
+	ld	r10,TI_FLAGS(r14);		     				\
+	std	r10,TI_FLAGS(r15);			     			\
+	ld	r10,TI_PREEMPT(r14);		     				\
+	std	r10,TI_PREEMPT(r15);		     				\
+	ld	r10,TI_TASK(r14);			     			\
+	std	r10,TI_TASK(r15);
+
 /* Exception prolog code for all exceptions */
 #define EXCEPTION_PROLOG(n, intnum, type, addition)	    		    \
 	mtspr	SPRN_SPRG_##type##_SCRATCH,r13;	/* get spare registers */   \
@@ -95,6 +107,7 @@
 	BOOK3E_LOAD_EXC_LEVEL_STACK(CRIT);					\
 	ld	r1,PACA_CRIT_STACK(r13);				    \
 	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;					\
+	BOOK3E_STORE_EXC_LEVEL_THEAD_INFO(CRIT);				\
 1:
 #define SPRN_CRIT_SRR0	SPRN_CSRR0
 #define SPRN_CRIT_SRR1	SPRN_CSRR1
@@ -105,6 +118,7 @@
 	BOOK3E_LOAD_EXC_LEVEL_STACK(DBG);					\
 	ld	r1,PACA_DBG_STACK(r13);					    \
 	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;					\
+	BOOK3E_STORE_EXC_LEVEL_THEAD_INFO(DBG);					\
 1:
 #define SPRN_DBG_SRR0	SPRN_DSRR0
 #define SPRN_DBG_SRR1	SPRN_DSRR1
@@ -115,6 +129,7 @@
 	BOOK3E_LOAD_EXC_LEVEL_STACK(MC);					\
 	ld	r1,PACA_MC_STACK(r13);					    \
 	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;					\
+	BOOK3E_STORE_EXC_LEVEL_THEAD_INFO(MC);					\
 1:
 #define SPRN_MC_SRR0	SPRN_MCSRR0
 #define SPRN_MC_SRR1	SPRN_MCSRR1
-- 
1.7.9.5

^ permalink raw reply related

* [v2][PATCH 1/6] powerpc/book3e: load critical/machine/debug exception stack
From: Tiejun Chen @ 2013-01-21  7:44 UTC (permalink / raw)
  To: benh, galak; +Cc: linuxppc-dev, linux-kernel, jason.wessel
In-Reply-To: <1358754255-31484-1-git-send-email-tiejun.chen@windriver.com>

We always alloc critical/machine/debug check exceptions. This is
different from the normal exception. So we should load these exception
stack properly like we did for booke.

Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
---
 arch/powerpc/kernel/exceptions-64e.S |   40 +++++++++++++++++++++++++++++++---
 1 file changed, 37 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index ae54553..767f856 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -36,6 +36,28 @@
  */
 #define	SPECIAL_EXC_FRAME_SIZE	INT_FRAME_SIZE
 
+/* only on book3e */
+#define DBG_STACK_BASE		dbgirq_ctx
+#define MC_STACK_BASE		mcheckirq_ctx
+#define CRIT_STACK_BASE		critirq_ctx
+
+#ifdef CONFIG_SMP
+#define BOOK3E_LOAD_EXC_LEVEL_STACK(level)		\
+	mfspr	r14,SPRN_PIR;				\
+	slwi	r14,r14,3;				\
+	LOAD_REG_IMMEDIATE(r10, level##_STACK_BASE);	\
+	add	r10,r10,r14;				\
+	ld	r10,0(r10);				\
+	addi	r10,r10,THREAD_SIZE;			\
+	std	r10,PACA_##level##_STACK(r13);
+#else
+#define BOOK3E_LOAD_EXC_LEVEL_STACK(level)		\
+	LOAD_REG_IMMEDIATE(r10, level##_STACK_BASE);	\
+	ld	r10,0(r10);				\
+	addi	r10,r10,THREAD_SIZE;			\
+	std	r10,PACA_##level##_STACK(r13);
+#endif
+
 /* Exception prolog code for all exceptions */
 #define EXCEPTION_PROLOG(n, intnum, type, addition)	    		    \
 	mtspr	SPRN_SPRG_##type##_SCRATCH,r13;	/* get spare registers */   \
@@ -68,20 +90,32 @@
 #define SPRN_GDBELL_SRR1	SPRN_GSRR1
 
 #define CRIT_SET_KSTACK						            \
+	andi.	r10,r11,MSR_PR;							\
+	bne	1f;								\
+	BOOK3E_LOAD_EXC_LEVEL_STACK(CRIT);					\
 	ld	r1,PACA_CRIT_STACK(r13);				    \
-	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;
+	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;					\
+1:
 #define SPRN_CRIT_SRR0	SPRN_CSRR0
 #define SPRN_CRIT_SRR1	SPRN_CSRR1
 
 #define DBG_SET_KSTACK						            \
+	andi.	r10,r11,MSR_PR;							\
+	bne	1f;								\
+	BOOK3E_LOAD_EXC_LEVEL_STACK(DBG);					\
 	ld	r1,PACA_DBG_STACK(r13);					    \
-	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;
+	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;					\
+1:
 #define SPRN_DBG_SRR0	SPRN_DSRR0
 #define SPRN_DBG_SRR1	SPRN_DSRR1
 
 #define MC_SET_KSTACK						            \
+	andi.	r10,r11,MSR_PR;							\
+	bne	1f;								\
+	BOOK3E_LOAD_EXC_LEVEL_STACK(MC);					\
 	ld	r1,PACA_MC_STACK(r13);					    \
-	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;
+	subi	r1,r1,SPECIAL_EXC_FRAME_SIZE;					\
+1:
 #define SPRN_MC_SRR0	SPRN_MCSRR0
 #define SPRN_MC_SRR1	SPRN_MCSRR1
 
-- 
1.7.9.5

^ permalink raw reply related

* [v2][PATCH 0/6] powerpc/book3e: make kgdb to work well
From: Tiejun Chen @ 2013-01-21  7:44 UTC (permalink / raw)
  To: benh, galak; +Cc: linuxppc-dev, linux-kernel, jason.wessel

This patchset is used to support kgdb/gdb on book3e.

v2:

* Make sure we cover CONFIG_PPC_BOOK3E_64 safely
* Use LOAD_REG_IMMEDIATE() to load properly
	the value of the constant expression in load debug exception stack 
* Copy thread infor form the kernel stack coming from usr
* Rebase latest powerpc git tree

v1:
* Copy thread info only when we are from !user mode since we'll get kernel stack
  coming from usr directly.
* remove save/restore EX_R14/EX_R15 since DBG_EXCEPTION_PROLOG already covered
  this.
* use CURRENT_THREAD_INFO() conveniently to get thread.
* fix some typos
* add a patch to make sure gdb can generate a single step properly to invoke a
  kgdb state.
* add a patch to if we need to replay an interrupt, we shouldn't restore that
  previous backup thread info to make sure we can replay an interrupt lately
  with a proper thread info.
* rebase latest powerpc git tree

v0:
This patchset is used to support kgdb for book3e.


Tiejun Chen (6):
      powerpc/book3e: load critical/machine/debug exception stack
      powerpc/book3e: store critical/machine/debug exception thread info
      book3e/kgdb: update thread's dbcr0
      book3e/kgdb: Fix a single stgep case of lazy IRQ
      powerpc/book3e: support kgdb for kernel space
      kgdb/kgdbts: support ppc64

 arch/powerpc/kernel/exceptions-64e.S |   60 +++++++++++++++++++++++++++++++---
 arch/powerpc/kernel/irq.c            |   10 ++++++
 arch/powerpc/kernel/kgdb.c           |   16 ++++++---
 drivers/misc/kgdbts.c                |    2 ++
 4 files changed, 80 insertions(+), 8 deletions(-)

Tiejun

^ permalink raw reply

* Re: Anyone have a PrPmc 280/2800 handy?
From: Paul Gortmaker @ 2013-01-20  1:59 UTC (permalink / raw)
  To: Mark A. Greer; +Cc: linuxppc-dev
In-Reply-To: <20130117223343.GC32446@animalcreek.com>

On Thu, Jan 17, 2013 at 5:33 PM, Mark A. Greer <mgreer@animalcreek.com> wrote:
> Hello.
>
> I want to make some fixups to the marvell hostbridge and
> mpsc code and would like to test them on a PrPmc280/2800.
> The problem is, I don't have one anymore.

Hi Mark,

Is there another platform that uses mpsc that can be used for
testing?  I ask because I did spend a lot of time with PrPMC-280
and motload/powerboot; some standalone, some on ATCA-F101.
And with that hindsight, I can't help thinking that the right thing
to do some eight years later is to simply consider removing the
support for these old platforms.  Would that make your validation
easier?  If so, then I'd recommend it.  This is of course, just my opinion.

Paul.
--

>
> If you have one handy and don't mind running a quick boot
> test for me,  please let me know.
>
> Thanks,
>
> Mark
> --
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* ml510 booting problem.
From: rakesh l @ 2013-01-19 10:41 UTC (permalink / raw)
  To: linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 1144 bytes --]

Hello All,

              I am trying to port 3.6 linux kernel on ml510 board and i
have already ported 3.6 kernel on ml403 board but i am unable to do the
same thing on ml510. I really don't know what i am missing here.

Tools used :

1) xilinx 13.1 edk
 2) linux kernel from git.xilinx.com (downloaded 3.6 kernel)
 3) eldk 5.3 (cross compilers tools)

Flow go's like this :

1) I have added necessary peripherals in the edk and generated bit stream
and exported it to sdk. In sdk i have creted a .dts file.
 2) The obtained dts file i have placed in arch/powerpc/boot/dts folder
 3) i also got ramdisk image from wiki.xilinx.com/powerpc-linux. (placed in
/arch/powerpc/boot)

compiled kernel using default configuration. i have also enabled soft fpu's

After compiling i got .elf file in arch/powerpc/boot . i took that elf and
download.bit in edk project and clubbed it in xmd console inorder to get
.ace file .
obtained .ace file is loaded in CF and trying to port through rs232, but i
am seeing nothing on the console. what to do.... what i am missing here. I
am breaking my head from past 1 month. Please do reply.

With Regards,
Rakesh.

[-- Attachment #2: Type: text/html, Size: 1507 bytes --]

^ permalink raw reply

* [PATCH] perf: Fix compile warnings in tests/attr.c
From: Sukadev Bhattiprolu @ 2013-01-19  1:30 UTC (permalink / raw)
  To: acme, Anton Blanchard, paulus; +Cc: linuxppc-dev, jolsa, linux-kernel

>From 4d266e5040c33103f5d226b0d16b89f8ef79e3ad Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Fri, 18 Jan 2013 11:14:28 -0800
Subject: [PATCH] perf: Fix compile warnings in tests/attr.c

Replace '%llu' in printf()s with 'PRIu64' in 'tools/perf/tests/attr.c'
to fix compile warnings (which become errors due to -Werror).

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
 tools/perf/tests/attr.c |   20 ++++++++++----------
 1 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index f61dd3f..5cbb2b3 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -65,7 +65,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
 	FILE *file;
 	char path[PATH_MAX];
 
-	snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir,
+	snprintf(path, PATH_MAX, "%s/event-%d-%" PRIu64 "-%d", dir,
 		 attr->type, attr->config, fd);
 
 	file = fopen(path, "w+");
@@ -74,7 +74,7 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
 		return -1;
 	}
 
-	if (fprintf(file, "[event-%d-%llu-%d]\n",
+	if (fprintf(file, "[event-%d-%" PRIu64 "-%d]\n",
 		    attr->type, attr->config, fd) < 0) {
 		perror("test attr - failed to write event file");
 		fclose(file);
@@ -91,10 +91,10 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
 	/* struct perf_event_attr */
 	WRITE_ASS(type,   PRIu32);
 	WRITE_ASS(size,   PRIu32);
-	WRITE_ASS(config,  "llu");
-	WRITE_ASS(sample_period, "llu");
-	WRITE_ASS(sample_type,   "llu");
-	WRITE_ASS(read_format,   "llu");
+	WRITE_ASS(config,  PRIu64);
+	WRITE_ASS(sample_period, PRIu64);
+	WRITE_ASS(sample_type,   PRIu64);
+	WRITE_ASS(read_format,   PRIu64);
 	WRITE_ASS(disabled,       "d");
 	WRITE_ASS(inherit,        "d");
 	WRITE_ASS(pinned,         "d");
@@ -119,10 +119,10 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
 	WRITE_ASS(exclude_callchain_user, "d");
 	WRITE_ASS(wakeup_events, PRIu32);
 	WRITE_ASS(bp_type, PRIu32);
-	WRITE_ASS(config1, "llu");
-	WRITE_ASS(config2, "llu");
-	WRITE_ASS(branch_sample_type, "llu");
-	WRITE_ASS(sample_regs_user,   "llu");
+	WRITE_ASS(config1, PRIu64);
+	WRITE_ASS(config2, PRIu64);
+	WRITE_ASS(branch_sample_type, PRIu64);
+	WRITE_ASS(sample_regs_user,   PRIu64);
 	WRITE_ASS(sample_stack_user,  PRIu32);
 
 	fclose(file);
-- 
1.7.1

^ permalink raw reply related

* Re: [PATCH] powerpc/85xx: dts - add ranges property for SEC
From: Kim Phillips @ 2013-01-18 20:40 UTC (permalink / raw)
  To: Po Liu; +Cc: linuxppc-dev
In-Reply-To: <1358500573-19477-1-git-send-email-po.liu@freescale.com>

On Fri, 18 Jan 2013 17:16:13 +0800
Po Liu <po.liu@freescale.com> wrote:

> This facilitates getting the physical address of the SEC node.
> 
> Signed-off-by: Liu po <po.liu@freescale.com>
> ---
Reviewed-by: Kim Phillips <kim.phillips@freescale.com>

Kim

^ permalink raw reply

* Re: [PATCH 6/6][v3] perf: Document the ABI of perf sysfs entries
From: Arnaldo Carvalho de Melo @ 2013-01-18 19:20 UTC (permalink / raw)
  To: Sukadev Bhattiprolu
  Cc: Andi Kleen, Peter Zijlstra, robert.richter, Greg KH,
	Anton Blanchard, linux-kernel, Stephane Eranian, linuxppc-dev,
	Ingo Molnar, Paul Mackerras, Jiri Olsa
In-Reply-To: <20130118174654.GA12575@us.ibm.com>

Em Fri, Jan 18, 2013 at 09:46:54AM -0800, Sukadev Bhattiprolu escreveu:
> Jiri Olsa [jolsa@redhat.com] wrote:
> | Maybe it'd worth to mention, that it does not need to be just 'event',
> | but anything from /sys/bus/event_source/devices/<dev>/format directory,
> | like
> | 
> | 		The can be multiple terms like 'event=0xNNNN' specified
> | 		and separated with comma. All available terms are located
> | 		in /sys/bus/event_source/devices/<dev>/format file.
> | 
> | Please feel free to rephrase or use proper English ;-)
> | 
> | otherwise it's ok,
> | jirka
> 
> Agree.  Thanks for the review. Here is the updated patch.

Ok, I'll wait for Jiri's final acked-by, then you can resubmit the patch
series and I'll apply it to my perf/core branch.

- Arnaldo

^ permalink raw reply

* [PATCH] pseries/iommu: ensure TCEs are cleared with non-huge DDW
From: Nishanth Aravamudan @ 2013-01-18 19:17 UTC (permalink / raw)
  To: benh; +Cc: nfont, paulus, linuxppc-dev, miltonm, anton

There are now two kinds of DMA windows that might be presented by
PowerVM DDW support -- huge windows (that can map all of system memory
regardless of the LPAR configuration) and non-huge windows (which
can't). They are implemented slightly differently in PowerVM, and thus
have different characteristics. The most obvious is that slot isolate
doesn't clear the TCEs/window for us with non-huge windows. Thus, when a
DLPAR operation occurs on a slot using a non-huge window, TCEs are still
present (the notifier chain doesn't currently remove them explicitly)
and the DLPAR fails. Fix this by calling remove_ddw() first, which will
unmap the DDW TCEs.

Note: a corresponding change to drmgr is needed to actually successfully
DLPAR, such that the device-tree update (which causes the notifier chain
to fire) occurs before slot isolate.

Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index a55b685..b4bb9e1 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1296,6 +1296,7 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
 
 	switch (action) {
 	case OF_RECONFIG_DETACH_NODE:
+		remove_ddw(np);
 		if (pci && pci->iommu_table)
 			iommu_free_table(pci->iommu_table, np->full_name);
 
@@ -1308,16 +1309,6 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
 			}
 		}
 		spin_unlock(&direct_window_list_lock);
-
-		/*
-		 * Because the notifier runs after isolation of the
-		 * slot, we are guaranteed any DMA window has already
-		 * been revoked and the TCEs have been marked invalid,
-		 * so we don't need a call to remove_ddw(np). However,
-		 * if an additional notifier action is added before the
-		 * isolate call, we should update this code for
-		 * completeness with such a call.
-		 */
 		break;
 	default:
 		err = NOTIFY_DONE;

^ permalink raw reply related

* [PATCH] pseries/iommu: Fix iteration in DDW TCE clearrange
From: Nishanth Aravamudan @ 2013-01-18 19:16 UTC (permalink / raw)
  To: benh; +Cc: nfont, paulus, linuxppc-dev, miltonm, anton

tce_clearrange_multi_pSeriesLP is attempting to iterate over all TCEs in
a given range. However, is it not advancing the dma_offset value passed
to plpar_tce_stuff via the next value. This prevents DLPAR from
completing, because TCEs are still present at slot isolation time.
    
Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com>

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index e2685ba..a55b685 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -382,6 +382,7 @@ static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
 		rc = plpar_tce_stuff((u64)be32_to_cpu(maprange->liobn),
 					     dma_offset,
 					     0, limit);
+		next += limit * tce_size;
 		num_tce -= limit;
 	} while (num_tce > 0 && !rc);
 

^ permalink raw reply related

* Re: [PATCH 6/6][v3] perf: Document the ABI of perf sysfs entries
From: Sukadev Bhattiprolu @ 2013-01-18 17:46 UTC (permalink / raw)
  To: Jiri Olsa
  Cc: Andi Kleen, Peter Zijlstra, robert.richter, Greg KH,
	Anton Blanchard, linux-kernel, Stephane Eranian, linuxppc-dev,
	Ingo Molnar, Paul Mackerras, Arnaldo Carvalho de Melo
In-Reply-To: <20130117141145.GA3817@krava.brq.redhat.com>

Jiri Olsa [jolsa@redhat.com] wrote:
| Maybe it'd worth to mention, that it does not need to be just 'event',
| but anything from /sys/bus/event_source/devices/<dev>/format directory,
| like
| 
| 		The can be multiple terms like 'event=0xNNNN' specified
| 		and separated with comma. All available terms are located
| 		in /sys/bus/event_source/devices/<dev>/format file.
| 
| Please feel free to rephrase or use proper English ;-)
| 
| otherwise it's ok,
| jirka

Agree.  Thanks for the review. Here is the updated patch.

---
>From 1e3cc6b3ef87f533985b10574af472361e39eecd Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Tue, 8 Jan 2013 22:31:49 -0800
Subject: [PATCH] perf: Document the ABI of perf sysfs entries

This patchset addes two new sets of files to sysfs for POWER architecture.

	- perf event config format in /sys/devices/cpu/format/event
	- generic and POWER-specific perf events in /sys/devices/cpu/events/

The format of the first file is already documented in:

	sysfs-bus-event_source-devices-format

Document the format of the second set of files '/sys/devices/cpu/events/*'
which would also become part of the ABI.

Changelog[v3.2]: (small changes to this one patch).
	[Jiri Olsa]: Mention that multiple event= like terms can be specified
	in the 'events' file.

Changelog[v3.1]:
	(small changes to just this one patch).
	[Jiri Olsa]: Remove the documentation for the 'config format' file
	as it is already documented in 'Documentation/ABI/testing/'.
	[Jiri Olsa]: Move the documentation of 'events' also to 'testing'
	from 'stable'.

Changelog[v3]:
	[Greg KH] Include ABI documentation.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
---
 .../testing/sysfs-bus-event_source-devices-events  |   62 ++++++++++++++++++++
 1 files changed, 62 insertions(+), 0 deletions(-)
 delete mode 100644 Documentation/ABI/stable/sysfs-devices-cpu-events
 create mode 100644 Documentation/ABI/testing/sysfs-bus-event_source-devices-events

diff --git a/Documentation/ABI/stable/sysfs-devices-cpu-events b/Documentation/ABI/stable/sysfs-devices-cpu-events
deleted file mode 100644
index e69de29..0000000
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
new file mode 100644
index 0000000..0adeb52
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events
@@ -0,0 +1,62 @@
+What:		/sys/devices/cpu/events/
+		/sys/devices/cpu/events/branch-misses
+		/sys/devices/cpu/events/cache-references
+		/sys/devices/cpu/events/cache-misses
+		/sys/devices/cpu/events/stalled-cycles-frontend
+		/sys/devices/cpu/events/branch-instructions
+		/sys/devices/cpu/events/stalled-cycles-backend
+		/sys/devices/cpu/events/instructions
+		/sys/devices/cpu/events/cpu-cycles
+
+Date:		2013/01/08
+
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+
+Description:	Generic performance monitoring events
+
+		A collection of performance monitoring events that may be
+		supported by many/most CPUs. These events can be monitored
+		using the 'perf(1)' tool.
+
+		The contents of each file would look like:
+
+			event=0xNNNN
+
+		where 'N' is a hex digit and the number '0xNNNN' shows the
+		"raw code" for the perf event identified by the file's
+		"basename".
+
+
+What: 		/sys/devices/cpu/events/PM_LD_MISS_L1
+		/sys/devices/cpu/events/PM_LD_REF_L1
+		/sys/devices/cpu/events/PM_CYC
+		/sys/devices/cpu/events/PM_BRU_FIN
+		/sys/devices/cpu/events/PM_GCT_NOSLOT_CYC
+		/sys/devices/cpu/events/PM_BRU_MPRED
+		/sys/devices/cpu/events/PM_INST_CMPL
+		/sys/devices/cpu/events/PM_CMPLU_STALL
+
+Date:		2013/01/08
+
+Contact:	Linux kernel mailing list <linux-kernel@vger.kernel.org>
+		Linux Powerpc mailing list <linuxppc-dev@ozlabs.org>
+
+Description:	POWER-systems specific performance monitoring events
+
+		A collection of performance monitoring events that may be
+		supported by the POWER CPU. These events can be monitored
+		using the 'perf(1)' tool.
+
+		These events may not be supported by other CPUs.
+
+		The contents of each file would look like:
+
+			event=0xNNNN
+
+		where 'N' is a hex digit and the number '0xNNNN' shows the
+		"raw code" for the perf event identified by the file's
+		"basename".
+
+		Further, multiple terms like 'event=0xNNNN' can be specified
+		and separated with comma. All available terms are defined in
+		the /sys/bus/event_source/devices/<dev>/format file.
-- 
1.7.1

^ permalink raw reply related

* Re: [RFC PATCH 0/6] USB: Add multiple PHYs of same type
From: kishon @ 2013-01-18 11:54 UTC (permalink / raw)
  To: balbi
  Cc: linux-doc, tony, linux, linux-sh, alexander.shishkin, stern,
	devicetree-discuss, linuxppc-dev, rob.herring, horms,
	haojian.zhuang, linux-omap, linux-arm-kernel, eric.y.miao,
	b-cousson, gregkh, linux-usb, linux-kernel, cbou, rob, dwmw2
In-Reply-To: <20130118114859.GC4379@arwen.pp.htv.fi>

On Friday 18 January 2013 05:18 PM, Felipe Balbi wrote:
> On Wed, Jan 16, 2013 at 08:30:56PM +0530, Kishon Vijay Abraham I wrote:
>> New platforms are being added which has multiple PHY's (of same type) and
>> which has multiple USB controllers. The binding information has to be
>> present in the PHY library (otg.c) in order for it to return the
>> appropriate PHY whenever the USB controller request for the PHY. So
>> added a new API to pass the binding information. This API should be
>> called by platform specific initialization code.
>>
>> So the binding should be done something like
>> usb_bind_phy("musb-hdrc.0.auto", 0, "omap-usb2.1.auto"); specifying the USB
>> controller device name, index, and the PHY device name.
>> I have done this binding for OMAP platforms, but it should be done for
>> all the platforms.
>>
>> After this design, the phy can be got by passing the USB controller device
>> pointer and the index.
>>
>> Developed this patch series on
>> git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git xceiv
>> after applying "usb: musb: add driver for control module" patch series.
>>
>> Did basic enumeration testing in omap4 panda, omap4 sdp and omap3 beagle.
>
> please resend without RFC so I can apply.

I'll resend after addressing Roger's comments in a while.

Thanks
Kishon

^ permalink raw reply

* Re: [RFC PATCH 0/6] USB: Add multiple PHYs of same type
From: Felipe Balbi @ 2013-01-18 11:48 UTC (permalink / raw)
  To: Kishon Vijay Abraham I
  Cc: linux-doc, tony, linux, linux-sh, alexander.shishkin, stern,
	devicetree-discuss, linuxppc-dev, rob.herring, horms,
	haojian.zhuang, linux-omap, linux-arm-kernel, eric.y.miao,
	b-cousson, gregkh, linux-usb, linux-kernel, balbi, cbou, rob,
	dwmw2
In-Reply-To: <1358348462-27693-1-git-send-email-kishon@ti.com>

[-- Attachment #1: Type: text/plain, Size: 1201 bytes --]

On Wed, Jan 16, 2013 at 08:30:56PM +0530, Kishon Vijay Abraham I wrote:
> New platforms are being added which has multiple PHY's (of same type) and
> which has multiple USB controllers. The binding information has to be
> present in the PHY library (otg.c) in order for it to return the
> appropriate PHY whenever the USB controller request for the PHY. So
> added a new API to pass the binding information. This API should be
> called by platform specific initialization code.
> 
> So the binding should be done something like
> usb_bind_phy("musb-hdrc.0.auto", 0, "omap-usb2.1.auto"); specifying the USB
> controller device name, index, and the PHY device name.
> I have done this binding for OMAP platforms, but it should be done for
> all the platforms.
> 
> After this design, the phy can be got by passing the USB controller device
> pointer and the index.
> 
> Developed this patch series on
> git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git xceiv
> after applying "usb: musb: add driver for control module" patch series.
> 
> Did basic enumeration testing in omap4 panda, omap4 sdp and omap3 beagle.

please resend without RFC so I can apply.

-- 
balbi

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [RFC PATCH 1/6] usb: otg: Add an API to bind the USB controller and PHY
From: Felipe Balbi @ 2013-01-18 11:48 UTC (permalink / raw)
  To: kishon
  Cc: linux-doc, tony, linux, linux-sh, alexander.shishkin, stern,
	devicetree-discuss, linuxppc-dev, rob.herring, horms,
	haojian.zhuang, linux-omap, linux-arm-kernel, Roger Quadros,
	eric.y.miao, b-cousson, gregkh, linux-usb, linux-kernel, balbi,
	cbou, rob, dwmw2
In-Reply-To: <50F7DD2C.90500@ti.com>

[-- Attachment #1: Type: text/plain, Size: 870 bytes --]

On Thu, Jan 17, 2013 at 04:44:52PM +0530, kishon wrote:
> >>@@ -171,6 +188,11 @@ static inline void devm_usb_put_phy(struct device *dev, struct usb_phy *x)
> >>  {
> >>  }
> >>
> >>+static inline struct usb_phy_bind *usb_bind_phy(const char *dev_name, u8 index,
> >>+				const char *phy_dev_name)
> >>+{
> >>+	return NULL;
> >>+}
> >>  #endif
> >>
> >>  static inline int
> >>
> >
> >Controllers like ehci-omap which don't need OTG functionality would
> >benefit from this API. Can we make these PHY APIs not dependent on OTG /
> >OTG_UTILS?
> 
> Actually much of whatever is in otg.c can be used by controllers
> which don't have OTG functionality (except otg_state_string). I
> vaguely remember, there was a patch that renamed otg.c to phy.c etc..
> I'm not sure what happened to that.

right, that has to be done eventually ;-)

-- 
balbi

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* [PATCH] powerpc/85xx: dts - add ranges property for SEC
From: Po Liu @ 2013-01-18  9:16 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Po Liu

This facilitates getting the physical address of the SEC node.

Signed-off-by: Liu po <po.liu@freescale.com>
---
 arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi b/arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi
index d4c9d5d..ef98fbf 100644
--- a/arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi
+++ b/arch/powerpc/boot/dts/fsl/pq3-sec4.4-0.dtsi
@@ -36,6 +36,7 @@ crypto@30000 {
 	compatible = "fsl,sec-v4.4", "fsl,sec-v4.0";
 	#address-cells = <1>;
 	#size-cells = <1>;
+	ranges		 = <0x0 0x30000 0x10000>
 	reg		 = <0x30000 0x10000>;
 	interrupts	 = <58 2 0 0>;
 
-- 
1.6.4

^ permalink raw reply related

* Re: [PATCH v3 1/2] memory-hotplug: introduce CONFIG_HAVE_BOOTMEM_INFO_NODE and revert register_page_bootmem_info_node() when platform not support
From: Lin Feng @ 2013-01-18  7:58 UTC (permalink / raw)
  To: Michal Hocko
  Cc: linux-ia64, aquini, heiko.carstens, tangchen, dhowells,
	paul.gortmaker, paulus, jbeulich, hpa, sparclinux, linux-s390,
	x86, isimatu.yasuaki, mingo, gerald.schaefer, fenghua.yu,
	jiang.liu, wency, mel, tglx, kamezawa.hiroyu, tony.luck, laijs,
	linux-mm, gregkh, linux-kernel, minchan, schwidefsky, linux390,
	akpm, linuxppc-dev, davem
In-Reply-To: <20130117130509.GE20538@dhcp22.suse.cz>

Hi Michal,

On 01/17/2013 09:05 PM, Michal Hocko wrote:
> On Thu 17-01-13 18:37:10, Lin Feng wrote:
> [...]
>>> > > I am still not sure I understand the relation to MEMORY_HOTREMOVE.
>>> > > Is register_page_bootmem_info_node required/helpful even if
>>> > > !CONFIG_MEMORY_HOTREMOVE?
>> > From old kenrel's view register_page_bootmem_info_node() is defined in 
>> > CONFIG_MEMORY_HOTPLUG_SPARSE, it registers some info for 
>> > memory hotplug/remove. If we don't use MEMORY_HOTPLUG feature, this
>> > function is empty, we don't need the info at all.
>> > So this info is not required/helpful if !CONFIG_MEMORY_HOTREMOVE.
> OK, then I suggest moving it under CONFIG_MEMORY_HOTREMOVE guards rather
> than CONFIG_MEMORY_HOTPLUG.
I can't agree more ;-) 
I also find that page_isolation.c selected by MEMORY_ISOLATION under MEMORY_HOTPLUG
is also such case, I fix it by the way.

thanks,
linfeng
> 

^ permalink raw reply

* [PATCH 17/17] powerpc: Documentation for transactional memory on powerpc
From: Michael Neuling @ 2013-01-18  5:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Michael Neuling, linuxppc-dev, Matt Evans
In-Reply-To: <1358488117-17363-1-git-send-email-mikey@neuling.org>

Signed-off-by: Matt Evans <matt@ozlabs.org>
Signed-off-by: Michael Neuling <mikey@neuling.org>
---
 Documentation/powerpc/transactional_memory.txt |  175 ++++++++++++++++++++++++
 1 file changed, 175 insertions(+)
 create mode 100644 Documentation/powerpc/transactional_memory.txt

diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt
new file mode 100644
index 0000000..c907be4
--- /dev/null
+++ b/Documentation/powerpc/transactional_memory.txt
@@ -0,0 +1,175 @@
+Transactional Memory support
+============================
+
+POWER kernel support for this feature is currently limited to supporting
+its use by user programs.  It is not currently used by the kernel itself.
+
+This file aims to sum up how it is supported by Linux and what behaviour you
+can expect from your user programs.
+
+
+Basic overview
+==============
+
+Hardware Transactional Memory is supported on POWER8 processors, and is a
+feature that enables a different form of atomic memory access.  Several new
+instructions are presented to delimit transactions; transactions are
+guaranteed to either complete atomically or roll back and undo any partial
+changes.
+
+A simple transaction looks like this:
+
+begin_move_money:
+  tbegin
+  beq   abort_handler
+
+  ld    r4, SAVINGS_ACCT(r3)
+  ld    r5, CURRENT_ACCT(r3)
+  subi  r5, r5, 1
+  addi  r4, r4, 1
+  std   r4, SAVINGS_ACCT(r3)
+  std   r5, CURRENT_ACCT(r3)
+
+  tend
+
+  b     continue
+
+abort_handler:
+  ... test for odd failures ...
+
+  /* Retry the transaction if it failed because it conflicted with
+   * someone else: */
+  b     begin_move_money
+
+
+The 'tbegin' instruction denotes the start point, and 'tend' the end point.
+Between these points the processor is in 'Transactional' state; any memory
+references will complete in one go if there are no conflicts with other
+transactional or non-transactional accesses within the system.  In this
+example, the transaction completes as though it were normal straight-line code
+IF no other processor has touched SAVINGS_ACCT(r3) or CURRENT_ACCT(r3); an
+atomic move of money from the current account to the savings account has been
+performed.  Even though the normal ld/std instructions are used (note no
+lwarx/stwcx), either *both* SAVINGS_ACCT(r3) and CURRENT_ACCT(r3) will be
+updated, or neither will be updated.
+
+If, in the meantime, there is a conflict with the locations accessed by the
+transaction, the transaction will be aborted by the CPU.  Register and memory
+state will roll back to that at the 'tbegin', and control will continue from
+'tbegin+4'.  The branch to abort_handler will be taken this second time; the
+abort handler can check the cause of the failure, and retry.
+
+Checkpointed registers include all GPRs, FPRs, VRs/VSRs, LR, CCR/CR, CTR, FPCSR
+and a few other status/flag regs; see the ISA for details.
+
+Causes of transaction aborts
+============================
+
+- Conflicts with cache lines used by other processors
+- Signals
+- Context switches
+- See the ISA for full documentation of everything that will abort transactions.
+
+
+Syscalls
+========
+
+Performing syscalls from within transaction is not recommended, and can lead
+to unpredictable results.
+
+Syscalls do not by design abort transactions, but beware: The kernel code will
+not be running in transactional state.  The effect of syscalls will always
+remain visible, but depending on the call they may abort your transaction as a
+side-effect, read soon-to-be-aborted transactional data that should not remain
+invisible, etc.  If you constantly retry a transaction that constantly aborts
+itself by calling a syscall, you'll have a livelock & make no progress.
+
+Simple syscalls (e.g. sigprocmask()) "could" be OK.  Even things like write()
+from, say, printf() should be OK as long as the kernel does not access any
+memory that was accessed transactionally.
+
+Consider any syscalls that happen to work as debug-only -- not recommended for
+production use.  Best to queue them up till after the transaction is over.
+
+
+Signals
+=======
+
+Delivery of signals (both sync and async) during transactions provides a second
+thread state (ucontext/mcontext) to represent the second transactional register
+state.  Signal delivery 'treclaim's to capture both register states, so signals
+abort transactions.  The usual ucontext_t passed to the signal handler
+represents the checkpointed/original register state; the signal appears to have
+arisen at 'tbegin+4'.
+
+If the sighandler ucontext has uc_link set, a second ucontext has been
+delivered.  For future compatibility the MSR.TS field should be checked to
+determine the transactional state -- if so, the second ucontext in uc->uc_link
+represents the active transactional registers at the point of the signal.
+
+For 64-bit processes, uc->uc_mcontext.regs->msr is a full 64-bit MSR and its TS
+field shows the transactional mode.
+
+For 32-bit processes, the mcontext's MSR register is only 32 bits; the top 32
+bits are stored in the MSR of the second ucontext, i.e. in
+uc->uc_link->uc_mcontext.regs->msr.  The top word contains the transactional
+state TS.
+
+However, basic signal handlers don't need to be aware of transactions
+and simply returning from the handler will deal with things correctly:
+
+Transaction-aware signal handlers can read the transactional register state
+from the second ucontext.  This will be necessary for crash handlers to
+determine, for example, the address of the instruction causing the SIGSEGV.
+
+Example signal handler:
+
+    void crash_handler(int sig, siginfo_t *si, void *uc)
+    {
+      ucontext_t *ucp = uc;
+      ucontext_t *transactional_ucp = ucp->uc_link;
+
+      if (ucp_link) {
+        u64 msr = ucp->uc_mcontext.regs->msr;
+        /* May have transactional ucontext! */
+#ifndef __powerpc64__
+        msr |= ((u64)transactional_ucp->uc_mcontext.regs->msr) << 32;
+#endif
+        if (MSR_TM_ACTIVE(msr)) {
+           /* Yes, we crashed during a transaction.  Oops. */
+   fprintf(stderr, "Transaction to be restarted at 0x%llx, but "
+                           "crashy instruction was at 0x%llx\n",
+                           ucp->uc_mcontext.regs->nip,
+                           transactional_ucp->uc_mcontext.regs->nip);
+        }
+      }
+
+      fix_the_problem(ucp->dar);
+    }
+
+
+Failure cause codes used by kernel
+==================================
+
+These are defined in <asm/reg.h>, and distinguish different reasons why the
+kernel aborted a transaction:
+
+ TM_CAUSE_RESCHED       Thread was rescheduled.
+ TM_CAUSE_FAC_UNAV      FP/VEC/VSX unavailable trap.
+ TM_CAUSE_SYSCALL       Currently unused; future syscalls that must abort
+                        transactions for consistency will use this.
+ TM_CAUSE_SIGNAL        Signal delivered.
+ TM_CAUSE_MISC          Currently unused.
+
+These can be checked by the user program's abort handler as TEXASR[0:7].
+
+
+GDB
+===
+
+GDB and ptrace are not currently TM-aware.  If one stops during a transaction,
+it looks like the transaction has just started (the checkpointed state is
+presented).  The transaction cannot then be continued and will take the failure
+handler route.  Furthermore, the transactional 2nd register state will be
+inaccessible.  GDB can currently be used on programs using TM, but not sensibly
+in parts within transactions.
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 16/17] powerpc: Add transactional memory to pseries and ppc64 defconfigs
From: Michael Neuling @ 2013-01-18  5:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Michael Neuling, linuxppc-dev, Matt Evans
In-Reply-To: <1358488117-17363-1-git-send-email-mikey@neuling.org>

Signed-off-by: Matt Evans <matt@ozlabs.org>
Signed-off-by: Michael Neuling <mikey@neuling.org>
---
 arch/powerpc/configs/ppc64_defconfig   |    2 ++
 arch/powerpc/configs/pseries_defconfig |    2 ++
 2 files changed, 4 insertions(+)

diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 6cbdeb4..01e2e86 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -49,6 +49,8 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_PMAC64=y
 CONFIG_HZ_100=y
 CONFIG_BINFMT_MISC=m
+CONFIG_TRANSACTIONAL_MEM=y
+CONFIG_HOTPLUG_CPU=y
 CONFIG_KEXEC=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_MEMORY_HOTREMOVE=y
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 04fa7f2..a50484f 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -43,6 +43,8 @@ CONFIG_RTAS_FLASH=m
 CONFIG_IBMEBUS=y
 CONFIG_HZ_100=y
 CONFIG_BINFMT_MISC=m
+CONFIG_TRANSACTIONAL_MEM=y
+CONFIG_HOTPLUG_CPU=y
 CONFIG_KEXEC=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_MEMORY_HOTPLUG=y
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 15/17] powerpc: Add config option for transactional memory
From: Michael Neuling @ 2013-01-18  5:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Michael Neuling, linuxppc-dev, Matt Evans
In-Reply-To: <1358488117-17363-1-git-send-email-mikey@neuling.org>

Kconfig option for transactional memory on powerpc.

Signed-off-by: Matt Evans <matt@ozlabs.org>
Signed-off-by: Michael Neuling <mikey@neuling.org>
---
 arch/powerpc/Kconfig |    8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 684fa64..349ed12 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -309,6 +309,14 @@ config MATH_EMULATION
 	  unit, which will allow programs that use floating-point
 	  instructions to run.
 
+config TRANSACTIONAL_MEM
+       bool "Transactional Memory support"
+       depends on PPC64
+       depends on SMP
+       default n
+       ---help---
+         Support user-mode Transactional Memory.
+
 config 8XX_MINIMAL_FPEMU
 	bool "Minimal math emulation for 8xx"
 	depends on 8xx && !MATH_EMULATION
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 14/17] powerpc: Add transactional memory to POWER8 cpu features
From: Michael Neuling @ 2013-01-18  5:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Michael Neuling, linuxppc-dev, Matt Evans
In-Reply-To: <1358488117-17363-1-git-send-email-mikey@neuling.org>

Signed-off-by: Matt Evans <matt@ozlabs.org>
Signed-off-by: Michael Neuling <mikey@neuling.org>
---
 arch/powerpc/include/asm/cputable.h |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 71a498b..5ab77f4 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -421,7 +421,7 @@ extern const char *powerpc_base_platform;
 	    CPU_FTR_DSCR | CPU_FTR_SAO  | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
-	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR)
+	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | CPU_FTR_TM_COMP)
 #define CPU_FTRS_CELL	(CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 13/17] powerpc: Add new transactional memory state to the signal context
From: Michael Neuling @ 2013-01-18  5:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Michael Neuling, linuxppc-dev, Matt Evans
In-Reply-To: <1358488117-17363-1-git-send-email-mikey@neuling.org>

This adds the new transactional memory archtected state to the signal context
in both 32 and 64 bit.

Signed-off-by: Matt Evans <matt@ozlabs.org>
Signed-off-by: Michael Neuling <mikey@neuling.org>
---
 arch/powerpc/include/asm/reg.h  |    1 +
 arch/powerpc/kernel/signal.h    |    8 +
 arch/powerpc/kernel/signal_32.c |  500 ++++++++++++++++++++++++++++++++++++++-
 arch/powerpc/kernel/signal_64.c |  337 +++++++++++++++++++++++++-
 4 files changed, 830 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index d19581ae4..41894dc 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -120,6 +120,7 @@
 #define TM_CAUSE_FAC_UNAV	0xfa
 #define TM_CAUSE_SYSCALL	0xf9 /* Persistent */
 #define TM_CAUSE_MISC		0xf6
+#define TM_CAUSE_SIGNAL		0xf4
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 #define MSR_64BIT	MSR_SF
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index e00acb4..ec84c90 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -25,13 +25,21 @@ extern int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
 
 extern unsigned long copy_fpr_to_user(void __user *to,
 				      struct task_struct *task);
+extern unsigned long copy_transact_fpr_to_user(void __user *to,
+					       struct task_struct *task);
 extern unsigned long copy_fpr_from_user(struct task_struct *task,
 					void __user *from);
+extern unsigned long copy_transact_fpr_from_user(struct task_struct *task,
+						 void __user *from);
 #ifdef CONFIG_VSX
 extern unsigned long copy_vsx_to_user(void __user *to,
 				      struct task_struct *task);
+extern unsigned long copy_transact_vsx_to_user(void __user *to,
+					       struct task_struct *task);
 extern unsigned long copy_vsx_from_user(struct task_struct *task,
 					void __user *from);
+extern unsigned long copy_transact_vsx_from_user(struct task_struct *task,
+						 void __user *from);
 #endif
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 804e323..9dc2472 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -43,6 +43,7 @@
 #include <asm/sigcontext.h>
 #include <asm/vdso.h>
 #include <asm/switch_to.h>
+#include <asm/tm.h>
 #ifdef CONFIG_PPC64
 #include "ppc32.h"
 #include <asm/unistd.h>
@@ -293,6 +294,10 @@ long sys_sigaction(int sig, struct old_sigaction __user *act,
 struct sigframe {
 	struct sigcontext sctx;		/* the sigcontext */
 	struct mcontext	mctx;		/* all the register values */
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	struct sigcontext sctx_transact;
+	struct mcontext	mctx_transact;
+#endif
 	/*
 	 * Programs using the rs6000/xcoff abi can save up to 19 gp
 	 * regs and 18 fp regs below sp before decrementing it.
@@ -321,6 +326,9 @@ struct rt_sigframe {
 	struct siginfo info;
 #endif
 	struct ucontext	uc;
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	struct ucontext	uc_transact;
+#endif
 	/*
 	 * Programs using the rs6000/xcoff abi can save up to 19 gp
 	 * regs and 18 fp regs below sp before decrementing it.
@@ -381,6 +389,61 @@ unsigned long copy_vsx_from_user(struct task_struct *task,
 		task->thread.fpr[i][TS_VSRLOWOFFSET] = buf[i];
 	return 0;
 }
+
+#ifdef CONFIG_TRANSACTIONAL_MEM
+unsigned long copy_transact_fpr_to_user(void __user *to,
+				  struct task_struct *task)
+{
+	double buf[ELF_NFPREG];
+	int i;
+
+	/* save FPR copy to local buffer then write to the thread_struct */
+	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+		buf[i] = task->thread.TS_TRANS_FPR(i);
+	memcpy(&buf[i], &task->thread.transact_fpscr, sizeof(double));
+	return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double));
+}
+
+unsigned long copy_transact_fpr_from_user(struct task_struct *task,
+					  void __user *from)
+{
+	double buf[ELF_NFPREG];
+	int i;
+
+	if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double)))
+		return 1;
+	for (i = 0; i < (ELF_NFPREG - 1) ; i++)
+		task->thread.TS_TRANS_FPR(i) = buf[i];
+	memcpy(&task->thread.transact_fpscr, &buf[i], sizeof(double));
+
+	return 0;
+}
+
+unsigned long copy_transact_vsx_to_user(void __user *to,
+				  struct task_struct *task)
+{
+	double buf[ELF_NVSRHALFREG];
+	int i;
+
+	/* save FPR copy to local buffer then write to the thread_struct */
+	for (i = 0; i < ELF_NVSRHALFREG; i++)
+		buf[i] = task->thread.transact_fpr[i][TS_VSRLOWOFFSET];
+	return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double));
+}
+
+unsigned long copy_transact_vsx_from_user(struct task_struct *task,
+					  void __user *from)
+{
+	double buf[ELF_NVSRHALFREG];
+	int i;
+
+	if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double)))
+		return 1;
+	for (i = 0; i < ELF_NVSRHALFREG ; i++)
+		task->thread.transact_fpr[i][TS_VSRLOWOFFSET] = buf[i];
+	return 0;
+}
+#endif /* CONFIG_TRANSACTIONAL_MEM */
 #else
 inline unsigned long copy_fpr_to_user(void __user *to,
 				      struct task_struct *task)
@@ -395,6 +458,22 @@ inline unsigned long copy_fpr_from_user(struct task_struct *task,
 	return __copy_from_user(task->thread.fpr, from,
 			      ELF_NFPREG * sizeof(double));
 }
+
+#ifdef CONFIG_TRANSACTIONAL_MEM
+inline unsigned long copy_transact_fpr_to_user(void __user *to,
+					 struct task_struct *task)
+{
+	return __copy_to_user(to, task->thread.transact_fpr,
+			      ELF_NFPREG * sizeof(double));
+}
+
+inline unsigned long copy_transact_fpr_from_user(struct task_struct *task,
+						 void __user *from)
+{
+	return __copy_from_user(task->thread.transact_fpr, from,
+				ELF_NFPREG * sizeof(double));
+}
+#endif /* CONFIG_TRANSACTIONAL_MEM */
 #endif
 
 /*
@@ -483,6 +562,156 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
 	return 0;
 }
 
+#ifdef CONFIG_TRANSACTIONAL_MEM
+/*
+ * Save the current user registers on the user stack.
+ * We only save the altivec/spe registers if the process has used
+ * altivec/spe instructions at some point.
+ * We also save the transactional registers to a second ucontext in the
+ * frame.
+ *
+ * See save_user_regs() and signal_64.c:setup_tm_sigcontexts().
+ */
+static int save_tm_user_regs(struct pt_regs *regs,
+			     struct mcontext __user *frame,
+			     struct mcontext __user *tm_frame, int sigret)
+{
+	unsigned long msr = regs->msr;
+
+	/* tm_reclaim rolls back all reg states, updating thread.ckpt_regs,
+	 * thread.transact_fpr[], thread.transact_vr[], etc.
+	 */
+	tm_enable();
+	tm_reclaim(&current->thread, msr, TM_CAUSE_SIGNAL);
+
+	/* Make sure floating point registers are stored in regs */
+	flush_fp_to_thread(current);
+
+	/* Save both sets of general registers */
+	if (save_general_regs(&current->thread.ckpt_regs, frame)
+	    || save_general_regs(regs, tm_frame))
+		return 1;
+
+	/* Stash the top half of the 64bit MSR into the 32bit MSR word
+	 * of the transactional mcontext.  This way we have a backward-compatible
+	 * MSR in the 'normal' (checkpointed) mcontext and additionally one can
+	 * also look at what type of transaction (T or S) was active at the
+	 * time of the signal.
+	 */
+	if (__put_user((msr >> 32), &tm_frame->mc_gregs[PT_MSR]))
+		return 1;
+
+#ifdef CONFIG_ALTIVEC
+	/* save altivec registers */
+	if (current->thread.used_vr) {
+		flush_altivec_to_thread(current);
+		if (__copy_to_user(&frame->mc_vregs, current->thread.vr,
+				   ELF_NVRREG * sizeof(vector128)))
+			return 1;
+		if (msr & MSR_VEC) {
+			if (__copy_to_user(&tm_frame->mc_vregs,
+					   current->thread.transact_vr,
+					   ELF_NVRREG * sizeof(vector128)))
+				return 1;
+		} else {
+			if (__copy_to_user(&tm_frame->mc_vregs,
+					   current->thread.vr,
+					   ELF_NVRREG * sizeof(vector128)))
+				return 1;
+		}
+
+		/* set MSR_VEC in the saved MSR value to indicate that
+		 * frame->mc_vregs contains valid data
+		 */
+		msr |= MSR_VEC;
+	}
+
+	/* We always copy to/from vrsave, it's 0 if we don't have or don't
+	 * use altivec. Since VSCR only contains 32 bits saved in the least
+	 * significant bits of a vector, we "cheat" and stuff VRSAVE in the
+	 * most significant bits of that same vector. --BenH
+	 */
+	if (__put_user(current->thread.vrsave,
+		       (u32 __user *)&frame->mc_vregs[32]))
+		return 1;
+	if (msr & MSR_VEC) {
+		if (__put_user(current->thread.transact_vrsave,
+			       (u32 __user *)&tm_frame->mc_vregs[32]))
+			return 1;
+	} else {
+		if (__put_user(current->thread.vrsave,
+			       (u32 __user *)&tm_frame->mc_vregs[32]))
+			return 1;
+	}
+#endif /* CONFIG_ALTIVEC */
+
+	if (copy_fpr_to_user(&frame->mc_fregs, current))
+		return 1;
+	if (msr & MSR_FP) {
+		if (copy_transact_fpr_to_user(&tm_frame->mc_fregs, current))
+			return 1;
+	} else {
+		if (copy_fpr_to_user(&tm_frame->mc_fregs, current))
+			return 1;
+	}
+
+#ifdef CONFIG_VSX
+	/*
+	 * Copy VSR 0-31 upper half from thread_struct to local
+	 * buffer, then write that to userspace.  Also set MSR_VSX in
+	 * the saved MSR value to indicate that frame->mc_vregs
+	 * contains valid data
+	 */
+	if (current->thread.used_vsr) {
+		__giveup_vsx(current);
+		if (copy_vsx_to_user(&frame->mc_vsregs, current))
+			return 1;
+		if (msr & MSR_VSX) {
+			if (copy_transact_vsx_to_user(&tm_frame->mc_vsregs,
+						      current))
+				return 1;
+		} else {
+			if (copy_vsx_to_user(&tm_frame->mc_vsregs, current))
+				return 1;
+		}
+
+		msr |= MSR_VSX;
+	}
+#endif /* CONFIG_VSX */
+#ifdef CONFIG_SPE
+	/* SPE regs are not checkpointed with TM, so this section is
+	 * simply the same as in save_user_regs().
+	 */
+	if (current->thread.used_spe) {
+		flush_spe_to_thread(current);
+		if (__copy_to_user(&frame->mc_vregs, current->thread.evr,
+				   ELF_NEVRREG * sizeof(u32)))
+			return 1;
+		/* set MSR_SPE in the saved MSR value to indicate that
+		 * frame->mc_vregs contains valid data */
+		msr |= MSR_SPE;
+	}
+
+	/* We always copy to/from spefscr */
+	if (__put_user(current->thread.spefscr, (u32 __user *)&frame->mc_vregs + ELF_NEVRREG))
+		return 1;
+#endif /* CONFIG_SPE */
+
+	if (__put_user(msr, &frame->mc_gregs[PT_MSR]))
+		return 1;
+	if (sigret) {
+		/* Set up the sigreturn trampoline: li r0,sigret; sc */
+		if (__put_user(0x38000000UL + sigret, &frame->tramp[0])
+		    || __put_user(0x44000002UL, &frame->tramp[1]))
+			return 1;
+		flush_icache_range((unsigned long) &frame->tramp[0],
+				   (unsigned long) &frame->tramp[2]);
+	}
+
+	return 0;
+}
+#endif
+
 /*
  * Restore the current user register values from the user stack,
  * (except for MSR).
@@ -588,6 +817,139 @@ static long restore_user_regs(struct pt_regs *regs,
 	return 0;
 }
 
+#ifdef CONFIG_TRANSACTIONAL_MEM
+/*
+ * Restore the current user register values from the user stack, except for
+ * MSR, and recheckpoint the original checkpointed register state for processes
+ * in transactions.
+ */
+static long restore_tm_user_regs(struct pt_regs *regs,
+				 struct mcontext __user *sr,
+				 struct mcontext __user *tm_sr)
+{
+	long err;
+	unsigned long msr;
+#ifdef CONFIG_VSX
+	int i;
+#endif
+
+	/*
+	 * restore general registers but not including MSR or SOFTE. Also
+	 * take care of keeping r2 (TLS) intact if not a signal.
+	 * See comment in signal_64.c:restore_tm_sigcontexts();
+	 * TFHAR is restored from the checkpointed NIP; TEXASR and TFIAR
+	 * were set by the signal delivery.
+	 */
+	err = restore_general_regs(regs, tm_sr);
+	err |= restore_general_regs(&current->thread.ckpt_regs, sr);
+
+	err |= __get_user(current->thread.tm_tfhar, &sr->mc_gregs[PT_NIP]);
+
+	err |= __get_user(msr, &sr->mc_gregs[PT_MSR]);
+	if (err)
+		return 1;
+
+	/* Restore the previous little-endian mode */
+	regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+
+	/*
+	 * Do this before updating the thread state in
+	 * current->thread.fpr/vr/evr.  That way, if we get preempted
+	 * and another task grabs the FPU/Altivec/SPE, it won't be
+	 * tempted to save the current CPU state into the thread_struct
+	 * and corrupt what we are writing there.
+	 */
+	discard_lazy_cpu_state();
+
+#ifdef CONFIG_ALTIVEC
+	regs->msr &= ~MSR_VEC;
+	if (msr & MSR_VEC) {
+		/* restore altivec registers from the stack */
+		if (__copy_from_user(current->thread.vr, &sr->mc_vregs,
+				     sizeof(sr->mc_vregs)) ||
+		    __copy_from_user(current->thread.transact_vr,
+				     &tm_sr->mc_vregs,
+				     sizeof(sr->mc_vregs)))
+			return 1;
+	} else if (current->thread.used_vr) {
+		memset(current->thread.vr, 0, ELF_NVRREG * sizeof(vector128));
+		memset(current->thread.transact_vr, 0,
+		       ELF_NVRREG * sizeof(vector128));
+	}
+
+	/* Always get VRSAVE back */
+	if (__get_user(current->thread.vrsave,
+		       (u32 __user *)&sr->mc_vregs[32]) ||
+	    __get_user(current->thread.transact_vrsave,
+		       (u32 __user *)&tm_sr->mc_vregs[32]))
+		return 1;
+#endif /* CONFIG_ALTIVEC */
+
+	regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1);
+
+	if (copy_fpr_from_user(current, &sr->mc_fregs) ||
+	    copy_transact_fpr_from_user(current, &tm_sr->mc_fregs))
+		return 1;
+
+#ifdef CONFIG_VSX
+	regs->msr &= ~MSR_VSX;
+	if (msr & MSR_VSX) {
+		/*
+		 * Restore altivec registers from the stack to a local
+		 * buffer, then write this out to the thread_struct
+		 */
+		if (copy_vsx_from_user(current, &sr->mc_vsregs) ||
+		    copy_transact_vsx_from_user(current, &tm_sr->mc_vsregs))
+			return 1;
+	} else if (current->thread.used_vsr)
+		for (i = 0; i < 32 ; i++) {
+			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0;
+		}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_SPE
+	/* SPE regs are not checkpointed with TM, so this section is
+	 * simply the same as in restore_user_regs().
+	 */
+	regs->msr &= ~MSR_SPE;
+	if (msr & MSR_SPE) {
+		if (__copy_from_user(current->thread.evr, &sr->mc_vregs,
+				     ELF_NEVRREG * sizeof(u32)))
+			return 1;
+	} else if (current->thread.used_spe)
+		memset(current->thread.evr, 0, ELF_NEVRREG * sizeof(u32));
+
+	/* Always get SPEFSCR back */
+	if (__get_user(current->thread.spefscr, (u32 __user *)&sr->mc_vregs
+		       + ELF_NEVRREG))
+		return 1;
+#endif /* CONFIG_SPE */
+
+	/* Now, recheckpoint.  This loads up all of the checkpointed (older)
+	 * registers, including FP and V[S]Rs.  After recheckpointing, the
+	 * transactional versions should be loaded.
+	 */
+	tm_enable();
+	/* This loads the checkpointed FP/VEC state, if used */
+	tm_recheckpoint(&current->thread, msr);
+	/* The task has moved into TM state S, so ensure MSR reflects this */
+	regs->msr = (regs->msr & ~MSR_TS_MASK) | MSR_TS_S;
+
+	/* This loads the speculative FP/VEC state, if used */
+	if (msr & MSR_FP) {
+		do_load_up_transact_fpu(&current->thread);
+		regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+	}
+	if (msr & MSR_VEC) {
+		do_load_up_transact_altivec(&current->thread);
+		regs->msr |= MSR_VEC;
+	}
+
+	return 0;
+}
+#endif
+
 #ifdef CONFIG_PPC64
 long compat_sys_rt_sigaction(int sig, const struct sigaction32 __user *act,
 		struct sigaction32 __user *oact, size_t sigsetsize)
@@ -827,6 +1189,8 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
 	struct mcontext __user *frame;
 	void __user *addr;
 	unsigned long newsp = 0;
+	int sigret;
+	unsigned long tramp;
 
 	/* Set up Signal Frame */
 	/* Put a Real Time Context onto stack */
@@ -838,7 +1202,6 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
 	/* Put the siginfo & fill in most of the ucontext */
 	if (copy_siginfo_to_user(&rt_sf->info, info)
 	    || __put_user(0, &rt_sf->uc.uc_flags)
-	    || __put_user(0, &rt_sf->uc.uc_link)
 	    || __put_user(current->sas_ss_sp, &rt_sf->uc.uc_stack.ss_sp)
 	    || __put_user(sas_ss_flags(regs->gpr[1]),
 			  &rt_sf->uc.uc_stack.ss_flags)
@@ -852,14 +1215,37 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
 	frame = &rt_sf->uc.uc_mcontext;
 	addr = frame;
 	if (vdso32_rt_sigtramp && current->mm->context.vdso_base) {
-		if (save_user_regs(regs, frame, 0, 1))
-			goto badframe;
-		regs->link = current->mm->context.vdso_base + vdso32_rt_sigtramp;
+		sigret = 0;
+		tramp = current->mm->context.vdso_base + vdso32_rt_sigtramp;
 	} else {
-		if (save_user_regs(regs, frame, __NR_rt_sigreturn, 1))
+		sigret = __NR_rt_sigreturn;
+		tramp = (unsigned long) frame->tramp;
+	}
+
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	if (MSR_TM_ACTIVE(regs->msr)) {
+		if (save_tm_user_regs(regs, &rt_sf->uc.uc_mcontext,
+				      &rt_sf->uc_transact.uc_mcontext, sigret))
 			goto badframe;
-		regs->link = (unsigned long) frame->tramp;
 	}
+	else
+#endif
+		if (save_user_regs(regs, frame, sigret, 1))
+			goto badframe;
+	regs->link = tramp;
+
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	if (MSR_TM_ACTIVE(regs->msr)) {
+		if (__put_user((unsigned long)&rt_sf->uc_transact,
+			       &rt_sf->uc.uc_link)
+		    || __put_user(to_user_ptr(&rt_sf->uc_transact.uc_mcontext),
+				  &rt_sf->uc_transact.uc_regs))
+			goto badframe;
+	}
+	else
+#endif
+		if (__put_user(0, &rt_sf->uc.uc_link))
+			goto badframe;
 
 	current->thread.fpscr.val = 0;	/* turn off all fp exceptions */
 
@@ -878,6 +1264,13 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka,
 	regs->nip = (unsigned long) ka->sa.sa_handler;
 	/* enter the signal handler in big-endian mode */
 	regs->msr &= ~MSR_LE;
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	/* Remove TM bits from thread's MSR.  The MSR in the sigcontext
+	 * just indicates to userland that we were doing a transaction, but we
+	 * don't want to return in transactional state:
+	 */
+	regs->msr &= ~MSR_TS_MASK;
+#endif
 	return 1;
 
 badframe:
@@ -925,6 +1318,35 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int
 	return 0;
 }
 
+#ifdef CONFIG_TRANSACTIONAL_MEM
+static int do_setcontext_tm(struct ucontext __user *ucp,
+			    struct ucontext __user *tm_ucp,
+			    struct pt_regs *regs)
+{
+	sigset_t set;
+	struct mcontext __user *mcp;
+	struct mcontext __user *tm_mcp;
+	u32 cmcp;
+	u32 tm_cmcp;
+
+	if (get_sigset_t(&set, &ucp->uc_sigmask))
+		return -EFAULT;
+
+	if (__get_user(cmcp, &ucp->uc_regs) ||
+	    __get_user(tm_cmcp, &tm_ucp->uc_regs))
+		return -EFAULT;
+	mcp = (struct mcontext __user *)(u64)cmcp;
+	tm_mcp = (struct mcontext __user *)(u64)tm_cmcp;
+	/* no need to check access_ok(mcp), since mcp < 4GB */
+
+	set_current_blocked(&set);
+	if (restore_tm_user_regs(regs, mcp, tm_mcp))
+		return -EFAULT;
+
+	return 0;
+}
+#endif
+
 long sys_swapcontext(struct ucontext __user *old_ctx,
 		     struct ucontext __user *new_ctx,
 		     int ctx_size, int r6, int r7, int r8, struct pt_regs *regs)
@@ -1020,7 +1442,12 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
 		     struct pt_regs *regs)
 {
 	struct rt_sigframe __user *rt_sf;
-
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	struct ucontext __user *uc_transact;
+	unsigned long msr_hi;
+	unsigned long tmp;
+	int tm_restore = 0;
+#endif
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
 
@@ -1028,6 +1455,34 @@ long sys_rt_sigreturn(int r3, int r4, int r5, int r6, int r7, int r8,
 		(regs->gpr[1] + __SIGNAL_FRAMESIZE + 16);
 	if (!access_ok(VERIFY_READ, rt_sf, sizeof(*rt_sf)))
 		goto bad;
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	if (__get_user(tmp, &rt_sf->uc.uc_link))
+		goto bad;
+	uc_transact = (struct ucontext __user *)(uintptr_t)tmp;
+	if (uc_transact) {
+		u32 cmcp;
+		struct mcontext __user *mcp;
+
+		if (__get_user(cmcp, &uc_transact->uc_regs))
+			return -EFAULT;
+		mcp = (struct mcontext __user *)(u64)cmcp;
+		/* The top 32 bits of the MSR are stashed in the transactional
+		 * ucontext. */
+		if (__get_user(msr_hi, &mcp->mc_gregs[PT_MSR]))
+			goto bad;
+
+		if (MSR_TM_SUSPENDED(msr_hi<<32)) {
+			/* We only recheckpoint on return if we're
+			 * transaction.
+			 */
+			tm_restore = 1;
+			if (do_setcontext_tm(&rt_sf->uc, uc_transact, regs))
+				goto bad;
+		}
+	}
+	if (!tm_restore)
+		/* Fall through, for non-TM restore */
+#endif
 	if (do_setcontext(&rt_sf->uc, regs, 1))
 		goto bad;
 
@@ -1179,6 +1634,8 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
 	struct sigcontext __user *sc;
 	struct sigframe __user *frame;
 	unsigned long newsp = 0;
+	int sigret;
+	unsigned long tramp;
 
 	/* Set up Signal Frame */
 	frame = get_sigframe(ka, regs, sizeof(*frame), 1);
@@ -1201,14 +1658,25 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
 		goto badframe;
 
 	if (vdso32_sigtramp && current->mm->context.vdso_base) {
-		if (save_user_regs(regs, &frame->mctx, 0, 1))
-			goto badframe;
-		regs->link = current->mm->context.vdso_base + vdso32_sigtramp;
+		sigret = 0;
+		tramp = current->mm->context.vdso_base + vdso32_sigtramp;
 	} else {
-		if (save_user_regs(regs, &frame->mctx, __NR_sigreturn, 1))
+		sigret = __NR_sigreturn;
+		tramp = (unsigned long) frame->mctx.tramp;
+	}
+
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	if (MSR_TM_ACTIVE(regs->msr)) {
+		if (save_tm_user_regs(regs, &frame->mctx, &frame->mctx_transact,
+				      sigret))
 			goto badframe;
-		regs->link = (unsigned long) frame->mctx.tramp;
 	}
+	else
+#endif
+		if (save_user_regs(regs, &frame->mctx, sigret, 1))
+			goto badframe;
+
+	regs->link = tramp;
 
 	current->thread.fpscr.val = 0;	/* turn off all fp exceptions */
 
@@ -1223,7 +1691,13 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka,
 	regs->nip = (unsigned long) ka->sa.sa_handler;
 	/* enter the signal handler in big-endian mode */
 	regs->msr &= ~MSR_LE;
-
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	/* Remove TM bits from thread's MSR.  The MSR in the sigcontext
+	 * just indicates to userland that we were doing a transaction, but we
+	 * don't want to return in transactional state:
+	 */
+	regs->msr &= ~MSR_TS_MASK;
+#endif
 	return 1;
 
 badframe:
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 1ca045d..5d349bd 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -34,6 +34,7 @@
 #include <asm/syscalls.h>
 #include <asm/vdso.h>
 #include <asm/switch_to.h>
+#include <asm/tm.h>
 
 #include "signal.h"
 
@@ -56,6 +57,9 @@
 struct rt_sigframe {
 	/* sys_rt_sigreturn requires the ucontext be the first field */
 	struct ucontext uc;
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	struct ucontext uc_transact;
+#endif
 	unsigned long _unused[2];
 	unsigned int tramp[TRAMP_SIZE];
 	struct siginfo __user *pinfo;
@@ -145,6 +149,145 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
 	return err;
 }
 
+#ifdef CONFIG_TRANSACTIONAL_MEM
+/*
+ * As above, but Transactional Memory is in use, so deliver sigcontexts
+ * containing checkpointed and transactional register states.
+ *
+ * To do this, we treclaim to gather both sets of registers and set up the
+ * 'normal' sigcontext registers with rolled-back register values such that a
+ * simple signal handler sees a correct checkpointed register state.
+ * If interested, a TM-aware sighandler can examine the transactional registers
+ * in the 2nd sigcontext to determine the real origin of the signal.
+ */
+static long setup_tm_sigcontexts(struct sigcontext __user *sc,
+				 struct sigcontext __user *tm_sc,
+				 struct pt_regs *regs,
+				 int signr, sigset_t *set, unsigned long handler)
+{
+	/* When CONFIG_ALTIVEC is set, we _always_ setup v_regs even if the
+	 * process never used altivec yet (MSR_VEC is zero in pt_regs of
+	 * the context). This is very important because we must ensure we
+	 * don't lose the VRSAVE content that may have been set prior to
+	 * the process doing its first vector operation
+	 * Userland shall check AT_HWCAP to know wether it can rely on the
+	 * v_regs pointer or not.
+	 */
+#ifdef CONFIG_ALTIVEC
+	elf_vrreg_t __user *v_regs = (elf_vrreg_t __user *)
+		(((unsigned long)sc->vmx_reserve + 15) & ~0xful);
+	elf_vrreg_t __user *tm_v_regs = (elf_vrreg_t __user *)
+		(((unsigned long)tm_sc->vmx_reserve + 15) & ~0xful);
+#endif
+	unsigned long msr = regs->msr;
+	long err = 0;
+
+	BUG_ON(!MSR_TM_ACTIVE(regs->msr));
+
+	/* tm_reclaim rolls back all reg states, saving checkpointed (older)
+	 * GPRs to thread.ckpt_regs and (if used) FPRs to (newer)
+	 * thread.transact_fp and/or VRs to (newer) thread.transact_vr.
+	 * THEN we save out FP/VRs, if necessary, to the checkpointed (older)
+	 * thread.fr[]/vr[]s.  The transactional (newer) GPRs are on the
+	 * stack, in *regs.
+	 */
+	tm_enable();
+	tm_reclaim(&current->thread, msr, TM_CAUSE_SIGNAL);
+
+	flush_fp_to_thread(current);
+
+#ifdef CONFIG_ALTIVEC
+	err |= __put_user(v_regs, &sc->v_regs);
+	err |= __put_user(tm_v_regs, &tm_sc->v_regs);
+
+	/* save altivec registers */
+	if (current->thread.used_vr) {
+		flush_altivec_to_thread(current);
+		/* Copy 33 vec registers (vr0..31 and vscr) to the stack */
+		err |= __copy_to_user(v_regs, current->thread.vr,
+				      33 * sizeof(vector128));
+		/* If VEC was enabled there are transactional VRs valid too,
+		 * else they're a copy of the checkpointed VRs.
+		 */
+		if (msr & MSR_VEC)
+			err |= __copy_to_user(tm_v_regs,
+					      current->thread.transact_vr,
+					      33 * sizeof(vector128));
+		else
+			err |= __copy_to_user(tm_v_regs,
+					      current->thread.vr,
+					      33 * sizeof(vector128));
+
+		/* set MSR_VEC in the MSR value in the frame to indicate
+		 * that sc->v_reg contains valid data.
+		 */
+		msr |= MSR_VEC;
+	}
+	/* We always copy to/from vrsave, it's 0 if we don't have or don't
+	 * use altivec.
+	 */
+	err |= __put_user(current->thread.vrsave, (u32 __user *)&v_regs[33]);
+	if (msr & MSR_VEC)
+		err |= __put_user(current->thread.transact_vrsave,
+				  (u32 __user *)&tm_v_regs[33]);
+	else
+		err |= __put_user(current->thread.vrsave,
+				  (u32 __user *)&tm_v_regs[33]);
+
+#else /* CONFIG_ALTIVEC */
+	err |= __put_user(0, &sc->v_regs);
+	err |= __put_user(0, &tm_sc->v_regs);
+#endif /* CONFIG_ALTIVEC */
+
+	/* copy fpr regs and fpscr */
+	err |= copy_fpr_to_user(&sc->fp_regs, current);
+	if (msr & MSR_FP)
+		err |= copy_transact_fpr_to_user(&tm_sc->fp_regs, current);
+	else
+		err |= copy_fpr_to_user(&tm_sc->fp_regs, current);
+
+#ifdef CONFIG_VSX
+	/*
+	 * Copy VSX low doubleword to local buffer for formatting,
+	 * then out to userspace.  Update v_regs to point after the
+	 * VMX data.
+	 */
+	if (current->thread.used_vsr) {
+		__giveup_vsx(current);
+		v_regs += ELF_NVRREG;
+		tm_v_regs += ELF_NVRREG;
+
+		err |= copy_vsx_to_user(v_regs, current);
+
+		if (msr & MSR_VSX)
+			err |= copy_transact_vsx_to_user(tm_v_regs, current);
+		else
+			err |= copy_vsx_to_user(tm_v_regs, current);
+
+		/* set MSR_VSX in the MSR value in the frame to
+		 * indicate that sc->vs_reg) contains valid data.
+		 */
+		msr |= MSR_VSX;
+	}
+#endif /* CONFIG_VSX */
+
+	err |= __put_user(&sc->gp_regs, &sc->regs);
+	err |= __put_user(&tm_sc->gp_regs, &tm_sc->regs);
+	WARN_ON(!FULL_REGS(regs));
+	err |= __copy_to_user(&tm_sc->gp_regs, regs, GP_REGS_SIZE);
+	err |= __copy_to_user(&sc->gp_regs,
+			      &current->thread.ckpt_regs, GP_REGS_SIZE);
+	err |= __put_user(msr, &tm_sc->gp_regs[PT_MSR]);
+	err |= __put_user(msr, &sc->gp_regs[PT_MSR]);
+	err |= __put_user(signr, &sc->signal);
+	err |= __put_user(handler, &sc->handler);
+	if (set != NULL)
+		err |=  __put_user(set->sig[0], &sc->oldmask);
+
+	return err;
+}
+#endif
+
 /*
  * Restore the sigcontext from the signal frame.
  */
@@ -241,6 +384,153 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
 	return err;
 }
 
+#ifdef CONFIG_TRANSACTIONAL_MEM
+/*
+ * Restore the two sigcontexts from the frame of a transactional processes.
+ */
+
+static long restore_tm_sigcontexts(struct pt_regs *regs,
+				   struct sigcontext __user *sc,
+				   struct sigcontext __user *tm_sc)
+{
+#ifdef CONFIG_ALTIVEC
+	elf_vrreg_t __user *v_regs, *tm_v_regs;
+#endif
+	unsigned long err = 0;
+	unsigned long msr;
+#ifdef CONFIG_VSX
+	int i;
+#endif
+	/* copy the GPRs */
+	err |= __copy_from_user(regs->gpr, tm_sc->gp_regs, sizeof(regs->gpr));
+	err |= __copy_from_user(&current->thread.ckpt_regs, sc->gp_regs,
+				sizeof(regs->gpr));
+
+	/*
+	 * TFHAR is restored from the checkpointed 'wound-back' ucontext's NIP.
+	 * TEXASR was set by the signal delivery reclaim, as was TFIAR.
+	 * Users doing anything abhorrent like thread-switching w/ signals for
+	 * TM-Suspended code will have to back TEXASR/TFIAR up themselves.
+	 * For the case of getting a signal and simply returning from it,
+	 * we don't need to re-copy them here.
+	 */
+	err |= __get_user(regs->nip, &tm_sc->gp_regs[PT_NIP]);
+	err |= __get_user(current->thread.tm_tfhar, &sc->gp_regs[PT_NIP]);
+
+	/* get MSR separately, transfer the LE bit if doing signal return */
+	err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
+	regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
+
+	/* The following non-GPR non-FPR non-VR state is also checkpointed: */
+	err |= __get_user(regs->ctr, &tm_sc->gp_regs[PT_CTR]);
+	err |= __get_user(regs->link, &tm_sc->gp_regs[PT_LNK]);
+	err |= __get_user(regs->xer, &tm_sc->gp_regs[PT_XER]);
+	err |= __get_user(regs->ccr, &tm_sc->gp_regs[PT_CCR]);
+	err |= __get_user(current->thread.ckpt_regs.ctr,
+			  &sc->gp_regs[PT_CTR]);
+	err |= __get_user(current->thread.ckpt_regs.link,
+			  &sc->gp_regs[PT_LNK]);
+	err |= __get_user(current->thread.ckpt_regs.xer,
+			  &sc->gp_regs[PT_XER]);
+	err |= __get_user(current->thread.ckpt_regs.ccr,
+			  &sc->gp_regs[PT_CCR]);
+
+	/* These regs are not checkpointed; they can go in 'regs'. */
+	err |= __get_user(regs->trap, &sc->gp_regs[PT_TRAP]);
+	err |= __get_user(regs->dar, &sc->gp_regs[PT_DAR]);
+	err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
+	err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
+
+	/*
+	 * Do this before updating the thread state in
+	 * current->thread.fpr/vr.  That way, if we get preempted
+	 * and another task grabs the FPU/Altivec, it won't be
+	 * tempted to save the current CPU state into the thread_struct
+	 * and corrupt what we are writing there.
+	 */
+	discard_lazy_cpu_state();
+
+	/*
+	 * Force reload of FP/VEC.
+	 * This has to be done before copying stuff into current->thread.fpr/vr
+	 * for the reasons explained in the previous comment.
+	 */
+	regs->msr &= ~(MSR_FP | MSR_FE0 | MSR_FE1 | MSR_VEC | MSR_VSX);
+
+#ifdef CONFIG_ALTIVEC
+	err |= __get_user(v_regs, &sc->v_regs);
+	err |= __get_user(tm_v_regs, &tm_sc->v_regs);
+	if (err)
+		return err;
+	if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128)))
+		return -EFAULT;
+	if (tm_v_regs && !access_ok(VERIFY_READ,
+				    tm_v_regs, 34 * sizeof(vector128)))
+		return -EFAULT;
+	/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
+	if (v_regs != 0 && tm_v_regs != 0 && (msr & MSR_VEC) != 0) {
+		err |= __copy_from_user(current->thread.vr, v_regs,
+					33 * sizeof(vector128));
+		err |= __copy_from_user(current->thread.transact_vr, tm_v_regs,
+					33 * sizeof(vector128));
+	}
+	else if (current->thread.used_vr) {
+		memset(current->thread.vr, 0, 33 * sizeof(vector128));
+		memset(current->thread.transact_vr, 0, 33 * sizeof(vector128));
+	}
+	/* Always get VRSAVE back */
+	if (v_regs != 0 && tm_v_regs != 0) {
+		err |= __get_user(current->thread.vrsave,
+				  (u32 __user *)&v_regs[33]);
+		err |= __get_user(current->thread.transact_vrsave,
+				  (u32 __user *)&tm_v_regs[33]);
+	}
+	else {
+		current->thread.vrsave = 0;
+		current->thread.transact_vrsave = 0;
+	}
+#endif /* CONFIG_ALTIVEC */
+	/* restore floating point */
+	err |= copy_fpr_from_user(current, &sc->fp_regs);
+	err |= copy_transact_fpr_from_user(current, &tm_sc->fp_regs);
+#ifdef CONFIG_VSX
+	/*
+	 * Get additional VSX data. Update v_regs to point after the
+	 * VMX data.  Copy VSX low doubleword from userspace to local
+	 * buffer for formatting, then into the taskstruct.
+	 */
+	if (v_regs && ((msr & MSR_VSX) != 0)) {
+		v_regs += ELF_NVRREG;
+		tm_v_regs += ELF_NVRREG;
+		err |= copy_vsx_from_user(current, v_regs);
+		err |= copy_transact_vsx_from_user(current, tm_v_regs);
+	} else {
+		for (i = 0; i < 32 ; i++) {
+			current->thread.fpr[i][TS_VSRLOWOFFSET] = 0;
+			current->thread.transact_fpr[i][TS_VSRLOWOFFSET] = 0;
+		}
+	}
+#endif
+	tm_enable();
+	/* This loads the checkpointed FP/VEC state, if used */
+	tm_recheckpoint(&current->thread, msr);
+	/* The task has moved into TM state S, so ensure MSR reflects this: */
+	regs->msr = (regs->msr & ~MSR_TS_MASK) | __MASK(33);
+
+	/* This loads the speculative FP/VEC state, if used */
+	if (msr & MSR_FP) {
+		do_load_up_transact_fpu(&current->thread);
+		regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+	}
+	if (msr & MSR_VEC) {
+		do_load_up_transact_altivec(&current->thread);
+		regs->msr |= MSR_VEC;
+	}
+
+	return err;
+}
+#endif
+
 /*
  * Setup the trampoline code on the stack
  */
@@ -355,6 +645,9 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
 {
 	struct ucontext __user *uc = (struct ucontext __user *)regs->gpr[1];
 	sigset_t set;
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	unsigned long msr;
+#endif
 
 	/* Always make any pending restarted system calls return -EINTR */
 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
@@ -365,6 +658,21 @@ int sys_rt_sigreturn(unsigned long r3, unsigned long r4, unsigned long r5,
 	if (__copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
 		goto badframe;
 	set_current_blocked(&set);
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
+		goto badframe;
+	if (MSR_TM_SUSPENDED(msr)) {
+		/* We recheckpoint on return. */
+		struct ucontext __user *uc_transact;
+		if (__get_user(uc_transact, &uc->uc_link))
+			goto badframe;
+		if (restore_tm_sigcontexts(regs, &uc->uc_mcontext,
+					   &uc_transact->uc_mcontext))
+			goto badframe;
+	}
+	else
+	/* Fall through, for non-TM restore */
+#endif
 	if (restore_sigcontext(regs, NULL, 1, &uc->uc_mcontext))
 		goto badframe;
 
@@ -415,19 +723,42 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info,
 
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
-	err |= __put_user(0, &frame->uc.uc_link);
 	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
 	err |= __put_user(sas_ss_flags(regs->gpr[1]),
 			  &frame->uc.uc_stack.ss_flags);
 	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
-	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr, NULL,
-				(unsigned long)ka->sa.sa_handler, 1);
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	if (MSR_TM_ACTIVE(regs->msr)) {
+		/* The ucontext_t passed to userland points to the second
+		 * ucontext_t (for transactional state) with its uc_link ptr.
+		 */
+		err |= __put_user(&frame->uc_transact, &frame->uc.uc_link);
+		err |= setup_tm_sigcontexts(&frame->uc.uc_mcontext,
+					    &frame->uc_transact.uc_mcontext,
+					    regs, signr,
+					    NULL,
+					    (unsigned long)ka->sa.sa_handler);
+	} else
+#endif
+	{
+		err |= __put_user(0, &frame->uc.uc_link);
+		err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, signr,
+					NULL, (unsigned long)ka->sa.sa_handler,
+					1);
+	}
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
 	if (err)
 		goto badframe;
 
 	/* Make sure signal handler doesn't get spurious FP exceptions */
 	current->thread.fpscr.val = 0;
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	/* Remove TM bits from thread's MSR.  The MSR in the sigcontext
+	 * just indicates to userland that we were doing a transaction, but we
+	 * don't want to return in transactional state:
+	 */
+	regs->msr &= ~MSR_TS_MASK;
+#endif
 
 	/* Set up to return from userspace. */
 	if (vdso64_rt_sigtramp && current->mm->context.vdso_base) {
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 12/17] powerpc: Hook in new transactional memory code
From: Michael Neuling @ 2013-01-18  5:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Michael Neuling, linuxppc-dev, Matt Evans
In-Reply-To: <1358488117-17363-1-git-send-email-mikey@neuling.org>

This hooks the new transactional memory code into context switching, FP/VMX/VMX
unavailable and exception return.

Signed-off-by: Matt Evans <matt@ozlabs.org>
Signed-off-by: Michael Neuling <mikey@neuling.org>
---
 arch/powerpc/kernel/exceptions-64s.S |   48 ++++++++++++++++++++++++++++++++--
 arch/powerpc/kernel/fpu.S            |    1 -
 arch/powerpc/kernel/process.c        |   15 +++++++++--
 arch/powerpc/kernel/traps.c          |   32 +++++++++++++++++++++++
 arch/powerpc/kernel/vector.S         |    1 -
 arch/powerpc/mm/hash_utils_64.c      |   13 +++++++++
 6 files changed, 104 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index d0cc657..c4a20d1 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1147,9 +1147,24 @@ fp_unavailable_common:
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.kernel_fp_unavailable_exception
 	BUG_OPCODE
-1:	bl	.load_up_fpu
+1:
+#ifdef CONFIG_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+	srdi	r0, r12, MSR_TS_LG
+	andi.	r0, r0, 3
+	bne-	2f
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
+	bl	.load_up_fpu
+	std     r12,_MSR(r1)
 	b	fast_exception_return
-
+#ifdef CONFIG_TRANSACTIONAL_MEM
+2:	/* User process was in a transaction */
+	bl	.save_nvgprs
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.fp_unavailable_tm
+	b	.ret_from_except
+#endif
 	.align	7
 	.globl altivec_unavailable_common
 altivec_unavailable_common:
@@ -1157,8 +1172,23 @@ altivec_unavailable_common:
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
 	beq	1f
+#ifdef CONFIG_TRANSACTIONAL_MEM
+  BEGIN_FTR_SECTION_NESTED(69)
+	srdi	r0, r12, MSR_TS_LG
+	andi.	r0, r0, 3
+	bne-	2f
+  END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
+#endif
 	bl	.load_up_altivec
+	std	r12,_MSR(r1)
 	b	fast_exception_return
+#ifdef CONFIG_TRANSACTIONAL_MEM
+2:	/* User process was in a transaction */
+	bl	.save_nvgprs
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.altivec_unavailable_tm
+	b	.ret_from_except
+#endif
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
@@ -1175,7 +1205,21 @@ vsx_unavailable_common:
 #ifdef CONFIG_VSX
 BEGIN_FTR_SECTION
 	beq	1f
+#ifdef CONFIG_TRANSACTIONAL_MEM
+  BEGIN_FTR_SECTION_NESTED(69)
+	srdi	r0, r12, MSR_TS_LG
+	andi.	r0, r0, 3
+	bne-	2f
+  END_FTR_SECTION_NESTED(CPU_FTR_TM, CPU_FTR_TM, 69)
+#endif
 	b	.load_up_vsx
+#ifdef CONFIG_TRANSACTIONAL_MEM
+2:	/* User process was in a transaction */
+	bl	.save_nvgprs
+	addi	r3,r1,STACK_FRAME_OVERHEAD
+	bl	.vsx_unavailable_tm
+	b	.ret_from_except
+#endif
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 6ab0e87..08b6a12f 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -170,7 +170,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	lwz	r4,THREAD_FPEXC_MODE(r5)
 	ori	r12,r12,MSR_FP
 	or	r12,r12,r4
-	std	r12,_MSR(r1)
 #endif
 	lfd	fr0,THREAD_FPSCR(r5)
 	MTFSF_L(fr0)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 14fabd1..bccc81b 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -510,7 +510,7 @@ out_and_saveregs:
 	tm_save_sprs(thr);
 }
 
-static inline void __maybe_unused tm_recheckpoint_new_task(struct task_struct *new)
+static inline void tm_recheckpoint_new_task(struct task_struct *new)
 {
 	unsigned long msr;
 
@@ -585,6 +585,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	struct ppc64_tlb_batch *batch;
 #endif
 
+	__switch_to_tm(prev);
+
 #ifdef CONFIG_SMP
 	/* avoid complexity of lazy save/restore of fpu
 	 * by just saving it every time we switch out if
@@ -700,6 +702,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	 * of sync. Hard disable here.
 	 */
 	hard_irq_disable();
+
+	tm_recheckpoint_new_task(new);
+
 	last = _switch(old_thread, new_thread);
 
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -1075,7 +1080,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 		regs->msr = MSR_USER32;
 	}
 #endif
-
 	discard_lazy_cpu_state();
 #ifdef CONFIG_VSX
 	current->thread.used_vsr = 0;
@@ -1095,6 +1099,13 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 	current->thread.spefscr = 0;
 	current->thread.used_spe = 0;
 #endif /* CONFIG_SPE */
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	if (cpu_has_feature(CPU_FTR_TM))
+		regs->msr |= MSR_TM;
+	current->thread.tm_tfhar = 0;
+	current->thread.tm_texasr = 0;
+	current->thread.tm_tfiar = 0;
+#endif /* CONFIG_TRANSACTIONAL_MEM */
 }
 
 #define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 311770d..486712b 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1032,6 +1032,38 @@ void __kprobes program_check_exception(struct pt_regs *regs)
 		_exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
 		return;
 	}
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	if (reason & REASON_TM) {
+		/* This is a TM "Bad Thing Exception" program check.
+		 * This occurs when:
+		 * -  An rfid/hrfid/mtmsrd attempts to cause an illegal
+		 *    transition in TM states.
+		 * -  A trechkpt is attempted when transactional.
+		 * -  A treclaim is attempted when non transactional.
+		 * -  A tend is illegally attempted.
+		 * -  writing a TM SPR when transactional.
+		 */
+		if (!user_mode(regs) &&
+		    report_bug(regs->nip, regs) == BUG_TRAP_TYPE_WARN) {
+			regs->nip += 4;
+			return;
+		}
+		/* If usermode caused this, it's done something illegal and
+		 * gets a SIGILL slap on the wrist.  We call it an illegal
+		 * operand to distinguish from the instruction just being bad
+		 * (e.g. executing a 'tend' on a CPU without TM!); it's an
+		 * illegal /placement/ of a valid instruction.
+		 */
+		if (user_mode(regs)) {
+			_exception(SIGILL, regs, ILL_ILLOPN, regs->nip);
+			return;
+		} else {
+			printk(KERN_EMERG "Unexpected TM Bad Thing exception "
+			       "at %lx (msr 0x%x)\n", regs->nip, reason);
+			die("Unrecoverable exception", regs, SIGABRT);
+		}
+	}
+#endif
 
 	/* We restore the interrupt state now */
 	if (!arch_irq_disabled_regs(regs))
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 330fc8c..9b47306 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -123,7 +123,6 @@ _GLOBAL(load_up_altivec)
 	ld	r4,PACACURRENT(r13)
 	addi	r5,r4,THREAD		/* Get THREAD */
 	oris	r12,r12,MSR_VEC@h
-	std	r12,_MSR(r1)
 #endif
 	li	r4,1
 	li	r10,THREAD_VSCR
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 3a292be..03a4c4a 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -55,6 +55,7 @@
 #include <asm/code-patching.h>
 #include <asm/fadump.h>
 #include <asm/firmware.h>
+#include <asm/tm.h>
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
@@ -1171,6 +1172,18 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
 		DBG_LOW(" sub %ld: hash=%lx, hidx=%lx\n", index, slot, hidx);
 		ppc_md.hpte_invalidate(slot, vpn, psize, ssize, local);
 	} pte_iterate_hashed_end();
+
+#ifdef CONFIG_TRANSACTIONAL_MEM
+	/* Transactions are not aborted by tlbiel, only tlbie.
+	 * Without, syncing a page back to a block device w/ PIO could pick up
+	 * transactional data (bad!) so we force an abort here.  Before the
+	 * sync the page will be made read-only, which will flush_hash_page.
+	 */
+	if (local && cpu_has_feature(CPU_FTR_TM)) {
+		tm_enable();
+		tm_abort(TM_CAUSE_TLBI);
+	}
+#endif
 }
 
 void flush_hash_range(unsigned long number, int local)
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 11/17] powerpc: Assembler routines for FP/VSX/VMX unavailable during a transaction
From: Michael Neuling @ 2013-01-18  5:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: Michael Neuling, linuxppc-dev, Matt Evans
In-Reply-To: <1358488117-17363-1-git-send-email-mikey@neuling.org>

We do lazy FP but not lazy TM (ie. userspace starts with MSR TM=1 FP=0).  Hence
if userspace does an FP instruction during a transaction, we'll take an
fp unavailable exception.

This adds functions needed to handle this case.  We have to inject the current
FP state into the checkpoint so that the hardware can decide what to do with
the transaction.  We can't inject only the FP so we have to do a full treclaim
and recheckpoint to inject just the FP state.  This will cause the transaction
to be marked as aborted by the hardware.

This just add the routines needed to do this for FP, VMX and VSX.  It doesn't
hook them into the rest of the code yet.

Signed-off-by: Matt Evans <matt@ozlabs.org>
Signed-off-by: Michael Neuling <mikey@neuling.org>
---
 arch/powerpc/kernel/traps.c |   95 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index dda0517..311770d 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -58,6 +58,7 @@
 #include <asm/rio.h>
 #include <asm/fadump.h>
 #include <asm/switch_to.h>
+#include <asm/tm.h>
 #include <asm/debug.h>
 
 #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
@@ -1192,6 +1193,100 @@ void tm_unavailable_exception(struct pt_regs *regs)
 	die("Unexpected TM unavailable exception", regs, SIGABRT);
 }
 
+#ifdef CONFIG_TRANSACTIONAL_MEM
+
+extern void do_load_up_fpu(struct pt_regs *regs);
+
+void fp_unavailable_tm(struct pt_regs *regs)
+{
+	/* Note:  This does not handle any kind of FP laziness. */
+
+	/* We restore the interrupt state now */
+	if (!arch_irq_disabled_regs(regs))
+		local_irq_enable();
+
+	TM_DEBUG("FP Unavailable trap whilst transactional at 0x%lx, MSR=%lx\n",
+		 regs->nip, regs->msr);
+	tm_enable();
+
+        /* We can only have got here if the task started using FP after
+         * beginning the transaction.  So, the transactional regs are just a
+         * copy of the checkpointed ones.  But, we still need to recheckpoint
+         * as we're enabling FP for the process; it will return, abort the
+         * transaction, and probably retry but now with FP enabled.  So the
+         * checkpointed FP registers need to be loaded.
+	 */
+	tm_reclaim(&current->thread, current->thread.regs->msr,
+		   TM_CAUSE_FAC_UNAV);
+	/* Reclaim didn't save out any FPRs to transact_fprs. */
+
+	/* Enable FP for the task: */
+	regs->msr |= (MSR_FP | current->thread.fpexc_mode);
+
+	/* This loads and recheckpoints the FP registers from
+	 * thread.fpr[].  They will remain in registers after the
+	 * checkpoint so we don't need to reload them after.
+	 */
+	tm_recheckpoint(&current->thread, regs->msr);
+}
+
+#ifdef CONFIG_ALTIVEC
+extern void do_load_up_altivec(struct pt_regs *regs);
+
+void altivec_unavailable_tm(struct pt_regs *regs)
+{
+	/* See the comments in fp_unavailable_tm().  This function operates
+	 * the same way.
+	 */
+
+	/* We restore the interrupt state now */
+	if (!arch_irq_disabled_regs(regs))
+		local_irq_enable();
+
+	TM_DEBUG("Vector Unavailable trap whilst transactional at 0x%lx,"
+		 "MSR=%lx\n",
+		 regs->nip, regs->msr);
+	tm_enable();
+	tm_reclaim(&current->thread, current->thread.regs->msr,
+		   TM_CAUSE_FAC_UNAV);
+	regs->msr |= MSR_VEC;
+	tm_recheckpoint(&current->thread, regs->msr);
+	current->thread.used_vr = 1;
+}
+#endif
+
+#ifdef CONFIG_VSX
+void vsx_unavailable_tm(struct pt_regs *regs)
+{
+	/* See the comments in fp_unavailable_tm().  This works similarly,
+	 * though we're loading both FP and VEC registers in here.
+	 *
+	 * If FP isn't in use, load FP regs.  If VEC isn't in use, load VEC
+	 * regs.  Either way, set MSR_VSX.
+	 */
+
+	/* We restore the interrupt state now */
+	if (!arch_irq_disabled_regs(regs))
+		local_irq_enable();
+
+	TM_DEBUG("VSX Unavailable trap whilst transactional at 0x%lx,"
+		 "MSR=%lx\n",
+		 regs->nip, regs->msr);
+
+	tm_enable();
+	/* This reclaims FP and/or VR regs if they're already enabled */
+	tm_reclaim(&current->thread, current->thread.regs->msr,
+		   TM_CAUSE_FAC_UNAV);
+
+	regs->msr |= MSR_VEC | MSR_FP | current->thread.fpexc_mode |
+		MSR_VSX;
+	/* This loads & recheckpoints FP and VRs. */
+	tm_recheckpoint(&current->thread, regs->msr);
+	current->thread.used_vsr = 1;
+}
+#endif
+#endif /* CONFIG_TRANSACTIONAL_MEM */
+
 void performance_monitor_exception(struct pt_regs *regs)
 {
 	__get_cpu_var(irq_stat).pmu_irqs++;
-- 
1.7.10.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox