LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 3/6] powerpc/booke64: add DO_KVM kernel hooks
From: Mihai Caraman @ 2012-08-06 13:27 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Mihai Caraman, kvm, kvm-ppc
In-Reply-To: <1344259628-31161-1-git-send-email-mihai.caraman@freescale.com>

Hook DO_KVM macro into 64-bit booke for KVM integration. Extend interrupt
handlers' parameter list with interrupt vector numbers to accomodate the macro.
Only the bolted version of tlb miss handers is addressed now.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
---
 arch/powerpc/kernel/exceptions-64e.S |   93 +++++++++++++++++++++------------
 arch/powerpc/mm/tlb_low_64e.S        |   14 ++++-
 2 files changed, 70 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 0f58a95..3b1ad1b 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -25,6 +25,8 @@
 #include <asm/ppc-opcode.h>
 #include <asm/mmu.h>
 #include <asm/hw_irq.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_booke_hv_asm.h>
 
 /* XXX This will ultimately add space for a special exception save
  *     structure used to save things like SRR0/SRR1, SPRGs, MAS, etc...
@@ -35,12 +37,14 @@
 #define	SPECIAL_EXC_FRAME_SIZE	INT_FRAME_SIZE
 
 /* Exception prolog code for all exceptions */
-#define EXCEPTION_PROLOG(n, type, addition)				    \
+#define EXCEPTION_PROLOG(n, intnum, type, addition)	    		    \
 	mtspr	SPRN_SPRG_##type##_SCRATCH,r13;	/* get spare registers */   \
 	mfspr	r13,SPRN_SPRG_PACA;	/* get PACA */			    \
 	std	r10,PACA_EX##type+EX_R10(r13);				    \
 	std	r11,PACA_EX##type+EX_R11(r13);				    \
 	mfcr	r10;			/* save CR */			    \
+	mfspr	r11,SPRN_##type##_SRR1;/* what are we coming from */	    \
+	DO_KVM	intnum,SPRN_##type##_SRR1;    /* KVM hook */		    \
 	addition;			/* additional code for that exc. */ \
 	std	r1,PACA_EX##type+EX_R1(r13); /* save old r1 in the PACA */  \
 	stw	r10,PACA_EX##type+EX_CR(r13); /* save old CR in the PACA */ \
@@ -81,20 +85,20 @@
 #define SPRN_MC_SRR0	SPRN_MCSRR0
 #define SPRN_MC_SRR1	SPRN_MCSRR1
 
-#define NORMAL_EXCEPTION_PROLOG(n, addition)				    \
-	EXCEPTION_PROLOG(n, GEN, addition##_GEN(n))
+#define NORMAL_EXCEPTION_PROLOG(n, intnum, addition)			    \
+	EXCEPTION_PROLOG(n, intnum, GEN, addition##_GEN(n))
 
-#define CRIT_EXCEPTION_PROLOG(n, addition)				    \
-	EXCEPTION_PROLOG(n, CRIT, addition##_CRIT(n))
+#define CRIT_EXCEPTION_PROLOG(n, intnum, addition)			    \
+	EXCEPTION_PROLOG(n, intnum, CRIT, addition##_CRIT(n))
 
-#define DBG_EXCEPTION_PROLOG(n, addition)				    \
-	EXCEPTION_PROLOG(n, DBG, addition##_DBG(n))
+#define DBG_EXCEPTION_PROLOG(n, intnum, addition)			    \
+	EXCEPTION_PROLOG(n, intnum, DBG, addition##_DBG(n))
 
-#define MC_EXCEPTION_PROLOG(n, addition)				    \
-	EXCEPTION_PROLOG(n, MC, addition##_MC(n))
+#define MC_EXCEPTION_PROLOG(n, intnum, addition)			    \
+	EXCEPTION_PROLOG(n, intnum, MC, addition##_MC(n))
 
-#define GDBELL_EXCEPTION_PROLOG(n, addition)				    \
-	EXCEPTION_PROLOG(n, GDBELL, addition##_GDBELL(n))
+#define GDBELL_EXCEPTION_PROLOG(n, intnum, addition)			    \
+	EXCEPTION_PROLOG(n, intnum, GDBELL, addition##_GDBELL(n))
 
 /* Variants of the "addition" argument for the prolog
  */
@@ -240,9 +244,9 @@ exc_##n##_bad_stack:							    \
 1:
 
 
-#define MASKABLE_EXCEPTION(trapnum, label, hdlr, ack)			\
+#define MASKABLE_EXCEPTION(trapnum, intnum, label, hdlr, ack)		\
 	START_EXCEPTION(label);						\
-	NORMAL_EXCEPTION_PROLOG(trapnum, PROLOG_ADDITION_MASKABLE)	\
+	NORMAL_EXCEPTION_PROLOG(trapnum, intnum, PROLOG_ADDITION_MASKABLE)\
 	EXCEPTION_COMMON(trapnum, PACA_EXGEN, INTS_DISABLE)		\
 	ack(r8);							\
 	CHECK_NAPPING();						\
@@ -293,7 +297,8 @@ interrupt_end_book3e:
 
 /* Critical Input Interrupt */
 	START_EXCEPTION(critical_input);
-	CRIT_EXCEPTION_PROLOG(0x100, PROLOG_ADDITION_NONE)
+	CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL,
+			      PROLOG_ADDITION_NONE)
 //	EXCEPTION_COMMON(0x100, PACA_EXCRIT, INTS_DISABLE)
 //	bl	special_reg_save_crit
 //	CHECK_NAPPING();
@@ -304,7 +309,8 @@ interrupt_end_book3e:
 
 /* Machine Check Interrupt */
 	START_EXCEPTION(machine_check);
-	MC_EXCEPTION_PROLOG(0x200, PROLOG_ADDITION_NONE)
+	MC_EXCEPTION_PROLOG(0x200, BOOKE_INTERRUPT_MACHINE_CHECK,
+			    PROLOG_ADDITION_NONE)
 //	EXCEPTION_COMMON(0x200, PACA_EXMC, INTS_DISABLE)
 //	bl	special_reg_save_mc
 //	addi	r3,r1,STACK_FRAME_OVERHEAD
@@ -315,7 +321,8 @@ interrupt_end_book3e:
 
 /* Data Storage Interrupt */
 	START_EXCEPTION(data_storage)
-	NORMAL_EXCEPTION_PROLOG(0x300, PROLOG_ADDITION_2REGS)
+	NORMAL_EXCEPTION_PROLOG(0x300, BOOKE_INTERRUPT_DATA_STORAGE,
+				PROLOG_ADDITION_2REGS)
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
 	EXCEPTION_COMMON(0x300, PACA_EXGEN, INTS_DISABLE)
@@ -323,18 +330,21 @@ interrupt_end_book3e:
 
 /* Instruction Storage Interrupt */
 	START_EXCEPTION(instruction_storage);
-	NORMAL_EXCEPTION_PROLOG(0x400, PROLOG_ADDITION_2REGS)
+	NORMAL_EXCEPTION_PROLOG(0x400, BOOKE_INTERRUPT_INST_STORAGE,
+				PROLOG_ADDITION_2REGS)
 	li	r15,0
 	mr	r14,r10
 	EXCEPTION_COMMON(0x400, PACA_EXGEN, INTS_DISABLE)
 	b	storage_fault_common
 
 /* External Input Interrupt */
-	MASKABLE_EXCEPTION(0x500, external_input, .do_IRQ, ACK_NONE)
+	MASKABLE_EXCEPTION(0x500, BOOKE_INTERRUPT_EXTERNAL,
+			   external_input, .do_IRQ, ACK_NONE)
 
 /* Alignment */
 	START_EXCEPTION(alignment);
-	NORMAL_EXCEPTION_PROLOG(0x600, PROLOG_ADDITION_2REGS)
+	NORMAL_EXCEPTION_PROLOG(0x600, BOOKE_INTERRUPT_ALIGNMENT,
+				PROLOG_ADDITION_2REGS)
 	mfspr	r14,SPRN_DEAR
 	mfspr	r15,SPRN_ESR
 	EXCEPTION_COMMON(0x600, PACA_EXGEN, INTS_KEEP)
@@ -342,7 +352,8 @@ interrupt_end_book3e:
 
 /* Program Interrupt */
 	START_EXCEPTION(program);
-	NORMAL_EXCEPTION_PROLOG(0x700, PROLOG_ADDITION_1REG)
+	NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
+				PROLOG_ADDITION_1REG)
 	mfspr	r14,SPRN_ESR
 	EXCEPTION_COMMON(0x700, PACA_EXGEN, INTS_DISABLE)
 	std	r14,_DSISR(r1)
@@ -354,7 +365,8 @@ interrupt_end_book3e:
 
 /* Floating Point Unavailable Interrupt */
 	START_EXCEPTION(fp_unavailable);
-	NORMAL_EXCEPTION_PROLOG(0x800, PROLOG_ADDITION_NONE)
+	NORMAL_EXCEPTION_PROLOG(0x800, BOOKE_INTERRUPT_FP_UNAVAIL,
+				PROLOG_ADDITION_NONE)
 	/* we can probably do a shorter exception entry for that one... */
 	EXCEPTION_COMMON(0x800, PACA_EXGEN, INTS_KEEP)
 	ld	r12,_MSR(r1)
@@ -369,14 +381,17 @@ interrupt_end_book3e:
 	b	.ret_from_except
 
 /* Decrementer Interrupt */
-	MASKABLE_EXCEPTION(0x900, decrementer, .timer_interrupt, ACK_DEC)
+	MASKABLE_EXCEPTION(0x900, BOOKE_INTERRUPT_DECREMENTER,
+			   decrementer, .timer_interrupt, ACK_DEC)
 
 /* Fixed Interval Timer Interrupt */
-	MASKABLE_EXCEPTION(0x980, fixed_interval, .unknown_exception, ACK_FIT)
+	MASKABLE_EXCEPTION(0x980, BOOKE_INTERRUPT_FIT,
+			   fixed_interval, .unknown_exception, ACK_FIT)
 
 /* Watchdog Timer Interrupt */
 	START_EXCEPTION(watchdog);
-	CRIT_EXCEPTION_PROLOG(0x9f0, PROLOG_ADDITION_NONE)
+	CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG,
+			      PROLOG_ADDITION_NONE)
 //	EXCEPTION_COMMON(0x9f0, PACA_EXCRIT, INTS_DISABLE)
 //	bl	special_reg_save_crit
 //	CHECK_NAPPING();
@@ -395,7 +410,8 @@ interrupt_end_book3e:
 
 /* Auxiliary Processor Unavailable Interrupt */
 	START_EXCEPTION(ap_unavailable);
-	NORMAL_EXCEPTION_PROLOG(0xf20, PROLOG_ADDITION_NONE)
+	NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
+				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0xf20, PACA_EXGEN, INTS_DISABLE)
 	bl	.save_nvgprs
 	addi	r3,r1,STACK_FRAME_OVERHEAD
@@ -404,7 +420,8 @@ interrupt_end_book3e:
 
 /* Debug exception as a critical interrupt*/
 	START_EXCEPTION(debug_crit);
-	CRIT_EXCEPTION_PROLOG(0xd00, PROLOG_ADDITION_2REGS)
+	CRIT_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG,
+			      PROLOG_ADDITION_2REGS)
 
 	/*
 	 * If there is a single step or branch-taken exception in an
@@ -469,7 +486,8 @@ kernel_dbg_exc:
 
 /* Debug exception as a debug interrupt*/
 	START_EXCEPTION(debug_debug);
-	DBG_EXCEPTION_PROLOG(0xd08, PROLOG_ADDITION_2REGS)
+	DBG_EXCEPTION_PROLOG(0xd00, BOOKE_INTERRUPT_DEBUG,
+						 PROLOG_ADDITION_2REGS)
 
 	/*
 	 * If there is a single step or branch-taken exception in an
@@ -530,18 +548,21 @@ kernel_dbg_exc:
 	b	.ret_from_except
 
 	START_EXCEPTION(perfmon);
-	NORMAL_EXCEPTION_PROLOG(0x260, PROLOG_ADDITION_NONE)
+	NORMAL_EXCEPTION_PROLOG(0x260, BOOKE_INTERRUPT_PERFORMANCE_MONITOR,
+				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x260, PACA_EXGEN, INTS_DISABLE)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.performance_monitor_exception
 	b	.ret_from_except_lite
 
 /* Doorbell interrupt */
-	MASKABLE_EXCEPTION(0x280, doorbell, .doorbell_exception, ACK_NONE)
+	MASKABLE_EXCEPTION(0x280, BOOKE_INTERRUPT_DOORBELL,
+			   doorbell, .doorbell_exception, ACK_NONE)
 
 /* Doorbell critical Interrupt */
 	START_EXCEPTION(doorbell_crit);
-	CRIT_EXCEPTION_PROLOG(0x2a0, PROLOG_ADDITION_NONE)
+	CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL,
+			      PROLOG_ADDITION_NONE)
 //	EXCEPTION_COMMON(0x2a0, PACA_EXCRIT, INTS_DISABLE)
 //	bl	special_reg_save_crit
 //	CHECK_NAPPING();
@@ -555,7 +576,8 @@ kernel_dbg_exc:
  *	This general exception use GSRRx save/restore registers
  */
 	START_EXCEPTION(guest_doorbell);
-	GDBELL_EXCEPTION_PROLOG(0x2c0, PROLOG_ADDITION_NONE)
+	GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL,
+			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x2c0, PACA_EXGEN, INTS_KEEP)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.save_nvgprs
@@ -565,7 +587,8 @@ kernel_dbg_exc:
 
 /* Guest Doorbell critical Interrupt */
 	START_EXCEPTION(guest_doorbell_crit);
-	CRIT_EXCEPTION_PROLOG(0x2e0, PROLOG_ADDITION_NONE)
+	CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT,
+			      PROLOG_ADDITION_NONE)
 //	EXCEPTION_COMMON(0x2e0, PACA_EXCRIT, INTS_DISABLE)
 //	bl	special_reg_save_crit
 //	CHECK_NAPPING();
@@ -576,7 +599,8 @@ kernel_dbg_exc:
 
 /* Hypervisor call */
 	START_EXCEPTION(hypercall);
-	NORMAL_EXCEPTION_PROLOG(0x310, PROLOG_ADDITION_NONE)
+	NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL,
+			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x310, PACA_EXGEN, INTS_KEEP)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.save_nvgprs
@@ -586,7 +610,8 @@ kernel_dbg_exc:
 
 /* Embedded Hypervisor priviledged  */
 	START_EXCEPTION(ehpriv);
-	NORMAL_EXCEPTION_PROLOG(0x320, PROLOG_ADDITION_NONE)
+	NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV,
+			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x320, PACA_EXGEN, INTS_KEEP)
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.save_nvgprs
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S
index f09d48e..d884fa4 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/tlb_low_64e.S
@@ -20,6 +20,8 @@
 #include <asm/pgtable.h>
 #include <asm/exception-64e.h>
 #include <asm/ppc-opcode.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_booke_hv_asm.h>
 
 #ifdef CONFIG_PPC_64K_PAGES
 #define VPTE_PMD_SHIFT	(PTE_INDEX_SIZE+1)
@@ -37,12 +39,18 @@
  *                                                                    *
  **********************************************************************/
 
-.macro tlb_prolog_bolted addr
+.macro tlb_prolog_bolted intnum addr
 	mtspr	SPRN_SPRG_TLB_SCRATCH,r13
 	mfspr	r13,SPRN_SPRG_PACA
 	std	r10,PACA_EXTLB+EX_TLB_R10(r13)
 	mfcr	r10
 	std	r11,PACA_EXTLB+EX_TLB_R11(r13)
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+	mfspr	r11, SPRN_SRR1
+END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
+#endif
+	DO_KVM	\intnum, SPRN_SRR1
 	std	r16,PACA_EXTLB+EX_TLB_R16(r13)
 	mfspr	r16,\addr		/* get faulting address */
 	std	r14,PACA_EXTLB+EX_TLB_R14(r13)
@@ -66,7 +74,7 @@
 
 /* Data TLB miss */
 	START_EXCEPTION(data_tlb_miss_bolted)
-	tlb_prolog_bolted SPRN_DEAR
+	tlb_prolog_bolted BOOKE_INTERRUPT_DTLB_MISS SPRN_DEAR
 
 	/* We need _PAGE_PRESENT and  _PAGE_ACCESSED set */
 
@@ -214,7 +222,7 @@ itlb_miss_fault_bolted:
 
 /* Instruction TLB miss */
 	START_EXCEPTION(instruction_tlb_miss_bolted)
-	tlb_prolog_bolted SPRN_SRR0
+	tlb_prolog_bolted BOOKE_INTERRUPT_ITLB_MISS SPRN_SRR0
 
 	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
 	srdi	r15,r16,60		/* get region */
-- 
1.7.4.1

^ permalink raw reply related

* Re: [PATCH V4 3/3] powerpc/fsl-pci: Unify pci/pcie initialization code
From: Scott Wood @ 2012-08-06 14:03 UTC (permalink / raw)
  To: Tabi Timur-B04825
  Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472,
	Jia Hongtao-B38951
In-Reply-To: <CAOZdJXWFPn2XqyTR4jUkUZ5NV-h7HRWcnm392qUYJ98tg8H7Ow@mail.gmail.com>

On 08/05/2012 11:11 PM, Tabi Timur-B04825 wrote:
> On Fri, Aug 3, 2012 at 11:09 AM, Scott Wood <scottwood@freescale.com> wrote:
> 
>> p1022ds OTOH is weird enough that it deserves its own board file.
> 
> What's so weird about the P1022DS?

The localbus muxing.

> Also, why do we need a default PCI bus if one isn't specified in the
> device tree?

Because there are bugs in the Linux PPC PCI code when you don't have a
primary bus -- see recent upstream linuxppc-dev discussion.

-Scott

^ permalink raw reply

* Re: [PATCH V5 3/3] powerpc/fsl-pci: Unify pci/pcie initialization code
From: Scott Wood @ 2012-08-06 15:09 UTC (permalink / raw)
  To: Jia Hongtao-B38951
  Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472
In-Reply-To: <412C8208B4A0464FA894C5F0C278CD5D01A36D0A@039-SN1MPN1-002.039d.mgd.msft.net>

On 08/05/2012 10:07 PM, Jia Hongtao-B38951 wrote:
> 
> 
>> -----Original Message-----
>> From: Wood Scott-B07421
>> Sent: Saturday, August 04, 2012 12:28 AM
>> To: Jia Hongtao-B38951
>> Cc: linuxppc-dev@lists.ozlabs.org; galak@kernel.crashing.org; Li Yang-
>> R58472; Wood Scott-B07421
>> Subject: Re: [PATCH V5 3/3] powerpc/fsl-pci: Unify pci/pcie
>> initialization code
>>
>> On 08/03/2012 05:14 AM, Jia Hongtao wrote:
>>> -void __devinit fsl_pci_init(void)
>>> +/* Checkout if PCI contains ISA node */
>>> +static int of_pci_has_isa(struct device_node *pci_node)
>>> +{
>>> +	struct device_node *np;
>>> +	int ret = 0;
>>> +
>>> +	if (!pci_node)
>>> +		return 0;
>>> +
>>> +	read_lock(&devtree_lock);
>>> +	np = pci_node->allnext;
>>> +
>>> +	/* Only scan the children of PCI node */
>>> +	for (; np != pci_node->sibling; np = np->allnext) {
>>> +		if (np->type && (of_node_cmp(np->type, "isa") == 0)
>>> +		    && of_node_get(np)) {
>>> +			ret = 1;
>>> +			break;
>>> +		}
>>> +	}
>>> +
>>> +	of_node_put(pci_node);
>>> +	read_unlock(&devtree_lock);
>>> +
>>> +	return ret;
>>> +}
>>
>> Why do you keep insisting on substituting your ISA search code here?
>> What advantages does it have over the code that is already there?  It
>> unnecessarily digs into the internals of the tree representation.
>>
> 
> I want ISA search is done from probe.

Too bad.  You're breaking the case where there's no ISA node.

> Also this way is more efficient due
> to we just search the children of PCI.

It is not more efficient, because you're doing the search for every PCIe
bus rather than once.  Not that it matters in this context.

>>> +
>>> +static int __devinit fsl_pci_probe(struct platform_device *pdev)
>>>  {
>>>  	int ret;
>>> -	struct device_node *node;
>>>  	struct pci_controller *hose;
>>> -	dma_addr_t max = 0xffffffff;
>>> +	int is_primary = 0;
>>>
>>> -	/* Callers can specify the primary bus using other means. */
>>>  	if (!fsl_pci_primary) {
>>> -		/* If a PCI host bridge contains an ISA node, it's primary.
>> */
>>> -		node = of_find_node_by_type(NULL, "isa");
>>> -		while ((fsl_pci_primary = of_get_parent(node))) {
>>> -			of_node_put(node);
>>> -			node = fsl_pci_primary;
>>> -
>>> -			if (of_match_node(pci_ids, node))
>>> -				break;
>>> -		}
>>> +		is_primary = of_pci_has_isa(pdev->dev.of_node);
>>> +		if (is_primary)
>>> +			fsl_pci_primary = pdev->dev.of_node;
>>>  	}
>>
>> As I explained before, this has to be done globally, not from the probe
>> function, so we can assign a default primary bus if there isn't any ISA.
>>  There are bugs in the Linux PPC PCI code relating to not having any
>> primary bus.
>>
>> -Scott
> 
> In my way of searching ISA you can also assign a default primary bus in board
> specific files. 

That was meant for when the board file had an alternate way of searching
for the primary bus (e.g. look for i8259), not as a replacement for the
mechanism that guarantees there's a primary bus.

You are causing a regression in the qemu_e500.c platform.

> I read your code and found that if there is no ISA node you will assign the
> first PCI bus scanned as primary. It's not all right. Take ge_imp3a as an
> example: The second PCI bus (9000) is primary not the first one.

Does that board have ISA on it, that isn't described by the device tree?
 If so, before converting to the new init mechanism, the board code will
need to set fsl_pci_primary based on its own knowledge of where that ISA
is.  If it doesn't have ISA, it doesn't matter which one we designate as
primary.

> I doubt that there are bugs if no primary assigned.

Yeah, I just implemented the fallback for fun.  Come on.

It was recently discussed on this list.  PCI under QEMU did not work
without it.

> Like mpc85xx_rdb assigned
> no primary at all. Some other boards has no primary ether like p1022ds, p1021mds,
> p1010rdb, p1023rds, all corenet boards (p2041_rdb, p3041_ds, p4080_ds, p5020_ds,
> p5040_ds). If no primary is a bug then all these boards above are not correctly
> setting up.

Those boards are not being correctly set up.  On real hardware things
work by chance, but not under QEMU.

-Scott

^ permalink raw reply

* Re: [PATCH V4 3/3] powerpc/fsl-pci: Unify pci/pcie initialization code
From: Scott Wood @ 2012-08-06 15:15 UTC (permalink / raw)
  To: Jia Hongtao-B38951
  Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472
In-Reply-To: <412C8208B4A0464FA894C5F0C278CD5D01A36CE7@039-SN1MPN1-002.039d.mgd.msft.net>

On 08/05/2012 09:39 PM, Jia Hongtao-B38951 wrote:
> 
> 
>> -----Original Message-----
>> From: Wood Scott-B07421
>> Sent: Saturday, August 04, 2012 12:04 AM
>> To: Jia Hongtao-B38951
>> Cc: Kumar Gala; linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421; Li
>> Yang-R58472
>> Subject: Re: [PATCH V4 3/3] powerpc/fsl-pci: Unify pci/pcie
>> initialization code
>>
>> On 08/02/2012 10:39 PM, Jia Hongtao-B38951 wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Kumar Gala [mailto:galak@kernel.crashing.org]
>>>> Sent: Thursday, August 02, 2012 8:24 PM
>>>> To: Jia Hongtao-B38951
>>>> Cc: linuxppc-dev@lists.ozlabs.org; Wood Scott-B07421; Li Yang-R58472
>>>> Subject: Re: [PATCH V4 3/3] powerpc/fsl-pci: Unify pci/pcie
>>>> initialization code
>>>>
>>>> You need to convert all boards to use fsl_pci_init before this patch.
>>>> Otherwise we'll end up with PCI getting initialized twice on boards.
>>>>
>>>> - k
>>>
>>> If we covert all boards with platform driver in this patch PCI will
>>> be initialized only once without converting all boards to use
>>> fsl_pci_init first.
>>
>> Then we'd have to pick apart core changes from board changes when
>> reviewing.
>>
>>> If we convert all boards to use fsl_pci_init before this patch and
>>> convert them to use platform driver again after this patch. Then
>>> between this patch and next pci will be initialized twice too.
>>
>> Why?  That one patch should both create the platform driver and remove
>> the init from fsl_pci_init() -- except things like primary bus detection
>> which has to happen globally.
>>
>> -Scott
> 
> "One patch both create the platform driver and remove the init from
> fsl_pci_init()" means we should create platform driver and applied to
> all boards. If so why not just directly convert all boards using platform
> driver?

Because it's harder to review when you have a bunch of board code in the
patch in addition to core changes.

Because you might want people to actually test on the boards in question
when converting, especially given the change in how primary buses are
determined, and that some boards may need to provide their own alternative.

-Scott

^ permalink raw reply

* Re: [PATCH 3/3 v4] powerpc/mpic: FSL MPIC error interrupt support.
From: Kumar Gala @ 2012-08-06 15:52 UTC (permalink / raw)
  To: Varun Sethi; +Cc: Bogdan Hamciuc, linuxppc-dev
In-Reply-To: <1344257045-11200-1-git-send-email-Varun.Sethi@freescale.com>


On Aug 6, 2012, at 7:44 AM, Varun Sethi wrote:

> All SOC device error interrupts are muxed and delivered to the core
> as a single MPIC error interrupt. Currently all the device drivers
> requiring access to device errors have to register for the MPIC error
> interrupt as a shared interrupt.
>=20
> With this patch we add interrupt demuxing capability in the mpic =
driver,
> allowing device drivers to register for their individual error =
interrupts.
> This is achieved by handling error interrupts in a cascaded fashion.
>=20
> MPIC error interrupt is handled by the "error_int_handler", which
> subsequently demuxes it using the EISR and delivers it to the =
respective
> drivers.=20
>=20
> The error interrupt capability is dependent on the MPIC EIMR register,
> which was introduced in FSL MPIC version 4.1 (P4080 rev2). So, error
> interrupt demuxing capability is dependent on the MPIC version and can
> be used for versions >=3D 4.1.
>=20
> Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
> Signed-off-by: Bogdan Hamciuc <bogdan.hamciuc@freescale.com>
> [In the initial version of the patch we were using handle_simple_irq
> as the handler for cascaded error interrupts, this resulted
> in issues in case of threaded isrs (with RT kernel). This issue was
> debugged by Bogdan and decision was taken to use the handle_level_irq
> handler]
> ---
> arch/powerpc/include/asm/mpic.h    |   16 ++++
> arch/powerpc/sysdev/Makefile       |    2 +-
> arch/powerpc/sysdev/fsl_mpic_err.c |  153 =
++++++++++++++++++++++++++++++++++++
> arch/powerpc/sysdev/mpic.c         |   45 ++++++++++-
> arch/powerpc/sysdev/mpic.h         |   22 +++++
> 5 files changed, 236 insertions(+), 2 deletions(-)
> create mode 100644 arch/powerpc/sysdev/fsl_mpic_err.c
>=20
> diff --git a/arch/powerpc/include/asm/mpic.h =
b/arch/powerpc/include/asm/mpic.h
> index e14d35d..6c8e53b 100644
> --- a/arch/powerpc/include/asm/mpic.h
> +++ b/arch/powerpc/include/asm/mpic.h
> @@ -118,6 +118,9 @@
> #define MPIC_MAX_CPUS		32
> #define MPIC_MAX_ISU		32
>=20
> +#define MPIC_MAX_ERR      32
> +#define MPIC_FSL_ERR_INT  16
> +
> /*
>  * Tsi108 implementation of MPIC has many differences from the =
original one
>  */
> @@ -270,6 +273,7 @@ struct mpic
> 	struct irq_chip		hc_ipi;
> #endif
> 	struct irq_chip		hc_tm;
> +	struct irq_chip		hc_err;
> 	const char		*name;
> 	/* Flags */
> 	unsigned int		flags;
> @@ -283,6 +287,8 @@ struct mpic
> 	/* vector numbers used for internal sources (ipi/timers) */
> 	unsigned int		ipi_vecs[4];
> 	unsigned int		timer_vecs[8];
> +	/* vector numbers used for FSL MPIC error interrupts */
> +	unsigned int		err_int_vecs[MPIC_MAX_ERR];
>=20
> 	/* Spurious vector to program into unused sources */
> 	unsigned int		spurious_vec;
> @@ -306,6 +312,11 @@ struct mpic
> 	struct mpic_reg_bank	cpuregs[MPIC_MAX_CPUS];
> 	struct mpic_reg_bank	isus[MPIC_MAX_ISU];
>=20
> +	/* ioremap'ed base for error interrupt registers */
> +	u32 __iomem	*err_regs;
> +	/* error interrupt config */
> +	u32			err_int_config_done;

I thought we were going to remove this as it don't really provide any =
value.

> +
> 	/* Protected sources */
> 	unsigned long		*protected;
>=20
> @@ -370,6 +381,11 @@ struct mpic
> #define MPIC_NO_RESET			0x00004000
> /* Freescale MPIC (compatible includes "fsl,mpic") */
> #define MPIC_FSL			0x00008000
> +/* Freescale MPIC supports EIMR (error interrupt mask register).
> + * This flag is set for MPIC version >=3D 4.1 (version determined
> + * from the BRR1 register).
> +*/
> +#define MPIC_FSL_HAS_EIMR		0x00010000
>=20
> /* MPIC HW modification ID */
> #define MPIC_REGSET_MASK		0xf0000000

- k=

^ permalink raw reply

* Re: [PATCH 2/4] powerpc/booke: Merge the 32 bit e5500/e500mc cpu setup code.
From: Kumar Gala @ 2012-08-06 15:58 UTC (permalink / raw)
  To: Sethi Varun-B16395
  Cc: linuxppc-dev@lists.ozlabs.org, agraf@suse.de,
	kvm-ppc@vger.kernel.org
In-Reply-To: <C5ECD7A89D1DC44195F34B25E172658D15ADB3@039-SN2MPN1-013.039d.mgd.msft.net>


On Aug 4, 2012, at 1:31 PM, Sethi Varun-B16395 wrote:

>=20
>=20
>> -----Original Message-----
>> From: Kumar Gala [mailto:galak@kernel.crashing.org]
>> Sent: Friday, August 03, 2012 10:04 PM
>> To: Sethi Varun-B16395
>> Cc: agraf@suse.de; benh@kernel.crashing.org; linuxppc-
>> dev@lists.ozlabs.org; kvm-ppc@vger.kernel.org
>> Subject: Re: [PATCH 2/4] powerpc/booke: Merge the 32 bit e5500/e500mc =
cpu
>> setup code.
>>=20
>>=20
>> On Jul 9, 2012, at 7:58 AM, Varun Sethi wrote:
>>=20
>>> Merge the 32 bit cpu setup code for e500mc/e5500 and define the
>> "cpu_restore"
>>> routine (for e5500/e6500) only for the 64 bit case. The cpu_restore
>>> routine is used in the 64 bit case for setting up the secondary =
cores.
>>>=20
>>> Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
>>> ---
>>> arch/powerpc/kernel/cpu_setup_fsl_booke.S |    1 +
>>> arch/powerpc/kernel/cputable.c            |    4 ++++
>>> 2 files changed, 5 insertions(+), 0 deletions(-)
>>>=20
>>> diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
>>> b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
>>> index a55d028..5e87737 100644
>>> --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
>>> +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
>>> @@ -75,6 +75,7 @@ _GLOBAL(__setup_cpu_e500v2)
>>> 	mtlr	r4
>>> 	blr
>>> _GLOBAL(__setup_cpu_e500mc)
>>> +_GLOBAL(__setup_cpu_e5500)
>>=20
>> This is a bit confusing, as we now have duplicated =
__setup_cpu_e5500()
>> between the ppc32 and ppc64 cases.
>>=20
>> If you build this patch for corenet32_smp_defconfig it fails.
> [Sethi Varun-B16395] I am able to build without any issue with the =
same config.
>=20
> -Varun

If you build corenet32_smp_defconfig at commit:

commit c5537ef2d672d2cf48d4e4ac754781c8db112843
Author: Varun Sethi <Varun.Sethi@freescale.com>
Date:   Mon Jul 9 18:28:21 2012 +0530

    powerpc/booke: Merge the 32 bit e5500/e500mc cpu setup code.
   =20
You get the following build error:

arch/powerpc/kernel/cpu_setup_fsl_booke.S: Assembler messages:
arch/powerpc/kernel/cpu_setup_fsl_booke.S:110: Error: symbol =
`__setup_cpu_e5500' is already defined

- k=

^ permalink raw reply

* Re: [PATCH 1/1] booke/wdt: fix incorrect WDIOC_GETSUPPORT return path
From: Kumar Gala @ 2012-08-06 16:13 UTC (permalink / raw)
  To: tiejun.chen, Timur Tabi; +Cc: linux-watchdog, linuxppc-dev@ozlabs.org list
In-Reply-To: <501F35C2.6090005@windriver.com>


On Aug 5, 2012, at 10:10 PM, tiejun.chen wrote:

> On 07/30/2012 04:15 PM, Tiejun Chen wrote:
>> We miss that correct WDIOC_GETSUPPORT return path when perform
>> copy_to_user() properly.
> 
> Any comments?
> 
> Thanks
> Tiejun

Adding Timur, as he's touched watchdog last.

- k

>> Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
>> ---
>> drivers/watchdog/booke_wdt.c |    7 ++++---
>> 1 files changed, 4 insertions(+), 3 deletions(-)
>> 
>> diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c
>> index 3fe82d0..2be7f29 100644
>> --- a/drivers/watchdog/booke_wdt.c
>> +++ b/drivers/watchdog/booke_wdt.c
>> @@ -162,12 +162,13 @@ static long booke_wdt_ioctl(struct file *file,
>> 				unsigned int cmd, unsigned long arg)
>> {
>> 	u32 tmp = 0;
>> -	u32 __user *p = (u32 __user *)arg;
>> +	void __user *argp = (u32 __user *)arg;
>> +	u32 __user *p = argp;
>> 
>> 	switch (cmd) {
>> 	case WDIOC_GETSUPPORT:
>> -		if (copy_to_user((void *)arg, &ident, sizeof(ident)))
>> -			return -EFAULT;
>> +		return copy_to_user(argp, &ident,
>> +				sizeof(ident)) ? -EFAULT : 0;
>> 	case WDIOC_GETSTATUS:
>> 		return put_user(0, p);
>> 	case WDIOC_GETBOOTSTATUS:
>> 

^ permalink raw reply

* RE: [PATCH 3/3 v4] powerpc/mpic: FSL MPIC error interrupt support.
From: Sethi Varun-B16395 @ 2012-08-06 16:22 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev@lists.ozlabs.org, Hamciuc Bogdan-BHAMCIU1
In-Reply-To: <FAD36277-EA2D-49D7-BD98-2F132CA886AA@kernel.crashing.org>



> -----Original Message-----
> From: Kumar Gala [mailto:galak@kernel.crashing.org]
> Sent: Monday, August 06, 2012 9:23 PM
> To: Sethi Varun-B16395
> Cc: linuxppc-dev@lists.ozlabs.org; Hamciuc Bogdan-BHAMCIU1
> Subject: Re: [PATCH 3/3 v4] powerpc/mpic: FSL MPIC error interrupt
> support.
>=20
>=20
> On Aug 6, 2012, at 7:44 AM, Varun Sethi wrote:
>=20
> > All SOC device error interrupts are muxed and delivered to the core as
> > a single MPIC error interrupt. Currently all the device drivers
> > requiring access to device errors have to register for the MPIC error
> > interrupt as a shared interrupt.
> >
> > With this patch we add interrupt demuxing capability in the mpic
> > driver, allowing device drivers to register for their individual error
> interrupts.
> > This is achieved by handling error interrupts in a cascaded fashion.
> >
> > MPIC error interrupt is handled by the "error_int_handler", which
> > subsequently demuxes it using the EISR and delivers it to the
> > respective drivers.
> >
> > The error interrupt capability is dependent on the MPIC EIMR register,
> > which was introduced in FSL MPIC version 4.1 (P4080 rev2). So, error
> > interrupt demuxing capability is dependent on the MPIC version and can
> > be used for versions >=3D 4.1.
> >
> > Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
> > Signed-off-by: Bogdan Hamciuc <bogdan.hamciuc@freescale.com> [In the
> > initial version of the patch we were using handle_simple_irq as the
> > handler for cascaded error interrupts, this resulted in issues in case
> > of threaded isrs (with RT kernel). This issue was debugged by Bogdan
> > and decision was taken to use the handle_level_irq handler]
> > ---
> > arch/powerpc/include/asm/mpic.h    |   16 ++++
> > arch/powerpc/sysdev/Makefile       |    2 +-
> > arch/powerpc/sysdev/fsl_mpic_err.c |  153
> ++++++++++++++++++++++++++++++++++++
> > arch/powerpc/sysdev/mpic.c         |   45 ++++++++++-
> > arch/powerpc/sysdev/mpic.h         |   22 +++++
> > 5 files changed, 236 insertions(+), 2 deletions(-) create mode 100644
> > arch/powerpc/sysdev/fsl_mpic_err.c
> >
> > diff --git a/arch/powerpc/include/asm/mpic.h
> > b/arch/powerpc/include/asm/mpic.h index e14d35d..6c8e53b 100644
> > --- a/arch/powerpc/include/asm/mpic.h
> > +++ b/arch/powerpc/include/asm/mpic.h
> > @@ -118,6 +118,9 @@
> > #define MPIC_MAX_CPUS		32
> > #define MPIC_MAX_ISU		32
> >
> > +#define MPIC_MAX_ERR      32
> > +#define MPIC_FSL_ERR_INT  16
> > +
> > /*
> >  * Tsi108 implementation of MPIC has many differences from the
> > original one  */ @@ -270,6 +273,7 @@ struct mpic
> > 	struct irq_chip		hc_ipi;
> > #endif
> > 	struct irq_chip		hc_tm;
> > +	struct irq_chip		hc_err;
> > 	const char		*name;
> > 	/* Flags */
> > 	unsigned int		flags;
> > @@ -283,6 +287,8 @@ struct mpic
> > 	/* vector numbers used for internal sources (ipi/timers) */
> > 	unsigned int		ipi_vecs[4];
> > 	unsigned int		timer_vecs[8];
> > +	/* vector numbers used for FSL MPIC error interrupts */
> > +	unsigned int		err_int_vecs[MPIC_MAX_ERR];
> >
> > 	/* Spurious vector to program into unused sources */
> > 	unsigned int		spurious_vec;
> > @@ -306,6 +312,11 @@ struct mpic
> > 	struct mpic_reg_bank	cpuregs[MPIC_MAX_CPUS];
> > 	struct mpic_reg_bank	isus[MPIC_MAX_ISU];
> >
> > +	/* ioremap'ed base for error interrupt registers */
> > +	u32 __iomem	*err_regs;
> > +	/* error interrupt config */
> > +	u32			err_int_config_done;
>=20
> I thought we were going to remove this as it don't really provide any
> value.
>=20
[Sethi Varun-B16395] We need a way to determine that irq handle got registe=
red for=20
Mpic error interrupt, only then can we go ahead and assign individual (casc=
aded)
error interrupts. Initially we were doing the same thing while translating=
=20
error interrupt specifier, now we are registering the handler in mpic_init.

-Varun=20

^ permalink raw reply

* RE: [PATCH 2/4] powerpc/booke: Merge the 32 bit e5500/e500mc cpu setup code.
From: Sethi Varun-B16395 @ 2012-08-06 16:24 UTC (permalink / raw)
  To: Kumar Gala
  Cc: linuxppc-dev@lists.ozlabs.org, agraf@suse.de,
	kvm-ppc@vger.kernel.org
In-Reply-To: <D762F063-8210-41EF-B583-F277967EA279@kernel.crashing.org>



> -----Original Message-----
> From: Kumar Gala [mailto:galak@kernel.crashing.org]
> Sent: Monday, August 06, 2012 9:28 PM
> To: Sethi Varun-B16395
> Cc: agraf@suse.de; benh@kernel.crashing.org; linuxppc-
> dev@lists.ozlabs.org; kvm-ppc@vger.kernel.org
> Subject: Re: [PATCH 2/4] powerpc/booke: Merge the 32 bit e5500/e500mc cpu
> setup code.
>=20
>=20
> On Aug 4, 2012, at 1:31 PM, Sethi Varun-B16395 wrote:
>=20
> >
> >
> >> -----Original Message-----
> >> From: Kumar Gala [mailto:galak@kernel.crashing.org]
> >> Sent: Friday, August 03, 2012 10:04 PM
> >> To: Sethi Varun-B16395
> >> Cc: agraf@suse.de; benh@kernel.crashing.org; linuxppc-
> >> dev@lists.ozlabs.org; kvm-ppc@vger.kernel.org
> >> Subject: Re: [PATCH 2/4] powerpc/booke: Merge the 32 bit e5500/e500mc
> >> cpu setup code.
> >>
> >>
> >> On Jul 9, 2012, at 7:58 AM, Varun Sethi wrote:
> >>
> >>> Merge the 32 bit cpu setup code for e500mc/e5500 and define the
> >> "cpu_restore"
> >>> routine (for e5500/e6500) only for the 64 bit case. The cpu_restore
> >>> routine is used in the 64 bit case for setting up the secondary
> cores.
> >>>
> >>> Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
> >>> ---
> >>> arch/powerpc/kernel/cpu_setup_fsl_booke.S |    1 +
> >>> arch/powerpc/kernel/cputable.c            |    4 ++++
> >>> 2 files changed, 5 insertions(+), 0 deletions(-)
> >>>
> >>> diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
> >>> b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
> >>> index a55d028..5e87737 100644
> >>> --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
> >>> +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
> >>> @@ -75,6 +75,7 @@ _GLOBAL(__setup_cpu_e500v2)
> >>> 	mtlr	r4
> >>> 	blr
> >>> _GLOBAL(__setup_cpu_e500mc)
> >>> +_GLOBAL(__setup_cpu_e5500)
> >>
> >> This is a bit confusing, as we now have duplicated
> >> __setup_cpu_e5500() between the ppc32 and ppc64 cases.
> >>
> >> If you build this patch for corenet32_smp_defconfig it fails.
> > [Sethi Varun-B16395] I am able to build without any issue with the same
> config.
> >
> > -Varun
>=20
> If you build corenet32_smp_defconfig at commit:
>=20
> commit c5537ef2d672d2cf48d4e4ac754781c8db112843
> Author: Varun Sethi <Varun.Sethi@freescale.com>
> Date:   Mon Jul 9 18:28:21 2012 +0530
>=20
>     powerpc/booke: Merge the 32 bit e5500/e500mc cpu setup code.
>=20
> You get the following build error:
>=20
> arch/powerpc/kernel/cpu_setup_fsl_booke.S: Assembler messages:
> arch/powerpc/kernel/cpu_setup_fsl_booke.S:110: Error: symbol
> `__setup_cpu_e5500' is already defined
>=20
Oh.., didn't realize that. Thanks for fixing this.

-Varun

^ permalink raw reply

* Re: [PATCH v6 5/8] fsl-dma: change release process of dma descriptor for supporting async_tx
From: Ira W. Snyder @ 2012-08-06 17:51 UTC (permalink / raw)
  To: qiang.liu
  Cc: arnd, vinod.koul, gregkh, linux-kernel, dan.j.williams, herbert,
	linux-crypto, dan.j.williams, linuxppc-dev, davem
In-Reply-To: <1344248073-9276-1-git-send-email-qiang.liu@freescale.com>

On Mon, Aug 06, 2012 at 06:14:33PM +0800, qiang.liu@freescale.com wrote:
> From: Qiang Liu <qiang.liu@freescale.com>
> 
> Fix the potential risk when enable config NET_DMA and ASYNC_TX.
> Async_tx is lack of support in current release process of dma descriptor,
> all descriptors will be released whatever is acked or no-acked by async_tx,
> so there is a potential race condition when dma engine is uesd by others
> clients (e.g. when enable NET_DMA to offload TCP).
> 
> In our case, a race condition which is raised when use both of talitos
> and dmaengine to offload xor is because napi scheduler will sync all
> pending requests in dma channels, it affects the process of raid operations
> due to ack_tx is not checked in fsl dma. The no-acked descriptor is freed
> which is submitted just now, as a dependent tx, this freed descriptor trigger
> BUG_ON(async_tx_test_ack(depend_tx)) in async_tx_submit().
> 
> TASK = ee1a94a0[1390] 'md0_raid5' THREAD: ecf40000 CPU: 0
> GPR00: 00000001 ecf41ca0 ee44/921a94a0 0000003f 00000001 c00593e4 00000000 00000001
> GPR08: 00000000 a7a7a7a7 00000001 045/920000002 42028042 100a38d4 ed576d98 00000000
> GPR16: ed5a11b0 00000000 2b162000 00000200 046/920000000 2d555000 ed3015e8 c15a7aa0
> GPR24: 00000000 c155fc40 00000000 ecb63220 ecf41d28 e47/92f640bb0 ef640c30 ecf41ca0
> NIP [c02b048c] async_tx_submit+0x6c/0x2b4
> LR [c02b068c] async_tx_submit+0x26c/0x2b4
> Call Trace:
> [ecf41ca0] [c02b068c] async_tx_submit+0x26c/0x2b448/92 (unreliable)
> [ecf41cd0] [c02b0a4c] async_memcpy+0x240/0x25c
> [ecf41d20] [c0421064] async_copy_data+0xa0/0x17c
> [ecf41d70] [c0421cf4] __raid_run_ops+0x874/0xe10
> [ecf41df0] [c0426ee4] handle_stripe+0x820/0x25e8
> [ecf41e90] [c0429080] raid5d+0x3d4/0x5b4
> [ecf41f40] [c04329b8] md_thread+0x138/0x16c
> [ecf41f90] [c008277c] kthread+0x8c/0x90
> [ecf41ff0] [c0011630] kernel_thread+0x4c/0x68
> 
> Another modification in this patch is the change of completed descriptors,
> there is a potential risk which caused by exception interrupt, all descriptors
> in ld_running list are seemed completed when an interrupt raised, it works fine
> under normal condition, but if there is an exception occured, it cannot work
> as our excepted. Hardware should not be depend on s/w list, the right way is
> to read current descriptor address register to find the last completed
> descriptor. If an interrupt is raised by an error, all descriptors in ld_running
> should not be seemed finished, or these unfinished descriptors in ld_running
> will be released wrongly.
> 
> A simple way to reproduce,
> Enable dmatest first, then insert some bad descriptors which can trigger
> Programming Error interrupts before the good descriptors. Last, the good
> descriptors will be freed before they are processsed because of the exception
> intrerrupt.
> 
> Note: the bad descriptors are only for simulating an exception interrupt.
> This case can illustrate the potential risk in current fsl-dma very well.
> 
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Dan Williams <dan.j.williams@gmail.com>
> Cc: Vinod Koul <vinod.koul@intel.com>
> Cc: Li Yang <leoli@freescale.com>
> Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
> Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>

There are two minor nitpicks below. Other than that, the patch looks
excellent to me.

Ira

> ---
>  drivers/dma/fsldma.c |  232 ++++++++++++++++++++++++++++++++++----------------
>  drivers/dma/fsldma.h |   17 +++-
>  2 files changed, 174 insertions(+), 75 deletions(-)
> 
> diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
> index 36490a3..938d8c1 100644
> --- a/drivers/dma/fsldma.c
> +++ b/drivers/dma/fsldma.c
> @@ -472,6 +472,110 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor(struct fsldma_chan *chan)
>  }
> 
>  /**
> + * fsldma_clean_completed_descriptor - free all descriptors which
> + * has been completed and acked
> + * @chan: Freescale DMA channel
> + *
> + * This function is used on all completed and acked descriptors.
> + * All descriptors should only be freed in this function.
> + */
> +static void
> +fsldma_clean_completed_descriptor(struct fsldma_chan *chan)
> +{
> +	struct fsl_desc_sw *desc, *_desc;
> +
> +	/* Run the callback for each descriptor, in order */
> +	list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node)
> +		if (async_tx_test_ack(&desc->async_tx))
> +			fsl_dma_free_descriptor(chan, desc);
> +}
> +
> +/**
> + * fsldma_run_tx_complete_actions - cleanup a single link descriptor
> + * @chan: Freescale DMA channel
> + * @desc: descriptor to cleanup and free
> + * @cookie: Freescale DMA transaction identifier
> + *
> + * This function is used on a descriptor which has been executed by the DMA
> + * controller. It will run any callbacks, submit any dependencies.
> + */
> +static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan *chan,
> +		struct fsl_desc_sw *desc, dma_cookie_t cookie)
> +{
> +	struct dma_async_tx_descriptor *txd = &desc->async_tx;
> +	struct device *dev = chan->common.device->dev;
> +	dma_addr_t src = get_desc_src(chan, desc);
> +	dma_addr_t dst = get_desc_dst(chan, desc);
> +	u32 len = get_desc_cnt(chan, desc);
> +
> +	BUG_ON(txd->cookie < 0);
> +
> +	if (txd->cookie > 0) {
> +		cookie = txd->cookie;
> +
> +		/* Run the link descriptor callback function */
> +		if (txd->callback) {
> +#ifdef FSL_DMA_LD_DEBUG
> +			chan_dbg(chan, "LD %p callback\n", desc);
> +#endif
> +			txd->callback(txd->callback_param);
> +		}
> +
> +		/* Unmap the dst buffer, if requested */
> +		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
> +			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
> +				dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
> +			else
> +				dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
> +		}
> +
> +		/* Unmap the src buffer, if requested */
> +		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
> +			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
> +				dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
> +			else
> +				dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
> +		}
> +	}
> +
> +	/* Run any dependencies */
> +	dma_run_dependencies(txd);
> +
> +	return cookie;
> +}
> +
> +/**
> + * fsldma_clean_running_descriptor - move the completed descriptor from
> + * ld_running to ld_completed
> + * @chan: Freescale DMA channel
> + * @desc: the descriptor which is completed
> + *
> + * Free the descriptor directly if acked by async_tx api, or move it to
> + * queue ld_completed.
> + */
> +static void
> +fsldma_clean_running_descriptor(struct fsldma_chan *chan,
> +		struct fsl_desc_sw *desc)
> +{
> +	/* Remove from the list of transactions */
> +	list_del(&desc->node);

Minor nitpick. Add a blank line here.

> +	/*
> +	 * the client is allowed to attach dependent operations
> +	 * until 'ack' is set
> +	 */
> +	if (!async_tx_test_ack(&desc->async_tx)) {
> +		/*
> +		 * Move this descriptor to the list of descriptors which is
> +		 * completed, but still awaiting the 'ack' bit to be set.
> +		 */
> +		list_add_tail(&desc->node, &chan->ld_completed);
> +		return;
> +	}
> +
> +	dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
> +}
> +
> +/**
>   * fsl_chan_xfer_ld_queue - transfer any pending transactions
>   * @chan : Freescale DMA channel
>   *
> @@ -539,51 +643,58 @@ static void fsl_chan_xfer_ld_queue(struct fsldma_chan *chan)
>  }
> 
>  /**
> - * fsldma_cleanup_descriptor - cleanup and free a single link descriptor
> + * fsldma_cleanup_descriptors - cleanup link descriptors which are completed
> + * and move them to ld_completed to free until flag 'ack' is set
>   * @chan: Freescale DMA channel
> - * @desc: descriptor to cleanup and free
>   *
> - * This function is used on a descriptor which has been executed by the DMA
> - * controller. It will run any callbacks, submit any dependencies, and then
> - * free the descriptor.
> + * This function is used on descriptors which have been executed by the DMA
> + * controller. It will run any callbacks, submit any dependencies, then
> + * free these descriptors if flag 'ack' is set.
>   */
> -static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
> -				      struct fsl_desc_sw *desc)
> +static void fsldma_cleanup_descriptors(struct fsldma_chan *chan)
>  {
> -	struct dma_async_tx_descriptor *txd = &desc->async_tx;
> -	struct device *dev = chan->common.device->dev;
> -	dma_addr_t src = get_desc_src(chan, desc);
> -	dma_addr_t dst = get_desc_dst(chan, desc);
> -	u32 len = get_desc_cnt(chan, desc);
> +	struct fsl_desc_sw *desc, *_desc;
> +	dma_cookie_t cookie = 0;
> +	dma_addr_t curr_phys = get_cdar(chan);
> +	int seen_current = 0;
> 
> -	/* Run the link descriptor callback function */
> -	if (txd->callback) {
> -#ifdef FSL_DMA_LD_DEBUG
> -		chan_dbg(chan, "LD %p callback\n", desc);
> -#endif
> -		txd->callback(txd->callback_param);
> -	}
> +	fsldma_clean_completed_descriptor(chan);
> 
> -	/* Run any dependencies */
> -	dma_run_dependencies(txd);
> +	/* Run the callback for each descriptor, in order */
> +	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
> +		/*
> +		 * do not advance past the current descriptor loaded into the
> +		 * hardware channel, subsequent descriptors are either in
> +		 * process or have not been submitted
> +		 */
> +		if (seen_current)
> +			break;
> 
> -	/* Unmap the dst buffer, if requested */
> -	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
> -		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
> -			dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
> -		else
> -			dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
> -	}
> +		/*
> +		 * stop the search if we reach the current descriptor and the
> +		 * channel is busy
> +		 */
> +		if (desc->async_tx.phys == curr_phys) {
> +			seen_current = 1;
> +			if (!dma_is_idle(chan))
> +				break;
> +		}

I wonder if this is better:

if (desc->async_tx.phys == get_cdar(chan)) {
	seen_current = 1;
	if (!dma_is_idle(chan))
		break;
}

Your version works fine, it just might stop earlier than necessary. This
is just a nitpick.

> 
> -	/* Unmap the src buffer, if requested */
> -	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
> -		if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
> -			dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
> -		else
> -			dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
> +		cookie = fsldma_run_tx_complete_actions(chan, desc, cookie);
> +
> +		fsldma_clean_running_descriptor(chan, desc);
>  	}
> 
> -	fsl_dma_free_descriptor(chan, desc);
> +	/*
> +	 * Start any pending transactions automatically
> +	 *
> +	 * In the ideal case, we keep the DMA controller busy while we go
> +	 * ahead and free the descriptors below.
> +	 */
> +	fsl_chan_xfer_ld_queue(chan);
> +
> +	if (cookie > 0)
> +		chan->common.completed_cookie = cookie;
>  }
> 
>  /**
> @@ -654,8 +765,10 @@ static void fsl_dma_free_chan_resources(struct dma_chan *dchan)
> 
>  	chan_dbg(chan, "free all channel resources\n");
>  	spin_lock_irqsave(&chan->desc_lock, flags);
> +	fsldma_cleanup_descriptors(chan);
>  	fsldma_free_desc_list(chan, &chan->ld_pending);
>  	fsldma_free_desc_list(chan, &chan->ld_running);
> +	fsldma_free_desc_list(chan, &chan->ld_completed);
>  	spin_unlock_irqrestore(&chan->desc_lock, flags);
> 
>  	dma_pool_destroy(chan->desc_pool);
> @@ -893,6 +1006,7 @@ static int fsl_dma_device_control(struct dma_chan *dchan,
>  		/* Remove and free all of the descriptors in the LD queue */
>  		fsldma_free_desc_list(chan, &chan->ld_pending);
>  		fsldma_free_desc_list(chan, &chan->ld_running);
> +		fsldma_free_desc_list(chan, &chan->ld_completed);
>  		chan->idle = true;
> 
>  		spin_unlock_irqrestore(&chan->desc_lock, flags);
> @@ -956,11 +1070,15 @@ static enum dma_status fsl_tx_status(struct dma_chan *dchan,
>  	enum dma_status ret;
>  	unsigned long flags;
> 
> -	spin_lock_irqsave(&chan->desc_lock, flags);
>  	ret = dma_cookie_status(dchan, cookie, txstate);
> +	if (ret == DMA_SUCCESS)
> +		return ret;
> +
> +	spin_lock_irqsave(&chan->desc_lock, flags);
> +	fsldma_cleanup_descriptors(chan);
>  	spin_unlock_irqrestore(&chan->desc_lock, flags);
> 
> -	return ret;
> +	return dma_cookie_status(dchan, cookie, txstate);
>  }
> 
>  /*----------------------------------------------------------------------------*/
> @@ -1037,52 +1155,19 @@ static irqreturn_t fsldma_chan_irq(int irq, void *data)
>  static void dma_do_tasklet(unsigned long data)
>  {
>  	struct fsldma_chan *chan = (struct fsldma_chan *)data;
> -	struct fsl_desc_sw *desc, *_desc;
> -	LIST_HEAD(ld_cleanup);
>  	unsigned long flags;
> 
>  	chan_dbg(chan, "tasklet entry\n");
> 
>  	spin_lock_irqsave(&chan->desc_lock, flags);
> 
> -	/* update the cookie if we have some descriptors to cleanup */
> -	if (!list_empty(&chan->ld_running)) {
> -		dma_cookie_t cookie;
> -
> -		desc = to_fsl_desc(chan->ld_running.prev);
> -		cookie = desc->async_tx.cookie;
> -		dma_cookie_complete(&desc->async_tx);
> -
> -		chan_dbg(chan, "completed_cookie=%d\n", cookie);
> -	}
> -
> -	/*
> -	 * move the descriptors to a temporary list so we can drop the lock
> -	 * during the entire cleanup operation
> -	 */
> -	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
> -
>  	/* the hardware is now idle and ready for more */
>  	chan->idle = true;
> 
> -	/*
> -	 * Start any pending transactions automatically
> -	 *
> -	 * In the ideal case, we keep the DMA controller busy while we go
> -	 * ahead and free the descriptors below.
> -	 */
> -	fsl_chan_xfer_ld_queue(chan);
> -	spin_unlock_irqrestore(&chan->desc_lock, flags);
> -
> -	/* Run the callback for each descriptor, in order */
> -	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
> +	/* Run all cleanup for descriptors which have been completed */
> +	fsldma_cleanup_descriptors(chan);
> 
> -		/* Remove from the list of transactions */
> -		list_del(&desc->node);
> -
> -		/* Run all cleanup for this descriptor */
> -		fsldma_cleanup_descriptor(chan, desc);
> -	}
> +	spin_unlock_irqrestore(&chan->desc_lock, flags);
> 
>  	chan_dbg(chan, "tasklet exit\n");
>  }
> @@ -1264,6 +1349,7 @@ static int __devinit fsl_dma_chan_probe(struct fsldma_device *fdev,
>  	spin_lock_init(&chan->desc_lock);
>  	INIT_LIST_HEAD(&chan->ld_pending);
>  	INIT_LIST_HEAD(&chan->ld_running);
> +	INIT_LIST_HEAD(&chan->ld_completed);
>  	chan->idle = true;
> 
>  	chan->common.device = &fdev->common;
> diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
> index f5c3879..a58275a 100644
> --- a/drivers/dma/fsldma.h
> +++ b/drivers/dma/fsldma.h
> @@ -138,8 +138,21 @@ struct fsldma_chan {
>  	char name[8];			/* Channel name */
>  	struct fsldma_chan_regs __iomem *regs;
>  	spinlock_t desc_lock;		/* Descriptor operation lock */
> -	struct list_head ld_pending;	/* Link descriptors queue */
> -	struct list_head ld_running;	/* Link descriptors queue */
> +	/*
> +	 * Descriptors which are queued to run, but have not yet been
> +	 * submitted to the hardware for execution
> +	 */
> +	struct list_head ld_pending;
> +	/*
> +	 * Descriptors which are currently being executed by the hardware
> +	 */
> +	struct list_head ld_running;
> +	/*
> +	 * Descriptors which have finished execution by the hardware. These
> +	 * descriptors have already had their cleanup actions run. They are
> +	 * waiting for the ACK bit to be set by the async_tx API.
> +	 */
> +	struct list_head ld_completed;	/* Link descriptors queue */
>  	struct dma_chan common;		/* DMA common channel */
>  	struct dma_pool *desc_pool;	/* Descriptors pool */
>  	struct device *dev;		/* Channel device */
> --
> 1.7.5.1
> 
> 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* Re: [PATCH 1/1] booke/wdt: fix incorrect WDIOC_GETSUPPORT return path
From: Tabi Timur-B04825 @ 2012-08-06 23:27 UTC (permalink / raw)
  To: Tiejun Chen; +Cc: linux-watchdog@vger.kernel.org, linuxppc-dev@ozlabs.org
In-Reply-To: <1343636122-23273-1-git-send-email-tiejun.chen@windriver.com>

On Mon, Jul 30, 2012 at 3:15 AM, Tiejun Chen <tiejun.chen@windriver.com> wr=
ote:
> We miss that correct WDIOC_GETSUPPORT return path when perform
> copy_to_user() properly.

Thanks for catching this.  I'm amazed that this driver still has bugs like =
this.

> diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c
> index 3fe82d0..2be7f29 100644
> --- a/drivers/watchdog/booke_wdt.c
> +++ b/drivers/watchdog/booke_wdt.c
> @@ -162,12 +162,13 @@ static long booke_wdt_ioctl(struct file *file,
>                                 unsigned int cmd, unsigned long arg)
>  {
>         u32 tmp =3D 0;
> -       u32 __user *p =3D (u32 __user *)arg;
> +       void __user *argp =3D (u32 __user *)arg;
> +       u32 __user *p =3D argp;

You don't need to create 'argp'.  The existing 'p' variable will work
in the copy_to_user() call.

> +               return copy_to_user(argp, &ident,
> +                               sizeof(ident)) ? -EFAULT : 0;

This can fit in one line, especially if you use 'p' instead of 'argp'.

--=20
Timur Tabi
Linux kernel developer at Freescale=

^ permalink raw reply

* Re: [PATCH 1/1] booke/wdt: fix incorrect WDIOC_GETSUPPORT return path
From: Tabi Timur-B04825 @ 2012-08-06 23:36 UTC (permalink / raw)
  To: Tiejun Chen; +Cc: linux-watchdog@vger.kernel.org, linuxppc-dev@ozlabs.org
In-Reply-To: <CAOZdJXWftkUSw57-Qbe9JFP9NwodkHprKr6_1xi6ZzzLN8A4wA@mail.gmail.com>

On Mon, Aug 6, 2012 at 2:12 PM, Tabi Timur-B04825 <b04825@freescale.com> wr=
ote:
> On Mon, Jul 30, 2012 at 3:15 AM, Tiejun Chen <tiejun.chen@windriver.com> =
wrote:
>> We miss that correct WDIOC_GETSUPPORT return path when perform
>> copy_to_user() properly.
>
> Thanks for catching this.  I'm amazed that this driver still has bugs lik=
e this.

While you're at it, I found a few related bugs.  Can you fix these, also?

1.	case WDIOC_SETOPTIONS:
		if (get_user(tmp, p))
			return -EINVAL;

This should return -EFAULT.

2. 	case WDIOC_GETBOOTSTATUS:
		/* XXX: something is clearing TSR */
		tmp =3D mfspr(SPRN_TSR) & TSR_WRS(3);
		/* returns CARDRESET if last reset was caused by the WDT */
		return (tmp ? WDIOF_CARDRESET : 0);

This should use put_user() to return the value, instead of returning
it as a return code.

You can title the new patch something like, "booke/wdt: some ioctls do
not return values properly"

--=20
Timur Tabi
Linux kernel developer at Freescale=

^ permalink raw reply

* Re: [PATCH v6 0/8] Raid: enable talitos xor offload for improving performance
From: Kim Phillips @ 2012-08-07  1:35 UTC (permalink / raw)
  To: qiang.liu
  Cc: arnd, vinod.koul, gregkh, linux-kernel, dan.j.williams, herbert,
	linux-crypto, dan.j.williams, linuxppc-dev
In-Reply-To: <1344247815-1104-1-git-send-email-qiang.liu@freescale.com>

On Mon, 6 Aug 2012 18:10:15 +0800
<qiang.liu@freescale.com> wrote:

> Changes in v6:
> 	- swap the order of original patch 3/6 and 4/6;
> 	- merge Ira's patch to reduce the size of original patch;
> 	- merge Ira's patch of carma in 8/8;
> 	- update documents and descriptions according to Ira's advice;

fwiw, I gave v5 a test-drive, setting up a RAID5 array on ramdisks
[1], and this patchseries, along with FSL_DMA && NET_DMA set seems
to be holding water, so this series gets my:

Tested-by: Kim Phillips <kim.phillips@freescale.com>

Thanks,

Kim

[1] mdadm --create --verbose --force /dev/md0 --level=raid5 --raid-devices=4 /dev/ram[0123]

^ permalink raw reply

* Re: [PATCH 1/1] booke/wdt: fix incorrect WDIOC_GETSUPPORT return path
From: tiejun.chen @ 2012-08-07  1:56 UTC (permalink / raw)
  To: Tabi Timur-B04825; +Cc: linux-watchdog@vger.kernel.org, linuxppc-dev@ozlabs.org
In-Reply-To: <CAOZdJXWYyc_=iikFwSLy4gX7Grag=jxvWhYrP4Y9U3QZcwieBw@mail.gmail.com>

On 08/07/2012 09:19 AM, Tabi Timur-B04825 wrote:
> On Mon, Aug 6, 2012 at 2:12 PM, Tabi Timur-B04825 <b04825@freescale.com> wrote:
>> On Mon, Jul 30, 2012 at 3:15 AM, Tiejun Chen <tiejun.chen@windriver.com> wrote:
>>> We miss that correct WDIOC_GETSUPPORT return path when perform
>>> copy_to_user() properly.
>>
>> Thanks for catching this.  I'm amazed that this driver still has bugs like this.
> 
> While you're at it, I found a few related bugs.  Can you fix these, also?
> 
> 1.	case WDIOC_SETOPTIONS:
> 		if (get_user(tmp, p))
> 			return -EINVAL;
> 
> This should return -EFAULT.
> 
> 2. 	case WDIOC_GETBOOTSTATUS:
> 		/* XXX: something is clearing TSR */
> 		tmp = mfspr(SPRN_TSR) & TSR_WRS(3);
> 		/* returns CARDRESET if last reset was caused by the WDT */
> 		return (tmp ? WDIOF_CARDRESET : 0);
> 
> This should use put_user() to return the value, instead of returning
> it as a return code.
> 
> You can title the new patch something like, "booke/wdt: some ioctls do
> not return values properly"

Will regenerate this patch including these error as v2.

Thanks
Tiejun

^ permalink raw reply

* [v2 PATCH 1/1] booke/wdt: some ioctls do not return values properly
From: Tiejun Chen @ 2012-08-07  1:59 UTC (permalink / raw)
  To: B04825; +Cc: linuxppc-dev, linux-watchdog

Fix some booke wdt ioctls return value error.

Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>
---
 drivers/watchdog/booke_wdt.c |    7 +++----
 1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/watchdog/booke_wdt.c b/drivers/watchdog/booke_wdt.c
index 3fe82d0..5b06d31 100644
--- a/drivers/watchdog/booke_wdt.c
+++ b/drivers/watchdog/booke_wdt.c
@@ -166,18 +166,17 @@ static long booke_wdt_ioctl(struct file *file,
 
 	switch (cmd) {
 	case WDIOC_GETSUPPORT:
-		if (copy_to_user((void *)arg, &ident, sizeof(ident)))
-			return -EFAULT;
+		return copy_to_user(p, &ident, sizeof(ident)) ? -EFAULT : 0;
 	case WDIOC_GETSTATUS:
 		return put_user(0, p);
 	case WDIOC_GETBOOTSTATUS:
 		/* XXX: something is clearing TSR */
 		tmp = mfspr(SPRN_TSR) & TSR_WRS(3);
 		/* returns CARDRESET if last reset was caused by the WDT */
-		return (tmp ? WDIOF_CARDRESET : 0);
+		return put_user((tmp ? WDIOF_CARDRESET : 0), p);
 	case WDIOC_SETOPTIONS:
 		if (get_user(tmp, p))
-			return -EINVAL;
+			return -EFAULT;
 		if (tmp == WDIOS_ENABLECARD) {
 			booke_wdt_ping();
 			break;
-- 
1.5.6

^ permalink raw reply related

* Re: [v2 PATCH 1/1] booke/wdt: some ioctls do not return values properly
From: Tabi Timur-B04825 @ 2012-08-07  2:20 UTC (permalink / raw)
  To: Tiejun Chen; +Cc: linuxppc-dev@ozlabs.org, linux-watchdog@vger.kernel.org
In-Reply-To: <1344304780-13555-1-git-send-email-tiejun.chen@windriver.com>

On Mon, Aug 6, 2012 at 8:59 PM, Tiejun Chen <tiejun.chen@windriver.com> wro=
te:
> Fix some booke wdt ioctls return value error.
>
> Signed-off-by: Tiejun Chen <tiejun.chen@windriver.com>

It's not the greatest patch description, but it'll do.

Acked-by: Timur Tabi <timur@freescale.com>

--=20
Timur Tabi
Linux kernel developer at Freescale=

^ permalink raw reply

* RE: [PATCH v6 6/8] fsl-dma: use spin_lock_bh to instead of spin_lock_irqsave
From: Liu Qiang-B32616 @ 2012-08-07  2:45 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Li Yang-R58472, vinod.koul@intel.com, gregkh@linuxfoundation.org,
	Tabi Timur-B04825, linux-kernel@vger.kernel.org,
	Phillips Kim-R1AAHA, dan.j.williams@gmail.com,
	herbert@gondor.hengli.com.au, linux-crypto@vger.kernel.org,
	dan.j.williams@intel.com, linuxppc-dev@lists.ozlabs.org,
	davem@davemloft.net
In-Reply-To: <201208061157.17667.arnd@arndb.de>

> -----Original Message-----
> From: Arnd Bergmann [mailto:arnd@arndb.de]
> Sent: Monday, August 06, 2012 7:57 PM
> To: Liu Qiang-B32616
> Cc: linux-crypto@vger.kernel.org; linuxppc-dev@lists.ozlabs.org;
> dan.j.williams@intel.com; linux-kernel@vger.kernel.org;
> dan.j.williams@gmail.com; vinod.koul@intel.com; Phillips Kim-R1AAHA;
> herbert@gondor.hengli.com.au; davem@davemloft.net;
> gregkh@linuxfoundation.org; Li Yang-R58472; Tabi Timur-B04825
> Subject: Re: [PATCH v6 6/8] fsl-dma: use spin_lock_bh to instead of
> spin_lock_irqsave
>=20
> On Monday 06 August 2012, qiang.liu@freescale.com wrote:
> >
> > From: Qiang Liu <qiang.liu@freescale.com>
> >
> > The use of spin_lock_irqsave() is a stronger locking mechanism than is
> > required throughout the driver. The minimum locking required should be
> > used instead. Interrupts will be turned off and context will be saved,
> > there is needless to use irqsave.
> >
> > Change all instances of spin_lock_irqsave() to spin_lock_bh().
> > All manipulation of protected fields is done using tasklet context or
> > weaker, which makes spin_lock_bh() the correct choice.
> >
> > Cc: Dan Williams <dan.j.williams@intel.com>
> > Cc: Dan Williams <dan.j.williams@gmail.com>
> > Cc: Vinod Koul <vinod.koul@intel.com>
> > Cc: Li Yang <leoli@freescale.com>
> > Cc: Timur Tabi <timur@freescale.com>
> > Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
> > Acked-by: Ira W. Snyder <iws@ovro.caltech.edu>
>=20
> Acked-by: Arnd Bergmann <arnd@arndb.de>
>=20
> You could actually change the use of spin_lock_bh inside of the tasklet
> function (dma_do_tasklet) do just spin_lock(), because softirqs are
> already disabled there, but your version is also ok.
Yes, you are right, it will disable softirq.
Thank you very much.

^ permalink raw reply

* RE: [PATCH v6 5/8] fsl-dma: change release process of dma descriptor for supporting async_tx
From: Liu Qiang-B32616 @ 2012-08-07  3:22 UTC (permalink / raw)
  To: Ira W. Snyder
  Cc: arnd@arndb.de, vinod.koul@intel.com, gregkh@linuxfoundation.org,
	linux-kernel@vger.kernel.org, dan.j.williams@gmail.com,
	herbert@gondor.hengli.com.au, linux-crypto@vger.kernel.org,
	dan.j.williams@intel.com, linuxppc-dev@lists.ozlabs.org,
	davem@davemloft.net
In-Reply-To: <20120806175115.GA23815@ovro.caltech.edu>

> -----Original Message-----
> From: Ira W. Snyder [mailto:iws@ovro.caltech.edu]
> Sent: Tuesday, August 07, 2012 1:51 AM
> To: Liu Qiang-B32616
> Cc: linux-crypto@vger.kernel.org; linuxppc-dev@lists.ozlabs.org;
> dan.j.williams@intel.com; linux-kernel@vger.kernel.org;
> dan.j.williams@gmail.com; vinod.koul@intel.com; arnd@arndb.de;
> gregkh@linuxfoundation.org; herbert@gondor.hengli.com.au;
> davem@davemloft.net
> Subject: Re: [PATCH v6 5/8] fsl-dma: change release process of dma
> descriptor for supporting async_tx
>=20
> On Mon, Aug 06, 2012 at 06:14:33PM +0800, qiang.liu@freescale.com wrote:
> > From: Qiang Liu <qiang.liu@freescale.com>
> >
> > Fix the potential risk when enable config NET_DMA and ASYNC_TX.
> > Async_tx is lack of support in current release process of dma
> descriptor,
> > all descriptors will be released whatever is acked or no-acked by
> async_tx,
> > so there is a potential race condition when dma engine is uesd by
> others
> > clients (e.g. when enable NET_DMA to offload TCP).
> >
> > In our case, a race condition which is raised when use both of talitos
> > and dmaengine to offload xor is because napi scheduler will sync all
> > pending requests in dma channels, it affects the process of raid
> operations
> > due to ack_tx is not checked in fsl dma. The no-acked descriptor is
> freed
> > which is submitted just now, as a dependent tx, this freed descriptor
> trigger
> > BUG_ON(async_tx_test_ack(depend_tx)) in async_tx_submit().
> >
> > TASK =3D ee1a94a0[1390] 'md0_raid5' THREAD: ecf40000 CPU: 0
> > GPR00: 00000001 ecf41ca0 ee44/921a94a0 0000003f 00000001 c00593e4
> 00000000 00000001
> > GPR08: 00000000 a7a7a7a7 00000001 045/920000002 42028042 100a38d4
> ed576d98 00000000
> > GPR16: ed5a11b0 00000000 2b162000 00000200 046/920000000 2d555000
> ed3015e8 c15a7aa0
> > GPR24: 00000000 c155fc40 00000000 ecb63220 ecf41d28 e47/92f640bb0
> ef640c30 ecf41ca0
> > NIP [c02b048c] async_tx_submit+0x6c/0x2b4
> > LR [c02b068c] async_tx_submit+0x26c/0x2b4
> > Call Trace:
> > [ecf41ca0] [c02b068c] async_tx_submit+0x26c/0x2b448/92 (unreliable)
> > [ecf41cd0] [c02b0a4c] async_memcpy+0x240/0x25c
> > [ecf41d20] [c0421064] async_copy_data+0xa0/0x17c
> > [ecf41d70] [c0421cf4] __raid_run_ops+0x874/0xe10
> > [ecf41df0] [c0426ee4] handle_stripe+0x820/0x25e8
> > [ecf41e90] [c0429080] raid5d+0x3d4/0x5b4
> > [ecf41f40] [c04329b8] md_thread+0x138/0x16c
> > [ecf41f90] [c008277c] kthread+0x8c/0x90
> > [ecf41ff0] [c0011630] kernel_thread+0x4c/0x68
> >
> > Another modification in this patch is the change of completed
> descriptors,
> > there is a potential risk which caused by exception interrupt, all
> descriptors
> > in ld_running list are seemed completed when an interrupt raised, it
> works fine
> > under normal condition, but if there is an exception occured, it cannot
> work
> > as our excepted. Hardware should not be depend on s/w list, the right
> way is
> > to read current descriptor address register to find the last completed
> > descriptor. If an interrupt is raised by an error, all descriptors in
> ld_running
> > should not be seemed finished, or these unfinished descriptors in
> ld_running
> > will be released wrongly.
> >
> > A simple way to reproduce,
> > Enable dmatest first, then insert some bad descriptors which can
> trigger
> > Programming Error interrupts before the good descriptors. Last, the
> good
> > descriptors will be freed before they are processsed because of the
> exception
> > intrerrupt.
> >
> > Note: the bad descriptors are only for simulating an exception
> interrupt.
> > This case can illustrate the potential risk in current fsl-dma very
> well.
> >
> > Cc: Dan Williams <dan.j.williams@intel.com>
> > Cc: Dan Williams <dan.j.williams@gmail.com>
> > Cc: Vinod Koul <vinod.koul@intel.com>
> > Cc: Li Yang <leoli@freescale.com>
> > Signed-off-by: Qiang Liu <qiang.liu@freescale.com>
> > Signed-off-by: Ira W. Snyder <iws@ovro.caltech.edu>
>=20
> There are two minor nitpicks below. Other than that, the patch looks
> excellent to me.
>=20
> Ira
>=20
> > ---
> >  drivers/dma/fsldma.c |  232 ++++++++++++++++++++++++++++++++++--------
> --------
> >  drivers/dma/fsldma.h |   17 +++-
> >  2 files changed, 174 insertions(+), 75 deletions(-)
> >
> > diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
> > index 36490a3..938d8c1 100644
> > --- a/drivers/dma/fsldma.c
> > +++ b/drivers/dma/fsldma.c
> > @@ -472,6 +472,110 @@ static struct fsl_desc_sw
> *fsl_dma_alloc_descriptor(struct fsldma_chan *chan)
> >  }
> >
> >  /**
> > + * fsldma_clean_completed_descriptor - free all descriptors which
> > + * has been completed and acked
> > + * @chan: Freescale DMA channel
> > + *
> > + * This function is used on all completed and acked descriptors.
> > + * All descriptors should only be freed in this function.
> > + */
> > +static void
> > +fsldma_clean_completed_descriptor(struct fsldma_chan *chan)
> > +{
> > +	struct fsl_desc_sw *desc, *_desc;
> > +
> > +	/* Run the callback for each descriptor, in order */
> > +	list_for_each_entry_safe(desc, _desc, &chan->ld_completed, node)
> > +		if (async_tx_test_ack(&desc->async_tx))
> > +			fsl_dma_free_descriptor(chan, desc);
> > +}
> > +
> > +/**
> > + * fsldma_run_tx_complete_actions - cleanup a single link descriptor
> > + * @chan: Freescale DMA channel
> > + * @desc: descriptor to cleanup and free
> > + * @cookie: Freescale DMA transaction identifier
> > + *
> > + * This function is used on a descriptor which has been executed by
> the DMA
> > + * controller. It will run any callbacks, submit any dependencies.
> > + */
> > +static dma_cookie_t fsldma_run_tx_complete_actions(struct fsldma_chan
> *chan,
> > +		struct fsl_desc_sw *desc, dma_cookie_t cookie)
> > +{
> > +	struct dma_async_tx_descriptor *txd =3D &desc->async_tx;
> > +	struct device *dev =3D chan->common.device->dev;
> > +	dma_addr_t src =3D get_desc_src(chan, desc);
> > +	dma_addr_t dst =3D get_desc_dst(chan, desc);
> > +	u32 len =3D get_desc_cnt(chan, desc);
> > +
> > +	BUG_ON(txd->cookie < 0);
> > +
> > +	if (txd->cookie > 0) {
> > +		cookie =3D txd->cookie;
> > +
> > +		/* Run the link descriptor callback function */
> > +		if (txd->callback) {
> > +#ifdef FSL_DMA_LD_DEBUG
> > +			chan_dbg(chan, "LD %p callback\n", desc);
> > +#endif
> > +			txd->callback(txd->callback_param);
> > +		}
> > +
> > +		/* Unmap the dst buffer, if requested */
> > +		if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
> > +			if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
> > +				dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
> > +			else
> > +				dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
> > +		}
> > +
> > +		/* Unmap the src buffer, if requested */
> > +		if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
> > +			if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
> > +				dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
> > +			else
> > +				dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
> > +		}
> > +	}
> > +
> > +	/* Run any dependencies */
> > +	dma_run_dependencies(txd);
> > +
> > +	return cookie;
> > +}
> > +
> > +/**
> > + * fsldma_clean_running_descriptor - move the completed descriptor
> from
> > + * ld_running to ld_completed
> > + * @chan: Freescale DMA channel
> > + * @desc: the descriptor which is completed
> > + *
> > + * Free the descriptor directly if acked by async_tx api, or move it
> to
> > + * queue ld_completed.
> > + */
> > +static void
> > +fsldma_clean_running_descriptor(struct fsldma_chan *chan,
> > +		struct fsl_desc_sw *desc)
> > +{
> > +	/* Remove from the list of transactions */
> > +	list_del(&desc->node);
>=20
> Minor nitpick. Add a blank line here.
My fault. I will correct it.

>=20
> > +	/*
> > +	 * the client is allowed to attach dependent operations
> > +	 * until 'ack' is set
> > +	 */
> > +	if (!async_tx_test_ack(&desc->async_tx)) {
> > +		/*
> > +		 * Move this descriptor to the list of descriptors which is
> > +		 * completed, but still awaiting the 'ack' bit to be set.
> > +		 */
> > +		list_add_tail(&desc->node, &chan->ld_completed);
> > +		return;
> > +	}
> > +
> > +	dma_pool_free(chan->desc_pool, desc, desc->async_tx.phys);
> > +}
> > +
> > +/**
> >   * fsl_chan_xfer_ld_queue - transfer any pending transactions
> >   * @chan : Freescale DMA channel
> >   *
> > @@ -539,51 +643,58 @@ static void fsl_chan_xfer_ld_queue(struct
> fsldma_chan *chan)
> >  }
> >
> >  /**
> > - * fsldma_cleanup_descriptor - cleanup and free a single link
> descriptor
> > + * fsldma_cleanup_descriptors - cleanup link descriptors which are
> completed
> > + * and move them to ld_completed to free until flag 'ack' is set
> >   * @chan: Freescale DMA channel
> > - * @desc: descriptor to cleanup and free
> >   *
> > - * This function is used on a descriptor which has been executed by
> the DMA
> > - * controller. It will run any callbacks, submit any dependencies, and
> then
> > - * free the descriptor.
> > + * This function is used on descriptors which have been executed by
> the DMA
> > + * controller. It will run any callbacks, submit any dependencies,
> then
> > + * free these descriptors if flag 'ack' is set.
> >   */
> > -static void fsldma_cleanup_descriptor(struct fsldma_chan *chan,
> > -				      struct fsl_desc_sw *desc)
> > +static void fsldma_cleanup_descriptors(struct fsldma_chan *chan)
> >  {
> > -	struct dma_async_tx_descriptor *txd =3D &desc->async_tx;
> > -	struct device *dev =3D chan->common.device->dev;
> > -	dma_addr_t src =3D get_desc_src(chan, desc);
> > -	dma_addr_t dst =3D get_desc_dst(chan, desc);
> > -	u32 len =3D get_desc_cnt(chan, desc);
> > +	struct fsl_desc_sw *desc, *_desc;
> > +	dma_cookie_t cookie =3D 0;
> > +	dma_addr_t curr_phys =3D get_cdar(chan);
> > +	int seen_current =3D 0;
> >
> > -	/* Run the link descriptor callback function */
> > -	if (txd->callback) {
> > -#ifdef FSL_DMA_LD_DEBUG
> > -		chan_dbg(chan, "LD %p callback\n", desc);
> > -#endif
> > -		txd->callback(txd->callback_param);
> > -	}
> > +	fsldma_clean_completed_descriptor(chan);
> >
> > -	/* Run any dependencies */
> > -	dma_run_dependencies(txd);
> > +	/* Run the callback for each descriptor, in order */
> > +	list_for_each_entry_safe(desc, _desc, &chan->ld_running, node) {
> > +		/*
> > +		 * do not advance past the current descriptor loaded into the
> > +		 * hardware channel, subsequent descriptors are either in
> > +		 * process or have not been submitted
> > +		 */
> > +		if (seen_current)
> > +			break;
> >
> > -	/* Unmap the dst buffer, if requested */
> > -	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
> > -		if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE)
> > -			dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE);
> > -		else
> > -			dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE);
> > -	}
> > +		/*
> > +		 * stop the search if we reach the current descriptor and the
> > +		 * channel is busy
> > +		 */
> > +		if (desc->async_tx.phys =3D=3D curr_phys) {
> > +			seen_current =3D 1;
> > +			if (!dma_is_idle(chan))
> > +				break;
> > +		}
>=20
> I wonder if this is better:
>=20
> if (desc->async_tx.phys =3D=3D get_cdar(chan)) {
> 	seen_current =3D 1;
> 	if (!dma_is_idle(chan))
> 		break;
> }
>=20
> Your version works fine, it just might stop earlier than necessary. This
> is just a nitpick.
2 reasons make me cannot correct it as you expected in v6,
First, there is a conflict when we read current address register, dma deviv=
e will refill current address continually, there is an arbitration mechanis=
m when we read this register, so it may not improve throughput.
Second, as I know, most normal interrupts means the whole list are complete=
d but not for only single descriptor, so there should not be obvious improv=
ement with the latest address. The current address is always same, it point=
s the last descriptor of the list.
Sorry, I should explain it clearly in v5. Thanks.

>=20
> >
> > -	/* Unmap the src buffer, if requested */
> > -	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
> > -		if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE)
> > -			dma_unmap_single(dev, src, len, DMA_TO_DEVICE);
> > -		else
> > -			dma_unmap_page(dev, src, len, DMA_TO_DEVICE);
> > +		cookie =3D fsldma_run_tx_complete_actions(chan, desc, cookie);
> > +
> > +		fsldma_clean_running_descriptor(chan, desc);
> >  	}
> >
> > -	fsl_dma_free_descriptor(chan, desc);
> > +	/*
> > +	 * Start any pending transactions automatically
> > +	 *
> > +	 * In the ideal case, we keep the DMA controller busy while we go
> > +	 * ahead and free the descriptors below.
> > +	 */
> > +	fsl_chan_xfer_ld_queue(chan);
> > +
> > +	if (cookie > 0)
> > +		chan->common.completed_cookie =3D cookie;
> >  }
> >
> >  /**
> > @@ -654,8 +765,10 @@ static void fsl_dma_free_chan_resources(struct
> dma_chan *dchan)
> >
> >  	chan_dbg(chan, "free all channel resources\n");
> >  	spin_lock_irqsave(&chan->desc_lock, flags);
> > +	fsldma_cleanup_descriptors(chan);
> >  	fsldma_free_desc_list(chan, &chan->ld_pending);
> >  	fsldma_free_desc_list(chan, &chan->ld_running);
> > +	fsldma_free_desc_list(chan, &chan->ld_completed);
> >  	spin_unlock_irqrestore(&chan->desc_lock, flags);
> >
> >  	dma_pool_destroy(chan->desc_pool);
> > @@ -893,6 +1006,7 @@ static int fsl_dma_device_control(struct dma_chan
> *dchan,
> >  		/* Remove and free all of the descriptors in the LD queue */
> >  		fsldma_free_desc_list(chan, &chan->ld_pending);
> >  		fsldma_free_desc_list(chan, &chan->ld_running);
> > +		fsldma_free_desc_list(chan, &chan->ld_completed);
> >  		chan->idle =3D true;
> >
> >  		spin_unlock_irqrestore(&chan->desc_lock, flags);
> > @@ -956,11 +1070,15 @@ static enum dma_status fsl_tx_status(struct
> dma_chan *dchan,
> >  	enum dma_status ret;
> >  	unsigned long flags;
> >
> > -	spin_lock_irqsave(&chan->desc_lock, flags);
> >  	ret =3D dma_cookie_status(dchan, cookie, txstate);
> > +	if (ret =3D=3D DMA_SUCCESS)
> > +		return ret;
> > +
> > +	spin_lock_irqsave(&chan->desc_lock, flags);
> > +	fsldma_cleanup_descriptors(chan);
> >  	spin_unlock_irqrestore(&chan->desc_lock, flags);
> >
> > -	return ret;
> > +	return dma_cookie_status(dchan, cookie, txstate);
> >  }
> >
> >  /*--------------------------------------------------------------------
> --------*/
> > @@ -1037,52 +1155,19 @@ static irqreturn_t fsldma_chan_irq(int irq,
> void *data)
> >  static void dma_do_tasklet(unsigned long data)
> >  {
> >  	struct fsldma_chan *chan =3D (struct fsldma_chan *)data;
> > -	struct fsl_desc_sw *desc, *_desc;
> > -	LIST_HEAD(ld_cleanup);
> >  	unsigned long flags;
> >
> >  	chan_dbg(chan, "tasklet entry\n");
> >
> >  	spin_lock_irqsave(&chan->desc_lock, flags);
> >
> > -	/* update the cookie if we have some descriptors to cleanup */
> > -	if (!list_empty(&chan->ld_running)) {
> > -		dma_cookie_t cookie;
> > -
> > -		desc =3D to_fsl_desc(chan->ld_running.prev);
> > -		cookie =3D desc->async_tx.cookie;
> > -		dma_cookie_complete(&desc->async_tx);
> > -
> > -		chan_dbg(chan, "completed_cookie=3D%d\n", cookie);
> > -	}
> > -
> > -	/*
> > -	 * move the descriptors to a temporary list so we can drop the lock
> > -	 * during the entire cleanup operation
> > -	 */
> > -	list_splice_tail_init(&chan->ld_running, &ld_cleanup);
> > -
> >  	/* the hardware is now idle and ready for more */
> >  	chan->idle =3D true;
> >
> > -	/*
> > -	 * Start any pending transactions automatically
> > -	 *
> > -	 * In the ideal case, we keep the DMA controller busy while we go
> > -	 * ahead and free the descriptors below.
> > -	 */
> > -	fsl_chan_xfer_ld_queue(chan);
> > -	spin_unlock_irqrestore(&chan->desc_lock, flags);
> > -
> > -	/* Run the callback for each descriptor, in order */
> > -	list_for_each_entry_safe(desc, _desc, &ld_cleanup, node) {
> > +	/* Run all cleanup for descriptors which have been completed */
> > +	fsldma_cleanup_descriptors(chan);
> >
> > -		/* Remove from the list of transactions */
> > -		list_del(&desc->node);
> > -
> > -		/* Run all cleanup for this descriptor */
> > -		fsldma_cleanup_descriptor(chan, desc);
> > -	}
> > +	spin_unlock_irqrestore(&chan->desc_lock, flags);
> >
> >  	chan_dbg(chan, "tasklet exit\n");
> >  }
> > @@ -1264,6 +1349,7 @@ static int __devinit fsl_dma_chan_probe(struct
> fsldma_device *fdev,
> >  	spin_lock_init(&chan->desc_lock);
> >  	INIT_LIST_HEAD(&chan->ld_pending);
> >  	INIT_LIST_HEAD(&chan->ld_running);
> > +	INIT_LIST_HEAD(&chan->ld_completed);
> >  	chan->idle =3D true;
> >
> >  	chan->common.device =3D &fdev->common;
> > diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
> > index f5c3879..a58275a 100644
> > --- a/drivers/dma/fsldma.h
> > +++ b/drivers/dma/fsldma.h
> > @@ -138,8 +138,21 @@ struct fsldma_chan {
> >  	char name[8];			/* Channel name */
> >  	struct fsldma_chan_regs __iomem *regs;
> >  	spinlock_t desc_lock;		/* Descriptor operation lock */
> > -	struct list_head ld_pending;	/* Link descriptors queue */
> > -	struct list_head ld_running;	/* Link descriptors queue */
> > +	/*
> > +	 * Descriptors which are queued to run, but have not yet been
> > +	 * submitted to the hardware for execution
> > +	 */
> > +	struct list_head ld_pending;
> > +	/*
> > +	 * Descriptors which are currently being executed by the hardware
> > +	 */
> > +	struct list_head ld_running;
> > +	/*
> > +	 * Descriptors which have finished execution by the hardware. These
> > +	 * descriptors have already had their cleanup actions run. They are
> > +	 * waiting for the ACK bit to be set by the async_tx API.
> > +	 */
> > +	struct list_head ld_completed;	/* Link descriptors queue */
> >  	struct dma_chan common;		/* DMA common channel */
> >  	struct dma_pool *desc_pool;	/* Descriptors pool */
> >  	struct device *dev;		/* Channel device */
> > --
> > 1.7.5.1
> >
> >
> > _______________________________________________
> > Linuxppc-dev mailing list
> > Linuxppc-dev@lists.ozlabs.org
> > https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* RE: [PATCH v6 0/8] Raid: enable talitos xor offload for improving performance
From: Liu Qiang-B32616 @ 2012-08-07  3:27 UTC (permalink / raw)
  To: Phillips Kim-R1AAHA
  Cc: Li Yang-R58472, arnd@arndb.de, vinod.koul@intel.com,
	gregkh@linuxfoundation.org, linux-kernel@vger.kernel.org,
	dan.j.williams@gmail.com, herbert@gondor.hengli.com.au,
	linux-crypto@vger.kernel.org, dan.j.williams@intel.com,
	linuxppc-dev@lists.ozlabs.org
In-Reply-To: <20120806203506.bcf31cc63a2d1f55a9695f13@freescale.com>

> -----Original Message-----
> From: Phillips Kim-R1AAHA
> Sent: Tuesday, August 07, 2012 9:35 AM
> To: Liu Qiang-B32616
> Cc: linux-crypto@vger.kernel.org; vinod.koul@intel.com;
> dan.j.williams@intel.com; herbert@gondor.hengli.com.au; arnd@arndb.de;
> gregkh@linuxfoundation.org; linuxppc-dev@lists.ozlabs.org; linux-
> kernel@vger.kernel.org; dan.j.williams@gmail.com; Li Yang-R58472
> Subject: Re: [PATCH v6 0/8] Raid: enable talitos xor offload for
> improving performance
>=20
> On Mon, 6 Aug 2012 18:10:15 +0800
> <qiang.liu@freescale.com> wrote:
>=20
> > Changes in v6:
> > 	- swap the order of original patch 3/6 and 4/6;
> > 	- merge Ira's patch to reduce the size of original patch;
> > 	- merge Ira's patch of carma in 8/8;
> > 	- update documents and descriptions according to Ira's advice;
>=20
> fwiw, I gave v5 a test-drive, setting up a RAID5 array on ramdisks [1],
> and this patchseries, along with FSL_DMA && NET_DMA set seems to be
> holding water, so this series gets my:
>=20
> Tested-by: Kim Phillips <kim.phillips@freescale.com>
Thanks, Kim. I will add this line in v7:)

>=20
> Thanks,
>=20
> Kim
>=20
> [1] mdadm --create --verbose --force /dev/md0 --level=3Draid5 --raid-
> devices=3D4 /dev/ram[0123]

^ permalink raw reply

* RE: [PATCH v2] PCI: use dev->irq instead of dev->pin to enable non MSI/INTx interrupt
From: Zang Roy-R61911 @ 2012-08-07  3:45 UTC (permalink / raw)
  To: Liu Shengzhou-B36685, bhelgaas@google.com,
	linux-pci@vger.kernel.org, akpm@linux-foundation.org, Kumar Gala
  Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org,
	Liu Shengzhou-B36685
In-Reply-To: <3F453DDFF675A64A89321A1F35281021771A8B@039-SN1MPN1-003.039d.mgd.msft.net>



> -----Original Message-----
> From: Linuxppc-dev [mailto:linuxppc-dev-bounces+tie-
> fei.zang=3Dfreescale.com@lists.ozlabs.org] On Behalf Of Liu Shengzhou-B36=
685
> Sent: Thursday, July 26, 2012 11:45 AM
> To: bhelgaas@google.com; linux-pci@vger.kernel.org; akpm@linux-
> foundation.org
> Cc: Wood Scott-B07421; linuxppc-dev@lists.ozlabs.org; Liu Shengzhou-B3668=
5
> Subject: RE: [PATCH v2] PCI: use dev->irq instead of dev->pin to enable n=
on
> MSI/INTx interrupt
>=20
> Hello,
>=20
> A gentle reminder!
> Any comments are appreciated.

Who can help to review and pick up this patch?
Thanks.
Roy

^ permalink raw reply

* Re: [RFC PATCH V6 15/19] memory-hotplug: implement register_page_bootmem_info_section of sparse-vmemmap
From: Wen Congyang @ 2012-08-07  3:48 UTC (permalink / raw)
  To: isimatu.yasuaki
  Cc: linux-s390, linux-ia64, linux-acpi, len.brown, linux-sh,
	linux-kernel, cmetcalf, linux-mm, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, linuxppc-dev, akpm, liuj97
In-Reply-To: <1343980161-14254-16-git-send-email-wency@cn.fujitsu.com>

At 08/03/2012 03:49 PM, wency@cn.fujitsu.com Wrote:
> From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> 
> For removing memmap region of sparse-vmemmap which is allocated bootmem,
> memmap region of sparse-vmemmap needs to be registered by get_page_bootmem().
> So the patch searches pages of virtual mapping and registers the pages by
> get_page_bootmem().
> 
> Note: register_page_bootmem_memmap() is not implemented for ia64, ppc, s390,
> and sparc.
> 
> CC: David Rientjes <rientjes@google.com>
> CC: Jiang Liu <liuj97@gmail.com>
> CC: Len Brown <len.brown@intel.com>
> CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> CC: Paul Mackerras <paulus@samba.org>
> CC: Christoph Lameter <cl@linux.com>
> Cc: Minchan Kim <minchan.kim@gmail.com>
> CC: Andrew Morton <akpm@linux-foundation.org>
> CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> CC: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> ---
>  arch/ia64/mm/discontig.c       |    6 ++++
>  arch/powerpc/mm/init_64.c      |    6 ++++
>  arch/s390/mm/vmem.c            |    6 ++++
>  arch/sparc/mm/init_64.c        |    6 ++++
>  arch/x86/mm/init_64.c          |   52 ++++++++++++++++++++++++++++++++++++++++
>  include/linux/memory_hotplug.h |    2 +
>  include/linux/mm.h             |    3 +-
>  mm/memory_hotplug.c            |   23 +++++++++++++++--
>  8 files changed, 100 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
> index c641333..33943db 100644
> --- a/arch/ia64/mm/discontig.c
> +++ b/arch/ia64/mm/discontig.c
> @@ -822,4 +822,10 @@ int __meminit vmemmap_populate(struct page *start_page,
>  {
>  	return vmemmap_populate_basepages(start_page, size, node);
>  }
> +
> +void register_page_bootmem_memmap(unsigned long section_nr,
> +				  struct page *start_page, unsigned long size)
> +{
> +	/* TODO */
> +}
>  #endif
> diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> index 620b7ac..3690c44 100644
> --- a/arch/powerpc/mm/init_64.c
> +++ b/arch/powerpc/mm/init_64.c
> @@ -298,5 +298,11 @@ int __meminit vmemmap_populate(struct page *start_page,
>  
>  	return 0;
>  }
> +
> +void register_page_bootmem_memmap(unsigned long section_nr,
> +				  struct page *start_page, unsigned long size)
> +{
> +	/* TODO */
> +}
>  #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>  
> diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
> index 6f896e7..eda55cd 100644
> --- a/arch/s390/mm/vmem.c
> +++ b/arch/s390/mm/vmem.c
> @@ -227,6 +227,12 @@ out:
>  	return ret;
>  }
>  
> +void register_page_bootmem_memmap(unsigned long section_nr,
> +				  struct page *start_page, unsigned long size)
> +{
> +	/* TODO */
> +}
> +
>  /*
>   * Add memory segment to the segment list if it doesn't overlap with
>   * an already present segment.
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index 6026fdd..53f7604 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -2059,6 +2059,12 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
>  	}
>  	return 0;
>  }
> +
> +void register_page_bootmem_memmap(unsigned long section_nr,
> +				  struct page *start_page, unsigned long size)
> +{
> +	/* TODO */
> +}
>  #endif /* CONFIG_SPARSEMEM_VMEMMAP */
>  
>  static void prot_init_common(unsigned long page_none,
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index e0d88ba..0075592 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -1138,6 +1138,58 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
>  	return 0;
>  }
>  
> +void register_page_bootmem_memmap(unsigned long section_nr,
> +				  struct page *start_page, unsigned long size)
> +{
> +	unsigned long addr = (unsigned long)start_page;
> +	unsigned long end = (unsigned long)(start_page + size);
> +	unsigned long next;
> +	pgd_t *pgd;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +
> +	for (; addr < end; addr = next) {
> +		pte_t *pte = NULL;
> +
> +		pgd = pgd_offset_k(addr);
> +		if (pgd_none(*pgd)) {
> +			next = (addr + PAGE_SIZE) & PAGE_MASK;
> +			continue;
> +		}
> +		get_page_bootmem(section_nr, pgd_page(*pgd), MIX_SECTION_INFO);
> +
> +		pud = pud_offset(pgd, addr);
> +		if (pud_none(*pud)) {
> +			next = (addr + PAGE_SIZE) & PAGE_MASK;
> +			continue;
> +		}
> +		get_page_bootmem(section_nr, pud_page(*pud), MIX_SECTION_INFO);
> +
> +		if (!cpu_has_pse) {
> +			next = (addr + PAGE_SIZE) & PAGE_MASK;
> +			pmd = pmd_offset(pud, addr);
> +			if (pmd_none(*pmd))
> +				continue;
> +			get_page_bootmem(section_nr, pmd_page(*pmd),
> +					 MIX_SECTION_INFO);
> +
> +			pte = pte_offset_kernel(pmd, addr);
> +			if (pte_none(*pte))
> +				continue;
> +			get_page_bootmem(section_nr, pte_page(*pte),
> +					 SECTION_INFO);
> +		} else {
> +			next = pmd_addr_end(addr, end);
> +
> +			pmd = pmd_offset(pud, addr);
> +			if (pmd_none(*pmd))
> +				continue;
> +			get_page_bootmem(section_nr, pmd_page(*pmd),
> +					 SECTION_INFO);
> +		}
> +	}
> +}
> +
>  void __meminit vmemmap_populate_print_last(void)
>  {
>  	if (p_start) {
> diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
> index 1133e63..2d18235 100644
> --- a/include/linux/memory_hotplug.h
> +++ b/include/linux/memory_hotplug.h
> @@ -164,6 +164,8 @@ static inline void arch_refresh_nodedata(int nid, pg_data_t *pgdat)
>  
>  extern void register_page_bootmem_info_node(struct pglist_data *pgdat);
>  extern void put_page_bootmem(struct page *page);
> +extern void get_page_bootmem(unsigned long ingo, struct page *page,
> +			     unsigned long type);
>  
>  /*
>   * Lock for memory hotplug guarantees 1) all callbacks for memory hotplug
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 311be90..c607913 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1618,7 +1618,8 @@ int vmemmap_populate_basepages(struct page *start_page,
>  						unsigned long pages, int node);
>  int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
>  void vmemmap_populate_print_last(void);
> -
> +void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
> +				  unsigned long size);
>  
>  enum mf_flags {
>  	MF_COUNT_INCREASED = 1 << 0,
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 3a264a5..4589f5b 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -91,8 +91,8 @@ static void release_memory_resource(struct resource *res)
>  }
>  
>  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
> -static void get_page_bootmem(unsigned long info,  struct page *page,
> -			     unsigned long type)
> +void get_page_bootmem(unsigned long info,  struct page *page,
> +		      unsigned long type)
>  {
>  	unsigned long page_type;
>  
> @@ -164,8 +164,25 @@ static void register_page_bootmem_info_section(unsigned long start_pfn)
>  
>  }
>  #else
> -static inline void register_page_bootmem_info_section(unsigned long start_pfn)
> +static void register_page_bootmem_info_section(unsigned long start_pfn)
>  {
> +	unsigned long mapsize, section_nr;
> +	struct mem_section *ms;
> +	struct page *page, *memmap;
> +
> +	if (!pfn_valid(start_pfn))
> +		return;
> +
> +	section_nr = pfn_to_section_nr(start_pfn);
> +	ms = __nr_to_section(section_nr);
> +
> +	memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr);
> +
> +	page = virt_to_page(memmap);
> +	mapsize = sizeof(struct page) * PAGES_PER_SECTION;
> +	mapsize = PAGE_ALIGN(mapsize) >> PAGE_SHIFT;
> +
> +	register_page_bootmem_memmap(section_nr, memmap, PAGES_PER_SECTION);

You only handle memmap here. I think usemap should be also handled here.

Thanks
Wen Congyang

>  }
>  #endif
>  

^ permalink raw reply

* Re: [PATCH V5 3/3] powerpc/fsl-pci: Unify pci/pcie initialization code
From: Li Yang @ 2012-08-07  4:20 UTC (permalink / raw)
  To: Scott Wood
  Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org, Li Yang-R58472,
	Jia Hongtao-B38951
In-Reply-To: <501FDE40.1060906@freescale.com>

On Mon, Aug 6, 2012 at 11:09 PM, Scott Wood <scottwood@freescale.com> wrote:
> On 08/05/2012 10:07 PM, Jia Hongtao-B38951 wrote:
>>
>>
>>> -----Original Message-----
>>> From: Wood Scott-B07421
>>> Sent: Saturday, August 04, 2012 12:28 AM
>>> To: Jia Hongtao-B38951
>>> Cc: linuxppc-dev@lists.ozlabs.org; galak@kernel.crashing.org; Li Yang-
>>> R58472; Wood Scott-B07421
>>> Subject: Re: [PATCH V5 3/3] powerpc/fsl-pci: Unify pci/pcie
>>> initialization code
>>>
>>> On 08/03/2012 05:14 AM, Jia Hongtao wrote:
>>>> -void __devinit fsl_pci_init(void)
>>>> +/* Checkout if PCI contains ISA node */
>>>> +static int of_pci_has_isa(struct device_node *pci_node)
>>>> +{
>>>> +   struct device_node *np;
>>>> +   int ret = 0;
>>>> +
>>>> +   if (!pci_node)
>>>> +           return 0;
>>>> +
>>>> +   read_lock(&devtree_lock);
>>>> +   np = pci_node->allnext;
>>>> +
>>>> +   /* Only scan the children of PCI node */
>>>> +   for (; np != pci_node->sibling; np = np->allnext) {
>>>> +           if (np->type && (of_node_cmp(np->type, "isa") == 0)
>>>> +               && of_node_get(np)) {
>>>> +                   ret = 1;
>>>> +                   break;
>>>> +           }
>>>> +   }
>>>> +
>>>> +   of_node_put(pci_node);
>>>> +   read_unlock(&devtree_lock);
>>>> +
>>>> +   return ret;
>>>> +}
>>>
>>> Why do you keep insisting on substituting your ISA search code here?
>>> What advantages does it have over the code that is already there?  It
>>> unnecessarily digs into the internals of the tree representation.
>>>
>>
>> I want ISA search is done from probe.
>
> Too bad.  You're breaking the case where there's no ISA node.
>

We can also take care of special cases with our approach if needed.
But it's not correct to assume the first PCI controller is the primary
one if there is no ISA node.  Your approach is still a band-aid to me.
 We can come back to this issue when we do find a proper solution.

>> Also this way is more efficient due
>> to we just search the children of PCI.
>
> It is not more efficient, because you're doing the search for every PCIe
> bus rather than once.  Not that it matters in this context.

We end up scanning at most a few PCI nodes instead of the whole device
tree for the primary.

>
>>>> +
>>>> +static int __devinit fsl_pci_probe(struct platform_device *pdev)
>>>>  {
>>>>     int ret;
>>>> -   struct device_node *node;
>>>>     struct pci_controller *hose;
>>>> -   dma_addr_t max = 0xffffffff;
>>>> +   int is_primary = 0;
>>>>
>>>> -   /* Callers can specify the primary bus using other means. */
>>>>     if (!fsl_pci_primary) {
>>>> -           /* If a PCI host bridge contains an ISA node, it's primary.
>>> */
>>>> -           node = of_find_node_by_type(NULL, "isa");
>>>> -           while ((fsl_pci_primary = of_get_parent(node))) {
>>>> -                   of_node_put(node);
>>>> -                   node = fsl_pci_primary;
>>>> -
>>>> -                   if (of_match_node(pci_ids, node))
>>>> -                           break;
>>>> -           }
>>>> +           is_primary = of_pci_has_isa(pdev->dev.of_node);
>>>> +           if (is_primary)
>>>> +                   fsl_pci_primary = pdev->dev.of_node;
>>>>     }
>>>
>>> As I explained before, this has to be done globally, not from the probe
>>> function, so we can assign a default primary bus if there isn't any ISA.
>>>  There are bugs in the Linux PPC PCI code relating to not having any
>>> primary bus.
>>>
>>> -Scott
>>
>> In my way of searching ISA you can also assign a default primary bus in board
>> specific files.
>
> That was meant for when the board file had an alternate way of searching
> for the primary bus (e.g. look for i8259), not as a replacement for the
> mechanism that guarantees there's a primary bus.
>
> You are causing a regression in the qemu_e500.c platform.

Can we fix the qemu device tree to address the problem if we do make
it a rule to use the ISA node to indicate the primary bus?

- Leo

^ permalink raw reply

* Re: [RFC PATCH V6 16/19] memory-hotplug: free memmap of sparse-vmemmap
From: Wen Congyang @ 2012-08-07  5:17 UTC (permalink / raw)
  To: wency
  Cc: linux-s390, linux-ia64, linux-acpi, len.brown, linux-sh,
	linux-kernel, cmetcalf, linux-mm, isimatu.yasuaki, paulus,
	minchan.kim, kosaki.motohiro, rientjes, cl, linuxppc-dev, akpm,
	liuj97
In-Reply-To: <1343980161-14254-17-git-send-email-wency@cn.fujitsu.com>

At 08/03/2012 03:49 PM, wency@cn.fujitsu.com Wrote:
> From: Wen Congyang <wency@cn.fujitsu.com>

This line is wrong. This patch is from Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>

> 
> All pages of virtual mapping in removed memory cannot be freed, since some pages
> used as PGD/PUD includes not only removed memory but also other memory. So the
> patch checks whether page can be freed or not.
> 
> How to check whether page can be freed or not?
>  1. When removing memory, the page structs of the revmoved memory are filled
>     with 0FD.
>  2. All page structs are filled with 0xFD on PT/PMD, PT/PMD can be cleared.
>     In this case, the page used as PT/PMD can be freed.
> 
> Applying patch, __remove_section() of CONFIG_SPARSEMEM_VMEMMAP is integrated
> into one. So __remove_section() of CONFIG_SPARSEMEM_VMEMMAP is deleted.
> 
> Note:  vmemmap_kfree() and vmemmap_free_bootmem() are not implemented for ia64,
> ppc, s390, and sparc.
> 
> CC: David Rientjes <rientjes@google.com>
> CC: Jiang Liu <liuj97@gmail.com>
> CC: Len Brown <len.brown@intel.com>
> CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> CC: Paul Mackerras <paulus@samba.org>
> CC: Christoph Lameter <cl@linux.com>
> Cc: Minchan Kim <minchan.kim@gmail.com>
> CC: Andrew Morton <akpm@linux-foundation.org>
> CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> CC: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> ---
>  arch/ia64/mm/discontig.c  |    8 +++
>  arch/powerpc/mm/init_64.c |    8 +++
>  arch/s390/mm/vmem.c       |    8 +++
>  arch/sparc/mm/init_64.c   |    8 +++
>  arch/x86/mm/init_64.c     |  119 +++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/mm.h        |    2 +
>  mm/memory_hotplug.c       |   17 +------
>  mm/sparse.c               |    5 +-
>  8 files changed, 158 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
> index 33943db..0d23b69 100644
> --- a/arch/ia64/mm/discontig.c
> +++ b/arch/ia64/mm/discontig.c
> @@ -823,6 +823,14 @@ int __meminit vmemmap_populate(struct page *start_page,
>  	return vmemmap_populate_basepages(start_page, size, node);
>  }
>  
> +void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
> +{
> +}
> +
> +void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
> +{
> +}
> +
>  void register_page_bootmem_memmap(unsigned long section_nr,
>  				  struct page *start_page, unsigned long size)
>  {
> diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
> index 3690c44..835a2b3 100644
> --- a/arch/powerpc/mm/init_64.c
> +++ b/arch/powerpc/mm/init_64.c
> @@ -299,6 +299,14 @@ int __meminit vmemmap_populate(struct page *start_page,
>  	return 0;
>  }
>  
> +void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
> +{
> +}
> +
> +void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
> +{
> +}
> +
>  void register_page_bootmem_memmap(unsigned long section_nr,
>  				  struct page *start_page, unsigned long size)
>  {
> diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
> index eda55cd..4b42b0b 100644
> --- a/arch/s390/mm/vmem.c
> +++ b/arch/s390/mm/vmem.c
> @@ -227,6 +227,14 @@ out:
>  	return ret;
>  }
>  
> +void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
> +{
> +}
> +
> +void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
> +{
> +}
> +
>  void register_page_bootmem_memmap(unsigned long section_nr,
>  				  struct page *start_page, unsigned long size)
>  {
> diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
> index 53f7604..d444f25 100644
> --- a/arch/sparc/mm/init_64.c
> +++ b/arch/sparc/mm/init_64.c
> @@ -2060,6 +2060,14 @@ int __meminit vmemmap_populate(struct page *start, unsigned long nr, int node)
>  	return 0;
>  }
>  
> +void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
> +{
> +}
> +
> +void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
> +{
> +}
> +
>  void register_page_bootmem_memmap(unsigned long section_nr,
>  				  struct page *start_page, unsigned long size)
>  {
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index 0075592..4e8f8a4 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -1138,6 +1138,125 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
>  	return 0;
>  }
>  
> +#define PAGE_INUSE 0xFD
> +
> +unsigned long find_and_clear_pte_page(unsigned long addr, unsigned long end,
> +			    struct page **pp, int *page_size)
> +{
> +	pgd_t *pgd;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +	pte_t *pte;
> +	void *page_addr;
> +	unsigned long next;
> +
> +	*pp = NULL;
> +
> +	pgd = pgd_offset_k(addr);
> +	if (pgd_none(*pgd))
> +		return pgd_addr_end(addr, end);
> +
> +	pud = pud_offset(pgd, addr);
> +	if (pud_none(*pud))
> +		return pud_addr_end(addr, end);
> +
> +	if (!cpu_has_pse) {
> +		next = (addr + PAGE_SIZE) & PAGE_MASK;
> +		pmd = pmd_offset(pud, addr);
> +		if (pmd_none(*pmd))
> +			return next;
> +
> +		pte = pte_offset_kernel(pmd, addr);
> +		if (pte_none(*pte))
> +			return next;
> +
> +		*page_size = PAGE_SIZE;
> +		*pp = pte_page(*pte);
> +	} else {
> +		next = pmd_addr_end(addr, end);
> +
> +		pmd = pmd_offset(pud, addr);
> +		if (pmd_none(*pmd))
> +			return next;
> +
> +		*page_size = PMD_SIZE;
> +		*pp = pmd_page(*pmd);
> +	}
> +
> +	/*
> +	 * Removed page structs are filled with 0xFD.
> +	 */
> +	memset((void *)addr, PAGE_INUSE, next - addr);
> +
> +	page_addr = page_address(*pp);
> +
> +	/*
> +	 * Check the page is filled with 0xFD or not.
> +	 * memchr_inv() returns the address. In this case, we cannot
> +	 * clear PTE/PUD entry, since the page is used by other.
> +	 * So we cannot also free the page.
> +	 *
> +	 * memchr_inv() returns NULL. In this case, we can clear
> +	 * PTE/PUD entry, since the page is not used by other.
> +	 * So we can also free the page.
> +	 */
> +	if (memchr_inv(page_addr, PAGE_INUSE, *page_size)) {
> +		*pp = NULL;
> +		return next;
> +	}
> +
> +	if (!cpu_has_pse)
> +		pte_clear(&init_mm, addr, pte);
> +	else
> +		pmd_clear(pmd);
> +
> +	return next;
> +}
> +
> +void vmemmap_kfree(struct page *memmap, unsigned long nr_pages)
> +{
> +	unsigned long addr = (unsigned long)memmap;
> +	unsigned long end = (unsigned long)(memmap + nr_pages);
> +	unsigned long next;
> +	struct page *page;
> +	int page_size;
> +
> +	for (; addr < end; addr = next) {
> +		page = NULL;
> +		page_size = 0;
> +		next = find_and_clear_pte_page(addr, end, &page, &page_size);
> +		if (!page)
> +			continue;
> +
> +		free_pages((unsigned long)page_address(page),
> +			    get_order(page_size));
> +		__flush_tlb_one(addr);
> +	}
> +}
> +
> +void vmemmap_free_bootmem(struct page *memmap, unsigned long nr_pages)
> +{
> +	unsigned long addr = (unsigned long)memmap;
> +	unsigned long end = (unsigned long)(memmap + nr_pages);
> +	unsigned long next;
> +	struct page *page;
> +	int page_size;
> +	unsigned long magic;
> +
> +	for (; addr < end; addr = next) {
> +		page = NULL;
> +		page_size = 0;
> +		next = find_and_clear_pte_page(addr, end, &page, &page_size);
> +		if (!page)
> +			continue;
> +
> +		magic = (unsigned long) page->lru.next;
> +		if (magic == SECTION_INFO)
> +			put_page_bootmem(page);
> +		flush_tlb_kernel_range(addr, end);
> +	}
> +}
> +
>  void register_page_bootmem_memmap(unsigned long section_nr,
>  				  struct page *start_page, unsigned long size)
>  {
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index c607913..fb0d1fc 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1620,6 +1620,8 @@ int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
>  void vmemmap_populate_print_last(void);
>  void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
>  				  unsigned long size);
> +void vmemmap_kfree(struct page *memmpa, unsigned long nr_pages);
> +void vmemmap_free_bootmem(struct page *memmpa, unsigned long nr_pages);
>  
>  enum mf_flags {
>  	MF_COUNT_INCREASED = 1 << 0,
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 4589f5b..a1f3490 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -300,19 +300,6 @@ static int __meminit __add_section(int nid, struct zone *zone,
>  	return register_new_memory(nid, __pfn_to_section(phys_start_pfn));
>  }
>  
> -#ifdef CONFIG_SPARSEMEM_VMEMMAP
> -static int __remove_section(struct zone *zone, struct mem_section *ms)
> -{
> -	int ret = -EINVAL;
> -
> -	if (!valid_section(ms))
> -		return ret;
> -
> -	ret = unregister_memory_section(ms);
> -
> -	return ret;
> -}
> -#else
>  static int __remove_section(struct zone *zone, struct mem_section *ms)
>  {
>  	unsigned long flags;
> @@ -329,9 +316,9 @@ static int __remove_section(struct zone *zone, struct mem_section *ms)
>  	pgdat_resize_lock(pgdat, &flags);
>  	sparse_remove_one_section(zone, ms);
>  	pgdat_resize_unlock(pgdat, &flags);
> -	return 0;
> +
> +	return ret;
>  }
> -#endif
>  
>  /*
>   * Reasonably generic function for adding memory.  It is
> diff --git a/mm/sparse.c b/mm/sparse.c
> index fac95f2..ab9d755 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -613,12 +613,13 @@ static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
>  	/* This will make the necessary allocations eventually. */
>  	return sparse_mem_map_populate(pnum, nid);
>  }
> -static void __kfree_section_memmap(struct page *memmap, unsigned long nr_pages)
> +static void __kfree_section_memmap(struct page *page, unsigned long nr_pages)
>  {
> -	return; /* XXX: Not implemented yet */
> +	vmemmap_kfree(page, nr_pages);
>  }
>  static void free_map_bootmem(struct page *page, unsigned long nr_pages)
>  {
> +	vmemmap_free_bootmem(page, nr_pages);
>  }
>  #else
>  static struct page *__kmalloc_section_memmap(unsigned long nr_pages)

^ permalink raw reply

* Re: [RFC PATCH V6 13/19] memory-hotplug: check page type in get_page_bootmem
From: Wen Congyang @ 2012-08-07  5:31 UTC (permalink / raw)
  To: isimatu.yasuaki
  Cc: linux-s390, linux-ia64, linux-acpi, len.brown, linux-sh,
	linux-kernel, cmetcalf, linux-mm, paulus, minchan.kim,
	kosaki.motohiro, rientjes, cl, linuxppc-dev, akpm, liuj97
In-Reply-To: <1343980161-14254-14-git-send-email-wency@cn.fujitsu.com>

At 08/03/2012 03:49 PM, wency@cn.fujitsu.com Wrote:
> From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> 
> There is a possibility that get_page_bootmem() is called to the same page many
> times. So when get_page_bootmem is called to the same page, the function only
> increments page->_count.
> 
> CC: David Rientjes <rientjes@google.com>
> CC: Jiang Liu <liuj97@gmail.com>
> CC: Len Brown <len.brown@intel.com>
> CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> CC: Paul Mackerras <paulus@samba.org>
> CC: Christoph Lameter <cl@linux.com>
> Cc: Minchan Kim <minchan.kim@gmail.com>
> CC: Andrew Morton <akpm@linux-foundation.org>
> CC: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
> CC: Wen Congyang <wency@cn.fujitsu.com>
> Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
> ---
>  mm/memory_hotplug.c |   15 +++++++++++----
>  1 files changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index 5f9f8c7..710e593 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -95,10 +95,17 @@ static void release_memory_resource(struct resource *res)
>  static void get_page_bootmem(unsigned long info,  struct page *page,
>  			     unsigned long type)
>  {
> -	page->lru.next = (struct list_head *) type;
> -	SetPagePrivate(page);
> -	set_page_private(page, info);
> -	atomic_inc(&page->_count);
> +	unsigned long page_type;
> +
> +	page_type = (unsigned long) page->lru.next;
> +	if (type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE ||
> +	    type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE){

I think it should be page_type not type here.

Thanks
Wen Congyang

> +		page->lru.next = (struct list_head *) type;
> +		SetPagePrivate(page);
> +		set_page_private(page, info);
> +		atomic_inc(&page->_count);
> +	} else
> +		atomic_inc(&page->_count);
>  }
>  
>  /* reference to __meminit __free_pages_bootmem is valid

^ permalink raw reply

* RE: [PATCH V4 3/3] powerpc/fsl-pci: Unify pci/pcie initialization code
From: Jia Hongtao-B38951 @ 2012-08-07  6:23 UTC (permalink / raw)
  To: Wood Scott-B07421; +Cc: linuxppc-dev@lists.ozlabs.org, Li Yang-R58472
In-Reply-To: <501FDF9B.4030304@freescale.com>

PiAtLS0tLU9yaWdpbmFsIE1lc3NhZ2UtLS0tLQ0KPiBGcm9tOiBXb29kIFNjb3R0LUIwNzQyMQ0K
PiBTZW50OiBNb25kYXksIEF1Z3VzdCAwNiwgMjAxMiAxMToxNiBQTQ0KPiBUbzogSmlhIEhvbmd0
YW8tQjM4OTUxDQo+IENjOiBXb29kIFNjb3R0LUIwNzQyMTsgS3VtYXIgR2FsYTsgbGludXhwcGMt
ZGV2QGxpc3RzLm96bGFicy5vcmc7IExpDQo+IFlhbmctUjU4NDcyDQo+IFN1YmplY3Q6IFJlOiBb
UEFUQ0ggVjQgMy8zXSBwb3dlcnBjL2ZzbC1wY2k6IFVuaWZ5IHBjaS9wY2llDQo+IGluaXRpYWxp
emF0aW9uIGNvZGUNCj4gDQo+IE9uIDA4LzA1LzIwMTIgMDk6MzkgUE0sIEppYSBIb25ndGFvLUIz
ODk1MSB3cm90ZToNCj4gPg0KPiA+DQo+ID4+IC0tLS0tT3JpZ2luYWwgTWVzc2FnZS0tLS0tDQo+
ID4+IEZyb206IFdvb2QgU2NvdHQtQjA3NDIxDQo+ID4+IFNlbnQ6IFNhdHVyZGF5LCBBdWd1c3Qg
MDQsIDIwMTIgMTI6MDQgQU0NCj4gPj4gVG86IEppYSBIb25ndGFvLUIzODk1MQ0KPiA+PiBDYzog
S3VtYXIgR2FsYTsgbGludXhwcGMtZGV2QGxpc3RzLm96bGFicy5vcmc7IFdvb2QgU2NvdHQtQjA3
NDIxOyBMaQ0KPiA+PiBZYW5nLVI1ODQ3Mg0KPiA+PiBTdWJqZWN0OiBSZTogW1BBVENIIFY0IDMv
M10gcG93ZXJwYy9mc2wtcGNpOiBVbmlmeSBwY2kvcGNpZQ0KPiA+PiBpbml0aWFsaXphdGlvbiBj
b2RlDQo+ID4+DQo+ID4+IE9uIDA4LzAyLzIwMTIgMTA6MzkgUE0sIEppYSBIb25ndGFvLUIzODk1
MSB3cm90ZToNCj4gPj4+DQo+ID4+Pg0KPiA+Pj4+IC0tLS0tT3JpZ2luYWwgTWVzc2FnZS0tLS0t
DQo+ID4+Pj4gRnJvbTogS3VtYXIgR2FsYSBbbWFpbHRvOmdhbGFrQGtlcm5lbC5jcmFzaGluZy5v
cmddDQo+ID4+Pj4gU2VudDogVGh1cnNkYXksIEF1Z3VzdCAwMiwgMjAxMiA4OjI0IFBNDQo+ID4+
Pj4gVG86IEppYSBIb25ndGFvLUIzODk1MQ0KPiA+Pj4+IENjOiBsaW51eHBwYy1kZXZAbGlzdHMu
b3psYWJzLm9yZzsgV29vZCBTY290dC1CMDc0MjE7IExpDQo+ID4+Pj4gWWFuZy1SNTg0NzINCj4g
Pj4+PiBTdWJqZWN0OiBSZTogW1BBVENIIFY0IDMvM10gcG93ZXJwYy9mc2wtcGNpOiBVbmlmeSBw
Y2kvcGNpZQ0KPiA+Pj4+IGluaXRpYWxpemF0aW9uIGNvZGUNCj4gPj4+Pg0KPiA+Pj4+IFlvdSBu
ZWVkIHRvIGNvbnZlcnQgYWxsIGJvYXJkcyB0byB1c2UgZnNsX3BjaV9pbml0IGJlZm9yZSB0aGlz
IHBhdGNoLg0KPiA+Pj4+IE90aGVyd2lzZSB3ZSdsbCBlbmQgdXAgd2l0aCBQQ0kgZ2V0dGluZyBp
bml0aWFsaXplZCB0d2ljZSBvbiBib2FyZHMuDQo+ID4+Pj4NCj4gPj4+PiAtIGsNCj4gPj4+DQo+
ID4+PiBJZiB3ZSBjb3ZlcnQgYWxsIGJvYXJkcyB3aXRoIHBsYXRmb3JtIGRyaXZlciBpbiB0aGlz
IHBhdGNoIFBDSSB3aWxsDQo+ID4+PiBiZSBpbml0aWFsaXplZCBvbmx5IG9uY2Ugd2l0aG91dCBj
b252ZXJ0aW5nIGFsbCBib2FyZHMgdG8gdXNlDQo+ID4+PiBmc2xfcGNpX2luaXQgZmlyc3QuDQo+
ID4+DQo+ID4+IFRoZW4gd2UnZCBoYXZlIHRvIHBpY2sgYXBhcnQgY29yZSBjaGFuZ2VzIGZyb20g
Ym9hcmQgY2hhbmdlcyB3aGVuDQo+ID4+IHJldmlld2luZy4NCj4gPj4NCj4gPj4+IElmIHdlIGNv
bnZlcnQgYWxsIGJvYXJkcyB0byB1c2UgZnNsX3BjaV9pbml0IGJlZm9yZSB0aGlzIHBhdGNoIGFu
ZA0KPiA+Pj4gY29udmVydCB0aGVtIHRvIHVzZSBwbGF0Zm9ybSBkcml2ZXIgYWdhaW4gYWZ0ZXIg
dGhpcyBwYXRjaC4gVGhlbg0KPiA+Pj4gYmV0d2VlbiB0aGlzIHBhdGNoIGFuZCBuZXh0IHBjaSB3
aWxsIGJlIGluaXRpYWxpemVkIHR3aWNlIHRvby4NCj4gPj4NCj4gPj4gV2h5PyAgVGhhdCBvbmUg
cGF0Y2ggc2hvdWxkIGJvdGggY3JlYXRlIHRoZSBwbGF0Zm9ybSBkcml2ZXIgYW5kDQo+ID4+IHJl
bW92ZSB0aGUgaW5pdCBmcm9tIGZzbF9wY2lfaW5pdCgpIC0tIGV4Y2VwdCB0aGluZ3MgbGlrZSBw
cmltYXJ5IGJ1cw0KPiA+PiBkZXRlY3Rpb24gd2hpY2ggaGFzIHRvIGhhcHBlbiBnbG9iYWxseS4N
Cj4gPj4NCj4gPj4gLVNjb3R0DQo+ID4NCj4gPiAiT25lIHBhdGNoIGJvdGggY3JlYXRlIHRoZSBw
bGF0Zm9ybSBkcml2ZXIgYW5kIHJlbW92ZSB0aGUgaW5pdCBmcm9tDQo+ID4gZnNsX3BjaV9pbml0
KCkiIG1lYW5zIHdlIHNob3VsZCBjcmVhdGUgcGxhdGZvcm0gZHJpdmVyIGFuZCBhcHBsaWVkIHRv
DQo+ID4gYWxsIGJvYXJkcy4gSWYgc28gd2h5IG5vdCBqdXN0IGRpcmVjdGx5IGNvbnZlcnQgYWxs
IGJvYXJkcyB1c2luZw0KPiA+IHBsYXRmb3JtIGRyaXZlcj8NCj4gDQo+IEJlY2F1c2UgaXQncyBo
YXJkZXIgdG8gcmV2aWV3IHdoZW4geW91IGhhdmUgYSBidW5jaCBvZiBib2FyZCBjb2RlIGluIHRo
ZQ0KPiBwYXRjaCBpbiBhZGRpdGlvbiB0byBjb3JlIGNoYW5nZXMuDQo+IA0KPiBCZWNhdXNlIHlv
dSBtaWdodCB3YW50IHBlb3BsZSB0byBhY3R1YWxseSB0ZXN0IG9uIHRoZSBib2FyZHMgaW4gcXVl
c3Rpb24NCj4gd2hlbiBjb252ZXJ0aW5nLCBlc3BlY2lhbGx5IGdpdmVuIHRoZSBjaGFuZ2UgaW4g
aG93IHByaW1hcnkgYnVzZXMgYXJlDQo+IGRldGVybWluZWQsIGFuZCB0aGF0IHNvbWUgYm9hcmRz
IG1heSBuZWVkIHRvIHByb3ZpZGUgdGhlaXIgb3duDQo+IGFsdGVybmF0aXZlLg0KPiANCj4gLVNj
b3R0DQoNCkJ1dCBpZiB3ZSBzZXBhcmF0ZSB0aGUgY29yZSBjaGFuZ2VzIGFuZCB0aGUgYm9hcmRz
IHVwZGF0ZSwgYmV0d2VlbiB0aGlzIHR3bw0KcGF0Y2hlcyBQQ0kgd2lsbCBiZSBpbml0aWFsaXpl
ZCB0d2ljZS4NCg0KLUhvbmd0YW8uDQoNCg==

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox