Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] arm64: dts: msm8996: Add SCM DT node
From: Sarangdhar Joshi @ 2016-10-29  0:14 UTC (permalink / raw)
  To: linux-arm-kernel

Add SCM DT node to enable SCM functionality on MSM8996.

Signed-off-by: Sarangdhar Joshi <spjoshi@codeaurora.org>
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 9e960c1..4927097 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -164,6 +164,12 @@
 
 	};
 
+	firmware {
+		scm {
+			compatible = "qcom,scm-msm8996";
+		};
+	};
+
 	tcsr_mutex: hwlock {
 		compatible = "qcom,tcsr-mutex";
 		syscon = <&tcsr_mutex_regs 0 0x1000>;
-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply related

* [PATCH v2 3/4] arm64: arch_timer: Work around Erratum Hisilicon-161601
From: Ding Tianhong @ 2016-10-29  2:05 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161028160000.GB1076@arm.com>



On 2016/10/29 0:00, Will Deacon wrote:
> Hi Ding,
> 
> On Thu, Oct 27, 2016 at 03:34:10PM +0800, Ding Tianhong wrote:
>> Erratum Hisilicon-161601 says that the ARM generic timer counter "has the
>> potential to contain an erroneous value when the timer value changes".
>> Accesses to TVAL (both read and write) are also affected due to the implicit counter
>> read.  Accesses to CVAL are not affected.
>>
>> The workaround is to reread the system count registers until the value of the second
>> read is larger than the first one by less than 32, the system counter can be guaranteed
>> not to return wrong value twice by back-to-back read and the error value is always larger
>> than the correct one by 32. Writes to TVAL are replaced with an equivalent write to CVAL.
>>
>> The workaround is enabled if the hisilicon,erratum-161601 property is found in
>> the timer node in the device tree.  This can be overridden with the
>> clocksource.arm_arch_timer.hisilicon-161601 boot parameter, which allows KVM
>> users to enable the workaround until a mechanism is implemented to
>> automatically communicate this information.
>>
>> Fix some description for fsl erratum a008585.
>>
>> v2: Significant rework based on feedback, including seperate the fsl erratum a008585
>>     to another patch, update the erratum name and remove unwanted code.
>>
>> Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
>> ---
> 
> [...]
> 
>> diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig
>> index 8a753fd..4aafb6a 100644
>> --- a/drivers/clocksource/Kconfig
>> +++ b/drivers/clocksource/Kconfig
>> @@ -312,8 +312,20 @@ config FSL_ERRATUM_A008585
>>  	help
>>  	  This option enables a workaround for Freescale/NXP Erratum
>>  	  A-008585 ("ARM generic timer may contain an erroneous
>> -	  value").  The workaround will only be active if the
>> +	  value").  The workaround will be active if the
>>  	  fsl,erratum-a008585 property is found in the timer node.
>> +	  This can be overridden with the clocksource.arm_arch_timer.fsl-a008585
>> +	  boot parameter.
>> +
>> +config HISILICON_ERRATUM_161601
>> +	bool "Workaround for Hisilicon Erratum 161601"
>> +	default y
>> +	depends on ARM_ARCH_TIMER && ARM64
>> +	help
>> +	  This option enables a workaround for Hisilicon Erratum
>> +	  161601. The workaround will be active if the hisilicon,erratum-161601
>> +	  property is found in the timer node. This can be overridden with
>> +	  the clocksource.arm_arch_timer.hisilicon-161601 boot parameter.
> 
> I'm really not keen on having a kernel commandline parameter for this.
> It's not something we've done for other, similar errata (e.g. CNTFRQ
> reporting the wrong value) and I think it's a slippery slope to having
> more of these workarounds controlled at boot-time. If you have a board
> that is affected by this, it's always going to need the workaround. Why
> would you turn it off?
> 

OK, miss it.

> Will
> 
> .
> 

^ permalink raw reply

* [PATCH v2 3/4] arm64: arch_timer: Work around Erratum Hisilicon-161601
From: Ding Tianhong @ 2016-10-29  2:50 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <8267644f-c488-2d02-3dd0-c7d0ed23babf@arm.com>



On 2016/10/27 20:23, Marc Zyngier wrote:
> On 27/10/16 13:17, Ding Tianhong wrote:
>>
>>
>> On 2016/10/27 18:58, Marc Zyngier wrote:
>>> On 27/10/16 08:34, Ding Tianhong wrote:
>>>> Erratum Hisilicon-161601 says that the ARM generic timer counter "has the
>>>> potential to contain an erroneous value when the timer value changes".
>>>> Accesses to TVAL (both read and write) are also affected due to the implicit counter
>>>> read.  Accesses to CVAL are not affected.
>>>>
>>>> The workaround is to reread the system count registers until the value of the second
>>>> read is larger than the first one by less than 32, the system counter can be guaranteed
>>>> not to return wrong value twice by back-to-back read and the error value is always larger
>>>> than the correct one by 32. Writes to TVAL are replaced with an equivalent write to CVAL.
>>>>
>>>> The workaround is enabled if the hisilicon,erratum-161601 property is found in
>>>> the timer node in the device tree.  This can be overridden with the
>>>> clocksource.arm_arch_timer.hisilicon-161601 boot parameter, which allows KVM
>>>> users to enable the workaround until a mechanism is implemented to
>>>> automatically communicate this information.
>>>>
>>>> Fix some description for fsl erratum a008585.
>>>>
>>>> v2: Significant rework based on feedback, including seperate the fsl erratum a008585
>>>>     to another patch, update the erratum name and remove unwanted code.
>>>>
>>>> Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
>>>> ---
>>>>  Documentation/arm64/silicon-errata.txt |  1 +
>>>>  Documentation/kernel-parameters.txt    |  9 ++++
>>>>  arch/arm64/include/asm/arch_timer.h    | 28 ++++++++++-
>>>>  drivers/clocksource/Kconfig            | 14 +++++-
>>>>  drivers/clocksource/arm_arch_timer.c   | 88 ++++++++++++++++++++++++++--------
>>>>  5 files changed, 118 insertions(+), 22 deletions(-)
>>>>
>>>> diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt
>>>> index 405da11..70c5d5e 100644
>>>> --- a/Documentation/arm64/silicon-errata.txt
>>>> +++ b/Documentation/arm64/silicon-errata.txt
>>>> @@ -63,3 +63,4 @@ stable kernels.
>>>>  | Cavium         | ThunderX SMMUv2 | #27704          | N/A		       |
>>>>  |                |                 |                 |                         |
>>>>  | Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585     |
>>>> +| Hisilicon      | Hip05/Hip06/Hip07 | #161601       | HISILICON_ERRATUM_161601|
>>>
>>> I've already commented on the alignment. Please read my initial review.
>>>
>>
>> It sees misunderstood, fix it this time.
>>
>>>> diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
>>>> index 6fa1d8a..735b4b6 100644
>>>> --- a/Documentation/kernel-parameters.txt
>>>> +++ b/Documentation/kernel-parameters.txt
>>>> @@ -707,6 +707,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
>>>>  			erratum.  If unspecified, the workaround is
>>>>  			enabled based on the device tree.
>>>>  
>>>> +	clocksource.arm_arch_timer.hisilicon-161601=
>>>> +			[ARM64]
>>>> +			Format: <bool>
>>>> +			Enable/disable the workaround of Hisilicon
>>>> +			erratum 161601.  This can be useful for KVM
>>>> +			guests, if the guest device tree doesn't show the
>>>> +			erratum.  If unspecified, the workaround is
>>>> +			enabled based on the device tree.
>>>> +
>>>>  	clearcpuid=BITNUM [X86]
>>>>  			Disable CPUID feature X for the kernel. See
>>>>  			arch/x86/include/asm/cpufeatures.h for the valid bit
>>>> diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
>>>> index 118719d8..49b3041 100644
>>>> --- a/arch/arm64/include/asm/arch_timer.h
>>>> +++ b/arch/arm64/include/asm/arch_timer.h
>>>> @@ -29,7 +29,7 @@
>>>>  
>>>>  #include <clocksource/arm_arch_timer.h>
>>>>  
>>>> -#if IS_ENABLED(CONFIG_FSL_ERRATUM_A008585)
>>>> +#if IS_ENABLED(CONFIG_FSL_ERRATUM_A008585) || IS_ENABLED(CONFIG_HISILICON_ERRATUM_161601)
>>>>  extern struct static_key_false arch_timer_read_ool_enabled;
>>>>  #define needs_timer_erratum_workaround() \
>>>>  	static_branch_unlikely(&arch_timer_read_ool_enabled)
>>>> @@ -65,11 +65,35 @@ extern struct arch_timer_erratum_workaround *erratum_workaround;
>>>>  	_new;						\
>>>>  })
>>>>  
>>>> +
>>>> +
>>>> +/*
>>>> + * The number of retries is an arbitrary value well beyond the highest number
>>>> + * of iterations the loop has been observed to take.
>>>> + * Verify whether the value of the second read is larger than the first by
>>>> + * less than 32 is the only way to confirm the value is correct, the system
>>>> + * counter can be guaranteed not to return wrong value twice by back-to-back read
>>>> + * and the error value is always larger than the correct one by 32.
>>>> + */
>>>> +#define __hisi_161601_read_reg(reg) ({				\
>>>> +	u64 _old, _new;						\
>>>> +	int _retries = 200;					\
>>>
>>> Please document how this value was found (either in the code or in the
>>> commit message).
>>
>> It really difficult to give the accurate standard, theoretically the error should not happened
>> twice together, so maybe 2 is enough here, I just give a arbitrary value.
>>
>>>
>>>> +								\
>>>> +	do {							\
>>>> +		_old = read_sysreg(reg);			\
>>>> +		_new = read_sysreg(reg);			\
>>>> +		_retries--;					\
>>>> +	} while (unlikely((_new - _old) >> 5) && _retries);	\
>>>> +								\
>>>> +	WARN_ON_ONCE(!_retries);				\
>>>> +	_new;							\
>>>> +})
>>>
>>> Same remark as in the previous patch.
>>>
>>
>> I think the sentence *Verify whether the value of the second read is larger than the first by
>> less than 32 is the only way to confirm the value is correct* could explain why should *(_new - _old) >> 5*.
>> it is the same as (_new - _old)/32, also mean the _new should never bigger than _old more than 32.
> 
> This is not about the explanation of the erratum, but about the location
> of the #define, which can be made private to the .c file instead of
> being globally visible.
> 

Ok, update it .

Thanks
Ding

> Thanks,
> 
> 	M.
> 

^ permalink raw reply

* [PATCH 1/2] PCI: iproc: Support DT property for ignoring aborts when probing
From: Rafał Miłecki @ 2016-10-29  6:14 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1b58db80-c9ff-d4d6-0df1-d80d1c03bc45@broadcom.com>

On 28 October 2016 at 18:58, Ray Jui <ray.jui@broadcom.com> wrote:
> On 10/28/2016 8:31 AM, Rafa? Mi?ecki wrote:
>> On 20 April 2016 at 20:18, Ray Jui <ray.jui@broadcom.com> wrote:
>>> Hi Rafal/Florian/Arnd,
>>>
>>> After a couple days of email exchange with the ASIC team, I think I've
>>> figured out the behavior on all of the Broadcom SoCs that use this iProc
>>> PCIe controller.
>>>
>>> On NSP, Cygnus, and NS2:
>>> - There's an APB error enable register at offset 0xf40 from the iProc PCIe
>>> controller's base address. If one clears bit 0 (enabled by default after
>>> chip POR) of that register, one can stop this from being forwarded to "iProc
>>> host" as an APB error/external imprecise abort
>>> - I will submit a patch to the iProc PCIe driver to disable this error
>>> forwarding
>>>
>>> On NS:
>>> - Unfortunately, there's no such control register in NS. In other words, we
>>> cannot disable this error at the PCIe controller level
>>> - FSR code corresponds to external (bit[12] = '1'), read (bit[11] = '0'),
>>> imprecise abort (bits[10][3:0] = '1''0110'), i.e., external imprecise abort
>>> triggered by read access. Our ASIC team believes a read access to a
>>> non-exist APB register can also trigger an abort with the same FSR code.
>>> Note this is the tricky part, by registering an abort hook that skips this
>>> particular FSR, one has a chance of skipping other aborts triggered by
>>> accessing invalid APB registers. But given that this cannot be disabled for
>>> the PCIe controller NS, I'm not sure what approach we should take. Any
>>> thoughts?
>>
>> It's really late reply but I wanted to finally handle this problem.
>>
>> From Ray's e-mail it seems Northstar is the only platform requiring
>> this workaround. So we don't have to worry about arm64.
>
> Yes, Northstar is the only platform that requires this workaround. Even
> the arm32 platforms like NSP and Cygnus can disable unsupported request
> being forwarded as APB error. I've recently sent out a patch series to
> fix this for all other platforms, and sorry I should have included you
> in the email but I did not. I'll include you when revision 2 is sent out.
>
>>
>> We have two options then:
>> 1) Add workaround in arch/arm/mach-bcm/bcm_5301x.c
>> 2) Add workaround into built-in drivers/pci/host/pcie-iproc-fault.c
>
> How do you plan to implement pcie-iproc-fault.c? If it's similar to what
> you have now, then I think it fits more to bcm_5301x.c

Yes, I just wanted to have a simple file with 2 functions there: one
adding a hook and second being a callback.

-- 
Rafa?

^ permalink raw reply

* [PATCH v2 2/6] net: phy: broadcom: Add BCM54810 PHY entry
From: Andrew Lunn @ 2016-10-29  8:18 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1477688219-3871-3-git-send-email-jon.mason@broadcom.com>

On Fri, Oct 28, 2016 at 04:56:55PM -0400, Jon Mason wrote:
> The BCM54810 PHY requires some semi-unique configuration, which results
> in some additional configuration in addition to the standard config.
> Also, some users of the BCM54810 require the PHY lanes to be swapped.
> Since there is no way to detect this, add a device tree query to see if
> it is applicable.
> 
> Inspired-by: Vikas Soni <vsoni@broadcom.com>
> Signed-off-by: Jon Mason <jon.mason@broadcom.com>
> ---
>  drivers/net/phy/Kconfig    |  2 +-
>  drivers/net/phy/broadcom.c | 58 +++++++++++++++++++++++++++++++++++++++++++++-
>  include/linux/brcmphy.h    | 10 ++++++++

Hi Jon

The binding documentation is missing.

> +	if (of_property_read_bool(np, "brcm,enet-phy-lane-swap")) {
> +		/* Lane Swap - Undocumented register...magic! */
> +		ret = bcm_phy_write_exp(phydev, MII_BCM54XX_EXP_SEL_ER + 0x9,
> +					0x11B);
> +		if (ret < 0)
> +			return ret;
> +	}
> +

I wounder if this property could be made generic? What exactly are you
swapping? Rx and Tx lanes? Maybe we should add it to phy.txt?

	  Andrew

^ permalink raw reply

* [PATCH] video: ARM CLCD: fix Vexpress regression
From: Linus Walleij @ 2016-10-29  8:23 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1476945992-5164-1-git-send-email-linus.walleij@linaro.org>

On Thu, Oct 20, 2016 at 8:46 AM, Linus Walleij <linus.walleij@linaro.org> wrote:

> The CLCD does not come up on Versatile Express as it does not
> (currently) have a syscon node for controlling the block apart
> from the CLCD itself. Make sure the .init() function can bail
> out without an error making it probe again.
>
> Reported-by: Amit Pundir <amit.pundir@linaro.org>
> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>

Tomi can you please pick this up?

Yours,
Linus Walleij

^ permalink raw reply

* pinctrl: mediatek: build failure if CONFIG_IRQ_DOMAIN is not set
From: Linus Walleij @ 2016-10-29  8:36 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1477675233.2167.11.camel@tiscali.nl>

On Fri, Oct 28, 2016 at 7:20 PM, Paul Bolle <pebolle@tiscali.nl> wrote:

> 3) Would you like me to submit a proper (but lightly tested) patch or
> do you prefer to fix this yourself?

Please send a tested patch, I'll apply it.

Thanks for finding this!

Yours,
Linus Walleij

^ permalink raw reply

* [RFC][PATCH] arm64: Add support for CONFIG_DEBUG_VIRTUAL
From: Ard Biesheuvel @ 2016-10-29  8:39 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <6a76edf2-3f80-fb18-4fc1-2e0b1f085d20@redhat.com>

On 28 October 2016 at 23:07, Laura Abbott <labbott@redhat.com> wrote:
>>>> diff --git a/arch/arm64/mm/physaddr.c b/arch/arm64/mm/physaddr.c
>>>> new file mode 100644
>>>> index 0000000..6c271e2
>>>> --- /dev/null
>>>> +++ b/arch/arm64/mm/physaddr.c
>>>> @@ -0,0 +1,17 @@
>>>> +#include <linux/mm.h>
>>>> +
>>>> +#include <asm/memory.h>
>>>> +
>>>> +unsigned long __virt_to_phys(unsigned long x)
>>>> +{
>>>> +    phys_addr_t __x = (phys_addr_t)x;
>>>> +
>>>> +    if (__x & BIT(VA_BITS - 1)) {
>>>> +        /* The bit check ensures this is the right range */
>>>> +        return (__x & ~PAGE_OFFSET) + PHYS_OFFSET;
>>>> +    } else {
>>>> +        VIRTUAL_BUG_ON(x < kimage_vaddr || x > (unsigned long)_end);
>>>
>>>
>>> IIUC, in (3) you were asking if the last check should be '>' or '>='?
>>>
>>> To match high_memory, I suspect the latter, as _end doesn't fall within
>>> the mapped virtual address space.
>>>
>>
>> I was actually concerned about if _end would be correct with KASLR.
>> Ard confirmed that it gets fixed up to be correct. I'll change the
>> check to check for >=.
>>
>
> While testing this, I found two places with __pa(_end) to get bounds,
> one in arm64 code and one in memblock code. x86 gets away with this
> because memblock is actually __pa_symbol and x86 does image placement
> different and can check against the maximum image size. I think
> including _end in __pa_symbol but excluding it from the generic
> __virt_to_phys makes sense. It's a bit nicer than doing _end - 1 +
> 1 everywhere.
>

Could we redefine __pa_symbol() under CONFIG_DEBUG_VIRTUAL to
something that checks (x >= kimage_vaddr + TEXT_OFFSET || x <=
(unsigned long)_end), i.e., reject linear virtual addresses? (Assuming
my understanding of the meaning of __pa_symbol() is correct)

^ permalink raw reply

* [PATCH v3 1/6] pinctrl: sunxi: Deal with configless pins
From: Linus Walleij @ 2016-10-29  9:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <ae812275bfb0f0ded2a73b571bfcecffcd747126.1476971126.git-series.maxime.ripard@free-electrons.com>

On Thu, Oct 20, 2016 at 3:49 PM, Maxime Ripard
<maxime.ripard@free-electrons.com> wrote:

> Even though the our binding had the assumption that the allwinner,pull and
> allwinner,drive properties were optional, the code never took that into
> account.
>
> Fix that.
>
> Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
> Acked-by: Chen-Yu Tsai <wens@csie.org>

Patch applied.

Yours,
Linus Walleij

^ permalink raw reply

* Low network throughput on i.MX28
From: Stefan Wahren @ 2016-10-29  9:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1477696028.31471.3.camel@embedded.rocks>


> J?rg Krause <joerg.krause@embedded.rocks> hat am 29. Oktober 2016 um 01:07
> geschrieben:
> 
> 
> You mentioned [1] an optimization in the Freescale vendor Linux kernel
> [2]. I would really like to see this optimization in the mainline
> kernel.
> 
> Did you ever tried to port this code from Freescale to mainline?

Yes, i tried once but i was frustrated soon because of the lot of required
changes and resulting issues.

> 
> Is it even possible, as the mainline driver uses the DMA engine?

I think the more important part would be analyse why the Mainline driver is
slowlier. I mean to exactly identify the bottleneck.

I don't have enough time and equipment for this. I better concentrate on standby
support.

> 
> [1] http://linux-arm-kernel.infradead.narkive.com/GNkqjvo8/patch-rfc-0-
> 3-mmc-mxs-mmc-implement-ddr-support#post8
> [2] http://git.freescale.com/git/cgit.cgi/imx/linux-2.6-imx.git/commit/
> ?h=imx_2.6.35_maintain&id=b09358887fb4b67f6d497fac8cc48475c8bd292d
> 
> Best regards,
> J?rg Krause

^ permalink raw reply

* [PATCH v12 RESEND 0/4] generic TEE subsystem
From: Jens Wiklander @ 2016-10-29  9:46 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1e532aeb-4944-62e4-c8c4-1e23438b92cd@ti.com>

On Fri, Oct 28, 2016 at 10:43:24AM -0500, Andrew F. Davis wrote:
> On 10/28/2016 05:19 AM, Jens Wiklander wrote:
> > Hi,
> > 
> > This patch set introduces a generic TEE subsystem. The TEE subsystem will
> > contain drivers for various TEE implementations. A TEE (Trusted Execution
> > Environment) is a trusted OS running in some secure environment, for
> > example, TrustZone on ARM CPUs, or a separate secure co-processor etc.
> > 
> > Regarding use cases, TrustZone has traditionally been used for
> > offloading secure tasks to the secure world. Examples include: 
> > - Secure key handling where the OS may or may not have direct access to key
> >   material.
> > - E-commerce and payment technologies. Credentials, credit card numbers etc
> >   could be stored in a more secure environment.
> > - Trusted User Interface (TUI) to ensure that no-one can snoop PIN-codes
> >   etc.
> > - Secure boot to ensure that loaded binaries haven?t been tampered with.
> >   It?s not strictly needed for secure boot, but you could enhance security
> >   by leveraging a TEE during boot.
> > - Digital Rights Management (DRM), the studios provides content with
> >   different resolution depending on the security of the device. Higher
> >   security means higher resolution.
> > 
> > A TEE could also be used in existing and new technologies. For example IMA
> > (Integrity Measurement Architecture) which has been in the kernel for quite
> > a while. Today you can enhance security by using a TPM-chip to sign the IMA
> > measurement list. This is something that you also could do by leveraging a
> > TEE.
> > 
> > Another example could be in 2-factor authentication which is becoming
> > increasingly more important. FIDO (https://fidoalliance.org) for example
> > are using public key cryptography in their 2-factor authentication standard
> > (U2F). With FIDO, a private and public key pair will be generated for every
> > site you visit and the private key should never leave the local device.
> > This is an example where you could use secure storage in a TEE for the
> > private key.
> > 
> > Today you will find a quite a few different out of tree implementations of
> > TEE drivers which tends to fragment the TEE ecosystem and development. We
> > think it would be a good idea to have a generic TEE driver integrated in
> > the kernel which would serve as a base for several different TEE solutions,
> > no matter if they are on-chip like TrustZone or if they are on a separate
> > crypto co-processor.
> > 
> > To develop this TEE subsystem we have been using the open source TEE called
> > OP-TEE (https://github.com/OP-TEE/optee_os) and therefore this would be the
> > first TEE solution supported by this new subsystem. OP-TEE is a
> > GlobalPlatform compliant TEE, however this TEE subsystem is not limited to
> > only GlobalPlatform TEEs, instead we have tried to design it so that it
> > should work with other TEE solutions also.
> > 
> 
> The above is my biggest concern with this whole subsystem, to me it
> still feels very OPTEE specific. As much as I would love to believe
> OPTEE will be the end-all TEE, I'm sure we soon will start to see wider
> use of vendor TEEs (like TI's own legacy Trustzone thing we are hoping
> to depreciate with OPTEE moving forward), possibly Google's Trusty TEE,
> and whatever Intel/AMD are cooking up for x86.

I'd rather say that it's slightly GlobalPlatform specific, but a bit
more flexible.

> 
> As we all know when things are upstreamed we lose the ability to make
> radical changes easily, especially to full subsystems. What happens when
> this framework, built with only one existing TEE, built by the one
> existing TEE's devs, is not as flexible as we need when other TEEs start
> rolling out?

Initially the TEE subsystem was much more flexible and was criticized
for that.

> 
> Do we see this as a chicken and egg situation, or is there any harm
> beyond the pains of supporting an out-of-tree driver for a while, to
> wait until we have at least one other TEE to add to this subsystem
> before merging?

This proposal is the bare minimum to have something useful. On top of
this there's more things we'd like to add, for example an in-kernel API
for accessing the TEE and secure buffer handling. The way we're dealing
with shared memory need to be improved to better support multiple guests
communicating with one TEE.

What we can do now with the subsystem now is somewhat limited by the
fact that we're trying to upstream it and want to do that it in
manageable increments.

Thanks,
Jens

> 
> This may also help with the perceived lack of reviewers for this series.
> 
> Thanks,
> Andrew
> 
> > "tee: generic TEE subsystem" brings in the generic TEE subsystem which
> > helps when writing a driver for a specific TEE, for example, OP-TEE.
> > 
> > "tee: add OP-TEE driver" is an OP-TEE driver which uses the subsystem to do
> > its work.
> > 
> > This patch set has been prepared in cooperation with Javier Gonz?lez who
> > proposed "Generic TrustZone Driver in Linux Kernel" patches 28 Nov 2014,
> > https://lwn.net/Articles/623380/ . We've since then changed the scope to
> > TEE instead of TrustZone.
> > 
> > We have discussed the design on tee-dev at lists.linaro.org (archive at
> > https://lists.linaro.org/pipermail/tee-dev/) with people from other
> > companies, including Valentin Manea <valentin.manea@huawei.com>,
> > Emmanuel MICHEL <emmanuel.michel@st.com>,
> > Jean-michel DELORME <jean-michel.delorme@st.com>,
> > and Joakim Bech <joakim.bech@linaro.org>. Our main concern has been to
> > agree on something that is generic enough to support many different
> > TEEs while still keeping the interface together.
> > 
> > v12-resend:
> > * Rebased on v4.9-rc2
> > 
> > v12:
> > * Rebased on v4.8-rc5
> > * Addressed review comments from Andrew F. Davis
> > * Removed Acked-by: Andreas Dannenberg <dannenberg@ti.com> as the
> >   mail bounces
> > * Bugfix possible null dereference in error cleanup path of
> >   optee_probe().
> > * Bugfix optee_from_msg_param() when calculating offset of memref
> >   into a shared memory object
> > 
> > v11:
> > * Rebased on v4.8-rc3
> > * Addressed review comments from Nishanth Menon
> > * Made the TEE framework available as a loadable module.
> > * Reviewed-by: Javier Gonz?lez <javier@javigon.com>
> > * Zeroes shared memory on allocation to avoid information leakage
> > * Links shared memory objects to context to avoid stealing of shared memory
> >   object from an unrelated process
> > * Allow RPC interruption if supplicant is unavailable
> > 
> > v10:
> > * Rebased on v4.7-rc1
> > * Addressed private review comments from Nishanth Menon
> > * Optee driver only accepts one supplicant process on the privileged device
> > * Optee driver avoids long delayed releases of shm objects
> > * Added more comments on functions and structs
> > 
> > v9:
> > * Rebased on v4.6-rc1
> > * Acked-by: Andreas Dannenberg <dannenberg@ti.com>
> > * Addressed comments from Al Viro how file descriptors are passed to
> >   user space
> > * Addressed comments from Randy Dunlap on documentation
> > * Changed license for include/uapi/linux/tee.h
> > 
> > v8:
> > * Rebased on v4.5-rc3
> > * dt/bindings: add bindings for optee
> >   Acked-by: Rob Herring <robh@kernel.org>
> > * Fixes build error for X86
> > * Fixes spell error in "dt/bindings: add bindings for optee"
> > 
> > v7:
> > * Rebased on v4.5-rc2
> > * Moved the ARM SMC Calling Convention support into a separate patch
> >   set, which is now merged
> > 
> > v6:
> > * Rebased on v4.3-rc7
> > * Changed smccc interface to let the compiler marshal most of the
> >   parameters
> > * Added ARCH64 capability for smccc interface
> > * Changed the PSCI firmware calls (both arm and arm64) to use the new
> >   generic smccc interface instead instead of own assembly functions.
> > * Move optee DT bindings to below arm/firmware
> > * Defines method for OP-TEE driver to call secure world in DT, smc or hvc
> > * Exposes implementation id of a TEE driver in sysfs
> >   to easily spawn corresponding tee-supplicant when device is ready
> > * Update OP-TEE Message Protocol to better cope with fragmented physical
> >   memory
> > * Read time directly from OP-TEE driver instead of forwarding the RPC
> >   request to tee-supplicant
> > 
> > v5:
> > * Replaced kref reference counting for the device with a size_t instead as
> >   the counter is always protected by a mutex
> > 
> > v4:
> > * Rebased on 4.1
> > * Redesigned the synchronization around entry exit of normal SMC
> > * Replaced rwsem on the driver instance with kref and completion since
> >   rwsem wasn't intended to be used in this way
> > * Expanded the TEE_IOCTL_PARAM_ATTR_TYPE_MASK to make room for
> >   future additional parameter types
> > * Documents TEE subsystem and OP-TEE driver
> > * Replaced TEE_IOC_CMD with TEE_IOC_OPEN_SESSION, TEE_IOC_INVOKE,
> >   TEE_IOC_CANCEL and TEE_IOC_CLOSE_SESSION
> > * DT bindings in a separate patch
> > * Assembly parts moved to arch/arm and arch/arm64 respectively, in a
> >   separate patch
> > * Redefined/clarified the meaning of OPTEE_SMC_SHM_CACHED
> > * Removed CMA usage to limit the scope of the patch set
> > 
> > v3:
> > * Rebased on 4.1-rc3 (dma_buf_export() API change)
> > * A couple of small sparse fixes
> > * Documents bindings for OP-TEE driver
> > * Updated MAINTAINERS
> > 
> > v2:
> > * Replaced the stubbed OP-TEE driver with a real OP-TEE driver
> > * Removed most APIs not needed by OP-TEE in current state
> > * Update Documentation/ioctl/ioctl-number.txt with correct path to tee.h
> > * Rename tee_shm_pool_alloc_cma() to tee_shm_pool_alloc()
> > * Moved tee.h into include/uapi/linux/
> > * Redefined tee.h IOCTL macros to be directly based on _IOR and friends
> > * Removed version info on the API to user space, a data blob which
> >   can contain an UUID is left for user space to be able to tell which
> >   protocol to use in TEE_IOC_CMD
> > * Changed user space exposed structures to only have types with __ prefix
> > * Dropped THIS_MODULE from tee_fops
> > * Reworked how the driver is registered and ref counted:
> >   - moved from using an embedded struct miscdevice to an embedded struct
> >     device.
> >   - uses an struct rw_semaphore as synchronization for driver detachment
> >   - uses alloc/register pattern from TPM
> > 
> > Thanks,
> > Jens
> > 
> > Jens Wiklander (4):
> >   dt/bindings: add bindings for optee
> >   tee: generic TEE subsystem
> >   tee: add OP-TEE driver
> >   Documentation: tee subsystem and op-tee driver
> > 
> >  Documentation/00-INDEX                             |   2 +
> >  .../bindings/arm/firmware/linaro,optee-tz.txt      |  31 +
> >  .../devicetree/bindings/vendor-prefixes.txt        |   1 +
> >  Documentation/ioctl/ioctl-number.txt               |   1 +
> >  Documentation/tee.txt                              | 118 +++
> >  MAINTAINERS                                        |  13 +
> >  drivers/Kconfig                                    |   2 +
> >  drivers/Makefile                                   |   1 +
> >  drivers/tee/Kconfig                                |  18 +
> >  drivers/tee/Makefile                               |   5 +
> >  drivers/tee/optee/Kconfig                          |   7 +
> >  drivers/tee/optee/Makefile                         |   5 +
> >  drivers/tee/optee/call.c                           | 435 ++++++++++
> >  drivers/tee/optee/core.c                           | 598 ++++++++++++++
> >  drivers/tee/optee/optee_msg.h                      | 435 ++++++++++
> >  drivers/tee/optee/optee_private.h                  | 185 +++++
> >  drivers/tee/optee/optee_smc.h                      | 446 ++++++++++
> >  drivers/tee/optee/rpc.c                            | 404 +++++++++
> >  drivers/tee/optee/supp.c                           | 273 +++++++
> >  drivers/tee/tee_core.c                             | 901 +++++++++++++++++++++
> >  drivers/tee/tee_private.h                          | 129 +++
> >  drivers/tee/tee_shm.c                              | 357 ++++++++
> >  drivers/tee/tee_shm_pool.c                         | 158 ++++
> >  include/linux/tee_drv.h                            | 278 +++++++
> >  include/uapi/linux/tee.h                           | 403 +++++++++
> >  25 files changed, 5206 insertions(+)
> >  create mode 100644 Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.txt
> >  create mode 100644 Documentation/tee.txt
> >  create mode 100644 drivers/tee/Kconfig
> >  create mode 100644 drivers/tee/Makefile
> >  create mode 100644 drivers/tee/optee/Kconfig
> >  create mode 100644 drivers/tee/optee/Makefile
> >  create mode 100644 drivers/tee/optee/call.c
> >  create mode 100644 drivers/tee/optee/core.c
> >  create mode 100644 drivers/tee/optee/optee_msg.h
> >  create mode 100644 drivers/tee/optee/optee_private.h
> >  create mode 100644 drivers/tee/optee/optee_smc.h
> >  create mode 100644 drivers/tee/optee/rpc.c
> >  create mode 100644 drivers/tee/optee/supp.c
> >  create mode 100644 drivers/tee/tee_core.c
> >  create mode 100644 drivers/tee/tee_private.h
> >  create mode 100644 drivers/tee/tee_shm.c
> >  create mode 100644 drivers/tee/tee_shm_pool.c
> >  create mode 100644 include/linux/tee_drv.h
> >  create mode 100644 include/uapi/linux/tee.h
> > 

^ permalink raw reply

* [PATCH 1/5] ARM: wire up HWCAP2 feature bits to the CPU modalias
From: Ard Biesheuvel @ 2016-10-29 10:08 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1476787939-21889-2-git-send-email-ard.biesheuvel@linaro.org>

On 18 October 2016 at 11:52, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> Wire up the generic support for exposing CPU feature bits via the
> modalias in /sys/device/system/cpu. This allows udev to automatically
> load modules for things like crypto algorithms that are implemented
> using optional instructions.
>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  arch/arm/Kconfig                  |  1 +
>  arch/arm/include/asm/cpufeature.h | 32 ++++++++++++++++++++
>  2 files changed, 33 insertions(+)
>

Russell,

do you have any concerns regarding this patch? If not, I will drop it
into the patch system.

Herbert,

I will resend the followup patches in this series to linux-crypto@
once this prerequisite is in place

Thanks,
Ard.


> diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
> index b5d529fdffab..1a0c6a486a9c 100644
> --- a/arch/arm/Kconfig
> +++ b/arch/arm/Kconfig
> @@ -21,6 +21,7 @@ config ARM
>         select GENERIC_ALLOCATOR
>         select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI)
>         select GENERIC_CLOCKEVENTS_BROADCAST if SMP
> +       select GENERIC_CPU_AUTOPROBE
>         select GENERIC_EARLY_IOREMAP
>         select GENERIC_IDLE_POLL_SETUP
>         select GENERIC_IRQ_PROBE
> diff --git a/arch/arm/include/asm/cpufeature.h b/arch/arm/include/asm/cpufeature.h
> new file mode 100644
> index 000000000000..19c3dddd901a
> --- /dev/null
> +++ b/arch/arm/include/asm/cpufeature.h
> @@ -0,0 +1,32 @@
> +/*
> + * Copyright (C) 2016 Linaro Ltd. <ard.biesheuvel@linaro.org>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + */
> +
> +#ifndef __ASM_CPUFEATURE_H
> +#define __ASM_CPUFEATURE_H
> +
> +#include <asm/hwcap.h>
> +
> +/*
> + * Due to the fact that ELF_HWCAP is a 32-bit type on ARM, and given the number
> + * of optional CPU features it defines, ARM's CPU capability bits have been
> + * divided across separate elf_hwcap and elf_hwcap2 variables, each of which
> + * covers a subset of the available CPU features.
> + *
> + * Currently, only a few of those are suitable for automatic module loading
> + * (which is the primary use case of this facility) and those happen to be all
> + * covered by HWCAP2. So let's only expose those via the CPU modalias for now.
> + */
> +#define MAX_CPU_FEATURES       (8 * sizeof(elf_hwcap2))
> +#define cpu_feature(x)         ilog2(HWCAP2_ ## x)
> +
> +static inline bool cpu_have_feature(unsigned int num)
> +{
> +       return elf_hwcap2 & (1UL << num);
> +}
> +
> +#endif
> --
> 2.7.4
>

^ permalink raw reply

* [PATCH 2/2] MAINTAINERS: add ARM and arm64 EFI specific files to EFI subsystem
From: Ard Biesheuvel @ 2016-10-29 10:09 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1474472114-12973-2-git-send-email-ard.biesheuvel@linaro.org>

On 21 September 2016 at 16:35, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> Since I will be co-maintaining the EFI subsystem, it makes sense to
> mention the ARM and arm64 EFI bits in the EFI section in MAINTAINERS
> so that Matt, the list and I get cc'ed on proposed changes.
>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Russell King <rmk+kernel@armlinux.org.uk>
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  MAINTAINERS | 6 ++++--
>  1 file changed, 4 insertions(+), 2 deletions(-)
>

Russell,

do you have an objections to this change?

Thanks,
Ard.


> diff --git a/MAINTAINERS b/MAINTAINERS
> index 224518556a84..cc8b36699f94 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -4562,12 +4562,14 @@ L:      linux-efi at vger.kernel.org
>  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/efi/efi.git
>  S:     Maintained
>  F:     Documentation/efi-stub.txt
> -F:     arch/ia64/kernel/efi.c
> +F:     arch/*/kernel/efi.c
>  F:     arch/x86/boot/compressed/eboot.[ch]
> -F:     arch/x86/include/asm/efi.h
> +F:     arch/*/include/asm/efi.h
>  F:     arch/x86/platform/efi/
>  F:     drivers/firmware/efi/
>  F:     include/linux/efi*.h
> +F:     arch/arm/boot/compressed/efi-header.S
> +F:     arch/arm64/kernel/efi-entry.S
>
>  EFI VARIABLE FILESYSTEM
>  M:     Matthew Garrett <matthew.garrett@nebula.com>
> --
> 2.7.4
>

^ permalink raw reply

* [PATCH 1/2] ABI: rtc-ab8500: fix rtc_calibration documentation
From: Mauro Carvalho Chehab @ 2016-10-29 10:10 UTC (permalink / raw)
  To: linux-arm-kernel

The "What:" field at the ABI should describe the location of
the ABI, e. g. the position under a mounted sysfs.

Fix it.

Cc: Mark Godfrey <mark.godfrey@stericsson.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Cc: linux-arm-kernel at lists.infradead.org
Cc: rtc-linux at googlegroups.com
Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>
---
 .../ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration          | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration b/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration
index 4cf1e72222d9..ec950c93e5c6 100644
--- a/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration
+++ b/Documentation/ABI/testing/sysfs-class-rtc-rtc0-device-rtc_calibration
@@ -1,8 +1,9 @@
-What:           Attribute for calibrating ST-Ericsson AB8500 Real Time Clock
+What:           /sys/class/rtc/rtc0/device/rtc_calibration
 Date:           Oct 2011
 KernelVersion:  3.0
 Contact:        Mark Godfrey <mark.godfrey@stericsson.com>
-Description:    The rtc_calibration attribute allows the userspace to
+Description:    Attribute for calibrating ST-Ericsson AB8500 Real Time Clock
+		The rtc_calibration attribute allows the userspace to
                 calibrate the AB8500.s 32KHz Real Time Clock.
                 Every 60 seconds the AB8500 will correct the RTC's value
                 by adding to it the value of this attribute.
-- 
2.7.4

^ permalink raw reply related

* [PATCH 1/2] ABI: rtc-ab8500: fix rtc_calibration documentation
From: Linus Walleij @ 2016-10-29 10:22 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <364a36215c44e0c2785911e9d9259cd866283cb9.1477735797.git.mchehab@s-opensource.com>

On Sat, Oct 29, 2016 at 12:10 PM, Mauro Carvalho Chehab
<mchehab@s-opensource.com> wrote:

> The "What:" field at the ABI should describe the location of
> the ABI, e. g. the position under a mounted sysfs.
>
> Fix it.
>
> Cc: Mark Godfrey <mark.godfrey@stericsson.com>
> Cc: Linus Walleij <linus.walleij@linaro.org>
> Cc: Alessandro Zummo <a.zummo@towertech.it>
> Cc: Alexandre Belloni <alexandre.belloni@free-electrons.com>
> Cc: linux-arm-kernel at lists.infradead.org
> Cc: rtc-linux at googlegroups.com
> Signed-off-by: Mauro Carvalho Chehab <mchehab@s-opensource.com>

Acked-by: Linus Walleij <linus.walleij@linaro.org>

Yours,
Linus Walleij

^ permalink raw reply

* [PATCH] drm/sun4i: Add a few formats
From: Chen-Yu Tsai @ 2016-10-29 10:25 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161027143505.rblgbz5uqjynrtjk@lukather>

On Thu, Oct 27, 2016 at 10:35 PM, Maxime Ripard
<maxime.ripard@free-electrons.com> wrote:
> Hi,
>
> On Tue, Oct 25, 2016 at 08:42:26AM +0800, Chen-Yu Tsai wrote:
>> On Mon, Oct 24, 2016 at 10:40 PM, Maxime Ripard
>> <maxime.ripard@free-electrons.com> wrote:
>> > Hi,
>> >
>> > On Fri, Oct 21, 2016 at 11:15:32AM +0800, Chen-Yu Tsai wrote:
>> >> On Tue, Oct 18, 2016 at 4:46 PM, Maxime Ripard
>> >> <maxime.ripard@free-electrons.com> wrote:
>> >> > The planes can do more than what was previously exposed. Add support for
>> >> > them.
>> >> >
>> >> > Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
>> >> > ---
>> >> >  drivers/gpu/drm/sun4i/sun4i_backend.c | 20 ++++++++++++++++++++
>> >> >  drivers/gpu/drm/sun4i/sun4i_layer.c   |  6 ++++++
>> >> >  2 files changed, 26 insertions(+)
>> >> >
>> >> > diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c
>> >> > index afb7ddf660ef..b184a476a480 100644
>> >> > --- a/drivers/gpu/drm/sun4i/sun4i_backend.c
>> >> > +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c
>> >> > @@ -96,6 +96,22 @@ static int sun4i_backend_drm_format_to_layer(struct drm_plane *plane,
>> >> >                 *mode = SUN4I_BACKEND_LAY_FBFMT_ARGB8888;
>> >> >                 break;
>> >> >
>> >> > +       case DRM_FORMAT_ARGB4444:
>> >> > +               *mode = SUN4I_BACKEND_LAY_FBFMT_ARGB4444;
>> >> > +               break;
>> >> > +
>> >> > +       case DRM_FORMAT_ARGB1555:
>> >> > +               *mode = SUN4I_BACKEND_LAY_FBFMT_ARGB1555;
>> >> > +               break;
>> >> > +
>> >> > +       case DRM_FORMAT_RGBA5551:
>> >> > +               *mode = SUN4I_BACKEND_LAY_FBFMT_RGBA5551;
>> >> > +               break;
>> >> > +
>> >> > +       case DRM_FORMAT_RGBA4444:
>> >> > +               *mode = SUN4I_BACKEND_LAY_FBFMT_RGBA4444;
>> >>
>> >> The A20 manual only lists ARGB4444, not RGBA4444. There might be
>> >> some discrepancy here. We can deal with them
>> >
>> > Hmm, yes, that's weird. But I guess this would be part of porting it
>> > to the A20.
>> >
>> >> Also there are some more formats missing from the list, could you
>> >> add them as well?
>> >
>> > Which one do you refer to?
>>
>> RGB556 and RGB655.
>
> These formats are not supported by Linux yet though.

I see. Sorry for the noise.

Acked-by: Chen-Yu Tsai <wens@csie.org>

>
> Thanks,
> Maxime
>
> --
> Maxime Ripard, Free Electrons
> Embedded Linux and Kernel engineering
> http://free-electrons.com

^ permalink raw reply

* [PATCH v3 0/2] ARM: dts: sun6i: hummingbird-a31: Enable display output through VGA bridge
From: Chen-Yu Tsai @ 2016-10-29 11:06 UTC (permalink / raw)
  To: linux-arm-kernel

Hi,

This is v3 of my sun6i DRM/KMS display enablement series. Most of the
patches in v2 were merged, leaving only the dumb-vga-dac patch and the
hummingbird-a31 VGA output patch.

A few things were changed, mainly the DAC enable method from a GPIO
to a regulator supply.

Changes since v2:

  - Changed the enable-gpio of dumb-vga-dac to vdd-supply regulator.
    This better matches the hardware that I have: the DAC has a fixed
    regulator dropping the voltage from the board-wide 5V to 3.3V the
    DAC uses. The regulator is controlled through a GPIO pin.

  - Renamed the node of the VGA connector from "vga" to "vga-connector".

  - Renamed the node of the VGA DAC from "bridge" to "vga-dac".


Regards
ChenYu


Chen-Yu Tsai (2):
  drm/bridge: dumb-vga-dac: Support a VDD regulator supply
  ARM: dts: sun6i: hummingbird-a31: Enable display output through VGA
    bridge

 .../bindings/display/bridge/dumb-vga-dac.txt       |  2 +
 arch/arm/boot/dts/sun6i-a31-hummingbird.dts        | 76 ++++++++++++++++++++++
 drivers/gpu/drm/bridge/dumb-vga-dac.c              | 35 ++++++++++
 3 files changed, 113 insertions(+)

-- 
2.9.3

^ permalink raw reply

* [PATCH v3 1/2] drm/bridge: dumb-vga-dac: Support a VDD regulator supply
From: Chen-Yu Tsai @ 2016-10-29 11:06 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161029110611.28951-1-wens@csie.org>

Some dumb VGA DACs are active components which require external power.
Add support for specifying a regulator as its power supply.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
---
 .../bindings/display/bridge/dumb-vga-dac.txt       |  2 ++
 drivers/gpu/drm/bridge/dumb-vga-dac.c              | 35 ++++++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/Documentation/devicetree/bindings/display/bridge/dumb-vga-dac.txt b/Documentation/devicetree/bindings/display/bridge/dumb-vga-dac.txt
index 003bc246a270..164cbb15f04c 100644
--- a/Documentation/devicetree/bindings/display/bridge/dumb-vga-dac.txt
+++ b/Documentation/devicetree/bindings/display/bridge/dumb-vga-dac.txt
@@ -16,6 +16,8 @@ graph bindings specified in Documentation/devicetree/bindings/graph.txt.
 - Video port 0 for RGB input
 - Video port 1 for VGA output
 
+Optional properties:
+- vdd-supply: Power supply for DAC
 
 Example
 -------
diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c
index afec232185a7..59781e031220 100644
--- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
+++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
@@ -12,6 +12,7 @@
 
 #include <linux/module.h>
 #include <linux/of_graph.h>
+#include <linux/regulator/consumer.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
@@ -23,6 +24,7 @@ struct dumb_vga {
 	struct drm_connector	connector;
 
 	struct i2c_adapter	*ddc;
+	struct regulator	*vdd;
 };
 
 static inline struct dumb_vga *
@@ -124,8 +126,33 @@ static int dumb_vga_attach(struct drm_bridge *bridge)
 	return 0;
 }
 
+static void dumb_vga_enable(struct drm_bridge *bridge)
+{
+	struct dumb_vga *vga = drm_bridge_to_dumb_vga(bridge);
+	int ret;
+
+	if (!IS_ERR(vga->vdd)) {
+		ret = regulator_enable(vga->vdd);
+
+		if (ret) {
+			DRM_ERROR("Failed to enable vdd regulator: %d\n", ret);
+			return;
+		}
+	}
+}
+
+static void dumb_vga_disable(struct drm_bridge *bridge)
+{
+	struct dumb_vga *vga = drm_bridge_to_dumb_vga(bridge);
+
+	if (!IS_ERR(vga->vdd))
+		regulator_disable(vga->vdd);
+}
+
 static const struct drm_bridge_funcs dumb_vga_bridge_funcs = {
 	.attach		= dumb_vga_attach,
+	.enable		= dumb_vga_enable,
+	.disable	= dumb_vga_disable,
 };
 
 static struct i2c_adapter *dumb_vga_retrieve_ddc(struct device *dev)
@@ -169,6 +196,14 @@ static int dumb_vga_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	platform_set_drvdata(pdev, vga);
 
+	vga->vdd = devm_regulator_get_optional(&pdev->dev, "vdd");
+	if (IS_ERR(vga->vdd)) {
+		ret = PTR_ERR(vga->vdd);
+		if (ret == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		dev_dbg(&pdev->dev, "No vdd regulator found: %d\n", ret);
+	}
+
 	vga->ddc = dumb_vga_retrieve_ddc(&pdev->dev);
 	if (IS_ERR(vga->ddc)) {
 		if (PTR_ERR(vga->ddc) == -ENODEV) {
-- 
2.9.3

^ permalink raw reply related

* [PATCH v3 2/2] ARM: dts: sun6i: hummingbird-a31: Enable display output through VGA bridge
From: Chen-Yu Tsai @ 2016-10-29 11:06 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161029110611.28951-1-wens@csie.org>

The Hummingbird A31 board has a VGA DAC which converts RGB output
from the LCD interface to VGA analog signals.

Add nodes for the VGA DAC, its power supply, and enable this part
of the display pipeline.

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
---
 arch/arm/boot/dts/sun6i-a31-hummingbird.dts | 76 +++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts
index 9a74637f677f..1ab28b2108fe 100644
--- a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts
+++ b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts
@@ -63,6 +63,62 @@
 		stdout-path = "serial0:115200n8";
 	};
 
+	vga-connector {
+		compatible = "vga-connector";
+
+		port {
+			vga_con_in: endpoint {
+				remote-endpoint = <&vga_dac_out>;
+			};
+		};
+	};
+
+	vga-dac {
+		compatible = "dumb-vga-dac";
+		vdd-supply = <&reg_vga_3v3>;
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port at 0 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				reg = <0>;
+
+				vga_dac_in: endpoint at 0 {
+					reg = <0>;
+					remote-endpoint = <&tcon0_out_vga>;
+				};
+			};
+
+			port at 1 {
+				#address-cells = <1>;
+				#size-cells = <0>;
+				reg = <1>;
+
+				vga_dac_out: endpoint at 0 {
+					reg = <0>;
+					remote-endpoint = <&vga_con_in>;
+				};
+			};
+		};
+	};
+
+	reg_vga_3v3: vga_3v3_regulator {
+		compatible = "regulator-fixed";
+		pinctrl-names = "default";
+		pinctrl-0 = <&vga_3v3_enable_pin_hummingbird>;
+		regulator-name = "vga-3v3";
+		regulator-min-microvolt = <3300000>;
+		regulator-max-microvolt = <3300000>;
+		regulator-boot-on;
+		enable-active-high;
+		gpio = <&pio 7 25 GPIO_ACTIVE_HIGH>; /* PH25 */
+	};
+
 	wifi_pwrseq: wifi_pwrseq {
 		compatible = "mmc-pwrseq-simple";
 		reset-gpios = <&pio 6 10 GPIO_ACTIVE_LOW>; /* PG10 */
@@ -166,6 +222,13 @@
 		allwinner,pull = <SUN4I_PINCTRL_PULL_UP>;
 	};
 
+	vga_3v3_enable_pin_hummingbird: vga_3v3_enable_pin {
+		allwinner,pins = "PH25";
+		allwinner,function = "gpio_out";
+		allwinner,drive = <SUN4I_PINCTRL_10_MA>;
+		allwinner,pull = <SUN4I_PINCTRL_NO_PULL>;
+	};
+
 	wifi_reset_pin_hummingbird: wifi_reset_pin at 0 {
 		allwinner,pins = "PG10";
 		allwinner,function = "gpio_out";
@@ -245,6 +308,19 @@
 	status = "okay";
 };
 
+&tcon0 {
+	pinctrl-names = "default";
+	pinctrl-0 = <&lcd0_rgb888_pins>;
+	status = "okay";
+};
+
+&tcon0_out {
+	tcon0_out_vga: endpoint at 0 {
+		reg = <0>;
+		remote-endpoint = <&vga_dac_in>;
+	};
+};
+
 &uart0 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart0_pins_a>;
-- 
2.9.3

^ permalink raw reply related

* [PATCH] ARM: BCM5301X: Add back handler ignoring external imprecise aborts
From: Rafał Miłecki @ 2016-10-29 11:12 UTC (permalink / raw)
  To: linux-arm-kernel

From: Rafa? Mi?ecki <rafal@milecki.pl>

Since early BCM5301X days we got abort handler that was removed by
commit 937b12306ea79 ("ARM: BCM5301X: remove workaround imprecise abort
fault handler"). It assumed we need to deal only with pending aborts
left by the bootloader. Unfortunately this isn't true for BCM5301X.

When probing PCI config space (device enumeration) it is expected to
have master aborts on the PCI bus. Most bridges don't forward (or they
allow disabling it) these errors onto the AXI/AMBA bus but not the
Northstar (BCM5301X) one.

iProc PCIe controller on Northstar seems to be some older one, without
a control register for errors forwarding. It means we need to workaround
this at platform level. All newer platforms are not affected by this
issue.

Signed-off-by: Rafa? Mi?ecki <rafal@milecki.pl>
---
 arch/arm/mach-bcm/bcm_5301x.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/arch/arm/mach-bcm/bcm_5301x.c b/arch/arm/mach-bcm/bcm_5301x.c
index c8830a2..fe067f6 100644
--- a/arch/arm/mach-bcm/bcm_5301x.c
+++ b/arch/arm/mach-bcm/bcm_5301x.c
@@ -9,14 +9,42 @@
 #include <asm/hardware/cache-l2x0.h>
 
 #include <asm/mach/arch.h>
+#include <asm/siginfo.h>
+#include <asm/signal.h>
+
+#define FSR_EXTERNAL		(1 << 12)
+#define FSR_READ		(0 << 10)
+#define FSR_IMPRECISE		0x0406
 
 static const char *const bcm5301x_dt_compat[] __initconst = {
 	"brcm,bcm4708",
 	NULL,
 };
 
+static int bcm5301x_abort_handler(unsigned long addr, unsigned int fsr,
+				  struct pt_regs *regs)
+{
+	/*
+	 * We want to ignore aborts forwarded from the PCIe bus that are
+	 * expected and shouldn't really be passed by the PCIe controller.
+	 * The biggest disadvantage is the same FSR code may be reported when
+	 * reading non-existing APB register and we shouldn't ignore that.
+	 */
+	if (fsr == (FSR_EXTERNAL | FSR_READ | FSR_IMPRECISE))
+		return 0;
+
+	return 1;
+}
+
+static void __init bcm5301x_init_early(void)
+{
+	hook_fault_code(16 + 6, bcm5301x_abort_handler, SIGBUS, BUS_OBJERR,
+			"imprecise external abort");
+}
+
 DT_MACHINE_START(BCM5301X, "BCM5301X")
 	.l2c_aux_val	= 0,
 	.l2c_aux_mask	= ~0,
 	.dt_compat	= bcm5301x_dt_compat,
+	.init_early	= bcm5301x_init_early,
 MACHINE_END
-- 
2.9.3

^ permalink raw reply related

* [PATCH] KVM: arm/arm64: vgic: Prevent VGIC_ADDR_TO_INTID from emiting divisions
From: Marc Zyngier @ 2016-10-29 11:19 UTC (permalink / raw)
  To: linux-arm-kernel

Using non-constant number of bits for VGIC_ADDR_TO_INTID() leads
to gcc 6.1 emiting calls to __aeabi_uldivmod, which the kernel
does not implement.

As we really don't want to implement complex division in the kernel,
the only other option is to prove to the compiler that there is only
a few values that are possible for the number of bits per IRQ, and
that they are all power of 2.

We turn the VGIC_ADDR_TO_INTID macro into a switch that looks for
the supported set of values (1, 2, 8, 64), and perform the computation
accordingly. When "bits" is a constant, the compiler optimizes
away the other cases. If not, we end-up with a small number of cases
that GCC optimises reasonably well. Out of range values are detected
both at build time (constants) and at run time (variables).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
This should be applied *before* Andre's patch fixing out of bound SPIs.

 virt/kvm/arm/vgic/vgic-mmio.h | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
index 4c34d39..a457282 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.h
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -57,10 +57,41 @@ extern struct kvm_io_device_ops kvm_io_gic_ops;
  * multiplication with the inverted fraction, and scale up both the
  * numerator and denominator with 8 to support at most 64 bits per IRQ:
  */
-#define VGIC_ADDR_TO_INTID(addr, bits)  (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
+#define __VGIC_ADDR_INTID(addr, bits)  (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
 					64 / (bits) / 8)
 
 /*
+ * Perform the same computation, but also handle non-constant number
+ * of bits. We only care about the few cases that are required by
+ * GICv2/v3.
+ */
+#define VGIC_ADDR_TO_INTID(addr, bits)				\
+	({							\
+		u32 __v;					\
+		switch((bits)) {				\
+		case 1:						\
+			__v = __VGIC_ADDR_INTID((addr), 1);	\
+			break;					\
+		case 2:						\
+			__v = __VGIC_ADDR_INTID((addr), 2);	\
+			break;					\
+		case 8:						\
+			__v = __VGIC_ADDR_INTID((addr), 8);	\
+			break;					\
+		case 64:					\
+			__v = __VGIC_ADDR_INTID((addr), 64);	\
+			break;					\
+		default:					\
+			if (__builtin_constant_p((bits)))	\
+				BUILD_BUG();			\
+			else					\
+				BUG();				\
+		}						\
+								\
+		__v;						\
+	})
+
+/*
  * Some VGIC registers store per-IRQ information, with a different number
  * of bits per IRQ. For those registers this macro is used.
  * The _WITH_LENGTH version instantiates registers with a fixed length
-- 
2.9.3

^ permalink raw reply related

* [Bug] ARM: mxs: STI: console can't wake up from freeze
From: Stefan Wahren @ 2016-10-29 11:44 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161023133134.GI1041@n2100.armlinux.org.uk>

Hi Russell,

> Russell King - ARM Linux <linux@armlinux.org.uk> hat am 23. Oktober 2016 um
> 15:31 geschrieben:
> 
> 
> On Sun, Oct 23, 2016 at 11:19:26AM +0200, Stefan Wahren wrote:
> > Hi,
> > 
> > i'm faced with the issue that on i.MX28 the console is unable to wake up
> > from
> > freeze ( suspend to idle). I tested it with Linux 4.9-rc1, 4.8 and 3.18 (
> > cmdline has
> > no_console_suspend=1 ) and also with a i.MX23 with the same result. The
> > suspend
> > seems to work, but there is no reaction to the console after the freeze
> > except
> > an hung task warning after some time:
> 
> I bet if you remove "no_console_suspend" (it's not =1) then it'll work.

unfortunately not:

Setting: no_console_suspend not in cmdline, Debug UART wakeup source enabled

echo mem > /sys/power/state

Result: Able to wakeup via Debug UART
Expected result: Able to wakeup via Debug UART

---

Setting: no_console_suspend not in cmdline, Debug UART wakeup source enabled

echo freeze > /sys/power/state

Result: Unable to wakeup via Debug UART (no hung task warning)
Expected result: Able to wakeup via Debug UART

---

Setting: no_console_suspend in cmdline, Debug UART as wakeup source enabled

echo mem > /sys/power/state

Result: Able to wakeup via Debug UART
Expected result: Able to wakeup via Debug UART

---

Setting: no_console_suspend in cmdline, Debug UART as wake source enabled

echo freeze > /sys/power/state

Result: Unable to wakeup via Debug UART (hung task warning after some minutes)
Expected result: Able to wakeup Debug via UART

---

Setting: no_console_suspend in cmdline, Debug UART as wake source disabled

echo mem > /sys/power/state

Result: Able to wakeup via Debug UART
Expected result: Able to wakeup via Debug UART

---

Setting: no_console_suspend in cmdline, Debug UART as wake source disabled

echo freeze > /sys/power/state

Result: Unable to wakeup via UART (hung task warning after some minutes)
Expected result: Able to wakeup via Debug UART

---

Here some more relevant information about the setup:

The console is operating on Debug UART which is a ARM PL011. The following
kernel configs are set:

CONFIG_PM_SLEEP=y
CONFIG_SUSPEND=y
CONFIG_SUSPEND_FREEZER=y
CONFIG_PM=y

^ permalink raw reply

* [PATCH v4 0/5] Cavium ThunderX uncore PMU support
From: Jan Glauber @ 2016-10-29 11:55 UTC (permalink / raw)
  To: linux-arm-kernel

As discussed, changed perf_sw_context -> perf_invalid_context.

Not changed:
- Stick to NUMA node ID to detect the socket a device belongs to but made
  uncore depend on CONFIG_NUMA.
- Stick to initcall for uncore framework because it is easier to do the
  scanning for the same type of PCI devices, also I don't know if the PCI layer
  would allow for several drivers to register for the same device ID.

Patches are against 4.9.0-rc2.

Changes to v3:
- use perf_invalid_context

Changes to v2:
- Embedded struct pmu and killed uncore->type
- Simplified add functions
- Unified functions where possible into a common implementation
- Use arrays to translate non-contiguous counter addresses to event_id's
  visible to the user
- Sorted includes
- Got rid of division for previous counter values
- Removed unneeded WARN_ONs
- Use sizeof(*ptr)
- Use bool for event_valid return
- Fixed HES_STOPPED logic
- Added some design notes and improved (hopefully) comments
- Removed pass1 counter support for now
- Merged EVENT_ATTR and EVENT_PTR defines into one (unreadable) thing
- Use pmu_enable|disable to start|stop the OCX TLK counter set
- Moved cpumask into thunder_uncore struct
- Switched to new cpuhp stuff. I still don't care about the CPU location
  used to access an uncore device, it may cross the CCPI and
  we'll pay a performance penalty. We might optimize this later, for now
  I feel it is not worth the time optimizing it.

--------------------------

Jan Glauber (5):
  arm64: perf: Basic uncore counter support for Cavium ThunderX SOC
  arm64: perf: Cavium ThunderX L2C TAD uncore support
  arm64: perf: Cavium ThunderX L2C CBC uncore support
  arm64: perf: Cavium ThunderX LMC uncore support
  arm64: perf: Cavium ThunderX OCX TLK uncore support

 drivers/perf/Kconfig                        |  13 +
 drivers/perf/Makefile                       |   1 +
 drivers/perf/uncore/Makefile                |   5 +
 drivers/perf/uncore/uncore_cavium.c         | 355 ++++++++++++++++++++++++++
 drivers/perf/uncore/uncore_cavium.h         |  75 ++++++
 drivers/perf/uncore/uncore_cavium_l2c_cbc.c | 148 +++++++++++
 drivers/perf/uncore/uncore_cavium_l2c_tad.c | 379 ++++++++++++++++++++++++++++
 drivers/perf/uncore/uncore_cavium_lmc.c     | 118 +++++++++
 drivers/perf/uncore/uncore_cavium_ocx_tlk.c | 344 +++++++++++++++++++++++++
 include/linux/cpuhotplug.h                  |   1 +
 10 files changed, 1439 insertions(+)
 create mode 100644 drivers/perf/uncore/Makefile
 create mode 100644 drivers/perf/uncore/uncore_cavium.c
 create mode 100644 drivers/perf/uncore/uncore_cavium.h
 create mode 100644 drivers/perf/uncore/uncore_cavium_l2c_cbc.c
 create mode 100644 drivers/perf/uncore/uncore_cavium_l2c_tad.c
 create mode 100644 drivers/perf/uncore/uncore_cavium_lmc.c
 create mode 100644 drivers/perf/uncore/uncore_cavium_ocx_tlk.c

-- 
2.9.0.rc0.21.g7777322

^ permalink raw reply

* [PATCH v4 1/5] arm64: perf: Basic uncore counter support for Cavium ThunderX SOC
From: Jan Glauber @ 2016-10-29 11:55 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <cover.1477741719.git.jglauber@cavium.com>

Provide "uncore" facilities for different non-CPU performance
counter units.

The uncore PMUs can be found under /sys/bus/event_source/devices.
All counters are exported via sysfs in the corresponding events
files under the PMU directory so the perf tool can list the event names.

There are some points that are special in this implementation:

1) The PMU detection relies on PCI device detection. If a
   matching PCI device is found the PMU is created. The code can deal
   with multiple units of the same type, e.g. more than one memory
   controller.

2) Counters are summarized across different units of the same type
   on one NUMA node but not across NUMA nodes.
   For instance L2C TAD 0..7 are presented as a single counter
   (adding the values from TAD 0 to 7). Although losing the ability
   to read a single value the merged values are easier to use.

3) The counters are not CPU related. A random CPU is picked regardless
   of the NUMA node. There is a small performance penalty for accessing
   counters on a remote note but reading a performance counter is a
   slow operation anyway.

Signed-off-by: Jan Glauber <jglauber@cavium.com>
---
 drivers/perf/Kconfig                |  13 ++
 drivers/perf/Makefile               |   1 +
 drivers/perf/uncore/Makefile        |   1 +
 drivers/perf/uncore/uncore_cavium.c | 351 ++++++++++++++++++++++++++++++++++++
 drivers/perf/uncore/uncore_cavium.h |  71 ++++++++
 include/linux/cpuhotplug.h          |   1 +
 6 files changed, 438 insertions(+)
 create mode 100644 drivers/perf/uncore/Makefile
 create mode 100644 drivers/perf/uncore/uncore_cavium.c
 create mode 100644 drivers/perf/uncore/uncore_cavium.h

diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
index 4d5c5f9..3266c87 100644
--- a/drivers/perf/Kconfig
+++ b/drivers/perf/Kconfig
@@ -19,4 +19,17 @@ config XGENE_PMU
         help
           Say y if you want to use APM X-Gene SoC performance monitors.
 
+config UNCORE_PMU
+	bool
+
+config UNCORE_PMU_CAVIUM
+	depends on PERF_EVENTS && NUMA && ARM64
+	bool "Cavium uncore PMU support"
+	select UNCORE_PMU
+	default y
+	help
+	  Say y if you want to access performance counters of subsystems
+	  on a Cavium SOC like cache controller, memory controller or
+	  processor interconnect.
+
 endmenu
diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
index b116e98..d6c02c9 100644
--- a/drivers/perf/Makefile
+++ b/drivers/perf/Makefile
@@ -1,2 +1,3 @@
 obj-$(CONFIG_ARM_PMU) += arm_pmu.o
 obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
+obj-y += uncore/
diff --git a/drivers/perf/uncore/Makefile b/drivers/perf/uncore/Makefile
new file mode 100644
index 0000000..6130e18
--- /dev/null
+++ b/drivers/perf/uncore/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_UNCORE_PMU_CAVIUM) += uncore_cavium.o
diff --git a/drivers/perf/uncore/uncore_cavium.c b/drivers/perf/uncore/uncore_cavium.c
new file mode 100644
index 0000000..a7b4277
--- /dev/null
+++ b/drivers/perf/uncore/uncore_cavium.c
@@ -0,0 +1,351 @@
+/*
+ * Cavium Thunder uncore PMU support.
+ *
+ * Copyright (C) 2015,2016 Cavium Inc.
+ * Author: Jan Glauber <jan.glauber@cavium.com>
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/numa.h>
+#include <linux/slab.h>
+
+#include "uncore_cavium.h"
+
+/*
+ * Some notes about the various counters supported by this "uncore" PMU
+ * and the design:
+ *
+ * All counters are 64 bit long.
+ * There are no overflow interrupts.
+ * Counters are summarized per node/socket.
+ * Most devices appear as separate PCI devices per socket with the exception
+ * of OCX TLK which appears as one PCI device per socket and contains several
+ * units with counters that are merged.
+ * Some counters are selected via a control register (L2C TAD) and read by
+ * a number of counter registers, others (L2C CBC, LMC & OCX TLK) have
+ * one dedicated counter per event.
+ * Some counters are not stoppable (L2C CBC & LMC).
+ * Some counters are read-only (LMC).
+ * All counters belong to PCI devices, the devices may have additional
+ * drivers but we assume we are the only user of the counter registers.
+ * We map the whole PCI BAR so we must be careful to forbid access to
+ * addresses that contain neither counters nor counter control registers.
+ */
+
+void thunder_uncore_read(struct perf_event *event)
+{
+	struct thunder_uncore *uncore = to_uncore(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct thunder_uncore_node *node;
+	struct thunder_uncore_unit *unit;
+	u64 prev, delta, new = 0;
+
+	node = get_node(hwc->config, uncore);
+
+	/* read counter values from all units on the node */
+	list_for_each_entry(unit, &node->unit_list, entry)
+		new += readq(hwc->event_base + unit->map);
+
+	prev = local64_read(&hwc->prev_count);
+	local64_set(&hwc->prev_count, new);
+	delta = new - prev;
+	local64_add(delta, &event->count);
+}
+
+int thunder_uncore_add(struct perf_event *event, int flags, u64 config_base,
+		       u64 event_base)
+{
+	struct thunder_uncore *uncore = to_uncore(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct thunder_uncore_node *node;
+	int id;
+
+	node = get_node(hwc->config, uncore);
+	id = get_id(hwc->config);
+
+	if (!cmpxchg(&node->events[id], NULL, event))
+		hwc->idx = id;
+
+	if (hwc->idx == -1)
+		return -EBUSY;
+
+	hwc->config_base = config_base;
+	hwc->event_base = event_base;
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+
+	if (flags & PERF_EF_START)
+		uncore->pmu.start(event, PERF_EF_RELOAD);
+
+	return 0;
+}
+
+void thunder_uncore_del(struct perf_event *event, int flags)
+{
+	struct thunder_uncore *uncore = to_uncore(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct thunder_uncore_node *node;
+	int i;
+
+	event->pmu->stop(event, PERF_EF_UPDATE);
+
+	/*
+	 * For programmable counters we need to check where we installed it.
+	 * To keep this function generic always test the more complicated
+	 * case (free running counters won't need the loop).
+	 */
+	node = get_node(hwc->config, uncore);
+	for (i = 0; i < node->num_counters; i++) {
+		if (cmpxchg(&node->events[i], event, NULL) == event)
+			break;
+	}
+	hwc->idx = -1;
+}
+
+void thunder_uncore_start(struct perf_event *event, int flags)
+{
+	struct thunder_uncore *uncore = to_uncore(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct thunder_uncore_node *node;
+	struct thunder_uncore_unit *unit;
+	u64 new = 0;
+
+	/* read counter values from all units on the node */
+	node = get_node(hwc->config, uncore);
+	list_for_each_entry(unit, &node->unit_list, entry)
+		new += readq(hwc->event_base + unit->map);
+	local64_set(&hwc->prev_count, new);
+
+	hwc->state = 0;
+	perf_event_update_userpage(event);
+}
+
+void thunder_uncore_stop(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	hwc->state |= PERF_HES_STOPPED;
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		thunder_uncore_read(event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+int thunder_uncore_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	struct thunder_uncore_node *node;
+	struct thunder_uncore *uncore;
+
+	if (event->attr.type != event->pmu->type)
+		return -ENOENT;
+
+	/* we do not support sampling */
+	if (is_sampling_event(event))
+		return -EINVAL;
+
+	/* counters do not have these bits */
+	if (event->attr.exclude_user	||
+	    event->attr.exclude_kernel	||
+	    event->attr.exclude_host	||
+	    event->attr.exclude_guest	||
+	    event->attr.exclude_hv	||
+	    event->attr.exclude_idle)
+		return -EINVAL;
+
+	uncore = to_uncore(event->pmu);
+	if (!uncore)
+		return -ENODEV;
+	if (!uncore->event_valid(event->attr.config & UNCORE_EVENT_ID_MASK))
+		return -EINVAL;
+
+	/* check NUMA node */
+	node = get_node(event->attr.config, uncore);
+	if (!node) {
+		pr_debug("Invalid NUMA node selected\n");
+		return -EINVAL;
+	}
+
+	hwc->config = event->attr.config;
+	hwc->idx = -1;
+	return 0;
+}
+
+static ssize_t thunder_uncore_attr_show_cpumask(struct device *dev,
+						struct device_attribute *attr,
+						char *buf)
+{
+	struct pmu *pmu = dev_get_drvdata(dev);
+	struct thunder_uncore *uncore =
+		container_of(pmu, struct thunder_uncore, pmu);
+
+	return cpumap_print_to_pagebuf(true, buf, &uncore->active_mask);
+}
+static DEVICE_ATTR(cpumask, S_IRUGO, thunder_uncore_attr_show_cpumask, NULL);
+
+static struct attribute *thunder_uncore_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+struct attribute_group thunder_uncore_attr_group = {
+	.attrs = thunder_uncore_attrs,
+};
+
+ssize_t thunder_events_sysfs_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *page)
+{
+	struct perf_pmu_events_attr *pmu_attr =
+		container_of(attr, struct perf_pmu_events_attr, attr);
+
+	if (pmu_attr->event_str)
+		return sprintf(page, "%s", pmu_attr->event_str);
+
+	return 0;
+}
+
+/* node attribute depending on number of NUMA nodes */
+static ssize_t node_show(struct device *dev, struct device_attribute *attr,
+			 char *page)
+{
+	if (NODES_SHIFT)
+		return sprintf(page, "config:16-%d\n", 16 + NODES_SHIFT - 1);
+	else
+		return sprintf(page, "config:16\n");
+}
+
+struct device_attribute format_attr_node = __ATTR_RO(node);
+
+/*
+ * Thunder uncore events are independent from CPUs. Provide a cpumask
+ * nevertheless to prevent perf from adding the event per-cpu and just
+ * set the mask to one online CPU. Use the same cpumask for all uncore
+ * devices.
+ *
+ * There is a performance penalty for accessing a device from a CPU on
+ * another socket, but we do not care (yet).
+ */
+static int thunder_uncore_offline_cpu(unsigned int old_cpu, struct hlist_node *node)
+{
+	struct thunder_uncore *uncore = hlist_entry_safe(node, struct thunder_uncore, node);
+	int new_cpu;
+
+	if (!cpumask_test_and_clear_cpu(old_cpu, &uncore->active_mask))
+		return 0;
+	new_cpu = cpumask_any_but(cpu_online_mask, old_cpu);
+	if (new_cpu >= nr_cpu_ids)
+		return 0;
+	perf_pmu_migrate_context(&uncore->pmu, old_cpu, new_cpu);
+	cpumask_set_cpu(new_cpu, &uncore->active_mask);
+	return 0;
+}
+
+static struct thunder_uncore_node * __init alloc_node(struct thunder_uncore *uncore,
+						      int node_id, int counters)
+{
+	struct thunder_uncore_node *node;
+
+	node = kzalloc(sizeof(*node), GFP_KERNEL);
+	if (!node)
+		return NULL;
+	node->num_counters = counters;
+	INIT_LIST_HEAD(&node->unit_list);
+	return node;
+}
+
+int __init thunder_uncore_setup(struct thunder_uncore *uncore, int device_id,
+				struct pmu *pmu, int counters)
+{
+	unsigned int vendor_id = PCI_VENDOR_ID_CAVIUM;
+	struct thunder_uncore_unit  *unit, *tmp;
+	struct thunder_uncore_node *node;
+	struct pci_dev *pdev = NULL;
+	int ret, node_id, found = 0;
+
+	/* detect PCI devices */
+	while ((pdev = pci_get_device(vendor_id, device_id, pdev))) {
+		if (!pdev)
+			break;
+
+		node_id = dev_to_node(&pdev->dev);
+
+		/* allocate node if necessary */
+		if (!uncore->nodes[node_id])
+			uncore->nodes[node_id] = alloc_node(uncore, node_id, counters);
+
+		node = uncore->nodes[node_id];
+		if (!node) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		unit = kzalloc(sizeof(*unit), GFP_KERNEL);
+		if (!unit) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+
+		unit->pdev = pdev;
+		unit->map = ioremap(pci_resource_start(pdev, 0),
+				    pci_resource_len(pdev, 0));
+		list_add(&unit->entry, &node->unit_list);
+		node->nr_units++;
+		found++;
+	}
+
+	if (!found)
+		return -ENODEV;
+
+	cpuhp_state_add_instance_nocalls(CPUHP_AP_UNCORE_CAVIUM_ONLINE,
+                                         &uncore->node);
+
+	/*
+	 * perf PMU is CPU dependent in difference to our uncore devices.
+	 * Just pick a CPU and migrate away if it goes offline.
+	 */
+	cpumask_set_cpu(smp_processor_id(), &uncore->active_mask);
+
+	uncore->pmu = *pmu;
+	ret = perf_pmu_register(&uncore->pmu, uncore->pmu.name, -1);
+	if (ret)
+		goto fail;
+
+	return 0;
+
+fail:
+	node_id = 0;
+	while (uncore->nodes[node_id]) {
+		node = uncore->nodes[node_id];
+
+		list_for_each_entry_safe(unit, tmp, &node->unit_list, entry) {
+			if (unit->pdev) {
+				if (unit->map)
+					iounmap(unit->map);
+				pci_dev_put(unit->pdev);
+			}
+			kfree(unit);
+		}
+		kfree(uncore->nodes[node_id]);
+		node_id++;
+	}
+	return ret;
+}
+
+static int __init thunder_uncore_init(void)
+{
+	unsigned long implementor = read_cpuid_implementor();
+	int ret;
+
+	if (implementor != ARM_CPU_IMP_CAVIUM)
+		return -ENODEV;
+
+	ret = cpuhp_setup_state_multi(CPUHP_AP_UNCORE_CAVIUM_ONLINE,
+				      "AP_PERF_UNCORE_CAVIUM_ONLINE", NULL,
+				      thunder_uncore_offline_cpu);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+late_initcall(thunder_uncore_init);
diff --git a/drivers/perf/uncore/uncore_cavium.h b/drivers/perf/uncore/uncore_cavium.h
new file mode 100644
index 0000000..b5d64b5
--- /dev/null
+++ b/drivers/perf/uncore/uncore_cavium.h
@@ -0,0 +1,71 @@
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/perf_event.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt)     "thunderx_uncore: " fmt
+
+#define to_uncore(x) container_of((x), struct thunder_uncore, pmu)
+
+#define UNCORE_EVENT_ID_MASK		0xffff
+#define UNCORE_EVENT_ID_SHIFT		16
+
+/* maximum number of parallel hardware counters for all uncore parts */
+#define MAX_COUNTERS			64
+
+struct thunder_uncore_unit {
+	struct list_head entry;
+	void __iomem *map;
+	struct pci_dev *pdev;
+};
+
+struct thunder_uncore_node {
+	int nr_units;
+	int num_counters;
+	struct list_head unit_list;
+	struct perf_event *events[MAX_COUNTERS];
+};
+
+/* generic uncore struct for different pmu types */
+struct thunder_uncore {
+	struct pmu pmu;
+	bool (*event_valid)(u64);
+	struct hlist_node node;
+	struct thunder_uncore_node *nodes[MAX_NUMNODES];
+	cpumask_t active_mask;
+};
+
+#define UC_EVENT_ENTRY(_name, _id)							\
+	&((struct perf_pmu_events_attr[]) {						\
+		{									\
+			__ATTR(_name, S_IRUGO, thunder_events_sysfs_show, NULL),	\
+			0,								\
+			"event=" __stringify(_id),					\
+		}									\
+	})[0].attr.attr
+
+static inline struct thunder_uncore_node *get_node(u64 config,
+				   struct thunder_uncore *uncore)
+{
+	return uncore->nodes[config >> UNCORE_EVENT_ID_SHIFT];
+}
+
+#define get_id(config) (config & UNCORE_EVENT_ID_MASK)
+
+extern struct attribute_group thunder_uncore_attr_group;
+extern struct device_attribute format_attr_node;
+
+/* Prototypes */
+void thunder_uncore_read(struct perf_event *event);
+int thunder_uncore_add(struct perf_event *event, int flags, u64 config_base,
+		       u64 event_base);
+void thunder_uncore_del(struct perf_event *event, int flags);
+void thunder_uncore_start(struct perf_event *event, int flags);
+void thunder_uncore_stop(struct perf_event *event, int flags);
+int thunder_uncore_event_init(struct perf_event *event);
+int thunder_uncore_setup(struct thunder_uncore *uncore, int id,
+			 struct pmu *pmu, int counters);
+ssize_t thunder_events_sysfs_show(struct device *dev,
+				  struct device_attribute *attr,
+				  char *page);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index afe641c..973f2bb 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -118,6 +118,7 @@ enum cpuhp_state {
 	CPUHP_AP_PERF_ARM_CCI_ONLINE,
 	CPUHP_AP_PERF_ARM_CCN_ONLINE,
 	CPUHP_AP_PERF_ARM_L2X0_ONLINE,
+	CPUHP_AP_UNCORE_CAVIUM_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
 	CPUHP_AP_NOTIFY_ONLINE,
-- 
2.9.0.rc0.21.g7777322

^ permalink raw reply related

* [PATCH v4 2/5] arm64: perf: Cavium ThunderX L2C TAD uncore support
From: Jan Glauber @ 2016-10-29 11:55 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <cover.1477741719.git.jglauber@cavium.com>

Support counters of the L2 Cache tag and data units.

Signed-off-by: Jan Glauber <jglauber@cavium.com>
---
 drivers/perf/uncore/Makefile                |   3 +-
 drivers/perf/uncore/uncore_cavium.c         |   1 +
 drivers/perf/uncore/uncore_cavium.h         |   1 +
 drivers/perf/uncore/uncore_cavium_l2c_tad.c | 379 ++++++++++++++++++++++++++++
 4 files changed, 383 insertions(+), 1 deletion(-)
 create mode 100644 drivers/perf/uncore/uncore_cavium_l2c_tad.c

diff --git a/drivers/perf/uncore/Makefile b/drivers/perf/uncore/Makefile
index 6130e18..90850a2 100644
--- a/drivers/perf/uncore/Makefile
+++ b/drivers/perf/uncore/Makefile
@@ -1 +1,2 @@
-obj-$(CONFIG_UNCORE_PMU_CAVIUM) += uncore_cavium.o
+obj-$(CONFIG_UNCORE_PMU_CAVIUM) += uncore_cavium.o		\
+				   uncore_cavium_l2c_tad.o
diff --git a/drivers/perf/uncore/uncore_cavium.c b/drivers/perf/uncore/uncore_cavium.c
index a7b4277..15e1aec 100644
--- a/drivers/perf/uncore/uncore_cavium.c
+++ b/drivers/perf/uncore/uncore_cavium.c
@@ -346,6 +346,7 @@ static int __init thunder_uncore_init(void)
 	if (ret)
 		return ret;
 
+	thunder_uncore_l2c_tad_setup();
 	return 0;
 }
 late_initcall(thunder_uncore_init);
diff --git a/drivers/perf/uncore/uncore_cavium.h b/drivers/perf/uncore/uncore_cavium.h
index b5d64b5..70a8214 100644
--- a/drivers/perf/uncore/uncore_cavium.h
+++ b/drivers/perf/uncore/uncore_cavium.h
@@ -69,3 +69,4 @@ int thunder_uncore_setup(struct thunder_uncore *uncore, int id,
 ssize_t thunder_events_sysfs_show(struct device *dev,
 				  struct device_attribute *attr,
 				  char *page);
+int thunder_uncore_l2c_tad_setup(void);
diff --git a/drivers/perf/uncore/uncore_cavium_l2c_tad.c b/drivers/perf/uncore/uncore_cavium_l2c_tad.c
new file mode 100644
index 0000000..b97ba33
--- /dev/null
+++ b/drivers/perf/uncore/uncore_cavium_l2c_tad.c
@@ -0,0 +1,379 @@
+/*
+ * Cavium Thunder uncore PMU support,
+ * L2 Cache tag-and-data-units (L2C TAD) counters.
+ *
+ * Copyright 2016 Cavium Inc.
+ * Author: Jan Glauber <jan.glauber@cavium.com>
+ */
+
+#include <linux/perf_event.h>
+#include <linux/slab.h>
+
+#include "uncore_cavium.h"
+
+struct thunder_uncore *thunder_uncore_l2c_tad;
+
+#define L2C_TAD_NR_COUNTERS             4
+#define L2C_TAD_PRF_OFFSET		0x10000
+#define L2C_TAD_PFC_OFFSET		0x10100
+
+/*
+ * Counters are selected via L2C_TAD(x)_PRF:
+ *
+ *   63					    32
+ *   +---------------------------------------+
+ *   |  Reserved			     |
+ *   +---------------------------------------+
+ *   | CNT3SEL | CNT2SEL | CNT1SEL | CNT0SEL |
+ *   +---------------------------------------+
+ *   31       24	16	  8	     0
+ *
+ * config_base contains the offset of the selected CNTxSEL in the mapped BAR.
+ *
+ * Counters are read via L2C_TAD(x)_PFC(0..3).
+ * event_base contains the associated address to read the counter.
+ */
+
+/* L2C TAD event list */
+#define L2C_TAD_EVENTS_DISABLED			0x00
+#define L2C_TAD_EVENT_L2T_HIT			0x01
+#define L2C_TAD_EVENT_L2T_MISS			0x02
+#define L2C_TAD_EVENT_L2T_NOALLOC		0x03
+#define L2C_TAD_EVENT_L2_VIC			0x04
+#define L2C_TAD_EVENT_SC_FAIL			0x05
+#define L2C_TAD_EVENT_SC_PASS			0x06
+#define L2C_TAD_EVENT_LFB_OCC			0x07
+#define L2C_TAD_EVENT_WAIT_LFB			0x08
+#define L2C_TAD_EVENT_WAIT_VAB			0x09
+#define L2C_TAD_EVENT_OPEN_CCPI			0x0a
+#define L2C_TAD_EVENT_LOOKUP			0x40
+#define L2C_TAD_EVENT_LOOKUP_XMC_LCL		0x41
+#define L2C_TAD_EVENT_LOOKUP_XMC_RMT		0x42
+#define L2C_TAD_EVENT_LOOKUP_MIB		0x43
+#define L2C_TAD_EVENT_LOOKUP_ALL		0x44
+#define L2C_TAD_EVENT_TAG_ALC_HIT		0x48
+#define L2C_TAD_EVENT_TAG_ALC_MISS		0x49
+#define L2C_TAD_EVENT_TAG_ALC_NALC		0x4a
+#define L2C_TAD_EVENT_TAG_NALC_HIT		0x4b
+#define L2C_TAD_EVENT_TAG_NALC_MISS		0x4c
+#define L2C_TAD_EVENT_LMC_WR			0x4e
+#define L2C_TAD_EVENT_LMC_SBLKDTY		0x4f
+#define L2C_TAD_EVENT_TAG_ALC_RTG_HIT		0x50
+#define L2C_TAD_EVENT_TAG_ALC_RTG_HITE		0x51
+#define L2C_TAD_EVENT_TAG_ALC_RTG_HITS		0x52
+#define L2C_TAD_EVENT_TAG_ALC_RTG_MISS		0x53
+#define L2C_TAD_EVENT_TAG_NALC_RTG_HIT		0x54
+#define L2C_TAD_EVENT_TAG_NALC_RTG_MISS		0x55
+#define L2C_TAD_EVENT_TAG_NALC_RTG_HITE		0x56
+#define L2C_TAD_EVENT_TAG_NALC_RTG_HITS		0x57
+#define L2C_TAD_EVENT_TAG_ALC_LCL_EVICT		0x58
+#define L2C_TAD_EVENT_TAG_ALC_LCL_CLNVIC	0x59
+#define L2C_TAD_EVENT_TAG_ALC_LCL_DTYVIC	0x5a
+#define L2C_TAD_EVENT_TAG_ALC_RMT_EVICT		0x5b
+#define L2C_TAD_EVENT_TAG_ALC_RMT_VIC		0x5c
+#define L2C_TAD_EVENT_RTG_ALC			0x5d
+#define L2C_TAD_EVENT_RTG_ALC_HIT		0x5e
+#define L2C_TAD_EVENT_RTG_ALC_HITWB		0x5f
+#define L2C_TAD_EVENT_STC_TOTAL			0x60
+#define L2C_TAD_EVENT_STC_TOTAL_FAIL		0x61
+#define L2C_TAD_EVENT_STC_RMT			0x62
+#define L2C_TAD_EVENT_STC_RMT_FAIL		0x63
+#define L2C_TAD_EVENT_STC_LCL			0x64
+#define L2C_TAD_EVENT_STC_LCL_FAIL		0x65
+#define L2C_TAD_EVENT_OCI_RTG_WAIT		0x68
+#define L2C_TAD_EVENT_OCI_FWD_CYC_HIT		0x69
+#define L2C_TAD_EVENT_OCI_FWD_RACE		0x6a
+#define L2C_TAD_EVENT_OCI_HAKS			0x6b
+#define L2C_TAD_EVENT_OCI_FLDX_TAG_E_NODAT	0x6c
+#define L2C_TAD_EVENT_OCI_FLDX_TAG_E_DAT	0x6d
+#define L2C_TAD_EVENT_OCI_RLDD			0x6e
+#define L2C_TAD_EVENT_OCI_RLDD_PEMD		0x6f
+#define L2C_TAD_EVENT_OCI_RRQ_DAT_CNT		0x70
+#define L2C_TAD_EVENT_OCI_RRQ_DAT_DMASK		0x71
+#define L2C_TAD_EVENT_OCI_RSP_DAT_CNT		0x72
+#define L2C_TAD_EVENT_OCI_RSP_DAT_DMASK		0x73
+#define L2C_TAD_EVENT_OCI_RSP_DAT_VICD_CNT	0x74
+#define L2C_TAD_EVENT_OCI_RSP_DAT_VICD_DMASK	0x75
+#define L2C_TAD_EVENT_OCI_RTG_ALC_EVICT		0x76
+#define L2C_TAD_EVENT_OCI_RTG_ALC_VIC		0x77
+#define L2C_TAD_EVENT_QD0_IDX			0x80
+#define L2C_TAD_EVENT_QD0_RDAT			0x81
+#define L2C_TAD_EVENT_QD0_BNKS			0x82
+#define L2C_TAD_EVENT_QD0_WDAT			0x83
+#define L2C_TAD_EVENT_QD1_IDX			0x90
+#define L2C_TAD_EVENT_QD1_RDAT			0x91
+#define L2C_TAD_EVENT_QD1_BNKS			0x92
+#define L2C_TAD_EVENT_QD1_WDAT			0x93
+#define L2C_TAD_EVENT_QD2_IDX			0xa0
+#define L2C_TAD_EVENT_QD2_RDAT			0xa1
+#define L2C_TAD_EVENT_QD2_BNKS			0xa2
+#define L2C_TAD_EVENT_QD2_WDAT			0xa3
+#define L2C_TAD_EVENT_QD3_IDX			0xb0
+#define L2C_TAD_EVENT_QD3_RDAT			0xb1
+#define L2C_TAD_EVENT_QD3_BNKS			0xb2
+#define L2C_TAD_EVENT_QD3_WDAT			0xb3
+#define L2C_TAD_EVENT_QD4_IDX			0xc0
+#define L2C_TAD_EVENT_QD4_RDAT			0xc1
+#define L2C_TAD_EVENT_QD4_BNKS			0xc2
+#define L2C_TAD_EVENT_QD4_WDAT			0xc3
+#define L2C_TAD_EVENT_QD5_IDX			0xd0
+#define L2C_TAD_EVENT_QD5_RDAT			0xd1
+#define L2C_TAD_EVENT_QD5_BNKS			0xd2
+#define L2C_TAD_EVENT_QD5_WDAT			0xd3
+#define L2C_TAD_EVENT_QD6_IDX			0xe0
+#define L2C_TAD_EVENT_QD6_RDAT			0xe1
+#define L2C_TAD_EVENT_QD6_BNKS			0xe2
+#define L2C_TAD_EVENT_QD6_WDAT			0xe3
+#define L2C_TAD_EVENT_QD7_IDX			0xf0
+#define L2C_TAD_EVENT_QD7_RDAT			0xf1
+#define L2C_TAD_EVENT_QD7_BNKS			0xf2
+#define L2C_TAD_EVENT_QD7_WDAT			0xf3
+
+static void thunder_uncore_start_l2c_tad(struct perf_event *event, int flags)
+{
+	struct thunder_uncore *uncore = to_uncore(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct thunder_uncore_node *node;
+	struct thunder_uncore_unit *unit;
+	int id;
+
+	node = get_node(hwc->config, uncore);
+	id = get_id(hwc->config);
+
+	/* reset counter values to zero */
+	if (flags & PERF_EF_RELOAD)
+		list_for_each_entry(unit, &node->unit_list, entry)
+			writeq(0, hwc->event_base + unit->map);
+
+	/* start counters on all units on the node */
+	list_for_each_entry(unit, &node->unit_list, entry)
+		writeb(id, hwc->config_base + unit->map);
+
+	hwc->state = 0;
+	perf_event_update_userpage(event);
+}
+
+static void thunder_uncore_stop_l2c_tad(struct perf_event *event, int flags)
+{
+	struct thunder_uncore *uncore = to_uncore(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct thunder_uncore_node *node;
+	struct thunder_uncore_unit *unit;
+
+	node = get_node(hwc->config, uncore);
+
+	/* disable counters for all units on the node */
+	list_for_each_entry(unit, &node->unit_list, entry)
+		writeb(L2C_TAD_EVENTS_DISABLED, hwc->config_base + unit->map);
+	hwc->state |= PERF_HES_STOPPED;
+
+	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
+		thunder_uncore_read(event);
+		hwc->state |= PERF_HES_UPTODATE;
+	}
+}
+
+static int thunder_uncore_add_l2c_tad(struct perf_event *event, int flags)
+{
+	struct thunder_uncore *uncore = to_uncore(event->pmu);
+	struct hw_perf_event *hwc = &event->hw;
+	struct thunder_uncore_node *node;
+	int i;
+
+	node = get_node(hwc->config, uncore);
+
+	/* take the first available counter */
+	for (i = 0; i < node->num_counters; i++) {
+		if (!cmpxchg(&node->events[i], NULL, event)) {
+			hwc->idx = i;
+			break;
+		}
+	}
+
+	if (hwc->idx == -1)
+		return -EBUSY;
+
+	/* see comment at beginning of file */
+	hwc->config_base = L2C_TAD_PRF_OFFSET + hwc->idx;
+	hwc->event_base = L2C_TAD_PFC_OFFSET + hwc->idx * sizeof(u64);
+
+	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+	if (flags & PERF_EF_START)
+		thunder_uncore_start(event, PERF_EF_RELOAD);
+	return 0;
+}
+
+PMU_FORMAT_ATTR(event, "config:0-7");
+
+static struct attribute *thunder_l2c_tad_format_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_node.attr,
+	NULL,
+};
+
+static struct attribute_group thunder_l2c_tad_format_group = {
+	.name = "format",
+	.attrs = thunder_l2c_tad_format_attr,
+};
+
+static struct attribute *thunder_l2c_tad_events_attr[] = {
+	UC_EVENT_ENTRY(l2t_hit,			L2C_TAD_EVENT_L2T_HIT),
+	UC_EVENT_ENTRY(l2t_miss,		L2C_TAD_EVENT_L2T_MISS),
+	UC_EVENT_ENTRY(l2t_noalloc,		L2C_TAD_EVENT_L2T_NOALLOC),
+	UC_EVENT_ENTRY(l2_vic,			L2C_TAD_EVENT_L2_VIC),
+	UC_EVENT_ENTRY(sc_fail,			L2C_TAD_EVENT_SC_FAIL),
+	UC_EVENT_ENTRY(sc_pass,			L2C_TAD_EVENT_SC_PASS),
+	UC_EVENT_ENTRY(lfb_occ,			L2C_TAD_EVENT_LFB_OCC),
+	UC_EVENT_ENTRY(wait_lfb,		L2C_TAD_EVENT_WAIT_LFB),
+	UC_EVENT_ENTRY(wait_vab,		L2C_TAD_EVENT_WAIT_VAB),
+	UC_EVENT_ENTRY(open_ccpi,		L2C_TAD_EVENT_OPEN_CCPI),
+	UC_EVENT_ENTRY(lookup,			L2C_TAD_EVENT_LOOKUP),
+	UC_EVENT_ENTRY(lookup_xmc_lcl,		L2C_TAD_EVENT_LOOKUP_XMC_LCL),
+	UC_EVENT_ENTRY(lookup_xmc_rmt,		L2C_TAD_EVENT_LOOKUP_XMC_RMT),
+	UC_EVENT_ENTRY(lookup_mib,		L2C_TAD_EVENT_LOOKUP_MIB),
+	UC_EVENT_ENTRY(lookup_all,		L2C_TAD_EVENT_LOOKUP_ALL),
+	UC_EVENT_ENTRY(tag_alc_hit,		L2C_TAD_EVENT_TAG_ALC_HIT),
+	UC_EVENT_ENTRY(tag_alc_miss,		L2C_TAD_EVENT_TAG_ALC_MISS),
+	UC_EVENT_ENTRY(tag_alc_nalc,		L2C_TAD_EVENT_TAG_ALC_NALC),
+	UC_EVENT_ENTRY(tag_nalc_hit,		L2C_TAD_EVENT_TAG_NALC_HIT),
+	UC_EVENT_ENTRY(tag_nalc_miss,		L2C_TAD_EVENT_TAG_NALC_MISS),
+	UC_EVENT_ENTRY(lmc_wr,			L2C_TAD_EVENT_LMC_WR),
+	UC_EVENT_ENTRY(lmc_sblkdty,		L2C_TAD_EVENT_LMC_SBLKDTY),
+	UC_EVENT_ENTRY(tag_alc_rtg_hit,		L2C_TAD_EVENT_TAG_ALC_RTG_HIT),
+	UC_EVENT_ENTRY(tag_alc_rtg_hite,	L2C_TAD_EVENT_TAG_ALC_RTG_HITE),
+	UC_EVENT_ENTRY(tag_alc_rtg_hits,	L2C_TAD_EVENT_TAG_ALC_RTG_HITS),
+	UC_EVENT_ENTRY(tag_alc_rtg_miss,	L2C_TAD_EVENT_TAG_ALC_RTG_MISS),
+	UC_EVENT_ENTRY(tag_alc_nalc_rtg_hit,	L2C_TAD_EVENT_TAG_NALC_RTG_HIT),
+	UC_EVENT_ENTRY(tag_nalc_rtg_miss,	L2C_TAD_EVENT_TAG_NALC_RTG_MISS),
+	UC_EVENT_ENTRY(tag_nalc_rtg_hite,	L2C_TAD_EVENT_TAG_NALC_RTG_HITE),
+	UC_EVENT_ENTRY(tag_nalc_rtg_hits,	L2C_TAD_EVENT_TAG_NALC_RTG_HITS),
+	UC_EVENT_ENTRY(tag_alc_lcl_evict,	L2C_TAD_EVENT_TAG_ALC_LCL_EVICT),
+	UC_EVENT_ENTRY(tag_alc_lcl_clnvic,	L2C_TAD_EVENT_TAG_ALC_LCL_CLNVIC),
+	UC_EVENT_ENTRY(tag_alc_lcl_dtyvic,	L2C_TAD_EVENT_TAG_ALC_LCL_DTYVIC),
+	UC_EVENT_ENTRY(tag_alc_rmt_evict,	L2C_TAD_EVENT_TAG_ALC_RMT_EVICT),
+	UC_EVENT_ENTRY(tag_alc_rmt_vic,		L2C_TAD_EVENT_TAG_ALC_RMT_VIC),
+	UC_EVENT_ENTRY(rtg_alc,			L2C_TAD_EVENT_RTG_ALC),
+	UC_EVENT_ENTRY(rtg_alc_hit,		L2C_TAD_EVENT_RTG_ALC_HIT),
+	UC_EVENT_ENTRY(rtg_alc_hitwb,		L2C_TAD_EVENT_RTG_ALC_HITWB),
+	UC_EVENT_ENTRY(stc_total,		L2C_TAD_EVENT_STC_TOTAL),
+	UC_EVENT_ENTRY(stc_total_fail,		L2C_TAD_EVENT_STC_TOTAL_FAIL),
+	UC_EVENT_ENTRY(stc_rmt,			L2C_TAD_EVENT_STC_RMT),
+	UC_EVENT_ENTRY(stc_rmt_fail,		L2C_TAD_EVENT_STC_RMT_FAIL),
+	UC_EVENT_ENTRY(stc_lcl,			L2C_TAD_EVENT_STC_LCL),
+	UC_EVENT_ENTRY(stc_lcl_fail,		L2C_TAD_EVENT_STC_LCL_FAIL),
+	UC_EVENT_ENTRY(oci_rtg_wait,		L2C_TAD_EVENT_OCI_RTG_WAIT),
+	UC_EVENT_ENTRY(oci_fwd_cyc_hit,		L2C_TAD_EVENT_OCI_FWD_CYC_HIT),
+	UC_EVENT_ENTRY(oci_fwd_race,		L2C_TAD_EVENT_OCI_FWD_RACE),
+	UC_EVENT_ENTRY(oci_haks,		L2C_TAD_EVENT_OCI_HAKS),
+	UC_EVENT_ENTRY(oci_fldx_tag_e_nodat,	L2C_TAD_EVENT_OCI_FLDX_TAG_E_NODAT),
+	UC_EVENT_ENTRY(oci_fldx_tag_e_dat,	L2C_TAD_EVENT_OCI_FLDX_TAG_E_DAT),
+	UC_EVENT_ENTRY(oci_rldd,		L2C_TAD_EVENT_OCI_RLDD),
+	UC_EVENT_ENTRY(oci_rldd_pemd,		L2C_TAD_EVENT_OCI_RLDD_PEMD),
+	UC_EVENT_ENTRY(oci_rrq_dat_cnt,		L2C_TAD_EVENT_OCI_RRQ_DAT_CNT),
+	UC_EVENT_ENTRY(oci_rrq_dat_dmask,	L2C_TAD_EVENT_OCI_RRQ_DAT_DMASK),
+	UC_EVENT_ENTRY(oci_rsp_dat_cnt,		L2C_TAD_EVENT_OCI_RSP_DAT_CNT),
+	UC_EVENT_ENTRY(oci_rsp_dat_dmaks,	L2C_TAD_EVENT_OCI_RSP_DAT_DMASK),
+	UC_EVENT_ENTRY(oci_rsp_dat_vicd_cnt,	L2C_TAD_EVENT_OCI_RSP_DAT_VICD_CNT),
+	UC_EVENT_ENTRY(oci_rsp_dat_vicd_dmask,	L2C_TAD_EVENT_OCI_RSP_DAT_VICD_DMASK),
+	UC_EVENT_ENTRY(oci_rtg_alc_evict,	L2C_TAD_EVENT_OCI_RTG_ALC_EVICT),
+	UC_EVENT_ENTRY(oci_rtg_alc_vic,		L2C_TAD_EVENT_OCI_RTG_ALC_VIC),
+	UC_EVENT_ENTRY(qd0_idx,			L2C_TAD_EVENT_QD0_IDX),
+	UC_EVENT_ENTRY(qd0_rdat,		L2C_TAD_EVENT_QD0_RDAT),
+	UC_EVENT_ENTRY(qd0_bnks,		L2C_TAD_EVENT_QD0_BNKS),
+	UC_EVENT_ENTRY(qd0_wdat,		L2C_TAD_EVENT_QD0_WDAT),
+	UC_EVENT_ENTRY(qd1_idx,			L2C_TAD_EVENT_QD1_IDX),
+	UC_EVENT_ENTRY(qd1_rdat,		L2C_TAD_EVENT_QD1_RDAT),
+	UC_EVENT_ENTRY(qd1_bnks,		L2C_TAD_EVENT_QD1_BNKS),
+	UC_EVENT_ENTRY(qd1_wdat,		L2C_TAD_EVENT_QD1_WDAT),
+	UC_EVENT_ENTRY(qd2_idx,			L2C_TAD_EVENT_QD2_IDX),
+	UC_EVENT_ENTRY(qd2_rdat,		L2C_TAD_EVENT_QD2_RDAT),
+	UC_EVENT_ENTRY(qd2_bnks,		L2C_TAD_EVENT_QD2_BNKS),
+	UC_EVENT_ENTRY(qd2_wdat,		L2C_TAD_EVENT_QD2_WDAT),
+	UC_EVENT_ENTRY(qd3_idx,			L2C_TAD_EVENT_QD3_IDX),
+	UC_EVENT_ENTRY(qd3_rdat,		L2C_TAD_EVENT_QD3_RDAT),
+	UC_EVENT_ENTRY(qd3_bnks,		L2C_TAD_EVENT_QD3_BNKS),
+	UC_EVENT_ENTRY(qd3_wdat,		L2C_TAD_EVENT_QD3_WDAT),
+	UC_EVENT_ENTRY(qd4_idx,			L2C_TAD_EVENT_QD4_IDX),
+	UC_EVENT_ENTRY(qd4_rdat,		L2C_TAD_EVENT_QD4_RDAT),
+	UC_EVENT_ENTRY(qd4_bnks,		L2C_TAD_EVENT_QD4_BNKS),
+	UC_EVENT_ENTRY(qd4_wdat,		L2C_TAD_EVENT_QD4_WDAT),
+	UC_EVENT_ENTRY(qd5_idx,			L2C_TAD_EVENT_QD5_IDX),
+	UC_EVENT_ENTRY(qd5_rdat,		L2C_TAD_EVENT_QD5_RDAT),
+	UC_EVENT_ENTRY(qd5_bnks,		L2C_TAD_EVENT_QD5_BNKS),
+	UC_EVENT_ENTRY(qd5_wdat,		L2C_TAD_EVENT_QD5_WDAT),
+	UC_EVENT_ENTRY(qd6_idx,			L2C_TAD_EVENT_QD6_IDX),
+	UC_EVENT_ENTRY(qd6_rdat,		L2C_TAD_EVENT_QD6_RDAT),
+	UC_EVENT_ENTRY(qd6_bnks,		L2C_TAD_EVENT_QD6_BNKS),
+	UC_EVENT_ENTRY(qd6_wdat,		L2C_TAD_EVENT_QD6_WDAT),
+	UC_EVENT_ENTRY(qd7_idx,			L2C_TAD_EVENT_QD7_IDX),
+	UC_EVENT_ENTRY(qd7_rdat,		L2C_TAD_EVENT_QD7_RDAT),
+	UC_EVENT_ENTRY(qd7_bnks,		L2C_TAD_EVENT_QD7_BNKS),
+	UC_EVENT_ENTRY(qd7_wdat,		L2C_TAD_EVENT_QD7_WDAT),
+	NULL,
+};
+
+static struct attribute_group thunder_l2c_tad_events_group = {
+	.name = "events",
+	.attrs = thunder_l2c_tad_events_attr,
+};
+
+static const struct attribute_group *thunder_l2c_tad_attr_groups[] = {
+	&thunder_uncore_attr_group,
+	&thunder_l2c_tad_format_group,
+	&thunder_l2c_tad_events_group,
+	NULL,
+};
+
+struct pmu thunder_l2c_tad_pmu = {
+	.name		= "thunder_l2c_tad",
+	.task_ctx_nr    = perf_invalid_context,
+	.event_init	= thunder_uncore_event_init,
+	.add		= thunder_uncore_add_l2c_tad,
+	.del		= thunder_uncore_del,
+	.start		= thunder_uncore_start_l2c_tad,
+	.stop		= thunder_uncore_stop_l2c_tad,
+	.read		= thunder_uncore_read,
+	.attr_groups	= thunder_l2c_tad_attr_groups,
+};
+
+static bool event_valid(u64 c)
+{
+	if ((c > 0 &&
+	     c <= L2C_TAD_EVENT_OPEN_CCPI) ||
+	    (c >= L2C_TAD_EVENT_LOOKUP &&
+	     c <= L2C_TAD_EVENT_LOOKUP_ALL) ||
+	    (c >= L2C_TAD_EVENT_TAG_ALC_HIT &&
+	     c <= L2C_TAD_EVENT_TAG_NALC_MISS) ||
+	    (c >= L2C_TAD_EVENT_LMC_WR &&
+	     c <= L2C_TAD_EVENT_STC_LCL_FAIL) ||
+	    (c >= L2C_TAD_EVENT_OCI_RTG_WAIT &&
+	     c <= L2C_TAD_EVENT_OCI_RTG_ALC_VIC) ||
+	    /* L2C_TAD_EVENT_QD[0..7] IDX,RDAT,BNKS,WDAT => 0x80 .. 0xf3 */
+	    ((c & 0x80) && ((c & 0xf) <= 3)))
+		return true;
+
+	return false;
+}
+
+int __init thunder_uncore_l2c_tad_setup(void)
+{
+	int ret = -ENOMEM;
+
+	thunder_uncore_l2c_tad = kzalloc(sizeof(*thunder_uncore_l2c_tad),
+					 GFP_KERNEL);
+	if (!thunder_uncore_l2c_tad)
+		goto fail_nomem;
+
+	ret = thunder_uncore_setup(thunder_uncore_l2c_tad, 0xa02e,
+				   &thunder_l2c_tad_pmu, L2C_TAD_NR_COUNTERS);
+	if (ret)
+		goto fail;
+
+	thunder_uncore_l2c_tad->event_valid = event_valid;
+	return 0;
+
+fail:
+	kfree(thunder_uncore_l2c_tad);
+fail_nomem:
+	return ret;
+}
-- 
2.9.0.rc0.21.g7777322

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox