Linux-ARM-Kernel Archive on lore.kernel.org

Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/1 v7] ARM: imx: Added perf functionality to mmdc driver
From: Zhi Li @ 2016-11-08 19:21 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <CAP=VYLo_p3ZmOFcyM9om_31-=5O3ZiRLCBkJDDtj84JxAA7h8Q@mail.gmail.com>

On Tue, Nov 8, 2016 at 1:00 PM, Paul Gortmaker
<paul.gortmaker@windriver.com> wrote:
> On Mon, Sep 19, 2016 at 1:47 PM, Frank Li <Frank.Li@nxp.com> wrote:
>> From: Zhengyu Shen <zhengyu.shen@nxp.com>
>>
>> MMDC is a multi-mode DDR controller that supports DDR3/DDR3L x16/x32/x64
>> and LPDDR2 two channel x16/x32 memory types. MMDC is configurable, high
>> performance, and optimized. MMDC is present on i.MX6 Quad and i.MX6
>> QuadPlus devices, but this driver only supports i.MX6 Quad at the moment.
>> MMDC provides registers for performance counters which read via this
>> driver to help debug memory throughput and similar issues.
>>
>> $ perf stat -a -e mmdc/busy-cycles/,mmdc/read-accesses/,mmdc/read-bytes/,mmdc/total-cycles/,mmdc/write-accesses/,mmdc/write-bytes/ dd if=/dev/zero of=/dev/null bs=1M count=5000
>> Performance counter stats for 'dd if=/dev/zero of=/dev/null bs=1M count=5000':
>>
>>          898021787      mmdc/busy-cycles/
>>           14819600      mmdc/read-accesses/
>>             471.30 MB   mmdc/read-bytes/
>>         2815419216      mmdc/total-cycles/
>>           13367354      mmdc/write-accesses/
>>             427.76 MB   mmdc/write-bytes/
>>
>>        5.334757334 seconds time elapsed
>>
>> Signed-off-by: Zhengyu Shen <zhengyu.shen@nxp.com>
>> Signed-off-by: Frank Li <frank.li@nxp.com>
>> ---
>> Changes from v6 to v7
>>     use mmdc_pmu prefix
>>     remove unnecessary check
>>     improve group event check according to mark's feedback.
>>     check pmu_mmdc->mmdc_events[cfg] at event_add
>>     only check == 0 at event_del
>>
>> Changes from v5 to v6
>>     Improve group event error handle
>>
>> Changes from v4 to v5
>>     Remove mmdc_pmu:irq
>>     remove static variable cpuhp_mmdc_pmu
>>     remove spin_lock
>>     check is_sampling_event(event)
>>     remove unnecessary cast
>>     use hw_perf_event::prev_count
>>
>> Changes from v3 to v4:
>>     Tested and fixed crash relating to removing events with perf fuzzer
>>     Adjusted formatting
>>     Moved all perf event code under CONFIG_PERF_EVENTS
>>         Switched cpuhp_setup_state to cpuhp_setup_state_nocalls
>>
>> Changes from v2 to v3:
>>     Use WARN_ONCE instead of returning generic error values
>>     Replace CPU Notifiers with newer state machine hotplug
>>     Added additional checks on event_init for grouping and sampling
>>     Remove useless mmdc_enable_profiling function
>>     Added comments
>>     Moved start index of events from 0x01 to 0x00
>>     Added a counter to pmu_mmdc to only stop hrtimer after all events are finished
>>     Replace readl_relaxed and writel_relaxed with readl and writel
>>     Removed duplicate update function
>>     Used devm_kasprintf when naming mmdcs probed
>>
>> Changes from v1 to v2:
>>     Added cpumask and migration handling support to driver
>>     Validated event during event_init
>>     Added code to properly stop counters
>>     Used perf_invalid_context instead of perf_sw_context
>>     Added hrtimer to poll for overflow
>>     Added better description
>>     Added support for multiple mmdcs
>>
>>  arch/arm/mach-imx/mmdc.c | 459 ++++++++++++++++++++++++++++++++++++++++++++++-
>>  1 file changed, 457 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c
>> index db9621c..1f70376 100644
>> --- a/arch/arm/mach-imx/mmdc.c
>> +++ b/arch/arm/mach-imx/mmdc.c
>> @@ -1,5 +1,5 @@
>>  /*
>> - * Copyright 2011 Freescale Semiconductor, Inc.
>> + * Copyright 2011,2016 Freescale Semiconductor, Inc.
>>   * Copyright 2011 Linaro Ltd.
>>   *
>>   * The code contained herein is licensed under the GNU General Public
>> @@ -10,12 +10,16 @@
>>   * http://www.gnu.org/copyleft/gpl.html
>>   */
>>
>> +#include <linux/hrtimer.h>
>>  #include <linux/init.h>
>> +#include <linux/interrupt.h>
>>  #include <linux/io.h>
>>  #include <linux/module.h>
>>  #include <linux/of.h>
>>  #include <linux/of_address.h>
>>  #include <linux/of_device.h>
>> +#include <linux/perf_event.h>
>> +#include <linux/slab.h>
>>
>>  #include "common.h"
>>
>> @@ -27,8 +31,458 @@
>>  #define BM_MMDC_MDMISC_DDR_TYPE        0x18
>>  #define BP_MMDC_MDMISC_DDR_TYPE        0x3
>>
>> +#define TOTAL_CYCLES           0x0
>> +#define BUSY_CYCLES            0x1
>> +#define READ_ACCESSES          0x2
>> +#define WRITE_ACCESSES         0x3
>> +#define READ_BYTES             0x4
>> +#define WRITE_BYTES            0x5
>> +
>> +/* Enables, resets, freezes, overflow profiling*/
>> +#define DBG_DIS                        0x0
>> +#define DBG_EN                 0x1
>> +#define DBG_RST                        0x2
>> +#define PRF_FRZ                        0x4
>> +#define CYC_OVF                        0x8
>> +
>> +#define MMDC_MADPCR0   0x410
>> +#define MMDC_MADPSR0   0x418
>> +#define MMDC_MADPSR1   0x41C
>> +#define MMDC_MADPSR2   0x420
>> +#define MMDC_MADPSR3   0x424
>> +#define MMDC_MADPSR4   0x428
>> +#define MMDC_MADPSR5   0x42C
>> +
>> +#define MMDC_NUM_COUNTERS      6
>> +
>> +#define to_mmdc_pmu(p) container_of(p, struct mmdc_pmu, pmu)
>> +
>>  static int ddr_type;
>>
>> +#ifdef CONFIG_PERF_EVENTS
>> +
>> +static DEFINE_IDA(mmdc_ida);
>> +
>> +PMU_EVENT_ATTR_STRING(total-cycles, mmdc_pmu_total_cycles, "event=0x00")
>> +PMU_EVENT_ATTR_STRING(busy-cycles, mmdc_pmu_busy_cycles, "event=0x01")
>> +PMU_EVENT_ATTR_STRING(read-accesses, mmdc_pmu_read_accesses, "event=0x02")
>> +PMU_EVENT_ATTR_STRING(write-accesses, mmdc_pmu_write_accesses, "config=0x03")
>> +PMU_EVENT_ATTR_STRING(read-bytes, mmdc_pmu_read_bytes, "event=0x04")
>> +PMU_EVENT_ATTR_STRING(read-bytes.unit, mmdc_pmu_read_bytes_unit, "MB");
>> +PMU_EVENT_ATTR_STRING(read-bytes.scale, mmdc_pmu_read_bytes_scale, "0.000001");
>> +PMU_EVENT_ATTR_STRING(write-bytes, mmdc_pmu_write_bytes, "event=0x05")
>> +PMU_EVENT_ATTR_STRING(write-bytes.unit, mmdc_pmu_write_bytes_unit, "MB");
>> +PMU_EVENT_ATTR_STRING(write-bytes.scale, mmdc_pmu_write_bytes_scale, "0.000001");
>> +
>> +struct mmdc_pmu {
>> +       struct pmu pmu;
>> +       void __iomem *mmdc_base;
>> +       cpumask_t cpu;
>> +       struct hrtimer hrtimer;
>> +       unsigned int active_events;
>> +       struct device *dev;
>> +       struct perf_event *mmdc_events[MMDC_NUM_COUNTERS];
>> +       struct hlist_node node;
>> +};
>> +
>> +/*
>> + * Polling period is set to one second, overflow of total-cycles (the fastest
>> + * increasing counter) takes ten seconds so one second is safe
>> + */
>> +static unsigned int mmdc_pmu_poll_period_us = 1000000;
>> +
>> +module_param_named(pmu_pmu_poll_period_us, mmdc_pmu_poll_period_us, uint,
>> +               S_IRUGO | S_IWUSR);
>
> I just noticed this commit now that linux-next is back after the week off.
>
> This should probably use core_param or setup_param since the Kconfig
> for this is bool and not tristate.  Similarly, that means that the .remove
> function you've added is dead code -- unless you envision a case where
> the user needs to forcibly unbind the driver...
>
> Do you want to redo the existing commit or do you want me to submit
> a follow-up fixup patch?

I will do follow-up fixup patch.

I think pmu_pmu_poll_period_us should be removed because no benefit to
change it.
I can delete .remove

best regards
Frank Li

>
> Thanks
> Paul.
> --
>
>> +
>> +static ktime_t mmdc_pmu_timer_period(void)
>> +{
>> +       return ns_to_ktime((u64)mmdc_pmu_poll_period_us * 1000);
>> +}
>> +

^ permalink raw reply

* [PATCH 2/2] mm: hugetlb: support gigantic surplus pages
From: Gerald Schaefer @ 2016-11-08 19:27 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161108091725.GA18678@sha-win-210.asiapac.arm.com>

On Tue, 8 Nov 2016 17:17:28 +0800
Huang Shijie <shijie.huang@arm.com> wrote:

> > I will look at the lockdep issue.
> I tested the new patch (will be sent out later) on the arm64 platform,
> and I did not meet the lockdep issue when I enabled the lockdep.
> The following is my config:
> 
> 	CONFIG_LOCKD=y
> 	CONFIG_LOCKD_V4=y
> 	CONFIG_LOCKUP_DETECTOR=y
>         # CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
> 	CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
> 	CONFIG_DEBUG_SPINLOCK=y
> 	CONFIG_DEBUG_LOCK_ALLOC=y
> 	CONFIG_PROVE_LOCKING=y
> 	CONFIG_LOCKDEP=y
> 	CONFIG_LOCK_STAT=y
> 	CONFIG_DEBUG_LOCKDEP=y
> 	CONFIG_DEBUG_LOCKING_API_SELFTESTS=y
> 	
> So do I miss something? 

Those options should be OK. Meanwhile I looked into this a little more,
and the problematic line/lock is spin_lock_irqsave(&z->lock, flags) at
the top of alloc_gigantic_page(). From the lockdep trace we see that
it is triggered by an mmap(), and then hugetlb_acct_memory() ->
__alloc_huge_page() -> alloc_gigantic_page().

However, in between those functions (inside gather_surplus_pages())
a NUMA_NO_NODE node id comes into play. And this finally results in
alloc_gigantic_page() being called with NUMA_NO_NODE as nid (which is
-1), and NODE_DATA(nid)->node_zones will then reach into Nirvana.

So, I guess the problem is a missing NUMA_NO_NODE check in
alloc_gigantic_page(), similar to the one in
__hugetlb_alloc_buddy_huge_page(). And somehow this was not a problem
before the gigantic surplus change.

^ permalink raw reply

* [PATCH 1/1 v7] ARM: imx: Added perf functionality to mmdc driver
From: Peter Zijlstra @ 2016-11-08 20:11 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <CAHrpEqQGRLY7ZB6moP9FZ0w-t63QkyLE1fUb3heRZJfRd2Y9sw@mail.gmail.com>

On Tue, Nov 08, 2016 at 01:21:47PM -0600, Zhi Li wrote:
> On Tue, Nov 8, 2016 at 1:00 PM, Paul Gortmaker
> > I just noticed this commit now that linux-next is back after the week off.
> >
> > This should probably use core_param or setup_param since the Kconfig
> > for this is bool and not tristate. 


> I think pmu_pmu_poll_period_us should be removed because no benefit to
> change it.
> I can delete .remove

Why not make it tristate ?

^ permalink raw reply

* [PATCH 1/1 v7] ARM: imx: Added perf functionality to mmdc driver
From: Zhi Li @ 2016-11-08 20:18 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161108201114.GQ3117@twins.programming.kicks-ass.net>

On Tue, Nov 8, 2016 at 2:11 PM, Peter Zijlstra <peterz@infradead.org> wrote:
> On Tue, Nov 08, 2016 at 01:21:47PM -0600, Zhi Li wrote:
>> On Tue, Nov 8, 2016 at 1:00 PM, Paul Gortmaker
>> > I just noticed this commit now that linux-next is back after the week off.
>> >
>> > This should probably use core_param or setup_param since the Kconfig
>> > for this is bool and not tristate.
>
>
>> I think pmu_pmu_poll_period_us should be removed because no benefit to
>> change it.
>> I can delete .remove
>
> Why not make it tristate ?

The based code provided a function, which need by suspend and resume.

best regards
Frank Li

^ permalink raw reply

* [GIT PULL 1/4] DaVinci cleanup for v4.10
From: Olof Johansson @ 2016-11-08 20:22 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161104062535.9593-1-nsekhar@ti.com>

On Fri, Nov 04, 2016 at 11:55:32AM +0530, Sekhar Nori wrote:
> The following changes since commit 1001354ca34179f3db924eb66672442a173147dc:
> 
>   Linux 4.9-rc1 (2016-10-15 12:17:50 -0700)
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/nsekhar/linux-davinci.git tags/davinci-for-v4.10/cleanup
> 
> for you to fetch changes up to 766763dbdc1dca11deabdb00077a1c19e2803f0a:
> 
>   ARM: davinci: da8xx: Remove duplicated defines (2016-10-31 16:51:56 +0530)
> 
> ----------------------------------------------------------------
> Clean-up some unnecessary code from mach-davinci.
> 
> - Remove now unneeded dma resources where drivers
>   are already converted to use the dma_slave_map[]
>   structure.
> 
> - Remove some duplicated defines related to USB support.

We don't have a cleanup branch in our tree any more, but thanks for breaking
this out for us anyway! I've merged it into next/soc.


-Olof

^ permalink raw reply

* [GIT PULL 2/4] DaVinci SoC updates for v4.10
From: Olof Johansson @ 2016-11-08 20:23 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161104062535.9593-2-nsekhar@ti.com>

On Fri, Nov 04, 2016 at 11:55:33AM +0530, Sekhar Nori wrote:
> The following changes since commit 766763dbdc1dca11deabdb00077a1c19e2803f0a:
> 
>   ARM: davinci: da8xx: Remove duplicated defines (2016-10-31 16:51:56 +0530)
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/nsekhar/linux-davinci.git tags/davinci-for-v4.10/soc
> 
> for you to fetch changes up to ced95ac0815501f47a6041548d70d8900400912d:
> 
>   ARM: davinci: da8xx: register USB PHY clocks in the DT file (2016-11-01 15:24:24 +0530)
> 
> ----------------------------------------------------------------
> DaVinci SoC support improvements mainly towards an effort to
> get to working USB support.
> 
> - use CFGCHIP syscon device to access common registers
> 
> - define platform data and device tree nodes for newly
>   introduced USB phy driver
> 
> - clock lookup and auxdata lookup for USB phy and also
>   for LCDC (LCD controller)

Merged, thanks.


-Olof

^ permalink raw reply

* [GIT PULL 3/4] DaVinci device-tree updates for v4.10
From: Olof Johansson @ 2016-11-08 20:23 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161104062535.9593-3-nsekhar@ti.com>

On Fri, Nov 04, 2016 at 11:55:34AM +0530, Sekhar Nori wrote:
> The following changes since commit 1001354ca34179f3db924eb66672442a173147dc:
> 
>   Linux 4.9-rc1 (2016-10-15 12:17:50 -0700)
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/nsekhar/linux-davinci.git tags/davinci-for-v4.10/dt
> 
> for you to fetch changes up to 1b499f255589204466d8f8ab26e6b577d7b5c88f:
> 
>   ARM: dts: da850: Add cfgchip syscon node (2016-11-01 15:11:10 +0530)
> 
> ----------------------------------------------------------------
> DaVinci device-tree source additions for
> LCD, SPI and cfgchip syscon.

Merged, thanks!


-Olof

^ permalink raw reply

* [GIT PULL 4/4] DaVinci defconfig updates for v4.10
From: Olof Johansson @ 2016-11-08 20:24 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161104062535.9593-4-nsekhar@ti.com>

On Fri, Nov 04, 2016 at 11:55:35AM +0530, Sekhar Nori wrote:
> The following changes since commit 1001354ca34179f3db924eb66672442a173147dc:
> 
>   Linux 4.9-rc1 (2016-10-15 12:17:50 -0700)
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/nsekhar/linux-davinci.git tags/davinci-for-v4.10/defconfig
> 
> for you to fetch changes up to 6e9be8608771192851a3adf59f0ba9240e3f802c:
> 
>   ARM: davinci_all_defconfig: enable LED default-on trigger (2016-11-01 11:42:54 +0530)
> 
> ----------------------------------------------------------------
> Some updates to davinci_all_defconfig for MMC,
> LCDC and GPIO/LEDs.
> 
> ----------------------------------------------------------------
> Bartosz Golaszewski (1):
>       ARM: davinci_all_defconfig: enable LCDC DRM driver
> 
> David Lechner (3):
>       ARM: davinci_all_defconfig: enable gpio poweroff driver
>       ARM: davinci_all_defconfig: build MMC into kernel
>       ARM: davinci_all_defconfig: enable LED default-on trigger

Merged, thanks.


-Olof

^ permalink raw reply

* Summary of LPC guest MSI discussion in Santa Fe (was: Re: [RFC 0/8] KVM PCIe/MSI passthrough on ARM/ARM64 (Alt II))
From: Christoffer Dall @ 2016-11-08 20:29 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161108024559.GA20591@arm.com>

Hi Will,

On Tue, Nov 08, 2016 at 02:45:59AM +0000, Will Deacon wrote:
> Hi all,
> 
> I figured this was a reasonable post to piggy-back on for the LPC minutes
> relating to guest MSIs on arm64.
> 
> On Thu, Nov 03, 2016 at 10:02:05PM -0600, Alex Williamson wrote:
> > We can always have QEMU reject hot-adding the device if the reserved
> > region overlaps existing guest RAM, but I don't even really see how we
> > advise users to give them a reasonable chance of avoiding that
> > possibility.  Apparently there are also ARM platforms where MSI pages
> > cannot be remapped to support the previous programmable user/VM
> > address, is it even worthwhile to support those platforms?  Does that
> > decision influence whether user programmable MSI reserved regions are
> > really a second class citizen to fixed reserved regions?  I expect
> > we'll be talking about this tomorrow morning, but I certainly haven't
> > come up with any viable solutions to this.  Thanks,
> 
> At LPC last week, we discussed guest MSIs on arm64 as part of the PCI
> microconference. I presented some slides to illustrate some of the issues
> we're trying to solve:
> 
>   http://www.willdeacon.ukfsn.org/bitbucket/lpc-16/msi-in-guest-arm64.pdf
> 
> Punit took some notes (thanks!) on the etherpad here:
> 
>   https://etherpad.openstack.org/p/LPC2016_PCI
> 
> although the discussion was pretty lively and jumped about, so I've had
> to go from memory where the notes didn't capture everything that was
> said.
> 
> To summarise, arm64 platforms differ in their handling of MSIs when compared
> to x86:
> 
>   1. The physical memory map is not standardised (Jon pointed out that
>      this is something that was realised late on)
>   2. MSIs are usually treated the same as DMA writes, in that they must be
>      mapped by the SMMU page tables so that they target a physical MSI
>      doorbell
>   3. On some platforms, MSIs bypass the SMMU entirely (e.g. due to an MSI
>      doorbell built into the PCI RC)
>   4. Platforms typically have some set of addresses that abort before
>      reaching the SMMU (e.g. because the PCI identifies them as P2P).
> 
> All of this means that userspace (QEMU) needs to identify the memory
> regions corresponding to points (3) and (4) and ensure that they are
> not allocated in the guest physical (IPA) space. For platforms that can
> remap the MSI doorbell as in (2), then some space also needs to be
> allocated for that.
> 
> Rather than treat these as separate problems, a better interface is to
> tell userspace about a set of reserved regions, and have this include
> the MSI doorbell, irrespective of whether or not it can be remapped.

Is my understanding correct, that you need to tell userspace about the
location of the doorbell (in the IOVA space) in case (2), because even
though the configuration of the device is handled by the (host) kernel
through trapping of the BARs, we have to avoid the VFIO user programming
the device to create other DMA transactions to this particular address,
since that will obviously conflict and either not produce the desired
DMA transactions or result in unintended weird interrupts?

Thanks,
Christoffer

^ permalink raw reply

* [PATCH 3/3] ARM: dts: da850: add usb device node
From: kbuild test robot @ 2016-11-08 20:34 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161108185831.17683-4-ahaslam@baylibre.com>

Hi Axel,

[auto build test ERROR on usb/usb-testing]
[also build test ERROR on next-20161108]
[cannot apply to robh/for-next v4.9-rc4]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Axel-Haslam/USB-ohci-da8xx-Add-devicetree-bindings/20161109-031338
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb.git usb-testing
config: arm-at91_dt_defconfig (attached as .config)
compiler: arm-linux-gnueabi-gcc (Debian 6.1.1-9) 6.1.1 20160705
reproduce:
        wget https://git.kernel.org/cgit/linux/kernel/git/wfg/lkp-tests.git/plain/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=arm 

All errors (new ones prefixed by >>):

>> Error: arch/arm/boot/dts/da850-lcdk.dts:89.1-9 Label or path usb_phy not found
   FATAL ERROR: Syntax error parsing input tree

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
-------------- next part --------------
A non-text attachment was scrubbed...
Name: .config.gz
Type: application/gzip
Size: 21781 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20161109/5e5ea637/attachment-0001.gz>

^ permalink raw reply

* [PATCH] drm/sun4i: Fix error handling
From: Maxime Ripard @ 2016-11-08 20:38 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1d4a8fc9-6b62-4af7-19bc-565b15cdc413@wanadoo.fr>

Salut,

On Sat, Nov 05, 2016 at 07:15:45AM +0100, Christophe JAILLET wrote:
> Le 02/11/2016 ? 19:14, Maxime Ripard a ?crit :
> > Hi,
> > 
> > On Sun, Oct 30, 2016 at 12:53:02PM +0100, Christophe JAILLET wrote:
> > > BTW, memory allocation in 'sun4i_layers_init()' looks spurious, especially
> > > the use of 'layer' in the for loop.
> > > Just my 2 cents.
> > What do you mean by it's spurious?
> Hi Maxime,
> 
> what I mean is:
> 
> > struct sun4i_layer **sun4i_layers_init(struct drm_device *drm)
> > {
> >     struct sun4i_drv *drv = drm->dev_private;
> >     struct sun4i_layer **layers;
> >     int i;
> >
> >     layers = devm_kcalloc(drm->dev, ARRAY_SIZE(sun4i_backend_planes),
> >                   sizeof(**layers), GFP_KERNEL);
> Here, we allocate some memory for ARRAY_SIZE(sun4i_backend_planes) (i.e. 2)
> 'struct sun4i_layer'.
> We do not allocate some space for some pointers but for some structure.
> 
> Also, these structures are zeroed and seem to never be initialized by
> anything else.
> 
> >     if (!layers)
> >         return ERR_PTR(-ENOMEM);
> >
> >     /*
> >      * The hardware is a bit unusual here.
> >      *
> >      * Even though it supports 4 layers, it does the composition
> >      * in two separate steps.
> >      *
> >      * The first one is assigning a layer to one of its two
> >      * pipes. If more that 1 layer is assigned to the same pipe,
> >      * and if pixels overlaps, the pipe will take the pixel from
> >      * the layer with the highest priority.
> >      *
> >      * The second step is the actual alpha blending, that takes
> >      * the two pipes as input, and uses the eventual alpha
> >      * component to do the transparency between the two.
> >      *
> >      * This two steps scenario makes us unable to guarantee a
> >      * robust alpha blending between the 4 layers in all
> >      * situations. So we just expose two layers, one per pipe. On
> >      * SoCs that support it, sprites could fill the need for more
> >      * layers.
> >      */
> The comment make me think that this driver (and this function) only handles
> 2 layers ("So we just expose two layers"), which is consistent with
> ARRAY_SIZE(sun4i_backend_planes) (i.e. 2)
> So I would expect that only 2 'struct sun4i_layer' to be allcoated
> 
> >     for (i = 0; i < ARRAY_SIZE(sun4i_backend_planes); i++) {
> >         const struct sun4i_plane_desc *plane = &sun4i_backend_planes[i];
> >         struct sun4i_layer *layer = layers[i];
> Here, we take the address of one of the 2 structure allocated above.
> This is overridden, just the line after.
> 
> >
> >         layer = sun4i_layer_init_one(drm, plane);
> 'sun4i_layer_init_one()' looks() like:
> 
>     struct sun4i_layer *layer;
>     layer = devm_kzalloc(drm->dev, sizeof(*layer), GFP_KERNEL);
>     ...
>     return layer;
> 
> So we once more allocate some 'struct sun4i_layer'
> 
> BUT, the corresponding address is stored into the 'layer' variable, and
> finally seems to get lost and no reference is kept of this. (i.e. 'layers'
> (with an s) is left unchanged)
> 
> >         if (IS_ERR(layer)) {
> >             dev_err(drm->dev, "Couldn't initialize %s plane\n",
> >                 i ? "overlay" : "primary");
> >             return ERR_CAST(layer);
> >         };
> >
> >         DRM_DEBUG_DRIVER("Assigning %s plane to pipe %d\n",
> >                  i ? "overlay" : "primary", plane->pipe);
> >         regmap_update_bits(drv->backend->regs,
> SUN4I_BACKEND_ATTCTL_REG0(i),
> >                    SUN4I_BACKEND_ATTCTL_REG0_LAY_PIPESEL_MASK,
> > SUN4I_BACKEND_ATTCTL_REG0_LAY_PIPESEL(plane->pipe));
> >
> >         layer->id = i;
> >     };
> >
> >     return layers;
> > }
> 
> 
> So, 4 'struct sun4i_layer' have been allocated. 2 in 'sun4i_layers_init()'
> and 2 in 'sun4i_layer_init_one()' (once at a time, but called twice)
> 
> What looks spurious to me is either:
>    - 'struct sun4i_layer *layer = layers[i];' which should just be 'struct
> sun4i_layer *layer;'
> or
>    - 'layers' (with an s) should be an array of pointers and the addresses
> returned by 'sun4i_layer_init_one()' should be saved there.
> 
> 
> I don't know at all this driver, so I'm maybe completely wrong.
> What I would have expected would be something like: (un-tested, just to give
> an idea)
> 
> 
> ==============8<================================================
> 
> @@ -133,9 +133,9 @@ struct sun4i_layer **sun4i_layers_init(struct drm_device
> *drm)
>      struct sun4i_layer **layers;
>      int i;
> 
>      layers = devm_kcalloc(drm->dev, ARRAY_SIZE(sun4i_backend_planes),
> -                  sizeof(**layers), GFP_KERNEL);
> +                  sizeof(*layers), GFP_KERNEL);
>      if (!layers)
>          return ERR_PTR(-ENOMEM);
> 
>      /*
> @@ -160,16 +160,17 @@ struct sun4i_layer **sun4i_layers_init(struct
> drm_device *drm)
>       * layers.
>       */
>      for (i = 0; i < ARRAY_SIZE(sun4i_backend_planes); i++) {
>          const struct sun4i_plane_desc *plane = &sun4i_backend_planes[i];
> -        struct sun4i_layer *layer = layers[i];
> +        struct sun4i_layer *layer;
> 
>          layer = sun4i_layer_init_one(drm, plane);
>          if (IS_ERR(layer)) {
>              dev_err(drm->dev, "Couldn't initialize %s plane\n",
>                  i ? "overlay" : "primary");
>              return ERR_CAST(layer);
>          };
> +        layers[i] = layer;
> 
>          DRM_DEBUG_DRIVER("Assigning %s plane to pipe %d\n",
>                   i ? "overlay" : "primary", plane->pipe);
>          regmap_update_bits(drv->backend->regs,
> SUN4I_BACKEND_ATTCTL_REG0(i),

You're totally right. Can you send this as a formal patch?

Thanks!
Maxime

-- 
Maxime Ripard, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20161108/2ed78f33/attachment.sig>

^ permalink raw reply

* [PATCH 3/3] ARM: gr8: evb: Add i2s codec
From: Maxime Ripard @ 2016-11-08 20:39 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <CAGb2v6557B5dooERbKc709Hbrzb3vP8eoSj3nh6rKek7UwrgEQ@mail.gmail.com>

On Tue, Nov 08, 2016 at 03:57:48PM +0800, Chen-Yu Tsai wrote:
> On Tue, Nov 8, 2016 at 3:44 PM, Maxime Ripard
> <maxime.ripard@free-electrons.com> wrote:
> > On Mon, Nov 07, 2016 at 10:11:45PM +0800, Chen-Yu Tsai wrote:
> >> On Mon, Nov 7, 2016 at 9:08 PM, Maxime Ripard
> >> <maxime.ripard@free-electrons.com> wrote:
> >> > The GR8-EVB comes with a wm8978 codec connected to the i2s bus.
> >> >
> >> > Add a card in order to have it working
> >> >
> >> > Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
> >> > ---
> >> >  arch/arm/boot/dts/ntc-gr8-evb.dts | 14 ++++++++++++++
> >> >  1 file changed, 14 insertions(+), 0 deletions(-)
> >> >
> >> > diff --git a/arch/arm/boot/dts/ntc-gr8-evb.dts b/arch/arm/boot/dts/ntc-gr8-evb.dts
> >> > index 12b4317a4383..5291e425caf9 100644
> >> > --- a/arch/arm/boot/dts/ntc-gr8-evb.dts
> >> > +++ b/arch/arm/boot/dts/ntc-gr8-evb.dts
> >> > @@ -76,6 +76,20 @@
> >> >                 default-brightness-level = <8>;
> >> >         };
> >> >
> >> > +       i2s {
> >>
> >> "sound" might be a better node name? The I2S controllers are also
> >> named "i2s".
> >
> > I know, but we also had the codec and SPDIF on this board, so sound
> > was too generic to be meaningful I guess. I don't really care about
> > the name though, if you have any suggestion...
> 
> Well people seem to use "sound" for the sound card nodes...
> 
> What about "sound-analog" for this one, and "sound-spdif" for the
> SPDIF simple card? Or "analog-sound" and "spdif-sound" if that looks
> better.

sound-analog works for me. I'll either fix it in the v2, or while
applying.

Thanks!
Maxime

-- 
Maxime Ripard, Free Electrons
Embedded Linux and Kernel engineering
http://free-electrons.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 801 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20161108/68ade2b6/attachment.sig>

^ permalink raw reply

* [PATCHv2] ARM: dts: socfpga: add specific compatible strings for boards
From: Dinh Nguyen @ 2016-11-08 20:53 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161101222352.GA6328@localhost>



On 11/01/2016 05:23 PM, Olof Johansson wrote:
> On Tue, Nov 01, 2016 at 03:56:52PM -0500, Dinh Nguyen wrote:
>> Add a more specific board compatible entry for all of the SOCFPGA
>> Cyclone 5 based boards.
>>

[snip]

>>  / {
>>  	model = "Altera SOCFPGA Cyclone V SoC Development Kit";
>> -	compatible = "altr,socfpga-cyclone5", "altr,socfpga";
>> +	compatible = "altr,socdk", "altr,socfpga-cyclone5", "altr,socfpga";
> 
> This looks a little too generic, what if there's another dk with another
> SoC down the road?
> 

Right...I'll change it to "altr,socfpga-cyclone5-socdk",

>>  	chosen {
>>  		bootargs = "earlyprintk";
>> diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
>> index 02e22f5..c5623a7 100644
>> --- a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
>> +++ b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
>> @@ -19,7 +19,7 @@
>>  
>>  / {
>>  	model = "Terasic SoCkit";
>> -	compatible = "altr,socfpga-cyclone5", "altr,socfpga";
>> +	compatible = "terasic,sockit", "altr,socfpga-cyclone5", "altr,socfpga";
> 
> Same thing here, this seems a bit on the generic side.
> 

perhaps "terasic,socfpga-cyclone5-sockit" ?

Thanks for reviewing.

Dinh

^ permalink raw reply

* [alsa-devel] [PATCH 2/9] ALSA: ac97: add an ac97 bus
From: Robert Jarzmik @ 2016-11-08 21:18 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <d57cdfba-da22-652d-3d75-6d1b754cf1d5@metafoo.de>

Lars-Peter Clausen <lars@metafoo.de> writes:

> On 10/26/2016 09:41 PM, Robert Jarzmik wrote:
>> +#define to_ac97_device(d) container_of(d, struct ac97_codec_device, dev)
>> +#define to_ac97_driver(d) container_of(d, struct ac97_codec_driver, driver)
>
> In my opinion these should be inline functions rather than macros as that
> generates much more legible compiler errors e.g. in case there is a type
> mismatch.
Sure, why not.

>> +struct ac97_codec_driver {
>> +	struct device_driver	driver;
>> +	int			(*probe)(struct ac97_codec_device *);
>> +	int			(*remove)(struct ac97_codec_device *);
>> +	int			(*suspend)(struct ac97_codec_device *);
>> +	int			(*resume)(struct ac97_codec_device *);
>> +	void			(*shutdown)(struct ac97_codec_device *);
>
> The suspend, resume and shutdown callbacks are never used. Which is good,
> since all new frameworks should use dev_pm_ops. Just drop the from the struct.
Ok.

>> +/**
>> + * struct ac97_controller - The AC97 controller of the AC-Link
>> + * @ops:		the AC97 operations.
>> + * @controllers:	linked list of all existing controllers.
>> + * @dev:		the device providing the AC97 controller.
>> + * @slots_available:	the mask of accessible/scanable codecs.
>> + * @codecs:		the 4 possible AC97 codecs (NULL if none found).
>> + * @codecs_pdata:	platform_data for each codec (NULL if no pdata).
>> + *
>> + * This structure is internal to AC97 bus, and should not be used by the
>> + * controllers themselves, excepting for using @dev.
>> + */
>> +struct ac97_controller {
>> +	const struct ac97_controller_ops *ops;
>> +	struct list_head controllers;
>> +	struct device *dev;
>
> I'd make the controller itself a struct dev, rather than just having the
> pointer to the parent. This is more idiomatic and matches what other
> subsystems do. It has several advantages, you get proper refcounting of your
> controller structure, the controller gets its own sysfs directory where the
> CODECs appear as children, you don't need the manual sysfs attribute
> creation and removal in ac97_controler_{un,}register().

If you mean having "struct device dev" instead of "struct device *dev", it has
also a drawback : all the legacy platforms have already a probing method, be
that platform data, device-tree or something else.

I'm a bit converned about the conversion toll. Maybe I've not understood your
point fully, so please feel free to explain, with an actual example even better.

>> +	void (*wait)(struct ac97_controller *adrv, int slot);
>> +	void (*init)(struct ac97_controller *adrv, int slot);
>
> Neither wait nor init are ever used.
This is because I've not begun to porting sound/pci/ac97_codec.c into
sound/ac97.

>> +	if ((codec_num < 0) || (codec_num >= AC97_BUS_MAX_CODECS))
>> +		return ERR_PTR(-ERANGE);
>
> I'd make this EINVAL.
Ok.

>> +	adev = container_of(dev, struct ac97_codec_device, dev);
>
> to_ac97_device()
Sure.

>> +	codec_name = kasprintf(GFP_KERNEL, "%s:%d", dev_name(ac97_ctrl->dev),
>> +			       idx);
>> +	codec->dev.init_name = codec_name;
>
> init_name is only for statically allocated devices. Use dev_set_name(dev,
> ...). No need for kasprintf() either as dev_set_name() takes a format string.
I'll try again, I seem to remember having tried that and it failed, but I can't
remember why.

> For this you need to split device_register into device_initialize() and
> device_add(). But usually that is what you want anyway.
Let me try again.

>> +	ret = sysfs_create_link(&codec->dev.kobj, &ac97_ctrl->dev->kobj,
>> +				"ac97_controller");
>
> Since the CODEC is a child of the controller this should not be necessary as
> this just points one directory up. It's like `ln -s .. parent`
This creates :
/sys/bus/ac97/devices/pxa2xx-ac97\:0/ac97_controller

Of course as you pointed out, it's a 'ln -s .. parent' like link, but it seems
to me very unfriendly to have :
 - /sys/bus/ac97/devices/pxa2xx-ac97\:0/../ac97_operations
 - while /sys/bus/ac97/devices/ac97_operations doesn't exist (obviously)

Mmm, I don't know for this one, my mind is not set, it's a bit hard to tell if
it's only an unecessary link bringing "comfort", or something usefull.

>
>> +	if (ret)
>> +		goto err_unregister_device;
>> +
>> +	return 0;
>> +err_unregister_device:
>> +	put_device(&codec->dev);
>> +err_free_codec:
>> +	kfree(codec);
>
> Since the struct is reference counted, the freeing is done in the release
> callback and this leads to a double free.
A yes in the "goto err_unregister_device" case right, while it's necessary in
the "goto err_free_codec" case ... I need to rework that a bit.

>> +int snd_ac97_codec_driver_register(struct ac97_codec_driver *drv)
>> +{
>> +	int ret;
>> +
>> +	drv->driver.bus = &ac97_bus_type;
>> +
>> +	ret = driver_register(&drv->driver);
>> +	if (!ret)
>> +		ac97_rescan_all_controllers();
>
> Rescanning the bus when a new codec driver is registered should not be
> neccessary. The bus is scanned once when the controller is registered, this
> creates the device. The device driver core will take care of binding the
> device to the driver, if the driver is registered after thed evice.
That's because you suppose the initial scanning found all the ac97 codecs.
But that's an incomplete vision as a codec might be powered off at that time and
not seen by the first scanning, while the new scanning will discover it.

>> +static int ac97_ctrl_codecs_unregister(struct ac97_controller *ac97_ctrl)
>> +{
>> +	int i;
>> +
>> +	for (i = 0; i < AC97_BUS_MAX_CODECS; i++)
>> +		if (ac97_ctrl->codecs[i])
>> +			put_device(&ac97_ctrl->codecs[i]->dev);
>
> This should be device_unregister() to match the device_register() in
> ac97_codec_add().
Right.

>> +
>> +	return 0;
>> +}
>> +
>> +static ssize_t cold_reset_store(struct device *dev,
>> +				struct device_attribute *attr, const char *buf,
>> +				size_t len)
>> +{
>> +	struct ac97_controller *ac97_ctrl = ac97_ctrl_find(dev);
>> +
>> +	if (!dev)
>> +		return -ENODEV;
>
> dev is never NULL here.
Ok.

> And for the ac97_ctrl there is a race condition. It could be unregistered and
> freed after ac97_ctrl_find() returned sucessfully, but before ac97_ctrl->ops
> is used.
A good catch, the ac97_controllers_mutex is missing.

> Same here.
Indeed.

>> +
>> +	ac97_ctrl->ops->warm_reset(ac97_ctrl);
>> +	return len;
>> +}
>> +static DEVICE_ATTR_WO(warm_reset);
>> +
>> +static struct attribute *ac97_controller_device_attrs[] = {
>> +	&dev_attr_cold_reset.attr,
>> +	&dev_attr_warm_reset.attr,
>> +	NULL
>> +};
>
> This adds new userspace ABI that is not documented at the moment.
Very true. And as all userspace interface, it needs to be discussed. If nobody
complains, I'll add the documentation for next patch round.
>
>> +int snd_ac97_controller_register(const struct ac97_controller_ops *ops,
>> +				 struct device *dev,
>> +				 unsigned short slots_available,
>> +				 void **codecs_pdata)
>
> In my opinion this should return a handle to a ac97 controller which can
> then be passed to snd_ac97_controller_unregister(). This is in my opinion
> the better approach rather than looking up the controller by parent device.
This is another "legacy drivers" issue.

The legacy driver (the one probed by platform_data or devicetree) doesn't
necessarily have a private structure to store this ac97_controller pointer. This
enables an "easier" porting of existing drivers.

>> +/**
>> + * snd_ac97_controller_unregister - unregister an ac97 controller
>> + * @dev: the device previously provided to ac97_controller_register()
>> + *
>> + * Returns 0 on success, negative upon error
>
> Unregister must not be able to fail. Hotunplug is one of the core concepts
> of the device driver model and there is really nothing that can be done to
> prevent a device from disappearing, so there is no sensible way of handling
> the error (and your pxa driver modifications simply ignore it as well).
>
> This also means the framework needs to cope with the case where the
> controller is removed and the CODEC devices are still present. All future
> operations should return -ENODEV in that case.
Ah ... that will require a serious modification, and I see your point, so I'll
prepare this for next patchset.
>> +static struct bus_type ac97_bus_type = {
>> +	.name		= "ac97",
>> +	.dev_attrs	= ac97_dev_attrs,
>
> dev_attrs is deprecated in favor of dev_groups (See commit 880ffb5c6).
Ok.

>> index 000000000000..a835f03744bf
>> --- /dev/null
>> +++ b/sound/ac97/codec.c
>> @@ -0,0 +1,15 @@
>> +/*
>> + *  Copyright (C) 2016 Robert Jarzmik <robert.jarzmik@free.fr>
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of the GNU General Public License version 2 as
>> + * published by the Free Software Foundation.
>> + */
>> +
>> +#include <sound/ac97_codec.h>
>> +#include <sound/ac97/codec.h>
>> +#include <sound/ac97/controller.h>
>> +#include <linux/device.h>
>> +#include <linux/slab.h>
>> +#include <sound/soc.h>	/* For compat_ac97_* */
>> +
>
> I'm not sure I understand what this file does.
Ah yes, I'll remove it.
It's the future conversion of sound/pci/ac97_codec.c, but it's ... empty so far.

Thanks very much for the very detailed review.

-- 
Robert

^ permalink raw reply

* [PATCHv3] ARM: dts: socfpga: add specific compatible strings for boards
From: Dinh Nguyen @ 2016-11-08 21:50 UTC (permalink / raw)
  To: linux-arm-kernel

Add a more specific board compatible entry for all of the SOCFPGA
Cyclone 5 based boards.

Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
---
v3: Be a bit more specific with the c5 dk and sockit, use
    "altr,socfpga-cyclone5-socdk" and "terasic,socfpga-cyclone5-sockit"
v2: remove extra space and add a comma between compatible entries
---
 arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts  | 2 +-
 arch/arm/boot/dts/socfpga_cyclone5_mcvevk.dts      | 2 +-
 arch/arm/boot/dts/socfpga_cyclone5_socdk.dts       | 2 +-
 arch/arm/boot/dts/socfpga_cyclone5_sockit.dts      | 2 +-
 arch/arm/boot/dts/socfpga_cyclone5_sodia.dts       | 2 +-
 arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts
index afea364..5ecd2ef 100644
--- a/arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts
+++ b/arch/arm/boot/dts/socfpga_cyclone5_de0_sockit.dts
@@ -18,7 +18,7 @@
 
 / {
 	model = "Terasic DE-0(Atlas)";
-	compatible = "altr,socfpga-cyclone5", "altr,socfpga";
+	compatible = "terasic,de0-atlas", "altr,socfpga-cyclone5", "altr,socfpga";
 
 	chosen {
 		bootargs = "earlyprintk";
diff --git a/arch/arm/boot/dts/socfpga_cyclone5_mcvevk.dts b/arch/arm/boot/dts/socfpga_cyclone5_mcvevk.dts
index 424523b..e5a98e5 100644
--- a/arch/arm/boot/dts/socfpga_cyclone5_mcvevk.dts
+++ b/arch/arm/boot/dts/socfpga_cyclone5_mcvevk.dts
@@ -19,7 +19,7 @@
 
 / {
 	model = "Aries/DENX MCV EVK";
-	compatible = "altr,socfpga-cyclone5", "altr,socfpga";
+	compatible = "denx,mcvevk", "altr,socfpga-cyclone5", "altr,socfpga";
 
 	aliases {
 		ethernet0 = &gmac0;
diff --git a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts
index 15e43f4..7a5f42d 100644
--- a/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts
+++ b/arch/arm/boot/dts/socfpga_cyclone5_socdk.dts
@@ -19,7 +19,7 @@
 
 / {
 	model = "Altera SOCFPGA Cyclone V SoC Development Kit";
-	compatible = "altr,socfpga-cyclone5", "altr,socfpga";
+	compatible = "altr,socfpga-cyclone5-socdk", "altr,socfpga-cyclone5", "altr,socfpga";
 
 	chosen {
 		bootargs = "earlyprintk";
diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
index 02e22f5..fcacaf7b 100644
--- a/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
+++ b/arch/arm/boot/dts/socfpga_cyclone5_sockit.dts
@@ -19,7 +19,7 @@
 
 / {
 	model = "Terasic SoCkit";
-	compatible = "altr,socfpga-cyclone5", "altr,socfpga";
+	compatible = "terasic,socfpga-cyclone5-sockit", "altr,socfpga-cyclone5", "altr,socfpga";
 
 	chosen {
 		bootargs = "earlyprintk";
diff --git a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts
index 9aaf413..5b7e3c2 100644
--- a/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts
+++ b/arch/arm/boot/dts/socfpga_cyclone5_sodia.dts
@@ -21,7 +21,7 @@
 
 / {
 	model = "Altera SOCFPGA Cyclone V SoC Macnica Sodia board";
-	compatible = "altr,socfpga-cyclone5", "altr,socfpga";
+	compatible = "macnica,sodia", "altr,socfpga-cyclone5", "altr,socfpga";
 
 	chosen {
 		bootargs = "earlyprintk";
diff --git a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts
index b844473..363ee62 100644
--- a/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts
+++ b/arch/arm/boot/dts/socfpga_cyclone5_vining_fpga.dts
@@ -51,7 +51,7 @@
 
 / {
 	model = "samtec VIN|ING FPGA";
-	compatible = "altr,socfpga-cyclone5", "altr,socfpga";
+	compatible = "samtec,vining", "altr,socfpga-cyclone5", "altr,socfpga";
 
 	chosen {
 		bootargs = "console=ttyS0,115200";
-- 
2.8.3

^ permalink raw reply related

* KASAN & the vmalloc area
From: Dmitry Vyukov @ 2016-11-08 22:09 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161108190302.GH15297@leverpostej>

On Tue, Nov 8, 2016 at 11:03 AM, Mark Rutland <mark.rutland@arm.com> wrote:
> Hi,
>
> I see a while back [1] there was a discussion of what to do about KASAN
> and vmapped stacks, but it doesn't look like that was solved, judging by
> the vmapped stacks pull [2] for v4.9.
>
> I wondered whether anyone had looked at that since?
>
> I have an additional reason to want to dynamically allocate the vmalloc
> area shadow: it turns out that KASAN currently interacts rather poorly
> with the arm64 ptdump code.
>
> When KASAN is selected, we allocate shadow for the whole vmalloc area,
> using common zero pte, pmd, pud tables. Walking over these in the ptdump
> code takes a *very* long time (I've seen up to 15 minutes with
> KASAN_OUTLINE enabled). For DEBUG_WX [3], this means boot hangs for that
> long, too.
>
> If I don't allocate vmalloc shadow (and remove the apparently pointlesss
> shadow of the shadow area), and only allocate shadow for the image,
> fixmap, vmemmap and so on, that delay gets cut to a few seconds, which
> is tolerable for a debug configuration...
>
> ... however, things blow up when the kernel touches vmalloc'd memory for
> the first time, as we don't install shadow for that dynamically.

I've seen the same iteration slowness problem on x86 with
CONFIG_DEBUG_RODATA which walks all pages. The is about 1 minute, but
it is enough to trigger rcu stall warning.

The zero pud and vmalloc-ed stacks looks like different problems.
To overcome the slowness we could map zero shadow for vmalloc area lazily.
However for vmalloc-ed stacks we need to map actual memory, because
stack instrumentation will read/write into the shadow. One downside
here is that vmalloc shadow can be as large as 1:1 (if we allocate 1
page in vmalloc area we need to allocate 1 page for shadow).

Re slowness: could we just skip the KASAN zero puds (the top level)
while walking? Can they be interesting for anybody? We can just
pretend that they are not there. Looks like a trivial solution for the
problem at hand.

^ permalink raw reply

* [GIT PULL] pxa for v4.10
From: Robert Jarzmik @ 2016-11-08 22:19 UTC (permalink / raw)
  To: linux-arm-kernel

Hello Arnd, Kevin, Olof,

Please consider this pull request for pxa 4.10 cycle. This pull request strides
off from my usual ones as its spans more than just mach-pxa.

The following changes since commit 1001354ca34179f3db924eb66672442a173147dc:

  Linux 4.9-rc1 (2016-10-15 12:17:50 -0700)

are available in the git repository at:

  https://github.com/rjarzmik/linux.git tags/pxa-for-4.10

for you to fetch changes up to e413bd33ac44b6d0bebc0ef2ac19cbe7558a7303:

  ARM: pxa: fix pxa25x interrupt init (2016-11-05 21:48:18 +0100)

----------------------------------------------------------------
This is the pxa changes for v4.10 cycle.

This cycle is covering :
 - some clock fixes common with sa1100 architecture
 - the consequence of the pxa_camera conversion to v4l2
 - a small irq related fix for pxa25x device-tree only

----------------------------------------------------------------
Robert Jarzmik (8):
      ARM: sa11x0/pxa: acquire timer rate from the clock rate
      watchdog: sa11x0/pxa: get rid of get_clock_tick_rate
      ARM: sa11x0/pxa: get rid of get_clock_tick_rate
      ARM: pxa: pxa_cplds: honor probe deferral
      ARM: pxa: mioa701: use the new pxa_camera platform_data
      ARM: pxa: ezx: use the new pxa_camera platform_data
      ARM: pxa: em-x270: use the new pxa_camera platform_data
      ARM: pxa: fix pxa25x interrupt init

Russell King - ARM Linux (1):
      clk: pxa25x: OSTIMER0 clocks from the main oscillator

Wei Yongjun (1):
      ARM: pxa: remove duplicated include from spitz.c

 arch/arm/mach-pxa/corgi.c                    |   1 -
 arch/arm/mach-pxa/em-x270.c                  |  89 +++++---------
 arch/arm/mach-pxa/ezx.c                      | 176 +++++++++++----------------
 arch/arm/mach-pxa/generic.c                  |  18 +--
 arch/arm/mach-pxa/include/mach/hardware.h    |   2 -
 arch/arm/mach-pxa/mioa701.c                  |  13 +-
 arch/arm/mach-pxa/pxa25x.c                   |   2 +-
 arch/arm/mach-pxa/pxa_cplds_irqs.c           |  11 +-
 arch/arm/mach-pxa/spitz.c                    |   1 -
 arch/arm/mach-sa1100/generic.c               |   2 +-
 arch/arm/mach-sa1100/include/mach/hardware.h |   4 -
 drivers/clk/pxa/clk-pxa25x.c                 |   2 +-
 drivers/clocksource/pxa_timer.c              |  11 +-
 drivers/watchdog/sa1100_wdt.c                |  24 +++-
 include/clocksource/pxa.h                    |   3 +-
 15 files changed, 142 insertions(+), 217 deletions(-)


-- 
Robert

^ permalink raw reply

* [GIT PULL] pxa-dt for v4.10
From: Robert Jarzmik @ 2016-11-08 22:27 UTC (permalink / raw)
  To: linux-arm-kernel

Hi Arnd, Kevin, and Olof,

This is the pxa pull request for 4.10 device-tree, can you please consider
pulling ?

The following changes since commit 1001354ca34179f3db924eb66672442a173147dc:

  Linux 4.9-rc1 (2016-10-15 12:17:50 -0700)

are available in the git repository at:

  https://github.com/rjarzmik/linux.git tags/pxa-dt-4.10

for you to fetch changes up to f409d2f134d499354dca0613693f27d8efd75c74:

  ARM: dts: pxa: add pxa27x cpu operating points (2016-11-02 22:52:45 +0100)

----------------------------------------------------------------
This device-tree pxa update brings :
 - pxa25x support
 - cpu operating points in preparation for cpufreq-dt
 - small fixes

----------------------------------------------------------------
Robert Jarzmik (4):
      ARM: dts: add pxa25x .dtsi file
      ARM: dts: pxa: fix gpio0 and gpio1 interrupts
      ARM: dts: pxa: add pxa25x cpu operating points
      ARM: dts: pxa: add pxa27x cpu operating points

Vijay Kumar (1):
      Fix no. of gpio cells in the pxa gpio binding doucmentation

 .../devicetree/bindings/gpio/mrvl-gpio.txt         |   6 +-
 arch/arm/boot/dts/pxa25x.dtsi                      | 117 +++++++++++++++++++++
 arch/arm/boot/dts/pxa27x.dtsi                      |  40 +++++++
 arch/arm/boot/dts/pxa2xx.dtsi                      |   4 +-
 4 files changed, 163 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/boot/dts/pxa25x.dtsi

-- 
Robert

^ permalink raw reply

* [PATCH V5 1/3] ARM64 LPC: Indirect ISA port IO introduced
From: Arnd Bergmann @ 2016-11-08 22:35 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161108164948.GG20591@arm.com>

On Tuesday, November 8, 2016 4:49:49 PM CET Will Deacon wrote:
> On Tue, Nov 08, 2016 at 04:33:44PM +0000, John Garry wrote:
> > On 08/11/2016 16:12, Will Deacon wrote:
> > >On Tue, Nov 08, 2016 at 11:47:07AM +0800, zhichang.yuan wrote:

> > >Is there no way to make this slightly more generic, so that it can be
> > >re-used elsewhere? For example, if struct extio_ops was common, then
> > >you could have the singleton (which maybe should be an interval tree?),
> > >type definition, setter function and the BUILD_EXTIO invocations
> > >somewhere generic, rather than squirelled away in the arch backend.
> > >
> > The concern would be that some architecture which uses generic higher-level
> > ISA accessor ops, but have IO space, could be affected.
> 
> You're already adding a Kconfig symbol for this stuff, so you can keep
> that if you don't want it on other architectures. I'm just arguing that
> plumbing drivers directly into arch code via arm64_set_extops is not
> something I'm particularly fond of, especially when it looks like it
> could be avoided with a small amount of effort.

Agreed, I initially suggested putting this into arch/arm64/, but there isn't
really a reason why it couldn't just live in lib/ with the header file
bits moved to include/asm-generic/io.h which we already use.

	Arnd

^ permalink raw reply

* [PATCH] ARM: socfpga_defconfig: enable FS configs to support Angstrom filesystem
From: Dinh Nguyen @ 2016-11-08 22:47 UTC (permalink / raw)
  To: linux-arm-kernel

Enables:

CONFIG_AUTOFS4_FS=y
CONFIG_NFSD=y
CONFIG_NFS_V3_ACL=y
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y

Signed-off-by: Matthew Gerlach <mgerlach@opensource.altera.com>
Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
---
 arch/arm/configs/socfpga_defconfig | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig
index 18d3ec1..2e1d254 100644
--- a/arch/arm/configs/socfpga_defconfig
+++ b/arch/arm/configs/socfpga_defconfig
@@ -116,13 +116,18 @@ CONFIG_EXT2_FS=y
 CONFIG_EXT2_FS_XATTR=y
 CONFIG_EXT2_FS_POSIX_ACL=y
 CONFIG_EXT3_FS=y
+CONFIG_AUTOFS4_FS=y
 CONFIG_VFAT_FS=y
 CONFIG_NTFS_FS=y
 CONFIG_NTFS_RW=y
 CONFIG_TMPFS=y
 CONFIG_CONFIGFS_FS=y
 CONFIG_NFS_FS=y
+CONFIG_NFS_V3_ACL=y
 CONFIG_ROOT_NFS=y
+CONFIG_NFSD=y
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
 CONFIG_NLS_CODEPAGE_437=y
 CONFIG_NLS_ISO8859_1=y
 CONFIG_PRINTK_TIME=y
-- 
2.8.3

^ permalink raw reply related

* [PATCH] ARM: socfpga_defconfig: enable FS configs to support Angstrom filesystem
From: Arnd Bergmann @ 2016-11-08 22:57 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161108224750.30242-1-dinguyen@kernel.org>

On Tuesday, November 8, 2016 4:47:50 PM CET Dinh Nguyen wrote:
> Enables:
> 
> CONFIG_AUTOFS4_FS=y
> CONFIG_NFSD=y
> CONFIG_NFS_V3_ACL=y
> CONFIG_NFSD_V3_ACL=y
> CONFIG_NFSD_V4=y
> 
> Signed-off-by: Matthew Gerlach <mgerlach@opensource.altera.com>
> Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
> 

No objections to the patch, but the changelog should not state what
the patch clearly shows already but instead it should explain why
you do this.

The subject line mentions "to support Angstrom filesystem", but
it's not clear (at least not to me) what that filesystem is.

	Arnd

^ permalink raw reply

* [PATCH V5 2/3] ARM64 LPC: Add missing range exception for special ISA
From: Benjamin Herrenschmidt @ 2016-11-08 23:12 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161108114953.GB15297@leverpostej>

On Tue, 2016-11-08 at 11:49 +0000, Mark Rutland wrote:
> 
> My understanding of ISA (which may be flawed) is that it's not part of
> the PCI host bridge, but rather on x86 it happens to share the IO space
> with PCI.

Sort-of. On some systems it actually goes through PCI and there's a
PCI->ISA bridge that uses substractive decoding to the legacy devices.

> So, how about this becomes:
> 
> ? Hisilicon Hip06 SoCs implement a Low Pin Count (LPC) controller, which
> ? provides access to some legacy ISA devices.
> 
> I believe that we could theoretically have multiple independent LPC/ISA
> busses, as is possible with PCI on !x86 systems. If the current ISA code
> assumes a singleton bus, I think that's something that needs to be fixed
> up more generically.
> 
> I don't see why we should need any architecture-specific code here. Why
> can we not fix up the ISA bus code in drivers/of/address.c such that it
> handles multiple ISA bus instances, and translates all sub-device
> addresses relative to the specific bus instance?

What in that code prevents that today ?

Cheers,
Ben.

^ permalink raw reply

* [PATCH V5 1/3] ARM64 LPC: Indirect ISA port IO introduced
From: Benjamin Herrenschmidt @ 2016-11-08 23:16 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161108120323.GC15297@leverpostej>

On Tue, 2016-11-08 at 12:03 +0000, Mark Rutland wrote:
> On Tue, Nov 08, 2016 at 11:47:07AM +0800, zhichang.yuan wrote:
> > 
> > For arm64, there is no I/O space as other architectural platforms, such as
> > X86. Most I/O accesses are achieved based on MMIO. But for some arm64 SoCs,
> > such as Hip06, when accessing some legacy ISA devices connected to LPC, those
> > known port addresses are used to control the corresponding target devices, for
> > example, 0x2f8 is for UART, 0xe4 is for ipmi-bt. It is different from the
> > normal MMIO mode in using.
> 
> This has nothing to do with arm64. Hardware with this kind of indirect
> bus access could be integrated with a variety of CPU architectures. It
> simply hasn't been, yet.

On some ppc's we also use similar indirect access methods for IOs. We
have a generic infrastructure for re-routing some memory or IO regions
to hooks.

On POWER8, our PCIe doesn't do IO at all, but we have an LPC bus behind
firmware calls ;-) We use that infrastructure to plumb in the LPC bus.

> > To drive these devices, this patch introduces a method named indirect-IO.
> > In this method the in/out pair in arch/arm64/include/asm/io.h will be
> > redefined. When upper layer drivers call in/out with those known legacy port
> > addresses to access the peripherals, the hooking functions corrresponding to
> > those target peripherals will be called. Through this way, those upper layer
> > drivers which depend on in/out can run on Hip06 without any changes.
> 
> As above, this has nothing to do with arm64, and as such, should live in
> generic code, exactly as we would do if we had higher-level ISA
> accessor ops.
> 
> Regardless, given the multi-instance case, I don't think this is
> sufficient in general (and I think we need higher-level ISA accessors
> to handle the indirection).

Multi-instance with IO is tricky to do generically because archs already
have all sort of hacks to deal with the fact that inb/outb don't require
an explicit ioremap, so an IO resource can take all sort of shape depending
on the arch.

Overall it boils down to applying some kind of per-instance "offset" to
the IO port number though.

> [...]
> 
> > 
> > diff --git a/arch/arm64/include/asm/extio.h b/arch/arm64/include/asm/extio.h
> > new file mode 100644
> > index 0000000..6ae0787
> > --- /dev/null
> > +++ b/arch/arm64/include/asm/extio.h
> 
> > 
> > +#ifndef __LINUX_EXTIO_H
> > +#define __LINUX_EXTIO_H
> 
> This doesn't match the file naming, __ASM_EXTIO_H would be consistent
> with other arm64 headers.
> 
> > 
> > +
> > +struct extio_ops {
> > > > +	unsigned long start;/* inclusive, sys io addr */
> > > > +	unsigned long end;/* inclusive, sys io addr */
> 
> Please put whitespace before inline comments.
> 
> [...]
> 
> > 
> > > > +type in##bw(unsigned long addr)						\
> > > > +{									\
> > > > > > +	if (!arm64_extio_ops || arm64_extio_ops->start > addr ||	\
> > > > > > +			arm64_extio_ops->end < addr)			\
> > > > > > +		return read##bw(PCI_IOBASE + addr);			\
> > > > > > +	return arm64_extio_ops->pfin ?					\
> > > > > > +		arm64_extio_ops->pfin(arm64_extio_ops->devpara,		\
> > > > > > +			addr, sizeof(type)) : -1;			\
> > > > +}									\
> > > > +									\
> > > > +void out##bw(type value, unsigned long addr)				\
> > > > +{									\
> > > > > > +	if (!arm64_extio_ops || arm64_extio_ops->start > addr ||	\
> > > > > > +			arm64_extio_ops->end < addr)			\
> > > > > > +		write##bw(value, PCI_IOBASE + addr);			\
> > > > > > +	else								\
> > > > > > +		if (arm64_extio_ops->pfout)				\
> > > > +			arm64_extio_ops->pfout(arm64_extio_ops->devpara,\
> > > > > > +				addr, value, sizeof(type));		\
> > > > +}									\
> > > > +									\
> > > > +void ins##bw(unsigned long addr, void *buffer, unsigned int count)	\
> > > > +{									\
> > > > > > +	if (!arm64_extio_ops || arm64_extio_ops->start > addr ||	\
> > > > > > +			arm64_extio_ops->end < addr)			\
> > > > > > +		reads##bw(PCI_IOBASE + addr, buffer, count);		\
> > > > > > +	else								\
> > > > > > +		if (arm64_extio_ops->pfins)				\
> > > > +			arm64_extio_ops->pfins(arm64_extio_ops->devpara,\
> > > > > > +				addr, buffer, sizeof(type), count);	\
> > > > +}									\
> > > > +									\
> > > > +void outs##bw(unsigned long addr, const void *buffer, unsigned int count)	\
> > > > +{									\
> > > > > > +	if (!arm64_extio_ops || arm64_extio_ops->start > addr ||	\
> > > > > > +			arm64_extio_ops->end < addr)			\
> > > > > > +		writes##bw(PCI_IOBASE + addr, buffer, count);		\
> > > > > > +	else								\
> > > > > > +		if (arm64_extio_ops->pfouts)				\
> > > > +			arm64_extio_ops->pfouts(arm64_extio_ops->devpara,\
> > > > > > +				addr, buffer, sizeof(type), count);	\
> > +}
> > +
> 
> So all PCI I/O will be slowed down by irrelevant checks when this is
> enabled?
> 
> [...]
> 
> > 
> > +static inline void arm64_set_extops(struct extio_ops *ops)
> > +{
> > > > +	if (ops)
> > > > +		WRITE_ONCE(arm64_extio_ops, ops);
> > +}
> 
> Why WRITE_ONCE()?
> 
> Is this not protected/propagated by some synchronisation mechanism?
> 
> WRITE_ONCE() is not sufficient to ensure that this is consistently
> observed by readers, and regardless, I don't see READ_ONCE() anywhere in
> this patch.
> 
> This looks very suspicious.
> 
> Thanks,
> Mark.

^ permalink raw reply

* Summary of LPC guest MSI discussion in Santa Fe (was: Re: [RFC 0/8] KVM PCIe/MSI passthrough on ARM/ARM64 (Alt II))
From: Alex Williamson @ 2016-11-08 23:35 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161108202922.GC15676@cbox>

On Tue, 8 Nov 2016 21:29:22 +0100
Christoffer Dall <christoffer.dall@linaro.org> wrote:

> Hi Will,
> 
> On Tue, Nov 08, 2016 at 02:45:59AM +0000, Will Deacon wrote:
> > Hi all,
> > 
> > I figured this was a reasonable post to piggy-back on for the LPC minutes
> > relating to guest MSIs on arm64.
> > 
> > On Thu, Nov 03, 2016 at 10:02:05PM -0600, Alex Williamson wrote:  
> > > We can always have QEMU reject hot-adding the device if the reserved
> > > region overlaps existing guest RAM, but I don't even really see how we
> > > advise users to give them a reasonable chance of avoiding that
> > > possibility.  Apparently there are also ARM platforms where MSI pages
> > > cannot be remapped to support the previous programmable user/VM
> > > address, is it even worthwhile to support those platforms?  Does that
> > > decision influence whether user programmable MSI reserved regions are
> > > really a second class citizen to fixed reserved regions?  I expect
> > > we'll be talking about this tomorrow morning, but I certainly haven't
> > > come up with any viable solutions to this.  Thanks,  
> > 
> > At LPC last week, we discussed guest MSIs on arm64 as part of the PCI
> > microconference. I presented some slides to illustrate some of the issues
> > we're trying to solve:
> > 
> >   http://www.willdeacon.ukfsn.org/bitbucket/lpc-16/msi-in-guest-arm64.pdf
> > 
> > Punit took some notes (thanks!) on the etherpad here:
> > 
> >   https://etherpad.openstack.org/p/LPC2016_PCI
> > 
> > although the discussion was pretty lively and jumped about, so I've had
> > to go from memory where the notes didn't capture everything that was
> > said.
> > 
> > To summarise, arm64 platforms differ in their handling of MSIs when compared
> > to x86:
> > 
> >   1. The physical memory map is not standardised (Jon pointed out that
> >      this is something that was realised late on)
> >   2. MSIs are usually treated the same as DMA writes, in that they must be
> >      mapped by the SMMU page tables so that they target a physical MSI
> >      doorbell
> >   3. On some platforms, MSIs bypass the SMMU entirely (e.g. due to an MSI
> >      doorbell built into the PCI RC)
> >   4. Platforms typically have some set of addresses that abort before
> >      reaching the SMMU (e.g. because the PCI identifies them as P2P).
> > 
> > All of this means that userspace (QEMU) needs to identify the memory
> > regions corresponding to points (3) and (4) and ensure that they are
> > not allocated in the guest physical (IPA) space. For platforms that can
> > remap the MSI doorbell as in (2), then some space also needs to be
> > allocated for that.
> > 
> > Rather than treat these as separate problems, a better interface is to
> > tell userspace about a set of reserved regions, and have this include
> > the MSI doorbell, irrespective of whether or not it can be remapped.  
> 
> Is my understanding correct, that you need to tell userspace about the
> location of the doorbell (in the IOVA space) in case (2), because even
> though the configuration of the device is handled by the (host) kernel
> through trapping of the BARs, we have to avoid the VFIO user programming
> the device to create other DMA transactions to this particular address,
> since that will obviously conflict and either not produce the desired
> DMA transactions or result in unintended weird interrupts?

Correct, if the MSI doorbell IOVA range overlaps RAM in the VM, then
it's potentially a DMA target and we'll get bogus data on DMA read from
the device, and lose data and potentially trigger spurious interrupts on
DMA write from the device.  Thanks,

Alex

^ permalink raw reply

* [PATCH v4 1/5] arm64: perf: Basic uncore counter support for Cavium ThunderX SOC
From: Will Deacon @ 2016-11-08 23:50 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <73173d6ad2430eead5e9da40564a90a60961b6d9.1477741719.git.jglauber@cavium.com>

Hi Jan,

Thanks for posting an updated series. I have a few minor comments, which
we can hopefully address in time for 4.10.

Also, have you run the perf fuzzer with this series applied?

https://github.com/deater/perf_event_tests

(build the tests and look under the fuzzer/ directory for the tool)

On Sat, Oct 29, 2016 at 01:55:29PM +0200, Jan Glauber wrote:
> Provide "uncore" facilities for different non-CPU performance
> counter units.
> 
> The uncore PMUs can be found under /sys/bus/event_source/devices.
> All counters are exported via sysfs in the corresponding events
> files under the PMU directory so the perf tool can list the event names.
> 
> There are some points that are special in this implementation:
> 
> 1) The PMU detection relies on PCI device detection. If a
>    matching PCI device is found the PMU is created. The code can deal
>    with multiple units of the same type, e.g. more than one memory
>    controller.
> 
> 2) Counters are summarized across different units of the same type
>    on one NUMA node but not across NUMA nodes.
>    For instance L2C TAD 0..7 are presented as a single counter
>    (adding the values from TAD 0 to 7). Although losing the ability
>    to read a single value the merged values are easier to use.
> 
> 3) The counters are not CPU related. A random CPU is picked regardless
>    of the NUMA node. There is a small performance penalty for accessing
>    counters on a remote note but reading a performance counter is a
>    slow operation anyway.
> 
> Signed-off-by: Jan Glauber <jglauber@cavium.com>
> ---
>  drivers/perf/Kconfig                |  13 ++
>  drivers/perf/Makefile               |   1 +
>  drivers/perf/uncore/Makefile        |   1 +
>  drivers/perf/uncore/uncore_cavium.c | 351 ++++++++++++++++++++++++++++++++++++
>  drivers/perf/uncore/uncore_cavium.h |  71 ++++++++

We already have "uncore" PMUs under drivers/perf, so I'd prefer that we
renamed this a bit to reflect better what's going on. How about:

  drivers/perf/cavium/

and then

  drivers/perf/cavium/uncore_thunder.[ch]

?

>  include/linux/cpuhotplug.h          |   1 +
>  6 files changed, 438 insertions(+)
>  create mode 100644 drivers/perf/uncore/Makefile
>  create mode 100644 drivers/perf/uncore/uncore_cavium.c
>  create mode 100644 drivers/perf/uncore/uncore_cavium.h
> 
> diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
> index 4d5c5f9..3266c87 100644
> --- a/drivers/perf/Kconfig
> +++ b/drivers/perf/Kconfig
> @@ -19,4 +19,17 @@ config XGENE_PMU
>          help
>            Say y if you want to use APM X-Gene SoC performance monitors.
>  
> +config UNCORE_PMU
> +	bool

This isn't needed.

> +
> +config UNCORE_PMU_CAVIUM
> +	depends on PERF_EVENTS && NUMA && ARM64
> +	bool "Cavium uncore PMU support"

Please mentioned Thunder somewhere, since that's the SoC being supported.

> +	select UNCORE_PMU
> +	default y
> +	help
> +	  Say y if you want to access performance counters of subsystems
> +	  on a Cavium SOC like cache controller, memory controller or
> +	  processor interconnect.
> +
>  endmenu
> diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
> index b116e98..d6c02c9 100644
> --- a/drivers/perf/Makefile
> +++ b/drivers/perf/Makefile
> @@ -1,2 +1,3 @@
>  obj-$(CONFIG_ARM_PMU) += arm_pmu.o
>  obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
> +obj-y += uncore/
> diff --git a/drivers/perf/uncore/Makefile b/drivers/perf/uncore/Makefile
> new file mode 100644
> index 0000000..6130e18
> --- /dev/null
> +++ b/drivers/perf/uncore/Makefile
> @@ -0,0 +1 @@
> +obj-$(CONFIG_UNCORE_PMU_CAVIUM) += uncore_cavium.o
> diff --git a/drivers/perf/uncore/uncore_cavium.c b/drivers/perf/uncore/uncore_cavium.c
> new file mode 100644
> index 0000000..a7b4277
> --- /dev/null
> +++ b/drivers/perf/uncore/uncore_cavium.c
> @@ -0,0 +1,351 @@
> +/*
> + * Cavium Thunder uncore PMU support.
> + *
> + * Copyright (C) 2015,2016 Cavium Inc.
> + * Author: Jan Glauber <jan.glauber@cavium.com>
> + */
> +
> +#include <linux/cpufeature.h>
> +#include <linux/numa.h>
> +#include <linux/slab.h>
> +
> +#include "uncore_cavium.h"
> +
> +/*
> + * Some notes about the various counters supported by this "uncore" PMU
> + * and the design:
> + *
> + * All counters are 64 bit long.
> + * There are no overflow interrupts.
> + * Counters are summarized per node/socket.
> + * Most devices appear as separate PCI devices per socket with the exception
> + * of OCX TLK which appears as one PCI device per socket and contains several
> + * units with counters that are merged.
> + * Some counters are selected via a control register (L2C TAD) and read by
> + * a number of counter registers, others (L2C CBC, LMC & OCX TLK) have
> + * one dedicated counter per event.
> + * Some counters are not stoppable (L2C CBC & LMC).
> + * Some counters are read-only (LMC).
> + * All counters belong to PCI devices, the devices may have additional
> + * drivers but we assume we are the only user of the counter registers.
> + * We map the whole PCI BAR so we must be careful to forbid access to
> + * addresses that contain neither counters nor counter control registers.
> + */
> +
> +void thunder_uncore_read(struct perf_event *event)
> +{

Rather than have a bunch of global symbols that are called from the
individual drivers, why don't you pass a struct of function pointers to
their respective init functions and keep the internals private?

> +	struct thunder_uncore *uncore = to_uncore(event->pmu);
> +	struct hw_perf_event *hwc = &event->hw;
> +	struct thunder_uncore_node *node;
> +	struct thunder_uncore_unit *unit;
> +	u64 prev, delta, new = 0;
> +
> +	node = get_node(hwc->config, uncore);
> +
> +	/* read counter values from all units on the node */
> +	list_for_each_entry(unit, &node->unit_list, entry)
> +		new += readq(hwc->event_base + unit->map);
> +
> +	prev = local64_read(&hwc->prev_count);
> +	local64_set(&hwc->prev_count, new);
> +	delta = new - prev;
> +	local64_add(delta, &event->count);
> +}
> +
> +int thunder_uncore_add(struct perf_event *event, int flags, u64 config_base,
> +		       u64 event_base)
> +{
> +	struct thunder_uncore *uncore = to_uncore(event->pmu);
> +	struct hw_perf_event *hwc = &event->hw;
> +	struct thunder_uncore_node *node;
> +	int id;
> +
> +	node = get_node(hwc->config, uncore);
> +	id = get_id(hwc->config);
> +
> +	if (!cmpxchg(&node->events[id], NULL, event))
> +		hwc->idx = id;

Does this need to be a full-fat cmpxchg? Who are you racing with?

> +
> +	if (hwc->idx == -1)
> +		return -EBUSY;

This would be much clearer as an else statement after the cmpxchg.

> +
> +	hwc->config_base = config_base;
> +	hwc->event_base = event_base;
> +	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
> +
> +	if (flags & PERF_EF_START)
> +		uncore->pmu.start(event, PERF_EF_RELOAD);
> +
> +	return 0;
> +}
> +
> +void thunder_uncore_del(struct perf_event *event, int flags)
> +{
> +	struct thunder_uncore *uncore = to_uncore(event->pmu);
> +	struct hw_perf_event *hwc = &event->hw;
> +	struct thunder_uncore_node *node;
> +	int i;
> +
> +	event->pmu->stop(event, PERF_EF_UPDATE);
> +
> +	/*
> +	 * For programmable counters we need to check where we installed it.
> +	 * To keep this function generic always test the more complicated
> +	 * case (free running counters won't need the loop).
> +	 */
> +	node = get_node(hwc->config, uncore);
> +	for (i = 0; i < node->num_counters; i++) {
> +		if (cmpxchg(&node->events[i], event, NULL) == event)
> +			break;
> +	}
> +	hwc->idx = -1;
> +}
> +
> +void thunder_uncore_start(struct perf_event *event, int flags)
> +{
> +	struct thunder_uncore *uncore = to_uncore(event->pmu);
> +	struct hw_perf_event *hwc = &event->hw;
> +	struct thunder_uncore_node *node;
> +	struct thunder_uncore_unit *unit;
> +	u64 new = 0;
> +
> +	/* read counter values from all units on the node */
> +	node = get_node(hwc->config, uncore);
> +	list_for_each_entry(unit, &node->unit_list, entry)
> +		new += readq(hwc->event_base + unit->map);
> +	local64_set(&hwc->prev_count, new);
> +
> +	hwc->state = 0;
> +	perf_event_update_userpage(event);
> +}
> +
> +void thunder_uncore_stop(struct perf_event *event, int flags)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +
> +	hwc->state |= PERF_HES_STOPPED;
> +
> +	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
> +		thunder_uncore_read(event);
> +		hwc->state |= PERF_HES_UPTODATE;
> +	}
> +}
> +
> +int thunder_uncore_event_init(struct perf_event *event)
> +{
> +	struct hw_perf_event *hwc = &event->hw;
> +	struct thunder_uncore_node *node;
> +	struct thunder_uncore *uncore;
> +
> +	if (event->attr.type != event->pmu->type)
> +		return -ENOENT;
> +
> +	/* we do not support sampling */
> +	if (is_sampling_event(event))
> +		return -EINVAL;
> +
> +	/* counters do not have these bits */
> +	if (event->attr.exclude_user	||
> +	    event->attr.exclude_kernel	||
> +	    event->attr.exclude_host	||
> +	    event->attr.exclude_guest	||
> +	    event->attr.exclude_hv	||
> +	    event->attr.exclude_idle)
> +		return -EINVAL;
> +
> +	uncore = to_uncore(event->pmu);
> +	if (!uncore)
> +		return -ENODEV;
> +	if (!uncore->event_valid(event->attr.config & UNCORE_EVENT_ID_MASK))
> +		return -EINVAL;
> +
> +	/* check NUMA node */
> +	node = get_node(event->attr.config, uncore);
> +	if (!node) {
> +		pr_debug("Invalid NUMA node selected\n");
> +		return -EINVAL;
> +	}
> +
> +	hwc->config = event->attr.config;
> +	hwc->idx = -1;
> +	return 0;
> +}
> +
> +static ssize_t thunder_uncore_attr_show_cpumask(struct device *dev,
> +						struct device_attribute *attr,
> +						char *buf)
> +{
> +	struct pmu *pmu = dev_get_drvdata(dev);
> +	struct thunder_uncore *uncore =
> +		container_of(pmu, struct thunder_uncore, pmu);
> +
> +	return cpumap_print_to_pagebuf(true, buf, &uncore->active_mask);
> +}
> +static DEVICE_ATTR(cpumask, S_IRUGO, thunder_uncore_attr_show_cpumask, NULL);
> +
> +static struct attribute *thunder_uncore_attrs[] = {
> +	&dev_attr_cpumask.attr,
> +	NULL,
> +};
> +
> +struct attribute_group thunder_uncore_attr_group = {
> +	.attrs = thunder_uncore_attrs,
> +};
> +
> +ssize_t thunder_events_sysfs_show(struct device *dev,
> +				  struct device_attribute *attr,
> +				  char *page)
> +{
> +	struct perf_pmu_events_attr *pmu_attr =
> +		container_of(attr, struct perf_pmu_events_attr, attr);
> +
> +	if (pmu_attr->event_str)
> +		return sprintf(page, "%s", pmu_attr->event_str);
> +
> +	return 0;
> +}
> +
> +/* node attribute depending on number of NUMA nodes */
> +static ssize_t node_show(struct device *dev, struct device_attribute *attr,
> +			 char *page)
> +{
> +	if (NODES_SHIFT)
> +		return sprintf(page, "config:16-%d\n", 16 + NODES_SHIFT - 1);

If NODES_SHIFT is 1, you'll end up with "config:16-16", which might confuse
userspace.

> +	else
> +		return sprintf(page, "config:16\n");
> +}
> +
> +struct device_attribute format_attr_node = __ATTR_RO(node);
> +
> +/*
> + * Thunder uncore events are independent from CPUs. Provide a cpumask
> + * nevertheless to prevent perf from adding the event per-cpu and just
> + * set the mask to one online CPU. Use the same cpumask for all uncore
> + * devices.
> + *
> + * There is a performance penalty for accessing a device from a CPU on
> + * another socket, but we do not care (yet).
> + */
> +static int thunder_uncore_offline_cpu(unsigned int old_cpu, struct hlist_node *node)
> +{
> +	struct thunder_uncore *uncore = hlist_entry_safe(node, struct thunder_uncore, node);

Why _safe?

> +	int new_cpu;
> +
> +	if (!cpumask_test_and_clear_cpu(old_cpu, &uncore->active_mask))
> +		return 0;
> +	new_cpu = cpumask_any_but(cpu_online_mask, old_cpu);
> +	if (new_cpu >= nr_cpu_ids)
> +		return 0;
> +	perf_pmu_migrate_context(&uncore->pmu, old_cpu, new_cpu);
> +	cpumask_set_cpu(new_cpu, &uncore->active_mask);
> +	return 0;
> +}
> +
> +static struct thunder_uncore_node * __init alloc_node(struct thunder_uncore *uncore,
> +						      int node_id, int counters)
> +{
> +	struct thunder_uncore_node *node;
> +
> +	node = kzalloc(sizeof(*node), GFP_KERNEL);
> +	if (!node)
> +		return NULL;
> +	node->num_counters = counters;
> +	INIT_LIST_HEAD(&node->unit_list);
> +	return node;
> +}
> +
> +int __init thunder_uncore_setup(struct thunder_uncore *uncore, int device_id,
> +				struct pmu *pmu, int counters)
> +{
> +	unsigned int vendor_id = PCI_VENDOR_ID_CAVIUM;
> +	struct thunder_uncore_unit  *unit, *tmp;
> +	struct thunder_uncore_node *node;
> +	struct pci_dev *pdev = NULL;
> +	int ret, node_id, found = 0;
> +
> +	/* detect PCI devices */
> +	while ((pdev = pci_get_device(vendor_id, device_id, pdev))) {

Redundant brackets?

> +		if (!pdev)
> +			break;

Redundant check?

> +		node_id = dev_to_node(&pdev->dev);
> +
> +		/* allocate node if necessary */
> +		if (!uncore->nodes[node_id])
> +			uncore->nodes[node_id] = alloc_node(uncore, node_id, counters);
> +
> +		node = uncore->nodes[node_id];
> +		if (!node) {
> +			ret = -ENOMEM;
> +			goto fail;
> +		}
> +
> +		unit = kzalloc(sizeof(*unit), GFP_KERNEL);
> +		if (!unit) {
> +			ret = -ENOMEM;
> +			goto fail;
> +		}
> +
> +		unit->pdev = pdev;
> +		unit->map = ioremap(pci_resource_start(pdev, 0),
> +				    pci_resource_len(pdev, 0));
> +		list_add(&unit->entry, &node->unit_list);
> +		node->nr_units++;
> +		found++;
> +	}
> +
> +	if (!found)
> +		return -ENODEV;
> +
> +	cpuhp_state_add_instance_nocalls(CPUHP_AP_UNCORE_CAVIUM_ONLINE,
> +                                         &uncore->node);
> +
> +	/*
> +	 * perf PMU is CPU dependent in difference to our uncore devices.
> +	 * Just pick a CPU and migrate away if it goes offline.
> +	 */
> +	cpumask_set_cpu(smp_processor_id(), &uncore->active_mask);
> +
> +	uncore->pmu = *pmu;
> +	ret = perf_pmu_register(&uncore->pmu, uncore->pmu.name, -1);
> +	if (ret)
> +		goto fail;
> +
> +	return 0;
> +
> +fail:
> +	node_id = 0;
> +	while (uncore->nodes[node_id]) {
> +		node = uncore->nodes[node_id];
> +
> +		list_for_each_entry_safe(unit, tmp, &node->unit_list, entry) {

Why do you need the _safe variant?

Will

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox