Linux-ARM-Kernel Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 10/18] arm64: ilp32: introduce binfmt_ilp32.c
From: Catalin Marinas @ 2016-12-05 15:38 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1477081997-4770-11-git-send-email-ynorov@caviumnetworks.com>

On Fri, Oct 21, 2016 at 11:33:09PM +0300, Yury Norov wrote:
> binfmt_ilp32.c is needed to handle ILP32 binaries
> 
> Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>
> Signed-off-by: Bamvor Zhang Jian <bamvor.zhangjian@linaro.org>
> ---
>  arch/arm64/include/asm/elf.h     |  6 +++
>  arch/arm64/kernel/Makefile       |  1 +
>  arch/arm64/kernel/binfmt_ilp32.c | 97 ++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 104 insertions(+)
>  create mode 100644 arch/arm64/kernel/binfmt_ilp32.c
> 
> diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
> index f259fe8..be29dde 100644
> --- a/arch/arm64/include/asm/elf.h
> +++ b/arch/arm64/include/asm/elf.h
> @@ -175,10 +175,16 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm,
>  
>  #define COMPAT_ELF_ET_DYN_BASE		(2 * TASK_SIZE_32 / 3)
>  
> +#ifndef USE_AARCH64_GREG
>  /* AArch32 registers. */
>  #define COMPAT_ELF_NGREG		18
>  typedef unsigned int			compat_elf_greg_t;
>  typedef compat_elf_greg_t		compat_elf_gregset_t[COMPAT_ELF_NGREG];
> +#else /* AArch64 registers for AARCH64/ILP32 */
> +#define COMPAT_ELF_NGREG	ELF_NGREG
> +#define compat_elf_greg_t	elf_greg_t
> +#define compat_elf_gregset_t	elf_gregset_t
> +#endif

I think you only need compat_elf_gregset_t definition here and leave the
other two undefined.

-- 
Catalin

^ permalink raw reply

* [PATCH] ARM: dts: orion5x: fix number of sata port for linkstation ls-gl
From: Roger Shimizu @ 2016-12-05 15:39 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161201151112.930-1-rogershimizu@gmail.com>

[CC Arnd Bergmann]

On Fri, Dec 2, 2016 at 12:11 AM, Roger Shimizu <rogershimizu@gmail.com> wrote:
> Bug report from Debian [0] shows there's minor changed model of
> Linkstation LS-GL that uses the 2nd SATA port of the SoC.
> So it's necessary to enable two SATA ports, though for that specific
> model only the 2nd one is used.
>
> [0] https://bugs.debian.org/845611
>
> Fixes: b1742ffa9ddb ("ARM: dts: orion5x: add device tree for buffalo linkstation ls-gl")
> Reported-by: Ryan Tandy <ryan@nardis.ca>
> Tested-by: Ryan Tandy <ryan@nardis.ca>
> Signed-off-by: Roger Shimizu <rogershimizu@gmail.com>
> ---
>  arch/arm/boot/dts/orion5x-linkstation-lsgl.dts | 4 ++++
>  1 file changed, 4 insertions(+)
>
> diff --git a/arch/arm/boot/dts/orion5x-linkstation-lsgl.dts b/arch/arm/boot/dts/orion5x-linkstation-lsgl.dts
> index 1cf644b..51dc734 100644
> --- a/arch/arm/boot/dts/orion5x-linkstation-lsgl.dts
> +++ b/arch/arm/boot/dts/orion5x-linkstation-lsgl.dts
> @@ -82,6 +82,10 @@
>         gpios = <&gpio0 9 GPIO_ACTIVE_HIGH>;
>  };
>
> +&sata {
> +       nr-ports = <2>;
> +};
> +
>  &ehci1 {
>         status = "okay";
>  };
> --
> 2.10.2
>

Is there any chance to get this simple fix into v4.9 or next v4.10?
Thank you!
-- 
Roger Shimizu, GMT +9 Tokyo
PGP/GPG: 4096R/6C6ACD6417B3ACB1

^ permalink raw reply

* XHCI controller does not detect USB key insertion
From: Mason @ 2016-12-05 15:39 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <2b371baa-f5ed-df9b-0da8-552853b6cd79@baylibre.com>

On 05/12/2016 09:26, Neil Armstrong wrote:

> On 12/02/2016 07:00 PM, Mason wrote:
>
>> On 02/12/2016 14:46, Neil Armstrong wrote:
>>
>>> On 12/02/2016 11:24 AM, Mason wrote:
>>>
>>>> (Sad face) All the documentation I have is in front of me, and nothing
>>>> is ringing a bell. This is a Sigma Designs SoC, with a Pravega XHCI
>>>> controller + Synopsys PHY.
>>>>
>>>> The documentation I have:
>>>>
>>>> Pravega_Dual_Mode_Datasheet_v10c.pdf (documents IP signals)
>>>> Pravega_Dual_Mode_Controller_Programmers_Reference_manual_v1.pdf (documents IP registers)
>>>> PHY databook (very low-level stuff)
>>>> SoC register mapping (for how the SoC maps the IP signals to registers)
>>>
>>> You should have all the necessary bits to enable and configure the Embedded Synopsys PHY !
>>>
>>> You should have some register mapping of the PHY signals, or at least a way to write those registers.
>>>
>>> You should have a reset, clock gate and eventually a power regulator to enable in order to have the PHY running.
>>
>> I'll dump all the non-0 non-standard registers. Maybe someone
>> more experienced than me will spot an obvious mistake.
>>
>> host_usb30_0_config: 0x2e800
>> 	- host_usb30_0_fladj                                 0x20
>> 	- host_usb30_0_usb30_controller_cg_disable           0x0
>> 	- host_usb30_0_mode_select                           0x1
>> 	- host_usb30_0_device_reset_mode                     0x0
>>
>> host_usb30_0_control: 0x2e804
>> 	- host_usb30_0_app_lfps_u3_wp                        0x0
>> 	- host_usb30_0_link_up                               0x1
>> 	- host_usb30_0_msi_msg_sent                          0x0
>> 	- host_usb30_0_usb3_p0_over_current                  0x0
>> 	- host_usb30_0_usb2_p0_over_current                  0x0
>>
>> host_usb30_0_test: 0x2e808
>> 	- host_usb30_0_test_powerdown_hsp                    0x0
>> 	- host_usb30_0_test_powerdown_ssp                    0x0
>> 	- host_usb30_0_test_burnin                           0x0
>> 	- host_usb30_0_acjt_level                            0x14
>> 	- host_usb30_0_lane0_tx2rx_loopbk                    0x0
>> 	- host_usb30_0_rtune_req                             0x0
>>
>> host_usb30_0_status: 0x2e80c
>> 	- host_usb30_0_phystatus                             0x0
>> 	- host_usb30_0_usb2_p0_pp                            0x1
>> 	- host_usb30_0_usb3_p0_pp                            0x1
>> 	- host_usb30_0_usb3_sleep                            0x0
>> 	- host_usb30_0_rtune_ack                             0x0
>>
>> host_usb30_0_clk_rst_0: 0x2e810
>> 	- host_usb30_0_commononn                             0x1
>> 	- host_usb30_0_portreset                             0x0
>> 	- host_usb30_0_refclksel                             0x2
>> 	- host_usb30_0_teneable                              0x1
>> 	- host_usb30_0_fsel                                  0x27
>> 	- host_usb30_0_mpll_multiplier                       0x19
>> 	- host_usb30_0_ref_clkdiv2                           0x0
>> 	- host_usb30_0_ref_ssp_en                            0x1
>> 	- host_usb30_0_ref_use_pad                           0x0
>> 	- host_usb30_0_ssc_en                                0x1
>> 	- host_usb30_0_ssc_range                             0x0
>>
>> host_usb30_0_clk_rst_1: 0x2e814
>> 	- host_usb30_0_ssc_ref_clk_sel                       0x88
>> 	- host_usb30_0_sleepm                                0x1
>> 	- host_usb30_0_vbusvldext                            0x1
>>
>> host_usb30_0_param_0: 0x2e818
>> 	- host_usb30_0_compdistune                           0x4
>> 	- host_usb30_0_otgtune                               0x4
>> 	- host_usb30_0_sqrxtune                              0x3
>> 	- host_usb30_0_txfsltune                             0x3
>> 	- host_usb30_0_txhsxvtune                            0x3
>> 	- host_usb30_0_txpreempltune                         0x0
>> 	- host_usb30_0_txpreemppulsetune                     0x0
>> 	- host_usb30_0_txrestune                             0x1
>> 	- host_usb30_0_txrisetune                            0x2
>> 	- host_usb30_0_txvreftune                            0x4
>>
>> host_usb30_0_param_1: 0x2e81c
>> 	- host_usb30_0_los_bias                              0x5
>> 	- host_usb30_0_los_level                             0xc
>> 	- host_usb30_0_pcs_rx_los_mask_val                   0xf0
>> 	- host_usb30_0_pcs_tx_deemph_3p5db                   0x18
>> 	- host_usb30_0_pcs_tx_deemph_6db                     0x21
>>
>> host_usb30_0_param_2: 0x2e820
>> 	- host_usb30_0_pcs_tx_swing_full                     0x73
>> 	- host_usb30_0_lane0_tx_term_offset                  0x0
>> 	- host_usb30_0_tx_vboost_lvl                         0x4
>>
>> host_usb30_0_SNPS_CR_ADD: 0x2e880
>> 	- host_usb30_0_snps_cr_add                           0xe03c
> 
> This is obviously the PHY registers.
> 
> Commonly, the PHY from Synopsys does not have a register interface given by Synopsys, but it's in charge
> of the SoC integrator to add a register set to program all the PHY signals.

Apparently, it was decided to map all Synopsys registers through
a single address/data register pair.

> Typically, those signal will contain some Clock selection, Enable Reset, Tunings and VBUS mode selection.

The Synopsys datasheet mentions two register blocks:

SS Function Control Registers (SS for SuperSpeed i.e. USB3, I guess)
HS Function Control Registers (HS for HighSpeed  i.e. USB2, I guess)

The registers in the first block are:

SUP.IDCODE_LO
SUP.IDCODE_HI
SUP.DEBUG
RTUNE_DEBUG
RTUNE_STAT
SS_PHASE
SS_FREQ
ATEOVRD
MPLL_OVRD_IN_LO
MPLL_OVRD_IN_HI
SSC_OVRD_IN
BS_OVRD_IN
LEVEL_OVRD_IN
SUP_OVRD_OUT
MPLL_ASIC_IN
BS_ASIC_IN
LEVEL_ASIC_IN
SSC_ASIC_IN
SUP_ASIC_OUT
ATEOVRD_STATUS
SCOPE_ENABLES
SCOPE_SAMPLES
SCOPE_COUNT
SCOPE_CTL
SCOPE_MASK_000
SCOPE_MASK_001
SCOPE_MASK_010
SCOPE_MASK_011
SCOPE_MASK_100
SCOPE_MASK_101
SCOPE_MASK_110
SCOPE_MASK_111
MPLL_LOOP_CTL
MPLL_ATB_MEAS1
MPLL_ATB_MEAS2
MPLL_OVRD
RTUNE_RTUNE_CTRL
ATB_SWITCHYARD_CTRL
SSC_CLK_CNTRL
LANE0.TX_OVRD_IN_LO
LANE0.TX_OVRD_IN_HI
LANE0.TX_OVRD_DRV_LO
LANE0.TX_OVRD_DRV_HI
LANE0.TX_OVRD_OUT
LANE0.RX_OVRD_IN_LO
LANE0.RX_OVRD_IN_HI
LANE0.RX_OVRD_OUT
LANE0.TX_ASIC_IN
LANE0.TX_ASIC_DRV_LO
LANE0.TX_ASIC_DRV_HI
TX_ASIC_OUT
RX_ASIC_IN
RX_ASIC_OUT
LANE0.TX_DEBUG
LANE0.TX_CM_WAIT_TIME_OVRD
LANE0.TX_VMD_FSM_TX_VCM_0
LANE0.TX_VMD_FSM_TX_VCM_1
LANE0.TX_LBERT_CTL
LANE0.RX_LBERT_CTL
LANE0.RX_LBERT_ERR
LANE0.RX_SCOPE_CTL
LANE0.RX_SCOPE_PHASE
LANE0.RX_DPLL_FREQ
LANE0.RX_CDR_CTL
LANE0.RX_CDR_CDR_FSM_DEBUG
LANE0.RX_CDR_LOCK_VEC_OVRD
LANE0.RX_CDR_LOCK_VEC
LANE0.RX_CDR_ADAP_FSM


The one register with clk in the name is SSC_CLK_CNTRL.
It has a 7-bit sub-field called SSC_CLK_DIV125.
"Sets SSC reference clock to 20 MHz." Default val = 7
/me blank stare ...

> commononn seems top be an enable, but active low
> portreset seems to be used to reset the port
> refclksel seem to select the clock source (you should have either an external Xtal, SoX Xtal or a PLL output)

These are PHY signals, which are mapped to registers in
my SoC, AFAICT.

COMMONONN
Common Block Power-Down Control
Function: Controls the power-down signals in the HS Bias and PLL blocks
when the USB 3.0 PHY is in Suspend or Sleep mode.
- 1: In Suspend or Sleep mode, the HS Bias and PLL blocks are powered down.
- 0: In Suspend or Sleep mode, the HS Bias and PLL blocks remain powered and continue to draw current.

PORTRESET<#>
Per-Port Reset
Function: When asserted, this customer-specific signal resets the
corresponding port's USB2.0 transmit and receive logic without disabling the
clocks within the USB 3.0 PHY.
- 1: The transmit and receive finite state machines (FSMs) are reset, and the
line_state logic combinatorially reflects the state of the single-ended
receivers.
- 0: The transmit and receive FSMs are operational, and the line_state logic
becomes sequential after 11 PHYCLOCK<#> cycles.
Asserting PORTRESET<#> does not override any USB 3.0 PHY inputs that
normally control the USB 2.0 state, nor does it cause any transient, illegal USB
states.
Within 100 ns of asserting PORTRESET<#>, the controller must set the inputs
that control the USB 2.0 to values that cause a safe state.
A safe state for Host and Device modes is defined as follows:
- Host mode: Non-driving (OPMODE<#>[1:0] = 2'b01) with the 15-k? pull-
down resistors enabled (DPPULLDOWN<#> and DMPULLDOWN<#> = 1'b1)
- Device mode: Non-driving (OPMODE<#>[1:0] = 2'b01) with the 1.5-k? pull-
up resistor disabled

REFCLKSEL[1:0]
Reference Clock Select for PLL Block
Function: Selects reference clock source for the HS PLL block.
- 11: HS PLL uses EXTREFCLK as reference.
- 10: HS PLL uses either ref_pad_clk{p,m} or ref_alt_clk_{p,m} as reference.
- 0x: Reserved
This bus is a strapping option that must be set prior to a power-on reset and
remain static during normal operation. Strapping options are not critical for STA,
and any other timings or loading limits for the pin are specified in the .lib timing
model included in the product deliverables.


> Please look at your "PHY databook" how these signals should be configured.
> Be aware that some "tune" register should have been calibrated in fab somehow, so you should make sure the reset values are correct.

Hmmm... Taking a closer look at the 280-page PHY documentation...

3.2 Clocks and Resets
The USB 3.0 PHY supports a wide range of input reference clocks from both
external and on-chip clock sources.
To support both SuperSpeed and high-speed operations, one of the following must be provided:
- A compatible, shared reference clock frequency
- Separate clock sources to support SuperSpeed operation and high-speed operation

I checked that specific configuration requirement.
(Table 3-3 Reference Clock Frequency Selection for SuperSpeed Operation)
and it does look like one of the arbitrary values was not correctly set
ssc_ref_clk_sel was 0x88 instead of 0x0 (for 100 MHz input clock).
But "fixing" that didn't make the controller detect my USB2 key (or a USB3 hub).


pipeP_phystatus is as an output signal documented as
PIPE PHY Status
Function: Communicates completion of several PHY functions including power
management state transitions, rate change, and receiver detection. When this
signal transitions during entry and exit from P3 states and PCLK is not running,
the signaling is asynchronous. In error situations (where the PHY fails to assert
PhyStatus), the MAC can take MAC-specific error recovery actions.

I think it's a bad sign that it remains at 0.
(Assuming host_usb30_0_phystatus and pipeP_phystatus are the same)


Hmmm, I think it's time to punt this task to a HW engineer, and let them
figure out what is required for basic functionality. Only then can I try
to make the Linux driver play nice with the HW block.

Regards.

^ permalink raw reply

* [PATCH] ARM: dts: orion5x: fix number of sata port for linkstation ls-gl
From: Gregory CLEMENT @ 2016-12-05 15:41 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <CAEQ9gEkp+bS10ZQ_0UJqvWMNgTkmtWGqcZviRZk0+pQaZFw_vA@mail.gmail.com>

Hi Roger,
 
 On lun., d?c. 05 2016, Roger Shimizu <rogershimizu@gmail.com> wrote:

> [CC Arnd Bergmann]
>
> On Fri, Dec 2, 2016 at 12:11 AM, Roger Shimizu <rogershimizu@gmail.com> wrote:
>> Bug report from Debian [0] shows there's minor changed model of
>> Linkstation LS-GL that uses the 2nd SATA port of the SoC.
>> So it's necessary to enable two SATA ports, though for that specific
>> model only the 2nd one is used.
>>
>> [0] https://bugs.debian.org/845611
>>
>> Fixes: b1742ffa9ddb ("ARM: dts: orion5x: add device tree for buffalo linkstation ls-gl")
>> Reported-by: Ryan Tandy <ryan@nardis.ca>
>> Tested-by: Ryan Tandy <ryan@nardis.ca>
>> Signed-off-by: Roger Shimizu <rogershimizu@gmail.com>
>> ---
>>  arch/arm/boot/dts/orion5x-linkstation-lsgl.dts | 4 ++++
>>  1 file changed, 4 insertions(+)
>>
>> diff --git a/arch/arm/boot/dts/orion5x-linkstation-lsgl.dts b/arch/arm/boot/dts/orion5x-linkstation-lsgl.dts
>> index 1cf644b..51dc734 100644
>> --- a/arch/arm/boot/dts/orion5x-linkstation-lsgl.dts
>> +++ b/arch/arm/boot/dts/orion5x-linkstation-lsgl.dts
>> @@ -82,6 +82,10 @@
>>         gpios = <&gpio0 9 GPIO_ACTIVE_HIGH>;
>>  };
>>
>> +&sata {
>> +       nr-ports = <2>;
>> +};
>> +
>>  &ehci1 {
>>         status = "okay";
>>  };
>> --
>> 2.10.2
>>
>
> Is there any chance to get this simple fix into v4.9 or next v4.10?
> Thank you!

I can apply it on mvebu/fixes and make a pull request. But I don't know
if Arnd or Olof will get it.

Let's see ahow it goes.

Gregory

> -- 
> Roger Shimizu, GMT +9 Tokyo
> PGP/GPG: 4096R/6C6ACD6417B3ACB1

-- 
Gregory Clement, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com

^ permalink raw reply

* [PATCH 0/2] Hibernate fixes for 'Fix memmap to be initialized for the entire section'
From: Ard Biesheuvel @ 2016-12-05 15:42 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161202144909.18405-1-james.morse@arm.com>

On 2 December 2016 at 14:49, James Morse <james.morse@arm.com> wrote:
> Patch "arm64: mm: Fix memmap to be initialized for the entire section"
> changes pfn_valid() in a way that breaks hibernate. These patches fix
> hibernate, and provided struct page's are allocated for nomap pages,
> can be applied before [0].
>
> Hibernate core code belives 'valid' to mean "I can access this". It
> uses pfn_valid() to test the page if the page is 'valid'.
>
> pfn_valid() needs to be changed so that all struct pages in a numa
> node have the same node-id. Currently 'nomap' pages are skipped, and
> retain their pre-numa node-ids, which leads to a later BUG_ON().
>
> These patches make hibernate's savable_page() take its escape route
> via 'if (PageReserved(page) && pfn_is_nosave(pfn))'.
>

This makes me feel slightly uneasy. Robert makes a convincing point,
but I wonder if we can expect more fallout from the ambiguity of
pfn_valid(). Now we are not only forced to assign non-existing (as far
as the OS is concerned) pages to the correct NUMA node, we also need
to set certain page flags.

^ permalink raw reply

* [PATCH] ARM: dts: orion5x: fix number of sata port for linkstation ls-gl
From: Gregory CLEMENT @ 2016-12-05 15:44 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161201151112.930-1-rogershimizu@gmail.com>

Hi Roger,
 
 On jeu., d?c. 01 2016, Roger Shimizu <rogershimizu@gmail.com> wrote:

> Bug report from Debian [0] shows there's minor changed model of
> Linkstation LS-GL that uses the 2nd SATA port of the SoC.
> So it's necessary to enable two SATA ports, though for that specific
> model only the 2nd one is used.
>
> [0] https://bugs.debian.org/845611
>
> Fixes: b1742ffa9ddb ("ARM: dts: orion5x: add device tree for buffalo linkstation ls-gl")
> Reported-by: Ryan Tandy <ryan@nardis.ca>
> Tested-by: Ryan Tandy <ryan@nardis.ca>
> Signed-off-by: Roger Shimizu <rogershimizu@gmail.com>

Applied on mvebu/fixes

Thanks,

Gregory


> ---
>  arch/arm/boot/dts/orion5x-linkstation-lsgl.dts | 4 ++++
>  1 file changed, 4 insertions(+)
>
> diff --git a/arch/arm/boot/dts/orion5x-linkstation-lsgl.dts b/arch/arm/boot/dts/orion5x-linkstation-lsgl.dts
> index 1cf644b..51dc734 100644
> --- a/arch/arm/boot/dts/orion5x-linkstation-lsgl.dts
> +++ b/arch/arm/boot/dts/orion5x-linkstation-lsgl.dts
> @@ -82,6 +82,10 @@
>  	gpios = <&gpio0 9 GPIO_ACTIVE_HIGH>;
>  };
>  
> +&sata {
> +	nr-ports = <2>;
> +};
> +
>  &ehci1 {
>  	status = "okay";
>  };
> -- 
> 2.10.2
>

-- 
Gregory Clement, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com

^ permalink raw reply

* next-20161205 build: 3 failures 4 warnings (next-20161205)
From: Marc Zyngier @ 2016-12-05 15:44 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161205112043.pyzcbhtu7bklioas@sirena.org.uk>

On 05/12/16 11:20, Mark Brown wrote:
> On Mon, Dec 05, 2016 at 07:56:06AM +0000, Build bot for Mark Brown wrote:
> 
> Today's -next fails to build an arm64 allnodconfig and allmodconfig
> with:
> 
>> 	arm64-allnoconfig
>> ../arch/arm64/lib/clear_user.S:33: Error: bad or irreducible absolute expression
>> ../arch/arm64/lib/clear_user.S:53: Error: bad or irreducible absolute expression
>> ../arch/arm64/lib/clear_user.S:33: Error: attempt to move .org backwards
>> ../arch/arm64/lib/clear_user.S:53: Error: attempt to move .org backwards
>> ../arch/arm64/lib/copy_from_user.S:67: Error: bad or irreducible absolute expression
>> ../arch/arm64/lib/copy_from_user.S:70: Error: bad or irreducible absolute expression
>> ../arch/arm64/lib/copy_from_user.S:67: Error: attempt to move .org backwards
>> ../arch/arm64/lib/copy_from_user.S:70: Error: attempt to move .org backwards
>> ../arch/arm64/lib/copy_in_user.S:68: Error: bad or irreducible absolute expression
>> ../arch/arm64/lib/copy_in_user.S:71: Error: bad or irreducible absolute expression
>> ../arch/arm64/lib/copy_in_user.S:68: Error: attempt to move .org backwards
>> ../arch/arm64/lib/copy_in_user.S:71: Error: attempt to move .org backwards
>> ../arch/arm64/lib/copy_to_user.S:66: Error: bad or irreducible absolute expression
>> ../arch/arm64/lib/copy_to_user.S:69: Error: bad or irreducible absolute expression
>> ../arch/arm64/lib/copy_to_user.S:66: Error: attempt to move .org backwards
>> ../arch/arm64/lib/copy_to_user.S:69: Error: attempt to move .org backwards
> 
> This was triggered somehow by bca8f17f57bd7 (arm64: Get rid of
> asm/opcodes.h) though I didn't figure out how.

Old and broken gas. I have a workaround stashed there:

http://git.kernel.org/cgit/linux/kernel/git/maz/arm-platforms.git/commit/?h=arm64/standalone.h&id=559f97365362ed9e96f594200020379df46630d8

At least binutils 2.24 and 2.25 are affected, while 2.27 is not.

Thanks,

	M.
-- 
Jazz is not dead. It just smells funny...

^ permalink raw reply

* [PATCH v3 7/7] ARM: dts: stm32: add stm32 general purpose timer driver in DT
From: Alexandre Torgue @ 2016-12-05 16:09 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161202132251.GL2683@dell>

Hi,

On 12/02/2016 02:22 PM, Lee Jones wrote:
> On Fri, 02 Dec 2016, Benjamin Gaignard wrote:
>
>> Add general purpose timers and it sub-nodes into DT for stm32f4.
>> Define and enable pwm1 and pwm3 for stm32f469 discovery board
>>
>> version 3:
>> - use "st,stm32-timer-trigger" in DT
>>
>> version 2:
>> - use parameters to describe hardware capabilities
>> - do not use references for pwm and iio timer subnodes
>>
>> Signed-off-by: Benjamin Gaignard <benjamin.gaignard@st.com>
>> ---
>>  arch/arm/boot/dts/stm32f429.dtsi      | 333 +++++++++++++++++++++++++++++++++-
>>  arch/arm/boot/dts/stm32f469-disco.dts |  28 +++
>>  2 files changed, 360 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi
>> index bca491d..8c50d03 100644
>> --- a/arch/arm/boot/dts/stm32f429.dtsi
>> +++ b/arch/arm/boot/dts/stm32f429.dtsi
>> @@ -48,7 +48,7 @@
>>  #include "skeleton.dtsi"
>>  #include "armv7-m.dtsi"
>>  #include <dt-bindings/pinctrl/stm32f429-pinfunc.h>
>> -
>> +#include <dt-bindings/iio/timer/st,stm32-timer-triggers.h>
>>  / {
>>  	clocks {
>>  		clk_hse: clk-hse {
>> @@ -355,6 +355,21 @@
>>  					slew-rate = <2>;
>>  				};
>>  			};
>> +
>> +			pwm1_pins: pwm at 1 {
>> +				pins {
>> +					pinmux = <STM32F429_PA8_FUNC_TIM1_CH1>,
>> +						 <STM32F429_PB13_FUNC_TIM1_CH1N>,
>> +						 <STM32F429_PB12_FUNC_TIM1_BKIN>;
>> +				};
>> +			};
>> +
>> +			pwm3_pins: pwm at 3 {
>> +				pins {
>> +					pinmux = <STM32F429_PB4_FUNC_TIM3_CH1>,
>> +						 <STM32F429_PB5_FUNC_TIM3_CH2>;
>> +				};
>> +			};
>>  		};
>>
>>  		rcc: rcc at 40023810 {
>> @@ -426,6 +441,322 @@
>>  			interrupts = <80>;
>>  			clocks = <&rcc 0 38>;
>>  		};
>> +
>> +		gptimer1: gptimer1 at 40010000 {
>
> timer at xxxxxxx
>
> Node names should be generic and not numbered.
>
> I suggest that this isn't actually a timer either.  Is contains a
> timer (and a PWM), but in it's completeness it is not a timer per
> say.
>
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40010000 0x400>;
>> +			clocks = <&rcc 0 160>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			pwm1 at 0 {
>> +				compatible = "st,stm32-pwm";
>> +				st,pwm-num-chan = <4>;
>> +				st,breakinput;
>> +				st,complementary;
>> +				status = "disabled";
>> +			};
>> +
>> +			timer1 at 0 {
>> +				compatible = "st,stm32-timer-trigger";
>> +				interrupts = <27>;
>> +				st,input-triggers-names = TIM5_TRGO,
>> +							  TIM2_TRGO,
>> +							  TIM4_TRGO,
>> +							  TIM3_TRGO;
>
> I'm still dubious with matching by strings.
>
> I'll take a look at the C code to see what the alternatives could be.
>
>> +				st,output-triggers-names = TIM1_TRGO,
>> +							   TIM1_CH1,
>> +							   TIM1_CH2,
>> +							   TIM1_CH3,
>> +							   TIM1_CH4;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer2: gptimer2 at 40000000 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40000000 0x400>;
>> +			clocks = <&rcc 0 128>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			pwm2 at 0 {
>> +				compatible = "st,stm32-pwm";
>> +				st,pwm-num-chan = <4>;
>> +				st,32bits-counter;
>> +				status = "disabled";
>> +			};
>> +
>> +			timer2 at 0 {
>> +				compatible = "st,stm32-timer-trigger";
>> +				interrupts = <28>;
>> +				st,input-triggers-names = TIM1_TRGO,
>> +							  TIM8_TRGO,
>> +							  TIM3_TRGO,
>> +							  TIM4_TRGO;
>> +				st,output-triggers-names = TIM2_TRGO,
>> +							   TIM2_CH1,
>> +							   TIM2_CH2,
>> +							   TIM2_CH3,
>> +							   TIM2_CH4;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer3: gptimer3 at 40000400 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40000400 0x400>;
>> +			clocks = <&rcc 0 129>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			pwm3 at 0 {
>> +				compatible = "st,stm32-pwm";
>> +				st,pwm-num-chan = <4>;
>> +				status = "disabled";
>> +			};
>> +
>> +			timer3 at 0 {
>> +				compatible = "st,stm32-timer-trigger";
>> +				interrupts = <29>;
>> +				st,input-triggers-names = TIM1_TRGO,
>> +							  TIM8_TRGO,
>> +							  TIM5_TRGO,
>> +							  TIM4_TRGO;
>> +				st,output-triggers-names = TIM3_TRGO,
>> +							   TIM3_CH1,
>> +							   TIM3_CH2,
>> +							   TIM3_CH3,
>> +							   TIM3_CH4;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer4: gptimer4 at 40000800 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40000800 0x400>;
>> +			clocks = <&rcc 0 130>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			pwm4 at 0 {
>> +				compatible = "st,stm32-pwm";
>> +				st,pwm-num-chan = <4>;
>> +				status = "disabled";
>> +			};
>> +
>> +			timer4 at 0 {
>> +				compatible = "st,stm32-timer-trigger";
>> +				interrupts = <30>;
>> +				st,input-triggers-names = TIM1_TRGO,
>> +							  TIM2_TRGO,
>> +							  TIM3_TRGO,
>> +							  TIM8_TRGO;
>> +				st,output-triggers-names = TIM4_TRGO,
>> +							   TIM4_CH1,
>> +							   TIM4_CH2,
>> +							   TIM4_CH3,
>> +							   TIM4_CH4;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer5: gptimer5 at 40000C00 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40000C00 0x400>;
>> +			clocks = <&rcc 0 131>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			pwm5 at 0 {
>> +				compatible = "st,stm32-pwm";
>> +				st,pwm-num-chan = <4>;
>> +				st,32bits-counter;
>> +				status = "disabled";
>> +			};
>> +
>> +			timer5 at 0 {
>> +				compatible = "st,stm32-timer-trigger";
>> +				interrupts = <50>;
>> +				st,input-triggers-names = TIM2_TRGO,
>> +							  TIM3_TRGO,
>> +							  TIM4_TRGO,
>> +							  TIM8_TRGO;
>> +				st,output-triggers-names = TIM5_TRGO,
>> +							   TIM5_CH1,
>> +							   TIM5_CH2,
>> +							   TIM5_CH3,
>> +							   TIM5_CH4;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer6: gptimer6 at 40001000 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40001000 0x400>;
>> +			clocks = <&rcc 0 132>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			timer6 at 0 {
>> +				compatible = "st,stm32-timer-trigger";
>> +				interrupts = <54>;
>> +				st,output-triggers-names = TIM6_TRGO;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer7: gptimer7 at 40001400 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40001400 0x400>;
>> +			clocks = <&rcc 0 133>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			timer7 at 0 {
>> +				compatible = "st,stm32-timer-trigger";
>> +				interrupts = <55>;
>> +				st,output-triggers-names = TIM7_TRGO;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer8: gptimer8 at 40010400 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40010400 0x400>;
>> +			clocks = <&rcc 0 161>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			pwm8 at 0 {
>> +				compatible = "st,stm32-pwm";
>> +				st,pwm-num-chan = <4>;
>> +				st,complementary;
>> +				st,breakinput;
>> +				status = "disabled";
>> +			};
>> +
>> +			timer8 at 0 {
>> +				compatible = "st,stm32-timer-trigger";
>> +				interrupts = <46>;
>> +				st,input-triggers-names = TIM1_TRGO,
>> +							  TIM2_TRGO,
>> +							  TIM4_TRGO,
>> +							  TIM5_TRGO;
>> +				st,output-triggers-names = TIM8_TRGO,
>> +							   TIM8_CH1,
>> +							   TIM8_CH2,
>> +							   TIM8_CH3,
>> +							   TIM8_CH4;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer9: gptimer9 at 40014000 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40014000 0x400>;
>> +			clocks = <&rcc 0 176>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			pwm9 at 0 {
>> +				compatible = "st,stm32-pwm";
>> +				st,pwm-num-chan = <2>;
>> +				status = "disabled";
>> +			};
>> +
>> +			timer9 at 0 {
>> +				compatible = "st,stm32-timer-trigger";
>> +				interrupts = <24>;
>> +				st,input-triggers-names = TIM2_TRGO,
>> +							  TIM3_TRGO;
>> +				st,output-triggers-names = TIM9_TRGO,
>> +							   TIM9_CH1,
>> +							   TIM9_CH2;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer10: gptimer10 at 40014400 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40014400 0x400>;
>> +			clocks = <&rcc 0 177>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			pwm10 at 0 {
>> +				compatible = "st,stm32-pwm";
>> +				st,pwm-num-chan = <1>;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer11: gptimer11 at 40014800 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40014800 0x400>;
>> +			clocks = <&rcc 0 178>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			pwm11 at 0 {
>> +				compatible = "st,stm32-pwm";
>> +				st,pwm-num-chan = <1>;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer12: gptimer12 at 40001800 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40001800 0x400>;
>> +			clocks = <&rcc 0 134>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			pwm12 at 0 {
>> +				compatible = "st,stm32-pwm";
>> +				st,pwm-num-chan = <2>;
>> +				status = "disabled";
>> +			};
>> +
>> +			timer12 at 0 {
>> +				compatible = "st,stm32-timer-trigger";
>> +				interrupts = <43>;
>> +				st,input-triggers-names = TIM4_TRGO,
>> +							  TIM5_TRGO;
>> +				st,output-triggers-names = TIM12_TRGO,
>> +							   TIM12_CH1,
>> +							   TIM12_CH2;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer13: gptimer13 at 40001C00 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40001C00 0x400>;
>> +			clocks = <&rcc 0 135>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			pwm13 at 0 {
>> +				compatible = "st,stm32-pwm";
>> +				st,pwm-num-chan = <1>;
>> +				status = "disabled";
>> +			};
>> +		};
>> +
>> +		gptimer14: gptimer14 at 40002000 {
>> +			compatible = "st,stm32-gptimer";
>> +			reg = <0x40002000 0x400>;
>> +			clocks = <&rcc 0 136>;
>> +			clock-names = "clk_int";
>> +			status = "disabled";
>> +
>> +			pwm14 at 0 {
>> +				compatible = "st,stm32-pwm";
>> +				st,pwm-num-chan = <1>;
>> +				status = "disabled";
>> +			};
>> +		};
>>  	};
>>  };
>>
>> diff --git a/arch/arm/boot/dts/stm32f469-disco.dts b/arch/arm/boot/dts/stm32f469-disco.dts
>> index 8a163d7..df4ca7e 100644
>> --- a/arch/arm/boot/dts/stm32f469-disco.dts
>> +++ b/arch/arm/boot/dts/stm32f469-disco.dts
>> @@ -81,3 +81,31 @@
>>  &usart3 {
>>  	status = "okay";
>>  };
>> +
>> +&gptimer1 {
>> +	status = "okay";
>> +
>> +	pwm1 at 0 {
>> +		pinctrl-0	= <&pwm1_pins>;
>> +		pinctrl-names	= "default";
>> +		status = "okay";
>> +	};
>> +
>> +	timer1 at 0 {
>> +		status = "okay";
>> +	};
>> +};
>
> This is a much *better* format than before.
>
> I still don't like the '&' syntax though.

Please keep "&" format to match with existing nodes.

>
>> +&gptimer3 {
>> +	status = "okay";
>> +
>> +	pwm3 at 0 {
>> +		pinctrl-0	= <&pwm3_pins>;
>> +		pinctrl-names	= "default";
>> +		status = "okay";
>> +	};
>> +
>> +	timer3 at 0 {
>> +		status = "okay";
>> +	};
>> +};
>

^ permalink raw reply

* Adding a .platform_init callback to sdhci_arasan_ops
From: Doug Anderson @ 2016-12-05 16:13 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <305f6531-3102-d0f5-cb22-bdd965e39519@laposte.net>

Hi,

On Mon, Dec 5, 2016 at 4:28 AM, Sebastian Frias <sf84@laposte.net> wrote:
> Hi Doug,
>
> On 28/11/16 18:46, Doug Anderson wrote:
>> Hi,
>>
>> On Mon, Nov 28, 2016 at 6:39 AM, Sebastian Frias <sf84@laposte.net> wrote:
>>>> I will try to send another patch with what a different approach
>>>>
>>>
>>> Here's a different approach (I just tested that it built, because I don't have the
>>> rk3399 platform):
>>>
>>> diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
>>> index 410a55b..5be6e67 100644
>>> --- a/drivers/mmc/host/sdhci-of-arasan.c
>>> +++ b/drivers/mmc/host/sdhci-of-arasan.c
>>> @@ -382,22 +382,6 @@ static int sdhci_arasan_resume(struct device *dev)
>>>  static SIMPLE_DEV_PM_OPS(sdhci_arasan_dev_pm_ops, sdhci_arasan_suspend,
>>>                          sdhci_arasan_resume);
>>>
>>> -static const struct of_device_id sdhci_arasan_of_match[] = {
>>> -       /* SoC-specific compatible strings w/ soc_ctl_map */
>>> -       {
>>> -               .compatible = "rockchip,rk3399-sdhci-5.1",
>>> -               .data = &rk3399_soc_ctl_map,
>>> -       },
>>> -
>>> -       /* Generic compatible below here */
>>> -       { .compatible = "arasan,sdhci-8.9a" },
>>> -       { .compatible = "arasan,sdhci-5.1" },
>>> -       { .compatible = "arasan,sdhci-4.9a" },
>>> -
>>> -       { /* sentinel */ }
>>> -};
>>> -MODULE_DEVICE_TABLE(of, sdhci_arasan_of_match);
>>> -
>>>  /**
>>>   * sdhci_arasan_sdcardclk_recalc_rate - Return the card clock rate
>>>   *
>>> @@ -578,28 +562,18 @@ static void sdhci_arasan_unregister_sdclk(struct device *dev)
>>>         of_clk_del_provider(dev->of_node);
>>>  }
>>>
>>> -static int sdhci_arasan_probe(struct platform_device *pdev)
>>> +static int sdhci_rockchip_platform_init(struct sdhci_host *host,
>>> +                                       struct platform_device *pdev)
>>>  {
>>>         int ret;
>>> -       const struct of_device_id *match;
>>>         struct device_node *node;
>>> -       struct clk *clk_xin;
>>> -       struct sdhci_host *host;
>>>         struct sdhci_pltfm_host *pltfm_host;
>>>         struct sdhci_arasan_data *sdhci_arasan;
>>> -       struct device_node *np = pdev->dev.of_node;
>>> -
>>> -       host = sdhci_pltfm_init(pdev, &sdhci_arasan_pdata,
>>> -                               sizeof(*sdhci_arasan));
>>> -       if (IS_ERR(host))
>>> -               return PTR_ERR(host);
>>>
>>>         pltfm_host = sdhci_priv(host);
>>>         sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
>>> -       sdhci_arasan->host = host;
>>>
>>> -       match = of_match_node(sdhci_arasan_of_match, pdev->dev.of_node);
>>> -       sdhci_arasan->soc_ctl_map = match->data;
>>> +       sdhci_arasan->soc_ctl_map = &rk3399_soc_ctl_map;
>>>
>>>         node = of_parse_phandle(pdev->dev.of_node, "arasan,soc-ctl-syscon", 0);
>>>         if (node) {
>>> @@ -611,10 +585,107 @@ static int sdhci_arasan_probe(struct platform_device *pdev)
>>>                         if (ret != -EPROBE_DEFER)
>>>                                 dev_err(&pdev->dev, "Can't get syscon: %d\n",
>>>                                         ret);
>>> -                       goto err_pltfm_free;
>>> +                       return -1;
>>>                 }
>>>         }
>>>
>>> +       if (of_property_read_bool(pdev->dev.of_node, "xlnx,fails-without-test-cd"))
>>> +               sdhci_arasan->quirks |= SDHCI_ARASAN_QUIRK_FORCE_CDTEST;
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static int sdhci_rockchip_clock_init(struct sdhci_host *host,
>>> +                                       struct platform_device *pdev)
>>> +{
>>> +       struct sdhci_pltfm_host *pltfm_host;
>>> +       struct sdhci_arasan_data *sdhci_arasan;
>>> +
>>> +       pltfm_host = sdhci_priv(host);
>>> +       sdhci_arasan = sdhci_pltfm_priv(pltfm_host);
>>> +
>>> +       if (of_device_is_compatible(pdev->dev.of_node,
>>> +                                   "rockchip,rk3399-sdhci-5.1"))
>>> +               sdhci_arasan_update_clockmultiplier(host, 0x0);
>>
>> This _does_ belong in a Rockchip-specific init function, for now.
>
> I'm not sure I understood. Are you saying that you agree to put this
> into a specific function? Essentially agreeing with the concept the
> patch is putting forward?
>
> Note, I'm more interested in the concept (i.e.: init functions) and less
> in knowing if my patch (which was a quick and dirty thing) properly moved
> the functions into the said init functions. For example, I did not move
> the code dealing with "arasan,sdhci-5.1", but it could go into another
> callback.
>
> Right now there are no "chip-specific" functions.
> Just a code in sdhci_arasan_probe() that by checking various compatible
> strings and the presence of other specific properties, acts as a way of
> "chip-specific" initialisation, because it calls or not some functions.
> (or the functions do nothing if some DT properties are not there).
>
> The proposed patch is an attempt to clean up sdhci_arasan_probe() from
> all those checks and move them into separate functions, for clarity and
> maintainability reasons.
>
> What are your thoughts in that regard?
>
> From what I've seen in other drivers, for example drivers/net/ethernet/aurora/nb8800.c
> One matches the compatible string to get a (likely) chip-specific set of
> callbacks to use in the 'probe' function.

I have no objections to chip-specific functions if they are needed.
It's really just a cleaner way to avoid doing "if this chip then, else
if this other chip then, else if this third chip them".

The thing I worry about is that too much stuff will be jammed into
chip-specific functions and that we'll end up solving the same thing
more than one way.


> Then, adding support for other chips is just a matter of replacing some
> of those callbacks with others adapted to a given platform.
>
>> Other platforms might want different values for
>> corecfg_clockmultiplier, I think.
>>
>> If it becomes common to need to set this, it wouldn't be hard to make
>> it generic by putting it in the data matched by the device tree, then
>> generically call sdhci_arasan_update_clockmultiplier() in cases where
>> it is needed.  sdhci_arasan_update_clockmultiplier() itself should be
>> generic enough to handle it.
>>
>>
>>> +
>>> +       sdhci_arasan_update_baseclkfreq(host);
>>
>> If you make soc_ctl_map always part of "struct sdhci_arasan_cs_ops"
>> then other platforms will be able to use it.
>
> I thought that soc_ctl_map was specific and for a given platform.

I believe the soc_ctl_map will be used by more than one chip, mostly
because I saw these same fields referenced in the generic
(non-Rockchip) Arasan docs.  Obviously I have a very small view of the
SDHCI-Arasan world though.


> For what is worth, I don't know which of these calls are or can be made
> generic or not.
>
> Indeed, I'm not an expert in this code; However, I think that given the
> amount of checks for specific properties, probably part of this is chip-
> specific, and as such, it would benefit from some re-factoring so that
> the chip-specific parts are clearly separated from the rest, instead of
> being mixed together inside the probe function.

I believe the only chip-specific stuff was the part that is currently
guarded by the "if rk3399" check.  Everything else seems like it ought
to be applicable to other platforms using Arasan's SDHCI IP.


>> As argued in my original patch the field "corecfg_baseclkfreq" is
>> documented in the generic Arasan document
>> <https://arasan.com/wp-content/media/eMMC-5-1-Total-Solution_Rev-1-3.pdf>
>> and thus is unlikely to be Rockchip specific.  It is entirely possible
>> that not everyone who integrates this IP block will need this register
>> set, but in that case they can set an offset as "-1" and they'll be
>> fine.
>>
>> Said another way: the concept of whether or not to set "baseclkfreq"
>> doesn't need to be tired to whether or not we're on Rockchip.
>>
>
> I see.
> For what is worth, the documentation for 'sdhci_arasan_update_baseclkfreq()'
> says something like:
>
>  *   Many existing devices don't seem to do this and work fine.  To keep
>  *   compatibility for old hardware where the device tree doesn't provide a
>  *   register map, this function is a noop if a soc_ctl_map hasn't been provided
>  *   for this platform.

Yup.  I wrote that.  See "git blame".

^ permalink raw reply

* [PATCH 14/18] arm64: signal32: move ilp32 and aarch32 common code to separated file
From: Catalin Marinas @ 2016-12-05 16:18 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1477081997-4770-15-git-send-email-ynorov@caviumnetworks.com>

On Fri, Oct 21, 2016 at 11:33:13PM +0300, Yury Norov wrote:
> Signed-off-by: Yury Norov <ynorov@caviumnetworks.com>

Please add some description, even if it means copying the subject.

> ---
>  arch/arm64/include/asm/signal32.h        |   3 +
>  arch/arm64/include/asm/signal32_common.h |  27 +++++++
>  arch/arm64/kernel/Makefile               |   2 +-
>  arch/arm64/kernel/signal32.c             | 107 ------------------------
>  arch/arm64/kernel/signal32_common.c      | 135 +++++++++++++++++++++++++++++++
>  5 files changed, 166 insertions(+), 108 deletions(-)
>  create mode 100644 arch/arm64/include/asm/signal32_common.h
>  create mode 100644 arch/arm64/kernel/signal32_common.c

I wonder whether you can make such patches more readable by setting
"diff.renames" to "copy" in your gitconfig (unless it's set already and
Git cannot detect partial file code moving/copying).

-- 
Catalin

^ permalink raw reply

* [PATCH v3] arm: dts: zynq: Add MicroZed board support
From: Jagan Teki @ 2016-12-05 16:28 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <f0c33080-0811-a515-f897-e7f851b13b5e@xilinx.com>

On Mon, Sep 26, 2016 at 8:22 AM, Michal Simek <michal.simek@xilinx.com> wrote:
> On 23.9.2016 11:48, Jagan Teki wrote:
>> From: Jagan Teki <jteki@openedev.com>
>>
>> Added basic dts support for MicroZed board.
>>
>> - UART
>> - SDHCI
>> - Ethernet
>>
>> Cc: Soren Brinkmann <soren.brinkmann@xilinx.com>
>> Cc: Michal Simek <michal.simek@xilinx.com>
>> Signed-off-by: Jagan Teki <jteki@openedev.com>
>> ---
>> Changes for v3:
>>       - Add Xilinx copyright
>> Changes for v2:
>>       - Add SDHCI
>>       - Add Ethernet
>>
>>  arch/arm/boot/dts/Makefile          |  1 +
>>  arch/arm/boot/dts/zynq-microzed.dts | 96 +++++++++++++++++++++++++++++++++++++
>>  2 files changed, 97 insertions(+)
>>  create mode 100644 arch/arm/boot/dts/zynq-microzed.dts
>>
>> diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
>> index faacd52..4d7b858 100644
>> --- a/arch/arm/boot/dts/Makefile
>> +++ b/arch/arm/boot/dts/Makefile
>> @@ -862,6 +862,7 @@ dtb-$(CONFIG_ARCH_VT8500) += \
>>       wm8750-apc8750.dtb \
>>       wm8850-w70v2.dtb
>>  dtb-$(CONFIG_ARCH_ZYNQ) += \
>> +     zynq-microzed.dtb \
>>       zynq-parallella.dtb \
>>       zynq-zc702.dtb \
>>       zynq-zc706.dtb \
>> diff --git a/arch/arm/boot/dts/zynq-microzed.dts b/arch/arm/boot/dts/zynq-microzed.dts
>> new file mode 100644
>> index 0000000..b9376a4
>> --- /dev/null
>> +++ b/arch/arm/boot/dts/zynq-microzed.dts
>> @@ -0,0 +1,96 @@
>> +/*
>> + * Copyright (C) 2011 - 2014 Xilinx
>> + * Copyright (C) 2016 Jagan Teki <jteki@openedev.com>
>> + *
>> + * This software is licensed under the terms of the GNU General Public
>> + * License version 2, as published by the Free Software Foundation, and
>> + * may be copied, distributed, and modified under those terms.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +/dts-v1/;
>> +/include/ "zynq-7000.dtsi"
>> +
>> +/ {
>> +     model = "Zynq MicroZED Development Board";
>> +     compatible = "xlnx,zynq-microzed", "xlnx,zynq-7000";
>> +
>> +     aliases {
>> +             ethernet0 = &gem0;
>> +             serial0 = &uart1;
>> +     };
>> +
>> +     memory {
>> +             device_type = "memory";
>> +             reg = <0x0 0x40000000>;
>> +     };
>> +
>> +     chosen {
>> +             bootargs = "earlycon";
>> +             stdout-path = "serial0:115200n8";
>> +     };
>> +
>> +     usb_phy0: phy0 {
>> +             compatible = "usb-nop-xceiv";
>> +             #phy-cells = <0>;
>> +     };
>> +};
>> +
>> +&clkc {
>> +     ps-clk-frequency = <33333333>;
>> +};
>> +
>> +&gem0 {
>> +     status = "okay";
>> +     phy-mode = "rgmii-id";
>> +     phy-handle = <&ethernet_phy>;
>> +
>> +     ethernet_phy: ethernet-phy at 0 {
>> +             reg = <0>;
>> +     };
>> +};
>> +
>> +&sdhci0 {
>> +     status = "okay";
>> +};
>> +
>> +&uart1 {
>> +     status = "okay";
>> +};
>> +
>> +&usb0 {
>> +     status = "okay";
>> +     dr_mode = "host";
>> +     usb-phy = <&usb_phy0>;
>> +     pinctrl-names = "default";
>> +     pinctrl-0 = <&pinctrl_usb0_default>;
>> +};
>> +
>> +&pinctrl0 {
>> +     pinctrl_usb0_default: usb0-default {
>> +             mux {
>> +                     groups = "usb0_0_grp";
>> +                     function = "usb0";
>> +             };
>> +
>> +             conf {
>> +                     groups = "usb0_0_grp";
>> +                     slew-rate = <0>;
>> +                     io-standard = <1>;
>> +             };
>> +
>> +             conf-rx {
>> +                     pins = "MIO29", "MIO31", "MIO36";
>> +                     bias-high-impedance;
>> +             };
>> +
>> +             conf-tx {
>> +                     pins = "MIO28", "MIO30", "MIO32", "MIO33", "MIO34",
>> +                            "MIO35", "MIO37", "MIO38", "MIO39";
>> +                     bias-disable;
>> +             };
>> +     };
>> +};
>>
>
> Applied.

Was it missed? I couldn't see it on the source for a while. any help?

thanks!
-- 
Jagan Teki
Free Software Engineer | www.openedev.com
U-Boot, Linux | Upstream Maintainer
Hyderabad, India.

^ permalink raw reply

* Adding a .platform_init callback to sdhci_arasan_ops
From: Doug Anderson @ 2016-12-05 16:30 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <9d63ea01-a5d1-603d-1ad4-6c6320022de8@laposte.net>

Hi,

On Mon, Dec 5, 2016 at 4:29 AM, Sebastian Frias <sf84@laposte.net> wrote:
> Hi Doug,
>
> On 28/11/16 19:02, Doug Anderson wrote:
>> Hi,
>>
>> On Mon, Nov 28, 2016 at 5:28 AM, Sebastian Frias <sf84@laposte.net> wrote:
>>> +static void sdhci_tango4_platform_init(struct sdhci_host *host)
>>> +{
>>> +       printk("%s\n", __func__);
>>> +
>>> +       /*
>>> +         pad_mode[2:0]=0    must be 0
>>> +         sel_sdio[3]=1      must be 1 for SDIO
>>> +         inv_sdwp_pol[4]=0  if set inverts the SD write protect polarity
>>> +         inv_sdcd_pol[5]=0  if set inverts the SD card present polarity
>>> +       */
>>> +       sdhci_writel(host, 0x00000008, 0x100 + 0x0);
>>
>> If I were doing this, I'd probably actually add these fields to the
>> "sdhci_arasan_soc_ctl_map", then add a 3rd option to
>> sdhci_arasan_syscon_write().  Right now it has 2 modes: "hiword
>> update" and "non-hiword update".  You could add a 3rd indicating that
>> you set config registers by just writing at some offset of the main
>> SDHCI registers space.
>>
>> So you'd add 4 fields:
>>
>> .tango_pad_mode = { .reg = 0x0000, .width = 3, .shift = 0 },
>> .sel_sdio = { .reg = 0x0000, .width = 1, .shift = 3},
>> .inv_sdwp_pol = { .reg = 0x0000, .width = 1, .shift = 4},
>> .inv_sdcd_pol = { .reg = 0x0000, .width = 1, .shift = 5},
>
> Right now I'm using something like:

So taking a very quick gander at
<https://arasan.com/wp-content/media/eMMC-5-1-Total-Solution_Rev-1-3.pdf>
and comparing the "corecfg" things to what you're setting, I find many
matches.  I didn't look very hard, so probably could find matches for
the rest?


> +       val = 0;
> +       val |= PAD_MODE(0); /* must be 0 */
> +       val |= SEL_SDIO;    /* enable SDIO */
> +       sdhci_writel(host, val, 0x100 + 0x0);
> +
> +       val = 0;
> +       val |= TIMEOUT_CLK_UNIT_MHZ;         /* unit: MHz */
> +       val |= TIMEOUT_CLK_FREQ(52);         /* timeout clock: 52MHz */

corecfg_timeoutclkfreq[5:0] ?

> +       val |= BUS_CLK_FREQ_FOR_SD_CLK(200); /* SDIO clock: 200MHz (TODO: get from DT) */
> +       val |= MAX_BLOCK_LENGTH(3);          /* max block size: 4096 bytes */
> +       val |= EXTENDED_MEDIA_BUS_SUPPORTED; /* DT? */
> +       val |= ADMA2_SUPPORTED;              /* DT? */

corecfg_adma2support

> +       val |= HIGH_SPEED_SUPPORTED;         /* DT? */

corecfg_highspeedsupport

> +       val |= SDMA_SUPPORTED;               /* DT? */

corecfg_sdmasupport

> +       val |= SUSPEND_RESUME_SUPPORTED;     /* DT? */

corecfg_suspressupport

> +       val |= VOLTAGE_3_3_V_SUPPORTED;      /* DT? */

corecfg_3p3voltsupport

> +#if 0
> +       val |= VOLTAGE_1_8_V_SUPPORTED;      /* DT? */

corecfg_1p8voltsupport

> +#endif
> +       val |= ASYNCHRONOUS_IRQ_SUPPORTED;   /* DT? */

corecfg_asyncintrsupport

> +       val |= SLOT_TYPE_REMOVABLE;          /* DT? */

corecfg_slottype[1:0]

> +       val |= SDR50_SUPPORTED;              /* DT? */

corecfg_sdr50support

> +       sdhci_writel(host, val, 0x100 + 0x40);
> +
> +       val = 0;
> +       val |= TIMER_COUNT_FOR_RETUNING(1);  /* DT? */

corecfg_retuningtimercnt[3:0]

> +       val |= CLOCK_MULTIPLIER(0);          /* DT? */
> +       val |= SPI_MODE_SUPPORTED;           /* DT? */

corecfg_spisupport

> +       val |= SPI_BLOCK_MODE_SUPPORTED;     /* DT? */

corecfg_spiblkmode

> +       sdhci_writel(host, val, 0x100 + 0x44);
> +
> +       sdhci_writel(host, 0x0004022c, 0x100 + 0x28);
> +       sdhci_writel(host, 0x00000002, 0x100 + 0x2c);
> +
> +       sdhci_writel(host, 0x00600000, 0x100 + 0x50);

AKA: you are setting up various "corecfg" stuff that's documented in
the generic Arasan docs.  Others SDHCI-Arasan implementations might
want to set the same things, but those values may be stored elsewhere
for them.

So if _all_ Arasan implementations need these same values or need the
same logic to figure out these values, then you should do something
that's not chip-specific but something generic.

If you've got a specific weird quirk that's specific to your platform,
then you could do that in a chip-specific init.

Presumably many of the above could just be hardcoded on some
implementations, so they might not be available in a memory-mapped
implementation...


> which seems much easier to handle (and portable).
>
> At any rate, one thing to note from this is that many of these
> bits should probably be initialised based on DT, right?

Probably, or by proving the voltage value of regulations.  Note that I
think DT already gets parsed and sets up caps.  I'm not really an
expert here and I'd let someone who actually knows / maintains SDMMC
comment.  I know for sure that dw_mmc (which I'm way more familiar
with) does things very differently than sdhci (which I'm barely
familiar with).


> For example, the DT has a "non-removable" property, which I think
> should be used to setup SLOT_TYPE_EMBEDDED (if the property is
> present) or SLOT_TYPE_REMOVABLE (if the property is not present)
>
> Looking at Documentation/devicetree/bindings/mmc/mmc.txt we can see
> more related properties:
>
> Optional properties:
> - bus-width: Number of data lines, can be <1>, <4>, or <8>.  The default
>   will be <1> if the property is absent.
> - wp-gpios: Specify GPIOs for write protection, see gpio binding
> - cd-inverted: when present, polarity on the CD line is inverted. See the note
>   below for the case, when a GPIO is used for the CD line
> - wp-inverted: when present, polarity on the WP line is inverted. See the note
>   below for the case, when a GPIO is used for the WP line
> - disable-wp: When set no physical WP line is present. This property should
>   only be specified when the controller has a dedicated write-protect
>   detection logic. If a GPIO is always used for the write-protect detection
>   logic it is sufficient to not specify wp-gpios property in the absence of a WP
>   line.
> - max-frequency: maximum operating clock frequency
> - no-1-8-v: when present, denotes that 1.8v card voltage is not supported on
>   this system, even if the controller claims it is.
> - cap-sd-highspeed: SD high-speed timing is supported
> - cap-mmc-highspeed: MMC high-speed timing is supported
> - sd-uhs-sdr12: SD UHS SDR12 speed is supported
> - sd-uhs-sdr25: SD UHS SDR25 speed is supported
> - sd-uhs-sdr50: SD UHS SDR50 speed is supported
> - sd-uhs-sdr104: SD UHS SDR104 speed is supported
> - sd-uhs-ddr50: SD UHS DDR50 speed is supported
> ...
>
> which makes me wonder, what is the recommended way of dealing with this?
> - Should I use properties on the DT? If so, then I need to add code to set
> up the register properly.
> - Should I hardcode these values use a minimal DT? If so, then I need an
> init function to put all this.
> - Should I hardcode stuff at u-Boot level? If so, nothing is required and
> should work without any modifications to the Arasan Linux driver.
>
> It appears that the Linux driver is expecting most of these fields to be
> hardcoded and "pre-set" before (maybe by the bootloader) it starts, hence
> the lack of any "init" function so far.
>
>>
>> In your platform-specific init you're proposing you could set
>> tango_pad_mode to 0.  That seems tango-specific.
>>
>> You'd want to hook into "set_ios" for setting sel_sdio or not.  That's
>> important if anyone ever wants to plug in an external SDIO card to
>> your slot.  This one good argument for putting this in
>> sdhci_arasan_soc_ctl_map, since you wouldn't want to do a
>> compatibility matching every time set_ios is called.
>
> Thanks for the advice, I will look into that.
>
>>
>> I'd have to look more into the whole SD/WP polarity issue.  There are
>> already so many levels of inversion for these signals in Linux that
>> it's confusing.  It seems like you might just want to hardcode them to
>> "0" and let users use all the existing ways to invert things...  You
>> could either put that hardcoding into your platform init code or (if
>> you're using sdhci_arasan_soc_ctl_map) put it in the main init code so
>> that if anyone else needs to init similar signals then they can take
>> advantage of it.
>
> Yes, I think I will leave them to 0.
>
>>
>> --
>>
>> One random side note is that as currently documented you need to
>> specify a "shift" of -1 for any sdhci_arasan_soc_ctl_map fields you
>> aren't using.  That seems stupid--not sure why I did that.  It seems
>> better to clue off "width = 0" so that you could just freely not init
>> any fields you don't need.
>
> I see.
> So far I'm not really convinced about using "soc_ctl_map" because what I
> have so far is more portable, and can easily be put as is somewhere else
> (i.e.: in different flavors of bootloaders)

Well, most of your parameters are generic corecfg parameters for
Asasan.  Seems like they would fit into the map nicely...

-Doug

^ permalink raw reply

* [PATCH 3/8] rtc: add STM32 RTC driver
From: Mathieu Poirier @ 2016-12-05 16:32 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <15bea9e9-adcc-edb0-1bd1-33d395c72eec@st.com>

On Mon, Dec 05, 2016 at 10:43:14AM +0100, Amelie DELAUNAY wrote:
> Hi Mathieu,
> 
> Thanks for reviewing
> 
> On 12/02/2016 06:56 PM, Mathieu Poirier wrote:
> > On Fri, Dec 02, 2016 at 03:09:56PM +0100, Amelie Delaunay wrote:
> >> This patch adds support for the STM32 RTC.
> >
> > Hello Amelie,
> >
> >>
> >> Signed-off-by: Amelie Delaunay <amelie.delaunay@st.com>
> >> ---
> >>  drivers/rtc/Kconfig     |  10 +
> >>  drivers/rtc/Makefile    |   1 +
> >>  drivers/rtc/rtc-stm32.c | 777
> ++++++++++++++++++++++++++++++++++++++++++++++++
> >>  3 files changed, 788 insertions(+)
> >>  create mode 100644 drivers/rtc/rtc-stm32.c
> >>
> >> diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
> >> index e859d14..dd8b218 100644
> >> --- a/drivers/rtc/Kconfig
> >> +++ b/drivers/rtc/Kconfig
> >> @@ -1706,6 +1706,16 @@ config RTC_DRV_PIC32
> >>         This driver can also be built as a module. If so, the module
> >>         will be called rtc-pic32
> >>
> >> +config RTC_DRV_STM32
> >> +    tristate "STM32 On-Chip RTC"
> >> +    depends on ARCH_STM32
> >> +    help
> >> +       If you say yes here you get support for the STM32 On-Chip
> >> +       Real Time Clock.
> >> +
> >> +       This driver can also be built as a module, if so, the module
> >> +       will be called "rtc-stm32".
> >> +
> >>  comment "HID Sensor RTC drivers"
> >>
> >>  config RTC_DRV_HID_SENSOR_TIME
> >> diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
> >> index 1ac694a..87bd9cc 100644
> >> --- a/drivers/rtc/Makefile
> >> +++ b/drivers/rtc/Makefile
> >> @@ -144,6 +144,7 @@ obj-$(CONFIG_RTC_DRV_SNVS)    += rtc-snvs.o
> >>  obj-$(CONFIG_RTC_DRV_SPEAR)    += rtc-spear.o
> >>  obj-$(CONFIG_RTC_DRV_STARFIRE)    += rtc-starfire.o
> >>  obj-$(CONFIG_RTC_DRV_STK17TA8)    += rtc-stk17ta8.o
> >> +obj-$(CONFIG_RTC_DRV_STM32)     += rtc-stm32.o
> >>  obj-$(CONFIG_RTC_DRV_STMP)    += rtc-stmp3xxx.o
> >>  obj-$(CONFIG_RTC_DRV_ST_LPC)    += rtc-st-lpc.o
> >>  obj-$(CONFIG_RTC_DRV_SUN4V)    += rtc-sun4v.o
> >> diff --git a/drivers/rtc/rtc-stm32.c b/drivers/rtc/rtc-stm32.c
> >> new file mode 100644
> >> index 0000000..9e710ff
> >> --- /dev/null
> >> +++ b/drivers/rtc/rtc-stm32.c
> >> @@ -0,0 +1,777 @@
> >> +/*
> >> + * Copyright (C) Amelie Delaunay 2015
> >> + * Author:  Amelie Delaunay <adelaunay.stm32@gmail.com>
> >> + * License terms:  GNU General Public License (GPL), version 2
> >> + */
> >> +
> >> +#include <linux/bcd.h>
> >> +#include <linux/clk.h>
> >> +#include <linux/init.h>
> >> +#include <linux/io.h>
> >> +#include <linux/iopoll.h>
> >> +#include <linux/ioport.h>
> >> +#include <linux/kernel.h>
> >> +#include <linux/mfd/syscon.h>
> >> +#include <linux/module.h>
> >> +#include <linux/of.h>
> >> +#include <linux/of_device.h>
> >> +#include <linux/platform_device.h>
> >> +#include <linux/regmap.h>
> >> +#include <linux/rtc.h>
> >> +#include <linux/spinlock.h>
> >> +
> >> +#define DRIVER_NAME "stm32_rtc"
> >> +
> >> +/* STM32 RTC registers */
> >> +#define STM32_RTC_TR        0x00
> >> +#define STM32_RTC_DR        0x04
> >> +#define STM32_RTC_CR        0x08
> >> +#define STM32_RTC_ISR        0x0C
> >> +#define STM32_RTC_PRER        0x10
> >> +#define STM32_RTC_ALRMAR    0x1C
> >> +#define STM32_RTC_WPR        0x24
> >> +
> >> +/* STM32_RTC_TR bit fields  */
> >> +#define STM32_RTC_TR_SEC_SHIFT        0
> >> +#define STM32_RTC_TR_SEC        GENMASK(6, 0)
> >> +#define STM32_RTC_TR_MIN_SHIFT        8
> >> +#define STM32_RTC_TR_MIN        GENMASK(14, 8)
> >> +#define STM32_RTC_TR_HOUR_SHIFT        16
> >> +#define STM32_RTC_TR_HOUR        GENMASK(21, 16)
> >> +
> >> +/* STM32_RTC_DR bit fields */
> >> +#define STM32_RTC_DR_DATE_SHIFT        0
> >> +#define STM32_RTC_DR_DATE        GENMASK(5, 0)
> >> +#define STM32_RTC_DR_MONTH_SHIFT    8
> >> +#define STM32_RTC_DR_MONTH        GENMASK(11, 8)
> >> +#define STM32_RTC_DR_WDAY_SHIFT        13
> >> +#define STM32_RTC_DR_WDAY        GENMASK(15, 13)
> >> +#define STM32_RTC_DR_YEAR_SHIFT        16
> >> +#define STM32_RTC_DR_YEAR        GENMASK(23, 16)
> >> +
> >> +/* STM32_RTC_CR bit fields */
> >> +#define STM32_RTC_CR_FMT        BIT(6)
> >> +#define STM32_RTC_CR_ALRAE        BIT(8)
> >> +#define STM32_RTC_CR_ALRAIE        BIT(12)
> >> +
> >> +/* STM32_RTC_ISR bit fields */
> >> +#define STM32_RTC_ISR_ALRAWF        BIT(0)
> >> +#define STM32_RTC_ISR_INITS        BIT(4)
> >> +#define STM32_RTC_ISR_RSF        BIT(5)
> >> +#define STM32_RTC_ISR_INITF        BIT(6)
> >> +#define STM32_RTC_ISR_INIT        BIT(7)
> >> +#define STM32_RTC_ISR_ALRAF        BIT(8)
> >> +
> >> +/* STM32_RTC_PRER bit fields */
> >> +#define STM32_RTC_PRER_PRED_S_SHIFT    0
> >> +#define STM32_RTC_PRER_PRED_S        GENMASK(14, 0)
> >> +#define STM32_RTC_PRER_PRED_A_SHIFT    16
> >> +#define STM32_RTC_PRER_PRED_A        GENMASK(22, 16)
> >> +
> >> +/* STM32_RTC_ALRMAR and STM32_RTC_ALRMBR bit fields */
> >> +#define STM32_RTC_ALRMXR_SEC_SHIFT    0
> >> +#define STM32_RTC_ALRMXR_SEC        GENMASK(6, 0)
> >> +#define STM32_RTC_ALRMXR_SEC_MASK    BIT(7)
> >> +#define STM32_RTC_ALRMXR_MIN_SHIFT    8
> >> +#define STM32_RTC_ALRMXR_MIN        GENMASK(14, 8)
> >> +#define STM32_RTC_ALRMXR_MIN_MASK    BIT(15)
> >> +#define STM32_RTC_ALRMXR_HOUR_SHIFT    16
> >> +#define STM32_RTC_ALRMXR_HOUR        GENMASK(21, 16)
> >> +#define STM32_RTC_ALRMXR_PM        BIT(22)
> >> +#define STM32_RTC_ALRMXR_HOUR_MASK    BIT(23)
> >> +#define STM32_RTC_ALRMXR_DATE_SHIFT    24
> >> +#define STM32_RTC_ALRMXR_DATE        GENMASK(29, 24)
> >> +#define STM32_RTC_ALRMXR_WDSEL        BIT(30)
> >> +#define STM32_RTC_ALRMXR_WDAY_SHIFT    24
> >> +#define STM32_RTC_ALRMXR_WDAY        GENMASK(27, 24)
> >> +#define STM32_RTC_ALRMXR_DATE_MASK    BIT(31)
> >> +
> >> +/* STM32_RTC_WPR key constants */
> >> +#define RTC_WPR_1ST_KEY            0xCA
> >> +#define RTC_WPR_2ND_KEY            0x53
> >> +#define RTC_WPR_WRONG_KEY        0xFF
> >> +
> >> +/*
> >> + * RTC registers are protected agains parasitic write access.
> >> + * PWR_CR_DBP bit must be set to enable write access to RTC registers.
> >> + */
> >> +/* STM32_PWR_CR */
> >> +#define PWR_CR                0x00
> >> +/* STM32_PWR_CR bit field */
> >> +#define PWR_CR_DBP            BIT(8)
> >> +
> >> +static struct regmap *dbp;
> >> +
> >> +struct stm32_rtc {
> >> +    struct rtc_device *rtc_dev;
> >> +    void __iomem *base;
> >> +    struct clk *pclk;
> >> +    struct clk *ck_rtc;
> >> +    unsigned int clksrc;
> >> +    spinlock_t lock; /* Protects registers accesses */
> >> +    int irq_alarm;
> >> +    struct regmap *pwrcr;
> >> +};
> >> +
> >> +static inline unsigned int stm32_rtc_readl(struct stm32_rtc *rtc,
> >> +                       unsigned int offset)
> >> +{
> >> +    return readl_relaxed(rtc->base + offset);
> >> +}
> >> +
> >> +static inline void stm32_rtc_writel(struct stm32_rtc *rtc,
> >> +                    unsigned int offset, unsigned int value)
> >> +{
> >> +    writel_relaxed(value, rtc->base + offset);
> >> +}
> >
> > I'm not sure wrapping the readl/writel_relaxed function does anything
> special
> > other than simply redirecting the reader to another section of the code.
> During development phase, it is useful to add debug traces but you're right,
> this can be remove.
> >
> >> +
> >> +static void stm32_rtc_wpr_unlock(struct stm32_rtc *rtc)
> >> +{
> >> +//    if (dbp)
> >> +//        regmap_update_bits(dbp, PWR_CR, PWR_CR_DBP, PWR_CR_DBP);
> >
> > Did checkpatch let you get away with this?  What did you intend to do
> here?
> Hum, as surprising as it may seem, checkpatch didn't complained about these
> comments! But anyway, this has to be removed, it was a tentative to
> enable/disable backup domain write protection any time we have to write in a
> protected RTC register, but it is not functionnal. I have commented this
> just to keep it in mind and forget to remove it before sending.
> >
> >> +
> >> +    stm32_rtc_writel(rtc, STM32_RTC_WPR, RTC_WPR_1ST_KEY);
> >> +    stm32_rtc_writel(rtc, STM32_RTC_WPR, RTC_WPR_2ND_KEY);
> >> +}
> >> +
> >> +static void stm32_rtc_wpr_lock(struct stm32_rtc *rtc)
> >> +{
> >> +    stm32_rtc_writel(rtc, STM32_RTC_WPR, RTC_WPR_WRONG_KEY);
> >> +
> >> +//    if (dbp)
> >> +//        regmap_update_bits(dbp, PWR_CR, PWR_CR_DBP, ~PWR_CR_DBP);
> >> +}
> >> +
> >> +static int stm32_rtc_enter_init_mode(struct stm32_rtc *rtc)
> >> +{
> >> +    unsigned int isr = stm32_rtc_readl(rtc, STM32_RTC_ISR);
> >> +
> >> +    if (!(isr & STM32_RTC_ISR_INITF)) {
> >> +        isr |= STM32_RTC_ISR_INIT;
> >> +        stm32_rtc_writel(rtc, STM32_RTC_ISR, isr);
> >> +
> >> +        return readl_relaxed_poll_timeout_atomic(
> >> +                    rtc->base + STM32_RTC_ISR,
> >> +                    isr, (isr & STM32_RTC_ISR_INITF),
> >> +                    10, 100000);
> >
> > When using hard coded numerics please add comments that explains the
> reason
> > behind the selected values.
> Sure. It takes around 2 RTCCLK clock cycles to enter in initialization phase
> mode. So it depends on the frequency of the ck_rtc parent clock.
> Either I keep parent clock frequency and compute the exact timeout, or I use
> the "best and worst cases": slowest RTCCLK frequency is 32kHz, so it can
> take up to 62us, highest RTCCLK frequency should be 1MHz, so it can take
> only 2us. Polling every 10us with a timeout of 100ms seemed reasonable and
> be a good compromise.

I think this is a resonnable approach - please add that explanation as a comment
in the code.

> >
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static void stm32_rtc_exit_init_mode(struct stm32_rtc *rtc)
> >> +{
> >> +    unsigned int isr = stm32_rtc_readl(rtc, STM32_RTC_ISR);
> >> +
> >> +    isr &= ~STM32_RTC_ISR_INIT;
> >> +    stm32_rtc_writel(rtc, STM32_RTC_ISR, isr);
> >> +}
> >> +
> >> +static int stm32_rtc_wait_sync(struct stm32_rtc *rtc)
> >> +{
> >> +    unsigned int isr;
> >> +
> >> +    isr = stm32_rtc_readl(rtc, STM32_RTC_ISR);
> >> +
> >> +    isr &= ~STM32_RTC_ISR_RSF;
> >> +    stm32_rtc_writel(rtc, STM32_RTC_ISR, isr);
> >> +
> >> +    /* Wait the registers to be synchronised */
> >> +    return readl_relaxed_poll_timeout_atomic(rtc->base + STM32_RTC_ISR,
> >> +                         isr,
> >> +                         (isr & STM32_RTC_ISR_RSF),
> >> +                         10, 100000);
> >
> > Shouldn't the break condition be !((isr & STM32_RTC_ISR_RSF) ?  If not
> this
> > probably deserve a better comment.
> RSF bit is set by hardware each time the calendar registers are synchronized
> (it takes up to 2 RTCCLK). So the break condition is correct: we poll until
> RSF flag is set or timeout is reached.
> >
> >> +}
> >> +
> >> +static irqreturn_t stm32_rtc_alarm_irq(int irq, void *dev_id)
> >> +{
> >> +    struct stm32_rtc *rtc = (struct stm32_rtc *)dev_id;
> >> +    unsigned long irqflags, events = 0;
> >> +    unsigned int isr, cr;
> >> +
> >> +    spin_lock_irqsave(&rtc->lock, irqflags);
> >> +
> >> +    isr = stm32_rtc_readl(rtc, STM32_RTC_ISR);
> >> +    cr = stm32_rtc_readl(rtc, STM32_RTC_CR);
> >> +
> >> +    if ((isr & STM32_RTC_ISR_ALRAF) &&
> >> +        (cr & STM32_RTC_CR_ALRAIE)) {
> >> +        /* Alarm A flag - Alarm interrupt */
> >> +        events |= RTC_IRQF | RTC_AF;
> >> +        isr &= ~STM32_RTC_ISR_ALRAF;
> >> +    }
> >> +
> >> +    /* Clear event irqflags, otherwise new events won't be received */
> >> +    stm32_rtc_writel(rtc, STM32_RTC_ISR, isr);
> >> +
> >> +    spin_unlock_irqrestore(&rtc->lock, irqflags);
> >> +
> >> +    if (events) {
> >> +        dev_info(&rtc->rtc_dev->dev, "Alarm occurred\n");
> >> +
> >> +        /* Pass event to the kernel */
> >> +        rtc_update_irq(rtc->rtc_dev, 1, events);
> >> +        return IRQ_HANDLED;
> >> +    } else {
> >> +        return IRQ_NONE;
> >> +    }
> >> +}
> >> +
> >> +/* Convert rtc_time structure from bin to bcd format */
> >> +static void tm2bcd(struct rtc_time *tm)
> >> +{
> >> +    tm->tm_sec = bin2bcd(tm->tm_sec);
> >> +    tm->tm_min = bin2bcd(tm->tm_min);
> >> +    tm->tm_hour = bin2bcd(tm->tm_hour);
> >> +
> >> +    tm->tm_mday = bin2bcd(tm->tm_mday);
> >> +    tm->tm_mon = bin2bcd(tm->tm_mon + 1);
> >> +    tm->tm_year = bin2bcd(tm->tm_year - 100);
> >> +    /*
> >> +     * Number of days since Sunday
> >> +     * - on kernel side, 0=Sunday...6=Saturday
> >> +     * - on rtc side, 0=invalid,1=Monday...7=Sunday
> >> +     */
> >> +    tm->tm_wday = (!tm->tm_wday) ? 7 : tm->tm_wday;
> >> +}
> >> +
> >> +/* Convert rtc_time structure from bcd to bin format */
> >> +static void bcd2tm(struct rtc_time *tm)
> >> +{
> >> +    tm->tm_sec = bcd2bin(tm->tm_sec);
> >> +    tm->tm_min = bcd2bin(tm->tm_min);
> >> +    tm->tm_hour = bcd2bin(tm->tm_hour);
> >> +
> >> +    tm->tm_mday = bcd2bin(tm->tm_mday);
> >> +    tm->tm_mon = bcd2bin(tm->tm_mon) - 1;
> >> +    tm->tm_year = bcd2bin(tm->tm_year) + 100;
> >> +    /*
> >> +     * Number of days since Sunday
> >> +     * - on kernel side, 0=Sunday...6=Saturday
> >> +     * - on rtc side, 0=invalid,1=Monday...7=Sunday
> >> +     */
> >> +    tm->tm_wday %= 7;
> >> +}
> >> +
> >> +static int stm32_rtc_read_time(struct device *dev, struct rtc_time *tm)
> >> +{
> >> +    struct stm32_rtc *rtc = dev_get_drvdata(dev);
> >> +    unsigned int tr, dr;
> >> +    unsigned long irqflags;
> >> +
> >> +    spin_lock_irqsave(&rtc->lock, irqflags);
> >> +
> >> +    /* Time and Date in BCD format */
> >> +    tr = stm32_rtc_readl(rtc, STM32_RTC_TR);
> >> +    dr = stm32_rtc_readl(rtc, STM32_RTC_DR);
> >> +
> >> +    spin_unlock_irqrestore(&rtc->lock, irqflags);
> >> +
> >> +    tm->tm_sec = (tr & STM32_RTC_TR_SEC) >> STM32_RTC_TR_SEC_SHIFT;
> >> +    tm->tm_min = (tr & STM32_RTC_TR_MIN) >> STM32_RTC_TR_MIN_SHIFT;
> >> +    tm->tm_hour = (tr & STM32_RTC_TR_HOUR) >> STM32_RTC_TR_HOUR_SHIFT;
> >> +
> >> +    tm->tm_mday = (dr & STM32_RTC_DR_DATE) >> STM32_RTC_DR_DATE_SHIFT;
> >> +    tm->tm_mon = (dr & STM32_RTC_DR_MONTH) >> STM32_RTC_DR_MONTH_SHIFT;
> >> +    tm->tm_year = (dr & STM32_RTC_DR_YEAR) >> STM32_RTC_DR_YEAR_SHIFT;
> >> +    tm->tm_wday = (dr & STM32_RTC_DR_WDAY) >> STM32_RTC_DR_WDAY_SHIFT;
> >> +
> >> +    /* We don't report tm_yday and tm_isdst */
> >> +
> >> +    bcd2tm(tm);
> >> +
> >> +    if (rtc_valid_tm(tm) < 0) {
> >> +        dev_err(dev, "%s: rtc_time is not valid.\n", __func__);
> >> +        return -EINVAL;
> >> +    }
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int stm32_rtc_set_time(struct device *dev, struct rtc_time *tm)
> >> +{
> >> +    struct stm32_rtc *rtc = dev_get_drvdata(dev);
> >> +    unsigned int tr, dr;
> >> +    unsigned long irqflags;
> >> +    int ret = 0;
> >> +
> >> +    if (rtc_valid_tm(tm) < 0) {
> >> +        dev_err(dev, "%s: rtc_time is not valid.\n", __func__);
> >> +        return -EINVAL;
> >> +    }
> >> +
> >> +    tm2bcd(tm);
> >> +
> >> +    /* Time in BCD format */
> >> +    tr = ((tm->tm_sec << STM32_RTC_TR_SEC_SHIFT) & STM32_RTC_TR_SEC) |
> >> +         ((tm->tm_min << STM32_RTC_TR_MIN_SHIFT) & STM32_RTC_TR_MIN) |
> >> +         ((tm->tm_hour << STM32_RTC_TR_HOUR_SHIFT) & STM32_RTC_TR_HOUR);
> >> +
> >> +    /* Date in BCD format */
> >> +    dr = ((tm->tm_mday << STM32_RTC_DR_DATE_SHIFT) & STM32_RTC_DR_DATE)
> |
> >> +         ((tm->tm_mon << STM32_RTC_DR_MONTH_SHIFT) & STM32_RTC_DR_MONTH)
> |
> >> +         ((tm->tm_year << STM32_RTC_DR_YEAR_SHIFT) & STM32_RTC_DR_YEAR)
> |
> >> +         ((tm->tm_wday << STM32_RTC_DR_WDAY_SHIFT) & STM32_RTC_DR_WDAY);
> >> +
> >> +    spin_lock_irqsave(&rtc->lock, irqflags);
> >> +
> >> +    stm32_rtc_wpr_unlock(rtc);
> >> +
> >> +    ret = stm32_rtc_enter_init_mode(rtc);
> >> +    if (ret) {
> >> +        dev_err(dev, "Can't enter in init mode. Set time aborted.\n");
> >> +        goto end;
> >> +    }
> >> +
> >> +    stm32_rtc_writel(rtc, STM32_RTC_TR, tr);
> >> +    stm32_rtc_writel(rtc, STM32_RTC_DR, dr);
> >> +
> >> +    stm32_rtc_exit_init_mode(rtc);
> >> +
> >> +    ret = stm32_rtc_wait_sync(rtc);
> >> +end:
> >> +    stm32_rtc_wpr_lock(rtc);
> >> +
> >> +    spin_unlock_irqrestore(&rtc->lock, irqflags);
> >> +
> >> +    return ret;
> >> +}
> >> +
> >> +static int stm32_rtc_read_alarm(struct device *dev, struct rtc_wkalrm
> *alrm)
> >> +{
> >> +    struct stm32_rtc *rtc = dev_get_drvdata(dev);
> >> +    struct rtc_time *tm = &alrm->time;
> >> +    unsigned int alrmar, cr, isr;
> >> +    unsigned long irqflags;
> >> +
> >> +    spin_lock_irqsave(&rtc->lock, irqflags);
> >> +
> >> +    alrmar = stm32_rtc_readl(rtc, STM32_RTC_ALRMAR);
> >> +    cr = stm32_rtc_readl(rtc, STM32_RTC_CR);
> >> +    isr = stm32_rtc_readl(rtc, STM32_RTC_ISR);
> >> +
> >> +    spin_unlock_irqrestore(&rtc->lock, irqflags);
> >> +
> >> +    if (alrmar & STM32_RTC_ALRMXR_DATE_MASK) {
> >> +        /*
> >> +         * Date/day don't care in Alarm comparison so alarm triggers
> >> +         * every day
> >> +         */
> >> +        tm->tm_mday = -1;
> >> +        tm->tm_wday = -1;
> >> +    } else {
> >> +        if (alrmar & STM32_RTC_ALRMXR_WDSEL) {
> >> +            /* Alarm is set to a day of week */
> >> +            tm->tm_mday = -1;
> >> +            tm->tm_wday = (alrmar & STM32_RTC_ALRMXR_WDAY) >>
> >> +                      STM32_RTC_ALRMXR_WDAY_SHIFT;
> >> +            tm->tm_wday %= 7;
> >> +        } else {
> >> +            /* Alarm is set to a day of month */
> >> +            tm->tm_wday = -1;
> >> +            tm->tm_mday = (alrmar & STM32_RTC_ALRMXR_DATE) >>
> >> +                       STM32_RTC_ALRMXR_DATE_SHIFT;
> >> +        }
> >> +    }
> >> +
> >> +    if (alrmar & STM32_RTC_ALRMXR_HOUR_MASK) {
> >> +        /* Hours don't care in Alarm comparison */
> >> +        tm->tm_hour = -1;
> >> +    } else {
> >> +        tm->tm_hour = (alrmar & STM32_RTC_ALRMXR_HOUR) >>
> >> +                   STM32_RTC_ALRMXR_HOUR_SHIFT;
> >> +        if (alrmar & STM32_RTC_ALRMXR_PM)
> >> +            tm->tm_hour += 12;
> >> +    }
> >> +
> >> +    if (alrmar & STM32_RTC_ALRMXR_MIN_MASK) {
> >> +        /* Minutes don't care in Alarm comparison */
> >> +        tm->tm_min = -1;
> >> +    } else {
> >> +        tm->tm_min = (alrmar & STM32_RTC_ALRMXR_MIN) >>
> >> +                  STM32_RTC_ALRMXR_MIN_SHIFT;
> >> +    }
> >> +
> >> +    if (alrmar & STM32_RTC_ALRMXR_SEC_MASK) {
> >> +        /* Seconds don't care in Alarm comparison */
> >> +        tm->tm_sec = -1;
> >> +    } else {
> >> +        tm->tm_sec = (alrmar & STM32_RTC_ALRMXR_SEC) >>
> >> +                  STM32_RTC_ALRMXR_SEC_SHIFT;
> >> +    }
> >> +
> >> +    bcd2tm(tm);
> >> +
> >> +    alrm->enabled = (cr & STM32_RTC_CR_ALRAE) ? 1 : 0;
> >> +    alrm->pending = (isr & STM32_RTC_ISR_ALRAF) ? 1 : 0;
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int stm32_rtc_alarm_irq_enable(struct device *dev, unsigned int
> enabled)
> >> +{
> >> +    struct stm32_rtc *rtc = dev_get_drvdata(dev);
> >> +    unsigned long irqflags;
> >> +    unsigned int isr, cr;
> >> +
> >> +    cr = stm32_rtc_readl(rtc, STM32_RTC_CR);
> >
> > Is the STM32_RTC_CR garanteed to be valid, i.e updated atomically? If not
> this
> > should probably be below the spinlock.
> You're right.
> >
> >> +
> >> +    spin_lock_irqsave(&rtc->lock, irqflags);
> >> +
> >> +    stm32_rtc_wpr_unlock(rtc);
> >> +
> >> +    /* We expose Alarm A to the kernel */
> >> +    if (enabled)
> >> +        cr |= (STM32_RTC_CR_ALRAIE | STM32_RTC_CR_ALRAE);
> >> +    else
> >> +        cr &= ~(STM32_RTC_CR_ALRAIE | STM32_RTC_CR_ALRAE);
> >> +    stm32_rtc_writel(rtc, STM32_RTC_CR, cr);
> >> +
> >> +    /* Clear event irqflags, otherwise new events won't be received */
> >> +    isr = stm32_rtc_readl(rtc, STM32_RTC_ISR);
> >> +    isr &= ~STM32_RTC_ISR_ALRAF;
> >> +    stm32_rtc_writel(rtc, STM32_RTC_ISR, isr);
> >> +
> >> +    stm32_rtc_wpr_lock(rtc);
> >> +
> >> +    spin_unlock_irqrestore(&rtc->lock, irqflags);
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int stm32_rtc_set_alarm(struct device *dev, struct rtc_wkalrm
> *alrm)
> >> +{
> >> +    struct stm32_rtc *rtc = dev_get_drvdata(dev);
> >> +    struct rtc_time *tm = &alrm->time;
> >> +    unsigned long irqflags;
> >> +    unsigned int cr, isr, alrmar;
> >> +    int ret = 0;
> >> +
> >> +    if (rtc_valid_tm(tm)) {
> >> +        dev_err(dev, "Alarm time not valid.\n");
> >> +        return -EINVAL;
> >> +    }
> >> +
> >> +    tm2bcd(tm);
> >> +
> >> +    spin_lock_irqsave(&rtc->lock, irqflags);
> >> +
> >> +    stm32_rtc_wpr_unlock(rtc);
> >> +
> >> +    /* Disable Alarm */
> >> +    cr = stm32_rtc_readl(rtc, STM32_RTC_CR);
> >> +    cr &= ~STM32_RTC_CR_ALRAE;
> >> +    stm32_rtc_writel(rtc, STM32_RTC_CR, cr);
> >> +
> >> +    /* Poll Alarm write flag to be sure that Alarm update is allowed */
> >> +    ret = readl_relaxed_poll_timeout_atomic(rtc->base + STM32_RTC_ISR,
> >> +                        isr,
> >> +                        (isr & STM32_RTC_ISR_ALRAWF),
> >> +                        10, 100);
> >> +
> >> +    if (ret) {
> >> +        dev_err(dev, "Alarm update not allowed\n");
> >> +        goto end;
> >> +    }
> >> +
> >> +    alrmar = 0;
> >> +
> >> +    if (tm->tm_mday < 0 && tm->tm_wday < 0) {
> >> +        /*
> >> +         * Date/day don't care in Alarm comparison so alarm triggers
> >> +         * every day
> >> +         */
> >> +        alrmar |= STM32_RTC_ALRMXR_DATE_MASK;
> >> +    } else {
> >> +        if (tm->tm_mday > 0) {
> >> +            /* Date is selected (ignoring wday) */
> >> +            alrmar |= (tm->tm_mday << STM32_RTC_ALRMXR_DATE_SHIFT) &
> >> +                  STM32_RTC_ALRMXR_DATE;
> >> +        } else {
> >> +            /* Day of week is selected */
> >> +            int wday = (tm->tm_wday == 0) ? 7 : tm->tm_wday;
> >> +
> >> +            alrmar |= STM32_RTC_ALRMXR_WDSEL;
> >> +            alrmar |= (wday << STM32_RTC_ALRMXR_WDAY_SHIFT) &
> >> +                  STM32_RTC_ALRMXR_WDAY;
> >> +        }
> >> +    }
> >> +
> >> +    if (tm->tm_hour < 0) {
> >> +        /* Hours don't care in Alarm comparison */
> >> +        alrmar |= STM32_RTC_ALRMXR_HOUR_MASK;
> >> +    } else {
> >> +        /* 24-hour format */
> >> +        alrmar &= ~STM32_RTC_ALRMXR_PM;
> >> +        alrmar |= (tm->tm_hour << STM32_RTC_ALRMXR_HOUR_SHIFT) &
> >> +              STM32_RTC_ALRMXR_HOUR;
> >> +    }
> >> +
> >> +    if (tm->tm_min < 0) {
> >> +        /* Minutes don't care in Alarm comparison */
> >> +        alrmar |= STM32_RTC_ALRMXR_MIN_MASK;
> >> +    } else {
> >> +        alrmar |= (tm->tm_min << STM32_RTC_ALRMXR_MIN_SHIFT) &
> >> +              STM32_RTC_ALRMXR_MIN;
> >> +    }
> >> +
> >> +    if (tm->tm_sec < 0) {
> >> +        /* Seconds don't care in Alarm comparison */
> >> +        alrmar |= STM32_RTC_ALRMXR_SEC_MASK;
> >> +    } else {
> >> +        alrmar |= (tm->tm_sec << STM32_RTC_ALRMXR_SEC_SHIFT) &
> >> +              STM32_RTC_ALRMXR_SEC;
> >> +    }
> >> +
> >> +    /* Write to Alarm register */
> >> +    stm32_rtc_writel(rtc, STM32_RTC_ALRMAR, alrmar);
> >> +
> >> +    if (alrm->enabled)
> >> +        stm32_rtc_alarm_irq_enable(dev, 1);
> >> +    else
> >> +        stm32_rtc_alarm_irq_enable(dev, 0);
> >> +
> >> +end:
> >> +    stm32_rtc_wpr_lock(rtc);
> >> +
> >> +    spin_unlock_irqrestore(&rtc->lock, irqflags);
> >> +
> >> +    return ret;
> >> +}
> >> +
> >> +static const struct rtc_class_ops stm32_rtc_ops = {
> >> +    .read_time    = stm32_rtc_read_time,
> >> +    .set_time    = stm32_rtc_set_time,
> >> +    .read_alarm    = stm32_rtc_read_alarm,
> >> +    .set_alarm    = stm32_rtc_set_alarm,
> >> +    .alarm_irq_enable = stm32_rtc_alarm_irq_enable,
> >> +};
> >> +
> >> +#ifdef CONFIG_OF
> >> +static const struct of_device_id stm32_rtc_of_match[] = {
> >> +    { .compatible = "st,stm32-rtc" },
> >> +    {}
> >> +};
> >> +MODULE_DEVICE_TABLE(of, stm32_rtc_of_match);
> >> +#endif
> >> +
> >> +static int stm32_rtc_init(struct platform_device *pdev,
> >> +              struct stm32_rtc *rtc)
> >> +{
> >> +    unsigned int prer, pred_a, pred_s, pred_a_max, pred_s_max, cr;
> >> +    unsigned int rate;
> >> +    unsigned long irqflags;
> >> +    int ret = 0;
> >> +
> >> +    rate = clk_get_rate(rtc->ck_rtc);
> >> +
> >> +    /* Find prediv_a and prediv_s to obtain the 1Hz calendar clock */
> >> +    pred_a_max = STM32_RTC_PRER_PRED_A >> STM32_RTC_PRER_PRED_A_SHIFT;
> >> +    pred_s_max = STM32_RTC_PRER_PRED_S >> STM32_RTC_PRER_PRED_S_SHIFT;
> >> +
> >> +    for (pred_a = pred_a_max; pred_a >= 0; pred_a--) {
> >> +        pred_s = (rate / (pred_a + 1)) - 1;
> >> +
> >> +        if (((pred_s + 1) * (pred_a + 1)) == rate)
> >> +            break;
> >> +    }
> >> +
> >> +    /*
> >> +     * Can't find a 1Hz, so give priority to RTC power consumption
> >> +     * by choosing the higher possible value for prediv_a
> >> +     */
> >> +    if ((pred_s > pred_s_max) || (pred_a > pred_a_max)) {
> >> +        pred_a = pred_a_max;
> >> +        pred_s = (rate / (pred_a + 1)) - 1;
> >> +
> >> +        dev_warn(&pdev->dev, "ck_rtc is %s\n",
> >> +             (rate - ((pred_a + 1) * (pred_s + 1)) < 0) ?
> >> +             "fast" : "slow");
> >> +    }
> >> +
> >> +    spin_lock_irqsave(&rtc->lock, irqflags);
> >> +
> >> +    stm32_rtc_wpr_unlock(rtc);
> >> +
> >> +    ret = stm32_rtc_enter_init_mode(rtc);
> >> +    if (ret) {
> >> +        dev_err(&pdev->dev,
> >> +            "Can't enter in init mode. Prescaler config failed.\n");
> >> +        goto end;
> >> +    }
> >> +
> >> +    prer = (pred_s << STM32_RTC_PRER_PRED_S_SHIFT) &
> STM32_RTC_PRER_PRED_S;
> >> +    stm32_rtc_writel(rtc, STM32_RTC_PRER, prer);
> >> +    prer |= (pred_a << STM32_RTC_PRER_PRED_A_SHIFT) &
> STM32_RTC_PRER_PRED_A;
> >> +    stm32_rtc_writel(rtc, STM32_RTC_PRER, prer);
> >> +
> >> +    /* Force 24h time format */
> >> +    cr = stm32_rtc_readl(rtc, STM32_RTC_CR);
> >> +    cr &= ~STM32_RTC_CR_FMT;
> >> +    stm32_rtc_writel(rtc, STM32_RTC_CR, cr);
> >> +
> >> +    stm32_rtc_exit_init_mode(rtc);
> >> +
> >> +    ret = stm32_rtc_wait_sync(rtc);
> >> +
> >> +    if (stm32_rtc_readl(rtc, STM32_RTC_ISR) & STM32_RTC_ISR_INITS)
> >> +        dev_warn(&pdev->dev, "Date/Time must be initialized\n");
> >> +end:
> >> +    stm32_rtc_wpr_lock(rtc);
> >> +
> >> +    spin_unlock_irqrestore(&rtc->lock, irqflags);
> >> +
> >> +    return ret;
> >> +}
> >> +
> >> +static int stm32_rtc_probe(struct platform_device *pdev)
> >> +{
> >> +    struct stm32_rtc *rtc;
> >> +    struct resource *res;
> >> +    int ret;
> >> +
> >> +    rtc = devm_kzalloc(&pdev->dev, sizeof(*rtc), GFP_KERNEL);
> >> +    if (!rtc)
> >> +        return -ENOMEM;
> >> +
> >> +    res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
> >
> > The value of 'res' should be checked before using it.
> res is checked in devm_ioremap_resource just below :
>     if (!res || resource_type(res) != IORESOURCE_MEM) {
>         dev_err(dev, "invalid resource\n");
>         return IOMEM_ERR_PTR(-EINVAL);
>     }
> That's why it is not checked here.
> >
> >> +    rtc->base = devm_ioremap_resource(&pdev->dev, res);
> >> +    if (IS_ERR(rtc->base))
> >> +        return PTR_ERR(rtc->base);
> >> +
> >> +    dbp = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
> "st,syscfg");
> >> +    if (IS_ERR(dbp)) {
> >> +        dev_err(&pdev->dev, "no st,syscfg\n");
> >> +        return PTR_ERR(dbp);
> >> +    }
> >> +
> >> +    spin_lock_init(&rtc->lock);
> >> +
> >> +    rtc->ck_rtc = devm_clk_get(&pdev->dev, "ck_rtc");
> >> +    if (IS_ERR(rtc->ck_rtc)) {
> >> +        dev_err(&pdev->dev, "no ck_rtc clock");
> >> +        return PTR_ERR(rtc->ck_rtc);
> >> +    }
> >> +
> >> +    ret = clk_prepare_enable(rtc->ck_rtc);
> >> +    if (ret)
> >> +        return ret;
> >> +
> >> +    if (dbp)
> >> +        regmap_update_bits(dbp, PWR_CR, PWR_CR_DBP, PWR_CR_DBP);
> >
> > The code above exits if there is a problem with the dbp, there is no point
> in
> > checking again.
> You're right.
> >
> >> +
> >> +    ret = stm32_rtc_init(pdev, rtc);
> >> +    if (ret)
> >> +        goto err;
> >> +
> >> +    rtc->irq_alarm = platform_get_irq_byname(pdev, "alarm");
> >> +    if (rtc->irq_alarm <= 0) {
> >> +        dev_err(&pdev->dev, "no alarm irq\n");
> >> +        ret = -ENOENT;
> >> +        goto err;
> >> +    }
> >> +
> >> +    platform_set_drvdata(pdev, rtc);
> >> +
> >> +    device_init_wakeup(&pdev->dev, true);
> >
> > What happens if device_init_wakeup() returns an error?
> It means that RTC won't be able to wake up the board with RTC alarm. I can
> add a warning for the user in this case ?

Not really sure - it really depends on the kind of system will use this. 
For some not being able to wake up the board might a minor problem while
for others a reason to fail the probing.

Do we need a new binging for this, i.e one that would indicate this RTC can (and
should) be able to wake up the board and fail driver probing if this can't be
done?

I'll let Alessandro and Alexander be the judge of that.

Thanks,
Mathieu

> >
> >> +
> >> +    rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, pdev->name,
> >> +            &stm32_rtc_ops, THIS_MODULE);
> >> +    if (IS_ERR(rtc->rtc_dev)) {
> >> +        ret = PTR_ERR(rtc->rtc_dev);
> >> +        dev_err(&pdev->dev, "rtc device registration failed, err=%d\n",
> >> +            ret);
> >> +        goto err;
> >> +    }
> >> +
> >> +    /* Handle RTC alarm interrupts */
> >> +    ret = devm_request_irq(&pdev->dev, rtc->irq_alarm,
> >> +                   stm32_rtc_alarm_irq, IRQF_TRIGGER_RISING,
> >> +                   dev_name(&rtc->rtc_dev->dev), rtc);
> >> +    if (ret) {
> >> +        dev_err(&pdev->dev, "IRQ%d (alarm interrupt) already claimed\n",
> >> +            rtc->irq_alarm);
> >> +        goto err;
> >> +    }
> >> +
> >> +    return 0;
> >> +err:
> >> +    clk_disable_unprepare(rtc->ck_rtc);
> >> +
> >> +    if (dbp)
> >> +        regmap_update_bits(dbp, PWR_CR, PWR_CR_DBP, ~PWR_CR_DBP);
> >
> > Same comment as above.
> OK.
> >
> >> +
> >> +    device_init_wakeup(&pdev->dev, false);
> >> +
> >> +    return ret;
> >> +}
> >> +
> >> +static int __exit stm32_rtc_remove(struct platform_device *pdev)
> >> +{
> >> +    struct stm32_rtc *rtc = platform_get_drvdata(pdev);
> >> +    unsigned int cr;
> >> +
> >> +    /* Disable interrupts */
> >> +    stm32_rtc_wpr_unlock(rtc);
> >> +    cr = stm32_rtc_readl(rtc, STM32_RTC_CR);
> >> +    cr &= ~STM32_RTC_CR_ALRAIE;
> >> +    stm32_rtc_writel(rtc, STM32_RTC_CR, cr);
> >> +    stm32_rtc_wpr_lock(rtc);
> >> +
> >> +    clk_disable_unprepare(rtc->ck_rtc);
> >> +
> >> +    /* Enable backup domain write protection */
> >> +    if (dbp)
> >> +        regmap_update_bits(dbp, PWR_CR, PWR_CR_DBP, ~PWR_CR_DBP);
> >> +
> >> +    device_init_wakeup(&pdev->dev, false);
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +#ifdef CONFIG_PM_SLEEP
> >> +static int stm32_rtc_suspend(struct device *dev)
> >> +{
> >> +    struct stm32_rtc *rtc = dev_get_drvdata(dev);
> >> +
> >> +    if (device_may_wakeup(dev))
> >> +        return enable_irq_wake(rtc->irq_alarm);
> >> +
> >> +    return 0;
> >> +}
> >> +
> >> +static int stm32_rtc_resume(struct device *dev)
> >> +{
> >> +    struct stm32_rtc *rtc = dev_get_drvdata(dev);
> >> +    int ret = 0;
> >> +
> >> +    ret = stm32_rtc_wait_sync(rtc);
> >> +    if (ret < 0)
> >> +        return ret;
> >> +
> >> +    if (device_may_wakeup(dev))
> >> +        return disable_irq_wake(rtc->irq_alarm);
> >> +
> >> +    return ret;
> >> +}
> >> +#endif
> >> +
> >> +static SIMPLE_DEV_PM_OPS(stm32_rtc_pm_ops,
> >> +             stm32_rtc_suspend, stm32_rtc_resume);
> >> +
> >> +static struct platform_driver stm32_rtc_driver = {
> >> +    .probe        = stm32_rtc_probe,
> >> +    .remove        = stm32_rtc_remove,
> >> +    .driver        = {
> >> +        .name    = DRIVER_NAME,
> >> +        .pm    = &stm32_rtc_pm_ops,
> >> +        .of_match_table = stm32_rtc_of_match,
> >> +    },
> >> +};
> >> +
> >> +module_platform_driver(stm32_rtc_driver);
> >> +
> >> +MODULE_ALIAS("platform:" DRIVER_NAME);
> >> +MODULE_AUTHOR("Amelie Delaunay <amelie.delaunay@st.com>");
> >> +MODULE_DESCRIPTION("STMicroelectronics STM32 Real Time Clock driver");
> >> +MODULE_LICENSE("GPL v2");
> >> --
> >> 1.9.1
> >>
> 
> Best regards,
> Amelie
> 

^ permalink raw reply

* [PATCH 07/12] usb: sunxi: Uses the resource-managed extcon API when registering extcon notifier
From: Bin Liu @ 2016-12-05 16:32 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161130084503.v5klzl6silp6323s@lukather>

On Wed, Nov 30, 2016 at 09:45:03AM +0100, Maxime Ripard wrote:
> On Wed, Nov 30, 2016 at 02:57:35PM +0900, Chanwoo Choi wrote:
> > This patch just uses the resource-managed extcon API when registering
> > the extcon notifier.
> > 
> > Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
> 
> Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com>

It would be ideal if the subject was "usb: musb: sunxi: ...".

Acked-by: Bin Liu <b-liu@ti.com>

Regards,
-Bin.

^ permalink raw reply

* [PATCH 16/18] arm64: ptrace: handle ptrace_request differently for aarch32 and ilp32
From: Catalin Marinas @ 2016-12-05 16:34 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1477081997-4770-17-git-send-email-ynorov@caviumnetworks.com>

On Fri, Oct 21, 2016 at 11:33:15PM +0300, Yury Norov wrote:
> New aarch32 ptrace syscall handler is introduced to avoid run-time
> detection of the task type.

What's wrong with the run-time detection? If it's just to avoid a
negligible overhead, I would rather keep the code simpler by avoiding
duplicating the generic compat_sys_ptrace().

-- 
Catalin

^ permalink raw reply

* [PATCH 11/18] arm64: ilp32: share aarch32 syscall handlers
From: Catalin Marinas @ 2016-12-05 17:12 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1477081997-4770-12-git-send-email-ynorov@caviumnetworks.com>

On Fri, Oct 21, 2016 at 11:33:10PM +0300, Yury Norov wrote:
> off_t is  passed in register pair just like in aarch32.
> In this patch corresponding aarch32 handlers are shared to
> ilp32 code.
[...]
> +/*
> + * Note: off_4k (w5) is always in units of 4K. If we can't do the
> + * requested offset because it is not page-aligned, we return -EINVAL.
> + */
> +ENTRY(compat_sys_mmap2_wrapper)
> +#if PAGE_SHIFT > 12
> +	tst	w5, #~PAGE_MASK >> 12
> +	b.ne	1f
> +	lsr	w5, w5, #PAGE_SHIFT - 12
> +#endif
> +	b	sys_mmap_pgoff
> +1:	mov	x0, #-EINVAL
> +	ret
> +ENDPROC(compat_sys_mmap2_wrapper)

For compat sys_mmap2, the pgoff argument is in multiples of 4K. This was
traditionally used for architectures where off_t is 32-bit to allow
mapping files to 2^44.

Since off_t is 64-bit with AArch64/ILP32, should we just pass the off_t
as a 64-bit value in two different registers (w5 and w6)?

-- 
Catalin

^ permalink raw reply

* [PATCH] usb: gadget: udc: atmel: used managed kasprintf
From: David Laight @ 2016-12-05 17:17 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161202161921.ace5rolitlxjhr6i@piout.net>

From: Alexandre Belloni
> Sent: 02 December 2016 16:19
> On 02/12/2016 at 15:59:57 +0000, David Laight wrote :
> > From: Alexandre Belloni
> > > Sent: 01 December 2016 10:27
> > > Use devm_kasprintf instead of simple kasprintf to free the allocated memory
> > > when needed.
> >
> > s/when needed/when the device is freed/
> >
> > > Suggested-by: Peter Rosin <peda@axentia.se>
> > > Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
> > > ---
> > >  drivers/usb/gadget/udc/atmel_usba_udc.c | 3 ++-
> > >  1 file changed, 2 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
> > > index 45bc997d0711..aec72fe8273c 100644
> > > --- a/drivers/usb/gadget/udc/atmel_usba_udc.c
> > > +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
> > > @@ -1978,7 +1978,8 @@ static struct usba_ep * atmel_udc_of_init(struct platform_device *pdev,
> > >  			dev_err(&pdev->dev, "of_probe: name error(%d)\n", ret);
> > >  			goto err;
> > >  		}
> > > -		ep->ep.name = kasprintf(GFP_KERNEL, "ep%d", ep->index);
> > > +		ep->ep.name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "ep%d",
> > > +					     ep->index);
> >
> > Acually why bother mallocing such a small string at all.
> > The maximum length is 12 bytes even if 'index' are unrestricted.
> >
> 
> IIRC, using statically allocated string is failing somewhere is the USB
> core but I don't remember all the details.

I can't imagine that changing ep->ep.name from 'char *' to 'char [12]' would
make any difference.

	David

^ permalink raw reply

* next-20161205 build: 3 failures 4 warnings (next-20161205)
From: Mark Brown @ 2016-12-05 17:20 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <7b3daf13-dbab-5acf-cd5f-2414141500b3@arm.com>

On Mon, Dec 05, 2016 at 03:44:30PM +0000, Marc Zyngier wrote:

> Old and broken gas. I have a workaround stashed there:

> http://git.kernel.org/cgit/linux/kernel/git/maz/arm-platforms.git/commit/?h=arm64/standalone.h&id=559f97365362ed9e96f594200020379df46630d8

> At least binutils 2.24 and 2.25 are affected, while 2.27 is not.

Not that old - this is the Linaro 2015.10 toolchain which wasn't super
new but not exactly from the mists of time either.  Anyway, I updated to
2016.11 (GCC 6.2.1, GAS 2.27)...  let's see what the fallout is like.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 488 bytes
Desc: not available
URL: <http://lists.infradead.org/pipermail/linux-arm-kernel/attachments/20161205/1f2213ac/attachment.sig>

^ permalink raw reply

* [PATCH] arm64: Add CMDLINE_EXTEND
From: Geoff Levand @ 2016-12-05 17:41 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161205120842.GA14429@e104818-lin.cambridge.arm.com>

Hi Catalin,

On 12/05/2016 04:08 AM, Catalin Marinas wrote:
> On Fri, Dec 02, 2016 at 02:17:02PM -0800, Geoff Levand wrote:
>> The device tree code already supports CMDLINE_EXTEND,
>> so add the config option to make it available on arm64.
> 
> What's your use-case for this patch? Note that both CMDLINE_FORCE and
> CMDLINE_EXTEND (if we introduce it) are ignored by the EFI stub.
> However, we don't seem to have stated this anywhere.

I use this in CoreOS, where we need to set "acpi=force" for
arm64.  CoreOS uses a proper UEFI + grub.

See: 

  https://github.com/coreos/scripts/pull/610
  https://github.com/coreos/coreos-overlay/pull/2298

I can add a comment for EFI stub, either in the Kconfig
options, and/or in Documentation/efi-stub.txt.  Just let
me know.

-Geoff
 

^ permalink raw reply

* [PATCH] arm: kprobe: replace patch_lock to raw lock
From: Shi, Yang @ 2016-12-05 18:16 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <20161201141345.4xlceuchfkxyel75@linutronix.de>

On 12/1/2016 6:13 AM, Sebastian Andrzej Siewior wrote:
> On 2016-11-10 16:17:55 [-0800], Yang Shi wrote:
>>
>> Since patch_text_stop_machine() is called in stop_machine() which disables IRQ,
>> sleepable lock should be not used in this atomic context, so replace patch_lock
>> to raw lock.
>>
>> Signed-off-by: Yang Shi <yang.shi@linaro.org>
>
> This can also go upstream.
> Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>

Yes, thanks for acking.

Russell,

Could you please consider this patch?

Thanks,
Yang

>
> Sebastian
>

^ permalink raw reply

* [PATCH v3 0/6] crypto: ARM/arm64 CRC-T10DIF/CRC32/CRC32C roundup
From: Ard Biesheuvel @ 2016-12-05 18:42 UTC (permalink / raw)
  To: linux-arm-kernel

This v3 combines the CRC-T10DIF and CRC32 implementations for both ARM and
arm64 that I sent out a couple of weeks ago, and adds support to the latter
for CRC32C.

Changes since v2:
- fix a couple of big-endian bugs in CRC32/CRC32C
- add back handling to the CRC-T10DIF routines of buffers that are not a
  multiple of 16 bytes (but they still must be 16 byte aligned)

Ard Biesheuvel (6):
  crypto: testmgr - avoid overlap in chunked tests
  crypto: testmgr - add/enhance test cases for CRC-T10DIF
  crypto: arm64/crct10dif - port x86 SSE implementation to arm64
  crypto: arm/crct10dif - port x86 SSE implementation to ARM
  crypto: arm64/crc32 - accelerated support based on x86 SSE
    implementation
  crypto: arm/crc32 - accelerated support based on x86 SSE
    implementation

 arch/arm/crypto/Kconfig               |  10 +
 arch/arm/crypto/Makefile              |   4 +
 arch/arm/crypto/crc32-ce-core.S       | 306 ++++++++++++++
 arch/arm/crypto/crc32-ce-glue.c       | 242 +++++++++++
 arch/arm/crypto/crct10dif-ce-core.S   | 427 ++++++++++++++++++++
 arch/arm/crypto/crct10dif-ce-glue.c   | 101 +++++
 arch/arm64/crypto/Kconfig             |  11 +
 arch/arm64/crypto/Makefile            |   6 +
 arch/arm64/crypto/crc32-ce-core.S     | 266 ++++++++++++
 arch/arm64/crypto/crc32-ce-glue.c     | 212 ++++++++++
 arch/arm64/crypto/crct10dif-ce-core.S | 392 ++++++++++++++++++
 arch/arm64/crypto/crct10dif-ce-glue.c |  95 +++++
 crypto/testmgr.c                      |   2 +-
 crypto/testmgr.h                      |  70 ++--
 14 files changed, 2115 insertions(+), 29 deletions(-)
 create mode 100644 arch/arm/crypto/crc32-ce-core.S
 create mode 100644 arch/arm/crypto/crc32-ce-glue.c
 create mode 100644 arch/arm/crypto/crct10dif-ce-core.S
 create mode 100644 arch/arm/crypto/crct10dif-ce-glue.c
 create mode 100644 arch/arm64/crypto/crc32-ce-core.S
 create mode 100644 arch/arm64/crypto/crc32-ce-glue.c
 create mode 100644 arch/arm64/crypto/crct10dif-ce-core.S
 create mode 100644 arch/arm64/crypto/crct10dif-ce-glue.c

-- 
2.7.4

^ permalink raw reply

* [PATCH v3 1/6] crypto: testmgr - avoid overlap in chunked tests
From: Ard Biesheuvel @ 2016-12-05 18:42 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1480963348-24203-1-git-send-email-ard.biesheuvel@linaro.org>

The IDXn offsets are chosen such that tap values (which may go up to
255) end up overlapping in the xbuf allocation. In particular, IDX1
and IDX3 are too close together, so update IDX3 to avoid this issue.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 crypto/testmgr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index ded50b67c757..670893bcf361 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -63,7 +63,7 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
  */
 #define IDX1		32
 #define IDX2		32400
-#define IDX3		1
+#define IDX3		511
 #define IDX4		8193
 #define IDX5		22222
 #define IDX6		17101
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 2/6] crypto: testmgr - add/enhance test cases for CRC-T10DIF
From: Ard Biesheuvel @ 2016-12-05 18:42 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1480963348-24203-1-git-send-email-ard.biesheuvel@linaro.org>

The existing test cases only exercise a small slice of the various
possible code paths through the x86 SSE/PCLMULQDQ implementation,
and the upcoming ports of it for arm64. So add one that exceeds 256
bytes in size, and convert another to a chunked test.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 crypto/testmgr.h | 70 ++++++++++++--------
 1 file changed, 42 insertions(+), 28 deletions(-)

diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index e64a4ef9d8ca..9b656be7f52f 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -1334,36 +1334,50 @@ static struct hash_testvec rmd320_tv_template[] = {
 	}
 };
 
-#define CRCT10DIF_TEST_VECTORS	3
+#define CRCT10DIF_TEST_VECTORS	ARRAY_SIZE(crct10dif_tv_template)
 static struct hash_testvec crct10dif_tv_template[] = {
 	{
-		.plaintext = "abc",
-		.psize  = 3,
-#ifdef __LITTLE_ENDIAN
-		.digest = "\x3b\x44",
-#else
-		.digest = "\x44\x3b",
-#endif
-	}, {
-		.plaintext = "1234567890123456789012345678901234567890"
-			     "123456789012345678901234567890123456789",
-		.psize	= 79,
-#ifdef __LITTLE_ENDIAN
-		.digest	= "\x70\x4b",
-#else
-		.digest	= "\x4b\x70",
-#endif
-	}, {
-		.plaintext =
-		"abcddddddddddddddddddddddddddddddddddddddddddddddddddddd",
-		.psize  = 56,
-#ifdef __LITTLE_ENDIAN
-		.digest = "\xe3\x9c",
-#else
-		.digest = "\x9c\xe3",
-#endif
-		.np     = 2,
-		.tap    = { 28, 28 }
+		.plaintext	= "abc",
+		.psize		= 3,
+		.digest		= (u8 *)(u16 []){ 0x443b },
+	}, {
+		.plaintext 	= "1234567890123456789012345678901234567890"
+				  "123456789012345678901234567890123456789",
+		.psize		= 79,
+		.digest 	= (u8 *)(u16 []){ 0x4b70 },
+		.np		= 2,
+		.tap		= { 63, 16 },
+	}, {
+		.plaintext	= "abcdddddddddddddddddddddddddddddddddddddddd"
+				  "ddddddddddddd",
+		.psize		= 56,
+		.digest		= (u8 *)(u16 []){ 0x9ce3 },
+		.np		= 8,
+		.tap		= { 1, 2, 28, 7, 6, 5, 4, 3 },
+	}, {
+		.plaintext 	= "1234567890123456789012345678901234567890"
+				  "1234567890123456789012345678901234567890"
+				  "1234567890123456789012345678901234567890"
+				  "1234567890123456789012345678901234567890"
+				  "1234567890123456789012345678901234567890"
+				  "1234567890123456789012345678901234567890"
+				  "1234567890123456789012345678901234567890"
+				  "123456789012345678901234567890123456789",
+		.psize		= 319,
+		.digest		= (u8 *)(u16 []){ 0x44c6 },
+	}, {
+		.plaintext 	= "1234567890123456789012345678901234567890"
+				  "1234567890123456789012345678901234567890"
+				  "1234567890123456789012345678901234567890"
+				  "1234567890123456789012345678901234567890"
+				  "1234567890123456789012345678901234567890"
+				  "1234567890123456789012345678901234567890"
+				  "1234567890123456789012345678901234567890"
+				  "123456789012345678901234567890123456789",
+		.psize		= 319,
+		.digest		= (u8 *)(u16 []){ 0x44c6 },
+		.np		= 4,
+		.tap		= { 1, 255, 57, 6 },
 	}
 };
 
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 3/6] crypto: arm64/crct10dif - port x86 SSE implementation to arm64
From: Ard Biesheuvel @ 2016-12-05 18:42 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1480963348-24203-1-git-send-email-ard.biesheuvel@linaro.org>

This is a transliteration of the Intel algorithm implemented
using SSE and PCLMULQDQ instructions that resides in the file
arch/x86/crypto/crct10dif-pcl-asm_64.S, but simplified to only
operate on buffers that are 16 byte aligned (but of any size)

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/crypto/Kconfig             |   5 +
 arch/arm64/crypto/Makefile            |   3 +
 arch/arm64/crypto/crct10dif-ce-core.S | 392 ++++++++++++++++++++
 arch/arm64/crypto/crct10dif-ce-glue.c |  95 +++++
 4 files changed, 495 insertions(+)

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 2cf32e9887e1..d773c0659202 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -23,6 +23,11 @@ config CRYPTO_GHASH_ARM64_CE
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_HASH
 
+config CRYPTO_CRCT10DIF_ARM64_CE
+	tristate "CRCT10DIF digest algorithm using PMULL instructions"
+	depends on KERNEL_MODE_NEON && CRC_T10DIF
+	select CRYPTO_HASH
+
 config CRYPTO_AES_ARM64_CE
 	tristate "AES core cipher using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index abb79b3cfcfe..36fd3eb4201b 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -17,6 +17,9 @@ sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
 obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
 ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
 
+obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
+crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
+
 obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
 CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
 
diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
new file mode 100644
index 000000000000..d5b5a8c038c8
--- /dev/null
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -0,0 +1,392 @@
+//
+// Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
+//
+// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License version 2 as
+// published by the Free Software Foundation.
+//
+
+//
+// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
+//
+// Copyright (c) 2013, Intel Corporation
+//
+// Authors:
+//     Erdinc Ozturk <erdinc.ozturk@intel.com>
+//     Vinodh Gopal <vinodh.gopal@intel.com>
+//     James Guilford <james.guilford@intel.com>
+//     Tim Chen <tim.c.chen@linux.intel.com>
+//
+// This software is available to you under a choice of one of two
+// licenses.  You may choose to be licensed under the terms of the GNU
+// General Public License (GPL) Version 2, available from the file
+// COPYING in the main directory of this source tree, or the
+// OpenIB.org BSD license below:
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the
+//   distribution.
+//
+// * Neither the name of the Intel Corporation nor the names of its
+//   contributors may be used to endorse or promote products derived from
+//   this software without specific prior written permission.
+//
+//
+// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//       Function API:
+//       UINT16 crc_t10dif_pcl(
+//               UINT16 init_crc, //initial CRC value, 16 bits
+//               const unsigned char *buf, //buffer pointer to calculate CRC on
+//               UINT64 len //buffer length in bytes (64-bit data)
+//       );
+//
+//       Reference paper titled "Fast CRC Computation for Generic
+//	Polynomials Using PCLMULQDQ Instruction"
+//       URL: http://www.intel.com/content/dam/www/public/us/en/documents
+//  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+//
+//
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+	.text
+	.cpu		generic+crypto
+
+	arg1_low32	.req	w0
+	arg2		.req	x1
+	arg3		.req	x2
+
+	vzr		.req	v13
+
+ENTRY(crc_t10dif_pmull)
+	movi		vzr.16b, #0		// init zero register
+
+	// adjust the 16-bit initial_crc value, scale it to 32 bits
+	lsl		arg1_low32, arg1_low32, #16
+
+	// check if smaller than 256
+	cmp		arg3, #256
+
+	// for sizes less than 128, we can't fold 64B at a time...
+	b.lt		_less_than_128
+
+	// load the initial crc value
+	// crc value does not need to be byte-reflected, but it needs
+	// to be moved to the high part of the register.
+	// because data will be byte-reflected and will align with
+	// initial crc at correct place.
+	movi		v10.16b, #0
+	mov		v10.s[3], arg1_low32		// initial crc
+
+	// receive the initial 64B data, xor the initial crc value
+	ldp		q0, q1, [arg2]
+	ldp		q2, q3, [arg2, #0x20]
+	ldp		q4, q5, [arg2, #0x40]
+	ldp		q6, q7, [arg2, #0x60]
+	add		arg2, arg2, #0x80
+
+CPU_LE(	rev64		v0.16b, v0.16b			)
+CPU_LE(	rev64		v1.16b, v1.16b			)
+CPU_LE(	rev64		v2.16b, v2.16b			)
+CPU_LE(	rev64		v3.16b, v3.16b			)
+CPU_LE(	rev64		v4.16b, v4.16b			)
+CPU_LE(	rev64		v5.16b, v5.16b			)
+CPU_LE(	rev64		v6.16b, v6.16b			)
+CPU_LE(	rev64		v7.16b, v7.16b			)
+
+CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
+CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
+CPU_LE(	ext		v2.16b, v2.16b, v2.16b, #8	)
+CPU_LE(	ext		v3.16b, v3.16b, v3.16b, #8	)
+CPU_LE(	ext		v4.16b, v4.16b, v4.16b, #8	)
+CPU_LE(	ext		v5.16b, v5.16b, v5.16b, #8	)
+CPU_LE(	ext		v6.16b, v6.16b, v6.16b, #8	)
+CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
+
+	// XOR the initial_crc value
+	eor		v0.16b, v0.16b, v10.16b
+
+	ldr		q10, rk3	// xmm10 has rk3 and rk4
+					// type of pmull instruction
+					// will determine which constant to use
+
+	//
+	// we subtract 256 instead of 128 to save one instruction from the loop
+	//
+	sub		arg3, arg3, #256
+
+	// at this section of the code, there is 64*x+y (0<=y<64) bytes of
+	// buffer. The _fold_64_B_loop will fold 64B at a time
+	// until we have 64+y Bytes of buffer
+
+
+	// fold 64B at a time. This section of the code folds 4 vector
+	// registers in parallel
+_fold_64_B_loop:
+
+	.macro		fold64, reg1, reg2
+	ldp		q11, q12, [arg2], #0x20
+
+	pmull2		v8.1q, \reg1\().2d, v10.2d
+	pmull		\reg1\().1q, \reg1\().1d, v10.1d
+
+CPU_LE(	rev64		v11.16b, v11.16b		)
+CPU_LE(	rev64		v12.16b, v12.16b		)
+
+	pmull2		v9.1q, \reg2\().2d, v10.2d
+	pmull		\reg2\().1q, \reg2\().1d, v10.1d
+
+CPU_LE(	ext		v11.16b, v11.16b, v11.16b, #8	)
+CPU_LE(	ext		v12.16b, v12.16b, v12.16b, #8	)
+
+	eor		\reg1\().16b, \reg1\().16b, v8.16b
+	eor		\reg2\().16b, \reg2\().16b, v9.16b
+	eor		\reg1\().16b, \reg1\().16b, v11.16b
+	eor		\reg2\().16b, \reg2\().16b, v12.16b
+	.endm
+
+	fold64		v0, v1
+	fold64		v2, v3
+	fold64		v4, v5
+	fold64		v6, v7
+
+	subs		arg3, arg3, #128
+
+	// check if there is another 64B in the buffer to be able to fold
+	b.ge		_fold_64_B_loop
+
+	// at this point, the buffer pointer is pointing at the last y Bytes
+	// of the buffer the 64B of folded data is in 4 of the vector
+	// registers: v0, v1, v2, v3
+
+	// fold the 8 vector registers to 1 vector register with different
+	// constants
+
+	ldr		q10, rk9
+
+	.macro		fold16, reg, rk
+	pmull		v8.1q, \reg\().1d, v10.1d
+	pmull2		\reg\().1q, \reg\().2d, v10.2d
+	.ifnb		\rk
+	ldr		q10, \rk
+	.endif
+	eor		v7.16b, v7.16b, v8.16b
+	eor		v7.16b, v7.16b, \reg\().16b
+	.endm
+
+	fold16		v0, rk11
+	fold16		v1, rk13
+	fold16		v2, rk15
+	fold16		v3, rk17
+	fold16		v4, rk19
+	fold16		v5, rk1
+	fold16		v6
+
+	// instead of 64, we add 48 to the loop counter to save 1 instruction
+	// from the loop instead of a cmp instruction, we use the negative
+	// flag with the jl instruction
+	adds		arg3, arg3, #(128-16)
+	b.lt		_final_reduction_for_128
+
+	// now we have 16+y bytes left to reduce. 16 Bytes is in register v7
+	// and the rest is in memory. We can fold 16 bytes@a time if y>=16
+	// continue folding 16B at a time
+
+_16B_reduction_loop:
+	pmull		v8.1q, v7.1d, v10.1d
+	pmull2		v7.1q, v7.2d, v10.2d
+	eor		v7.16b, v7.16b, v8.16b
+
+	ldr		q0, [arg2], #16
+CPU_LE(	rev64		v0.16b, v0.16b			)
+CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
+	eor		v7.16b, v7.16b, v0.16b
+	subs		arg3, arg3, #16
+
+	// instead of a cmp instruction, we utilize the flags with the
+	// jge instruction equivalent of: cmp arg3, 16-16
+	// check if there is any more 16B in the buffer to be able to fold
+	b.ge		_16B_reduction_loop
+
+	// now we have 16+z bytes left to reduce, where 0<= z < 16.
+	// first, we reduce the data in the xmm7 register
+
+_final_reduction_for_128:
+	// check if any more data to fold. If not, compute the CRC of
+	// the final 128 bits
+	adds		arg3, arg3, #16
+	b.eq		_128_done
+
+	// here we are getting data that is less than 16 bytes.
+	// since we know that there was data before the pointer, we can
+	// offset the input pointer before the actual point, to receive
+	// exactly 16 bytes. after that the registers need to be adjusted.
+_get_last_two_regs:
+	add		arg2, arg2, arg3
+	ldr		q1, [arg2, #-16]
+CPU_LE(	rev64		v1.16b, v1.16b			)
+CPU_LE(	ext		v1.16b, v1.16b, v1.16b, #8	)
+
+	// get rid of the extra data that was loaded before
+	// load the shift constant
+	adr		x4, tbl_shf_table + 16
+	sub		x4, x4, arg3
+	ld1		{v0.16b}, [x4]
+
+	// shift v2 to the left by arg3 bytes
+	tbl		v2.16b, {v7.16b}, v0.16b
+
+	// shift v7 to the right by 16-arg3 bytes
+	movi		v9.16b, #0x80
+	eor		v0.16b, v0.16b, v9.16b
+	tbl		v7.16b, {v7.16b}, v0.16b
+
+	// blend
+	sshr		v0.16b, v0.16b, #7	// convert to 8-bit mask
+	bsl		v0.16b, v2.16b, v1.16b
+
+	// fold 16 Bytes
+	pmull		v8.1q, v7.1d, v10.1d
+	pmull2		v7.1q, v7.2d, v10.2d
+	eor		v7.16b, v7.16b, v8.16b
+	eor		v7.16b, v7.16b, v0.16b
+
+_128_done:
+	// compute crc of a 128-bit value
+	ldr		q10, rk5		// rk5 and rk6 in xmm10
+
+	// 64b fold
+	ext		v0.16b, vzr.16b, v7.16b, #8
+	mov		v7.d[0], v7.d[1]
+	pmull		v7.1q, v7.1d, v10.1d
+	eor		v7.16b, v7.16b, v0.16b
+
+	// 32b fold
+	ext		v0.16b, v7.16b, vzr.16b, #4
+	mov		v7.s[3], vzr.s[0]
+	pmull2		v0.1q, v0.2d, v10.2d
+	eor		v7.16b, v7.16b, v0.16b
+
+	// barrett reduction
+_barrett:
+	ldr		q10, rk7
+	mov		v0.d[0], v7.d[1]
+
+	pmull		v0.1q, v0.1d, v10.1d
+	ext		v0.16b, vzr.16b, v0.16b, #12
+	pmull2		v0.1q, v0.2d, v10.2d
+	ext		v0.16b, vzr.16b, v0.16b, #12
+	eor		v7.16b, v7.16b, v0.16b
+	mov		w0, v7.s[1]
+
+_cleanup:
+	// scale the result back to 16 bits
+	lsr		x0, x0, #16
+	ret
+
+_less_than_128:
+	cbz		arg3, _cleanup
+
+	movi		v0.16b, #0
+	mov		v0.s[3], arg1_low32	// get the initial crc value
+
+	ldr		q7, [arg2], #0x10
+CPU_LE(	rev64		v7.16b, v7.16b			)
+CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
+	eor		v7.16b, v7.16b, v0.16b	// xor the initial crc value
+
+	cmp		arg3, #16
+	b.eq		_128_done		// exactly 16 left
+	b.lt		_less_than_16_left
+
+	ldr		q10, rk1		// rk1 and rk2 in xmm10
+
+	// update the counter. subtract 32 instead of 16 to save one
+	// instruction from the loop
+	subs		arg3, arg3, #32
+	b.ge		_16B_reduction_loop
+
+	add		arg3, arg3, #16
+	b		_get_last_two_regs
+
+_less_than_16_left:
+	// shl r9, 4
+	adr		x0, tbl_shf_table + 16
+	sub		x0, x0, arg3
+	ld1		{v0.16b}, [x0]
+	movi		v9.16b, #0x80
+	eor		v0.16b, v0.16b, v9.16b
+	tbl		v7.16b, {v7.16b}, v0.16b
+	b		_128_done
+ENDPROC(crc_t10dif_pmull)
+
+// precomputed constants
+// these constants are precomputed from the poly:
+// 0x8bb70000 (0x8bb7 scaled to 32 bits)
+	.align		4
+// Q = 0x18BB70000
+// rk1 = 2^(32*3) mod Q << 32
+// rk2 = 2^(32*5) mod Q << 32
+// rk3 = 2^(32*15) mod Q << 32
+// rk4 = 2^(32*17) mod Q << 32
+// rk5 = 2^(32*3) mod Q << 32
+// rk6 = 2^(32*2) mod Q << 32
+// rk7 = floor(2^64/Q)
+// rk8 = Q
+
+rk1:	.octa		0x06df0000000000002d56000000000000
+rk3:	.octa		0x7cf50000000000009d9d000000000000
+rk5:	.octa		0x13680000000000002d56000000000000
+rk7:	.octa		0x000000018bb7000000000001f65a57f8
+rk9:	.octa		0xbfd6000000000000ceae000000000000
+rk11:	.octa		0x713c0000000000001e16000000000000
+rk13:	.octa		0x80a6000000000000f7f9000000000000
+rk15:	.octa		0xe658000000000000044c000000000000
+rk17:	.octa		0xa497000000000000ad18000000000000
+rk19:	.octa		0xe7b50000000000006ee3000000000000
+
+tbl_shf_table:
+// use these values for shift constants for the tbl/tbx instruction
+// different alignments result in values as shown:
+//	DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
+//	DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
+//	DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
+//	DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
+//	DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
+//	DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
+//	DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9  (16-7) / shr7
+//	DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8  (16-8) / shr8
+//	DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7  (16-9) / shr9
+//	DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6  (16-10) / shr10
+//	DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5  (16-11) / shr11
+//	DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4  (16-12) / shr12
+//	DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3  (16-13) / shr13
+//	DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2  (16-14) / shr14
+//	DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1  (16-15) / shr15
+
+	.byte		 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
+	.byte		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe , 0x0
diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
new file mode 100644
index 000000000000..60cb590c2590
--- /dev/null
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -0,0 +1,95 @@
+/*
+ * Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/cpufeature.h>
+#include <linux/crc-t10dif.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <crypto/internal/hash.h>
+
+#include <asm/neon.h>
+
+#define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
+
+asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u64 len);
+
+static int crct10dif_init(struct shash_desc *desc)
+{
+	u16 *crc = shash_desc_ctx(desc);
+
+	*crc = 0;
+	return 0;
+}
+
+static int crct10dif_update(struct shash_desc *desc, const u8 *data,
+			    unsigned int length)
+{
+	u16 *crc = shash_desc_ctx(desc);
+	unsigned int l;
+
+	if (unlikely((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) {
+		l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE -
+			  ((u64)data % CRC_T10DIF_PMULL_CHUNK_SIZE));
+
+		*crc = crc_t10dif_generic(*crc, data, l);
+
+		length -= l;
+		data += l;
+	}
+
+	if (length > 0) {
+		kernel_neon_begin_partial(14);
+		*crc = crc_t10dif_pmull(*crc, data, length);
+		kernel_neon_end();
+	}
+
+	return 0;
+}
+
+static int crct10dif_final(struct shash_desc *desc, u8 *out)
+{
+	u16 *crc = shash_desc_ctx(desc);
+
+	*(u16 *)out = *crc;
+	return 0;
+}
+
+static struct shash_alg crc_t10dif_alg = {
+	.digestsize		= CRC_T10DIF_DIGEST_SIZE,
+	.init			= crct10dif_init,
+	.update			= crct10dif_update,
+	.final			= crct10dif_final,
+	.descsize		= CRC_T10DIF_DIGEST_SIZE,
+
+	.base.cra_name		= "crct10dif",
+	.base.cra_driver_name	= "crct10dif-arm64-ce",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= CRC_T10DIF_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+};
+
+static int __init crc_t10dif_mod_init(void)
+{
+	return crypto_register_shash(&crc_t10dif_alg);
+}
+
+static void __exit crc_t10dif_mod_exit(void)
+{
+	crypto_unregister_shash(&crc_t10dif_alg);
+}
+
+module_cpu_feature_match(PMULL, crc_t10dif_mod_init);
+module_exit(crc_t10dif_mod_exit);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
-- 
2.7.4

^ permalink raw reply related

* [PATCH v3 4/6] crypto: arm/crct10dif - port x86 SSE implementation to ARM
From: Ard Biesheuvel @ 2016-12-05 18:42 UTC (permalink / raw)
  To: linux-arm-kernel
In-Reply-To: <1480963348-24203-1-git-send-email-ard.biesheuvel@linaro.org>

This is a transliteration of the Intel algorithm implemented
using SSE and PCLMULQDQ instructions that resides in the file
arch/x86/crypto/crct10dif-pcl-asm_64.S, but simplified to only
operate on buffers that are 16 byte aligned (but of any size)

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm/crypto/Kconfig             |   5 +
 arch/arm/crypto/Makefile            |   2 +
 arch/arm/crypto/crct10dif-ce-core.S | 427 ++++++++++++++++++++
 arch/arm/crypto/crct10dif-ce-glue.c | 101 +++++
 4 files changed, 535 insertions(+)

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 27ed1b1cd1d7..fce801fa52a1 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -120,4 +120,9 @@ config CRYPTO_GHASH_ARM_CE
 	  that uses the 64x64 to 128 bit polynomial multiplication (vmull.p64)
 	  that is part of the ARMv8 Crypto Extensions
 
+config CRYPTO_CRCT10DIF_ARM_CE
+	tristate "CRCT10DIF digest algorithm using PMULL instructions"
+	depends on KERNEL_MODE_NEON && CRC_T10DIF
+	select CRYPTO_HASH
+
 endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index fc5150702b64..fc77265014b7 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -13,6 +13,7 @@ ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
 ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
+ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
 
 ifneq ($(ce-obj-y)$(ce-obj-m),)
 ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
@@ -36,6 +37,7 @@ sha1-arm-ce-y	:= sha1-ce-core.o sha1-ce-glue.o
 sha2-arm-ce-y	:= sha2-ce-core.o sha2-ce-glue.o
 aes-arm-ce-y	:= aes-ce-core.o aes-ce-glue.o
 ghash-arm-ce-y	:= ghash-ce-core.o ghash-ce-glue.o
+crct10dif-arm-ce-y	:= crct10dif-ce-core.o crct10dif-ce-glue.o
 
 quiet_cmd_perl = PERL    $@
       cmd_perl = $(PERL) $(<) > $(@)
diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S
new file mode 100644
index 000000000000..ce45ba0c0687
--- /dev/null
+++ b/arch/arm/crypto/crct10dif-ce-core.S
@@ -0,0 +1,427 @@
+//
+// Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
+//
+// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License version 2 as
+// published by the Free Software Foundation.
+//
+
+//
+// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
+//
+// Copyright (c) 2013, Intel Corporation
+//
+// Authors:
+//     Erdinc Ozturk <erdinc.ozturk@intel.com>
+//     Vinodh Gopal <vinodh.gopal@intel.com>
+//     James Guilford <james.guilford@intel.com>
+//     Tim Chen <tim.c.chen@linux.intel.com>
+//
+// This software is available to you under a choice of one of two
+// licenses.  You may choose to be licensed under the terms of the GNU
+// General Public License (GPL) Version 2, available from the file
+// COPYING in the main directory of this source tree, or the
+// OpenIB.org BSD license below:
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+//   notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+//   notice, this list of conditions and the following disclaimer in the
+//   documentation and/or other materials provided with the
+//   distribution.
+//
+// * Neither the name of the Intel Corporation nor the names of its
+//   contributors may be used to endorse or promote products derived from
+//   this software without specific prior written permission.
+//
+//
+// THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+//       Function API:
+//       UINT16 crc_t10dif_pcl(
+//               UINT16 init_crc, //initial CRC value, 16 bits
+//               const unsigned char *buf, //buffer pointer to calculate CRC on
+//               UINT64 len //buffer length in bytes (64-bit data)
+//       );
+//
+//       Reference paper titled "Fast CRC Computation for Generic
+//	Polynomials Using PCLMULQDQ Instruction"
+//       URL: http://www.intel.com/content/dam/www/public/us/en/documents
+//  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
+//
+//
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+#ifdef CONFIG_CPU_ENDIAN_BE8
+#define CPU_LE(code...)
+#else
+#define CPU_LE(code...)		code
+#endif
+
+	.text
+	.fpu		crypto-neon-fp-armv8
+
+	arg1_low32	.req	r0
+	arg2		.req	r1
+	arg3		.req	r2
+
+	qzr		.req	q13
+
+	q0l		.req	d0
+	q0h		.req	d1
+	q1l		.req	d2
+	q1h		.req	d3
+	q2l		.req	d4
+	q2h		.req	d5
+	q3l		.req	d6
+	q3h		.req	d7
+	q4l		.req	d8
+	q4h		.req	d9
+	q5l		.req	d10
+	q5h		.req	d11
+	q6l		.req	d12
+	q6h		.req	d13
+	q7l		.req	d14
+	q7h		.req	d15
+
+ENTRY(crc_t10dif_pmull)
+	vmov.i8		qzr, #0			// init zero register
+
+	// adjust the 16-bit initial_crc value, scale it to 32 bits
+	lsl		arg1_low32, arg1_low32, #16
+
+	// check if smaller than 256
+	cmp		arg3, #256
+
+	// for sizes less than 128, we can't fold 64B at a time...
+	blt		_less_than_128
+
+	// load the initial crc value
+	// crc value does not need to be byte-reflected, but it needs
+	// to be moved to the high part of the register.
+	// because data will be byte-reflected and will align with
+	// initial crc at correct place.
+	vmov		s0, arg1_low32		// initial crc
+	vext.8		q10, qzr, q0, #4
+
+	// receive the initial 64B data, xor the initial crc value
+	vld1.64		{q0-q1}, [arg2, :128]!
+	vld1.64		{q2-q3}, [arg2, :128]!
+	vld1.64		{q4-q5}, [arg2, :128]!
+	vld1.64		{q6-q7}, [arg2, :128]!
+CPU_LE(	vrev64.8	q0, q0			)
+CPU_LE(	vrev64.8	q1, q1			)
+CPU_LE(	vrev64.8	q2, q2			)
+CPU_LE(	vrev64.8	q3, q3			)
+CPU_LE(	vrev64.8	q4, q4			)
+CPU_LE(	vrev64.8	q5, q5			)
+CPU_LE(	vrev64.8	q6, q6			)
+CPU_LE(	vrev64.8	q7, q7			)
+
+	vswp		d0, d1
+	vswp		d2, d3
+	vswp		d4, d5
+	vswp		d6, d7
+	vswp		d8, d9
+	vswp		d10, d11
+	vswp		d12, d13
+	vswp		d14, d15
+
+	// XOR the initial_crc value
+	veor.8		q0, q0, q10
+
+	adr		ip, rk3
+	vld1.64		{q10}, [ip, :128]	// xmm10 has rk3 and rk4
+
+	//
+	// we subtract 256 instead of 128 to save one instruction from the loop
+	//
+	sub		arg3, arg3, #256
+
+	// at this section of the code, there is 64*x+y (0<=y<64) bytes of
+	// buffer. The _fold_64_B_loop will fold 64B at a time
+	// until we have 64+y Bytes of buffer
+
+
+	// fold 64B at a time. This section of the code folds 4 vector
+	// registers in parallel
+_fold_64_B_loop:
+
+	.macro		fold64, reg1, reg2
+	vld1.64		{q11-q12}, [arg2, :128]!
+
+	vmull.p64	q8, \reg1\()h, d21
+	vmull.p64	\reg1, \reg1\()l, d20
+	vmull.p64	q9, \reg2\()h, d21
+	vmull.p64	\reg2, \reg2\()l, d20
+
+CPU_LE(	vrev64.8	q11, q11		)
+CPU_LE(	vrev64.8	q12, q12		)
+	vswp		d22, d23
+	vswp		d24, d25
+
+	veor.8		\reg1, \reg1, q8
+	veor.8		\reg2, \reg2, q9
+	veor.8		\reg1, \reg1, q11
+	veor.8		\reg2, \reg2, q12
+	.endm
+
+	fold64		q0, q1
+	fold64		q2, q3
+	fold64		q4, q5
+	fold64		q6, q7
+
+	subs		arg3, arg3, #128
+
+	// check if there is another 64B in the buffer to be able to fold
+	bge		_fold_64_B_loop
+
+	// at this point, the buffer pointer is pointing at the last y Bytes
+	// of the buffer the 64B of folded data is in 4 of the vector
+	// registers: v0, v1, v2, v3
+
+	// fold the 8 vector registers to 1 vector register with different
+	// constants
+
+	adr		ip, rk9
+	vld1.64		{q10}, [ip, :128]!
+
+	.macro		fold16, reg, rk
+	vmull.p64	q8, \reg\()l, d20
+	vmull.p64	\reg, \reg\()h, d21
+	.ifnb		\rk
+	vld1.64		{q10}, [ip, :128]!
+	.endif
+	veor.8		q7, q7, q8
+	veor.8		q7, q7, \reg
+	.endm
+
+	fold16		q0, rk11
+	fold16		q1, rk13
+	fold16		q2, rk15
+	fold16		q3, rk17
+	fold16		q4, rk19
+	fold16		q5, rk1
+	fold16		q6
+
+	// instead of 64, we add 48 to the loop counter to save 1 instruction
+	// from the loop instead of a cmp instruction, we use the negative
+	// flag with the jl instruction
+	adds		arg3, arg3, #(128-16)
+	blt		_final_reduction_for_128
+
+	// now we have 16+y bytes left to reduce. 16 Bytes is in register v7
+	// and the rest is in memory. We can fold 16 bytes@a time if y>=16
+	// continue folding 16B at a time
+
+_16B_reduction_loop:
+	vmull.p64	q8, d14, d20
+	vmull.p64	q7, d15, d21
+	veor.8		q7, q7, q8
+
+	vld1.64		{q0}, [arg2, :128]!
+CPU_LE(	vrev64.8	q0, q0		)
+	vswp		d0, d1
+	veor.8		q7, q7, q0
+	subs		arg3, arg3, #16
+
+	// instead of a cmp instruction, we utilize the flags with the
+	// jge instruction equivalent of: cmp arg3, 16-16
+	// check if there is any more 16B in the buffer to be able to fold
+	bge		_16B_reduction_loop
+
+	// now we have 16+z bytes left to reduce, where 0<= z < 16.
+	// first, we reduce the data in the xmm7 register
+
+_final_reduction_for_128:
+	// check if any more data to fold. If not, compute the CRC of
+	// the final 128 bits
+	adds		arg3, arg3, #16
+	beq		_128_done
+
+	// here we are getting data that is less than 16 bytes.
+	// since we know that there was data before the pointer, we can
+	// offset the input pointer before the actual point, to receive
+	// exactly 16 bytes. after that the registers need to be adjusted.
+_get_last_two_regs:
+	add		arg2, arg2, arg3
+	sub		arg2, arg2, #16
+	vld1.64		{q1}, [arg2]
+CPU_LE(	vrev64.8	q1, q1			)
+	vswp		d2, d3
+
+	// get rid of the extra data that was loaded before
+	// load the shift constant
+	adr		ip, tbl_shf_table + 16
+	sub		ip, ip, arg3
+	vld1.8		{q0}, [ip]
+
+	// shift v2 to the left by arg3 bytes
+	vtbl.8		d4, {d14-d15}, d0
+	vtbl.8		d5, {d14-d15}, d1
+
+	// shift v7 to the right by 16-arg3 bytes
+	vmov.i8		q9, #0x80
+	veor.8		q0, q0, q9
+	vtbl.8		d18, {d14-d15}, d0
+	vtbl.8		d19, {d14-d15}, d1
+
+	// blend
+	vshr.s8		q0, q0, #7		// convert to 8-bit mask
+	vbsl.8		q0, q2, q1
+
+	// fold 16 Bytes
+	vmull.p64	q8, d18, d20
+	vmull.p64	q7, d19, d21
+	veor.8		q7, q7, q8
+	veor.8		q7, q7, q0
+
+_128_done:
+	// compute crc of a 128-bit value
+	vldr		d20, rk5
+	vldr		d21, rk6		// rk5 and rk6 in xmm10
+
+	// 64b fold
+	vext.8		q0, qzr, q7, #8
+	vmull.p64	q7, d15, d20
+	veor.8		q7, q7, q0
+
+	// 32b fold
+	vext.8		q0, q7, qzr, #12
+	vmov		s31, s3
+	vmull.p64	q0, d0, d21
+	veor.8		q7, q0, q7
+
+	// barrett reduction
+_barrett:
+	vldr		d20, rk7
+	vldr		d21, rk8
+
+	vmull.p64	q0, d15, d20
+	vext.8		q0, qzr, q0, #12
+	vmull.p64	q0, d1, d21
+	vext.8		q0, qzr, q0, #12
+	veor.8		q7, q7, q0
+	vmov		r0, s29
+
+_cleanup:
+	// scale the result back to 16 bits
+	lsr		r0, r0, #16
+	bx		lr
+
+_less_than_128:
+	teq		arg3, #0
+	beq		_cleanup
+
+	vmov.i8		q0, #0
+	vmov		s3, arg1_low32		// get the initial crc value
+
+	vld1.64		{q7}, [arg2, :128]!
+CPU_LE(	vrev64.8	q7, q7		)
+	vswp		d14, d15
+	veor.8		q7, q7, q0
+
+	cmp		arg3, #16
+	beq		_128_done		// exactly 16 left
+	blt		_less_than_16_left
+
+	// now if there is, load the constants
+	vldr		d20, rk1
+	vldr		d21, rk2		// rk1 and rk2 in xmm10
+
+	// check if there is enough buffer to be able to fold 16B at a time
+	subs		arg3, arg3, #32
+	addlt		arg3, arg3, #16
+	blt		_get_last_two_regs
+	b		_16B_reduction_loop
+
+_less_than_16_left:
+	// shl r9, 4
+	adr		ip, tbl_shf_table + 16
+	sub		ip, ip, arg3
+	vld1.8		{q0}, [ip]
+	vmov.i8		q9, #0x80
+	veor.8		q0, q0, q9
+	vtbl.8		d18, {d14-d15}, d0
+	vtbl.8		d15, {d14-d15}, d1
+	vmov		d14, d18
+	b		_128_done
+ENDPROC(crc_t10dif_pmull)
+
+// precomputed constants
+// these constants are precomputed from the poly:
+// 0x8bb70000 (0x8bb7 scaled to 32 bits)
+	.align		4
+// Q = 0x18BB70000
+// rk1 = 2^(32*3) mod Q << 32
+// rk2 = 2^(32*5) mod Q << 32
+// rk3 = 2^(32*15) mod Q << 32
+// rk4 = 2^(32*17) mod Q << 32
+// rk5 = 2^(32*3) mod Q << 32
+// rk6 = 2^(32*2) mod Q << 32
+// rk7 = floor(2^64/Q)
+// rk8 = Q
+
+rk3:	.quad		0x9d9d000000000000
+rk4:	.quad		0x7cf5000000000000
+rk5:	.quad		0x2d56000000000000
+rk6:	.quad		0x1368000000000000
+rk7:	.quad		0x00000001f65a57f8
+rk8:	.quad		0x000000018bb70000
+rk9:	.quad		0xceae000000000000
+rk10:	.quad		0xbfd6000000000000
+rk11:	.quad		0x1e16000000000000
+rk12:	.quad		0x713c000000000000
+rk13:	.quad		0xf7f9000000000000
+rk14:	.quad		0x80a6000000000000
+rk15:	.quad		0x044c000000000000
+rk16:	.quad		0xe658000000000000
+rk17:	.quad		0xad18000000000000
+rk18:	.quad		0xa497000000000000
+rk19:	.quad		0x6ee3000000000000
+rk20:	.quad		0xe7b5000000000000
+rk1:	.quad		0x2d56000000000000
+rk2:	.quad		0x06df000000000000
+
+tbl_shf_table:
+// use these values for shift constants for the tbl/tbx instruction
+// different alignments result in values as shown:
+//	DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
+//	DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
+//	DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
+//	DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
+//	DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
+//	DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
+//	DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9  (16-7) / shr7
+//	DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8  (16-8) / shr8
+//	DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7  (16-9) / shr9
+//	DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6  (16-10) / shr10
+//	DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5  (16-11) / shr11
+//	DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4  (16-12) / shr12
+//	DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3  (16-13) / shr13
+//	DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2  (16-14) / shr14
+//	DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1  (16-15) / shr15
+
+	.byte		 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
+	.byte		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe , 0x0
diff --git a/arch/arm/crypto/crct10dif-ce-glue.c b/arch/arm/crypto/crct10dif-ce-glue.c
new file mode 100644
index 000000000000..d428355cf38d
--- /dev/null
+++ b/arch/arm/crypto/crct10dif-ce-glue.c
@@ -0,0 +1,101 @@
+/*
+ * Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions instructions
+ *
+ * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/crc-t10dif.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+
+#include <crypto/internal/hash.h>
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+#define CRC_T10DIF_PMULL_CHUNK_SIZE	16U
+
+asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u32 len);
+
+static int crct10dif_init(struct shash_desc *desc)
+{
+	u16 *crc = shash_desc_ctx(desc);
+
+	*crc = 0;
+	return 0;
+}
+
+static int crct10dif_update(struct shash_desc *desc, const u8 *data,
+			    unsigned int length)
+{
+	u16 *crc = shash_desc_ctx(desc);
+	unsigned int l;
+
+	if (!may_use_simd()) {
+		*crc = crc_t10dif_generic(*crc, data, length);
+	} else {
+		if (unlikely((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE)) {
+			l = min_t(u32, length, CRC_T10DIF_PMULL_CHUNK_SIZE -
+				  ((u32)data % CRC_T10DIF_PMULL_CHUNK_SIZE));
+
+			*crc = crc_t10dif_generic(*crc, data, l);
+
+			length -= l;
+			data += l;
+		}
+		if (length > 0) {
+			kernel_neon_begin();
+			*crc = crc_t10dif_pmull(*crc, data, length);
+			kernel_neon_end();
+		}
+	}
+	return 0;
+}
+
+static int crct10dif_final(struct shash_desc *desc, u8 *out)
+{
+	u16 *crc = shash_desc_ctx(desc);
+
+	*(u16 *)out = *crc;
+	return 0;
+}
+
+static struct shash_alg crc_t10dif_alg = {
+	.digestsize		= CRC_T10DIF_DIGEST_SIZE,
+	.init			= crct10dif_init,
+	.update			= crct10dif_update,
+	.final			= crct10dif_final,
+	.descsize		= CRC_T10DIF_DIGEST_SIZE,
+
+	.base.cra_name		= "crct10dif",
+	.base.cra_driver_name	= "crct10dif-arm-ce",
+	.base.cra_priority	= 200,
+	.base.cra_blocksize	= CRC_T10DIF_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+};
+
+static int __init crc_t10dif_mod_init(void)
+{
+	if (!(elf_hwcap2 & HWCAP2_PMULL))
+		return -ENODEV;
+
+	return crypto_register_shash(&crc_t10dif_alg);
+}
+
+static void __exit crc_t10dif_mod_exit(void)
+{
+	crypto_unregister_shash(&crc_t10dif_alg);
+}
+
+module_init(crc_t10dif_mod_init);
+module_exit(crc_t10dif_mod_exit);
+
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("crct10dif");
-- 
2.7.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox