* [PATCH v2 net-next 1/2] sock: Add sock_owned_by_user_nocheck
From: Tom Herbert @ 2017-12-23 17:17 UTC (permalink / raw)
To: davem; +Cc: netdev, dvyukov, rohit, Tom Herbert
In-Reply-To: <20171223171716.16130-1-tom@quantonium.net>
This allows checking socket lock ownership with producing lockdep
warnings.
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
include/net/sock.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/include/net/sock.h b/include/net/sock.h
index 6c1db823f8b9..66fd3951e6f3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1515,6 +1515,11 @@ static inline bool sock_owned_by_user(const struct sock *sk)
return sk->sk_lock.owned;
}
+static inline bool sock_owned_by_user_nocheck(const struct sock *sk)
+{
+ return sk->sk_lock.owned;
+}
+
/* no reclassification while locks are held */
static inline bool sock_allow_reclassification(const struct sock *csk)
{
--
2.11.0
^ permalink raw reply related
* [PATCH v2 net-next 2/2] strparser: Call sock_owned_by_user_nocheck
From: Tom Herbert @ 2017-12-23 17:17 UTC (permalink / raw)
To: davem; +Cc: netdev, dvyukov, rohit, Tom Herbert
In-Reply-To: <20171223171716.16130-1-tom@quantonium.net>
strparser wants to check socket ownership without producing any
warnings. As indicated by the comment in the code, it is permissible
for owned_by_user to return true.
Fixes: 43a0c6751a322847 ("strparser: Stream parser for messages")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Tom Herbert <tom@quantonium.net>
---
net/strparser/strparser.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index c5fda15ba319..1fdab5c4eda8 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -401,7 +401,7 @@ void strp_data_ready(struct strparser *strp)
* allows a thread in BH context to safely check if the process
* lock is held. In this case, if the lock is held, queue work.
*/
- if (sock_owned_by_user(strp->sk)) {
+ if (sock_owned_by_user_nocheck(strp->sk)) {
queue_work(strp_wq, &strp->work);
return;
}
--
2.11.0
^ permalink raw reply related
* [PATCH bpf-next] bpf: fix stacksafe exploration when comparing states
From: Gianluca Borello @ 2017-12-23 10:09 UTC (permalink / raw)
To: netdev; +Cc: ast, daniel, Gianluca Borello
Commit cc2b14d51053 ("bpf: teach verifier to recognize zero initialized
stack") introduced a very relaxed check when comparing stacks of different
states, effectively returning a positive result in many cases where it
shouldn't.
This can create problems in cases such as this following C pseudocode:
long var;
long *x = bpf_map_lookup(...);
if (!x)
return;
if (*x != 0xbeef)
var = 0;
else
var = 1;
/* This is the key part, calling a helper causes an explored state
* to be saved with the information that "var" is on the stack as
* STACK_ZERO, since the helper is first met by the verifier after
* the "var = 0" assignment. This state will however be wrongly used
* also for the "var = 1" case, so the verifier assumes "var" is always
* 0 and will replace the NULL assignment with nops, because the
* search pruning prevents it from exploring the faulty branch.
*/
bpf_ktime_get_ns();
if (var)
*(long *)0 = 0xbeef;
Fix the issue by making sure that the stack is fully explored before
returning a positive comparison result.
Also attach a couple tests that highlight the bad behavior. In the first
test, without this fix instructions 16 and 17 are replaced with nops
instead of being rejected by the verifier.
The second test, instead, allows a program to make a potentially illegal
read from the stack.
Fixes: cc2b14d51053 ("bpf: teach verifier to recognize zero initialized stack")
Signed-off-by: Gianluca Borello <g.borello@gmail.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
kernel/bpf/verifier.c | 2 +-
tools/testing/selftests/bpf/test_verifier.c | 51 +++++++++++++++++++++++++++++
2 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8b442ae125d0..93e1c77dae1d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4107,7 +4107,7 @@ static bool stacksafe(struct bpf_func_state *old,
if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
/* explored state didn't use this */
- return true;
+ continue;
if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
continue;
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3bacff0d6f91..5e79515d10c5 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -9715,6 +9715,57 @@ static struct bpf_test tests[] = {
.result = REJECT,
.prog_type = BPF_PROG_TYPE_XDP,
},
+ {
+ "search pruning: all branches should be verified (nop operation)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_A(1),
+ BPF_MOV64_IMM(BPF_REG_4, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R6 invalid mem access 'inv'",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "search pruning: all branches should be verified (invalid stack access)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
+ BPF_JMP_A(1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24),
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "invalid read from stack off -16+0 size 8",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
};
static int probe_filter_length(const struct bpf_insn *fp)
--
2.14.1
^ permalink raw reply related
* Re: [RFT net-next 1/2] net: stmmac: dwmac-meson8b: fix setting the PHY clock on Meson8b
From: Jerome Brunet @ 2017-12-23 17:32 UTC (permalink / raw)
To: Martin Blumenstingl, netdev, ingrassia
Cc: linus.luessing, khilman, linux-amlogic, narmstrong,
peppe.cavallaro, alexandre.torgue
In-Reply-To: <20171223170433.8150-2-martin.blumenstingl@googlemail.com>
On Sat, 2017-12-23 at 18:04 +0100, Martin Blumenstingl wrote:
> Meson8b only supports MPLL2 as clock input. The rate of the MPLL2 clock
> set by Odroid-C1's u-boot is close to 500MHz. The exact rate is
> 500002394Hz, which is calculated in drivers/clk/meson/clk-mpll.c
> using the following formula:
> DIV_ROUND_UP_ULL((u64)parent_rate * SDM_DEN, (SDM_DEN * n2) + sdm)
> Odroid-C1's u-boot configures MPLL2 with the following values:
> - SDM_DEN = 16384
> - SDM = 1638
> - N2 = 5
>
> The 250MHz and 25MHz clocks inside dwmac-meson8b driver are derived
> from the MPLL2 clock. Due to MPLL2 running slightly faster than 500MHz
> the common clock framework chooses dividers which are too big to
> generate the 250MHz and 25MHz clocks. Emiliano Ingrassia observed that
> the divider for the 250MHz clock was set to 0x5 which results in a clock
> rate of close to 100MHz instead of 250MHz. The divider for the 25MHz
> clock is set to 0x0 (which means "divide by 5") so the resulting RGMII
> clock is running at 20MHz (plus a few additional Hz). The RTL8211F PHY
> on Odroid-C1 however fails to operate with a 20MHz RGMII clock.
>
> Round the divider's clock rates to prevent this issue on Meson8b. This
> means we'll now end up with a clock rate of 25000120Hz (= 25MHz plus
> 120Hz).
> This has no effect on the Meson GX SoCs since there fclk_div2 is used as
> input clock, which has a rate of 1000MHz (and thus is divisible cleanly
> to 250MHz and 25MHz).
>
> Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC")
> Reported-by: Emiliano Ingrassia <ingrassia@epigenesys.com>
> Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Makes sense to add ROUND_CLOSEST (no risk if the rate is slightly over the
requested one)
Reviewed-by: Jerome Brunet <jbrunet@baylibre.com>
^ permalink raw reply
* Re: [RFT net-next 2/2] net: stmmac: dwmac-meson8b: don't try to change m250_div parent's rate
From: Jerome Brunet @ 2017-12-23 17:40 UTC (permalink / raw)
To: Martin Blumenstingl, netdev, ingrassia
Cc: linus.luessing, khilman, linux-amlogic, narmstrong,
peppe.cavallaro, alexandre.torgue
In-Reply-To: <20171223170433.8150-3-martin.blumenstingl@googlemail.com>
On Sat, 2017-12-23 at 18:04 +0100, Martin Blumenstingl wrote:
> Trying to set the rate of m250_div's parent clock makes no sense since
> it's a mux which has neither CLK_MUX_ROUND_CLOSEST nor
> CLK_SET_RATE_PARENT set.
> It even does harm on Meson8b SoCs where the input clock for the mux
> cannot be divided down to 250MHz evenly (the parent rate is 500002394Hz)
So your problem is more with the mux actually, not the divider. Instead of
removing CLK_SET_RATE_PARENT from the divider, may I suggest to put
CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT on the parent mux, and keep
CLK_SET_RATE_PARENT (with CLK_DIVIDER_ROUND_CLOSEST) on the divS.
I suppose it would a accomplish the same thing with one added benefits for
meson8b :
If the bootloader did not set the mpll2 to the correct rate, it will now be done
thanks to rate propagation.
If I missed anything, feel free to point it out.
Cheers
> which is why we need to use CLK_DIVIDER_ROUND_CLOSEST for the m250_div
> clock. The clk-divider driver however ignores the
> CLK_DIVIDER_ROUND_CLOSEST flag if CLK_SET_RATE_PARENT is set (because
> it simply tries to set the best possible clock rate for the parent,
> which does nothing in our case since the parent is a mux which doesn't
> allow rate changes as explained above).
>
> This fixes setting the RGMII clock on Meson8 SoCs which ended up with a
> ~20MHz clock instead of the expected ~25MHz.
> The dwmac-meson8b driver requests a 25MHz clock rate for the m25_div
> (which only supports "divide by 5" and "divide by 10") clock which is
> derived from the m250_div clock. Due to clk-divider ignoring the
> CLK_DIVIDER_ROUND_CLOSEST flag the resulting m250_div clock was set to
> ~100MHz (divider = 5) and the m25_div clock was set to ~20MHz (divider =
> 5) by the common clock framework (as this value is closest to 25MHz if
> we would not have set CLK_DIVIDER_ROUND_CLOSEST). What we actually need
> however is a rate of ~250MHz on the m250_div clock (divider = 2) and
> ~25MHz on the m25_div clock (divider = 10) - these are also the values
> chosen by the out-of-tree vendor driver.
> With this we end up with a RGMII clock of 25000120Hz (which is as close
> to 25MHz we can get with an input clock of 500002394Hz).
>
> SoCs from the Meson GX series are not affected by this change because
> the input clock is FCLK_DIV2 whose rate cannot be changed. Additionally
> the GX SoCs don't need to use the "closest" divider since the parent
> clock is a multiple of 250MHz.
>
> Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC")
> Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
> ---
> drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
> index c71966332387..26f41c117d63 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
> @@ -135,7 +135,7 @@ static int meson8b_init_clk(struct meson8b_dwmac *dwmac)
> snprintf(clk_name, sizeof(clk_name), "%s#m250_div", dev_name(dev));
> init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
> init.ops = &clk_divider_ops;
> - init.flags = CLK_SET_RATE_PARENT;
> + init.flags = 0;
> clk_div_parents[0] = __clk_get_name(dwmac->m250_mux_clk);
> init.parent_names = clk_div_parents;
> init.num_parents = ARRAY_SIZE(clk_div_parents);
^ permalink raw reply
* Re: [RFT net-next 2/2] net: stmmac: dwmac-meson8b: don't try to change m250_div parent's rate
From: Jerome Brunet @ 2017-12-23 17:43 UTC (permalink / raw)
To: Martin Blumenstingl, netdev, ingrassia
Cc: linus.luessing, khilman, linux-amlogic, narmstrong,
peppe.cavallaro, alexandre.torgue
In-Reply-To: <1514050857.29566.53.camel@baylibre.com>
On Sat, 2017-12-23 at 18:40 +0100, Jerome Brunet wrote:
> > Trying to set the rate of m250_div's parent clock makes no sense since
> > it's a mux which has neither CLK_MUX_ROUND_CLOSEST nor
> > CLK_SET_RATE_PARENT set.
> > It even does harm on Meson8b SoCs where the input clock for the mux
> > cannot be divided down to 250MHz evenly (the parent rate is 500002394Hz)
>
> So your problem is more with the mux actually, not the divider. Instead of
> removing CLK_SET_RATE_PARENT from the divider, may I suggest to put
>
> CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT on the parent mux, and keep
Thinking about it, you don't even need CLK_SET_RATE_NO_REPARENT. Just let rate
propagation figure out the best combination
> CLK_SET_RATE_PARENT (with CLK_DIVIDER_ROUND_CLOSEST) on the divS.
>
> I suppose it would a accomplish the same thing with one added benefits for
> meson8b :
>
> If the bootloader did not set the mpll2 to the correct rate, it will now be done
> thanks to rate propagation.
>
> If I missed anything, feel free to point it out.
>
> Cheers
^ permalink raw reply
* Re: [PATCH 2/2] Revert "xfrm: Fix stack-out-of-bounds read in xfrm_state_find."
From: robsonde @ 2017-12-23 18:10 UTC (permalink / raw)
To: Steffen Klassert; +Cc: David Miller, herbert, netdev
In-Reply-To: <20171223160938.rmfwopbmeyepndh5@gauss3.secunet.de>
> On 24/12/2017, at 5:09 AM, Steffen Klassert <steffen.klassert@secunet.com> wrote:
>
>> On Sat, Dec 23, 2017 at 10:56:12AM -0500, David Miller wrote:
>> From: Steffen Klassert <steffen.klassert@secunet.com>
>> Date: Sat, 23 Dec 2017 10:22:17 +0100
>>
>>>> On Thu, Nov 16, 2017 at 11:00:40AM +0100, Steffen Klassert wrote:
>>>> This reverts commit c9f3f813d462c72dbe412cee6a5cbacf13c4ad5e.
>>>>
>>>> This commit breaks transport mode when the policy template
>>>> has widlcard addresses configured, so revert it.
>>>>
>>>> Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
>>>
>>> David, can you please queue this one up for v4.14-stable?
>>> Commit ID is 94802151894d482e82c324edf2c658f8e6b96508
>>>
>>> v4.14 is unusable for some people without this revert.
>>
>> Yes, but it adds back the stack out-of-bounds bug.
>>
>> If I queue up the revert, I would also need to queue up whatever
>> follow-on you used to fix the out-of-bounds bug properly. Which
>> commit is that?
>
> This is commit ddc47e4404b58f03e98345398fb12d38fe291512
> ("xfrm: Fix stack-out-of-bounds read on socket policy lookup.")
>
> It is included in the pull request for the net tree that
> I sent yesterday. The patch looks save, but not so sure
> if it should go directly to stable. These bugs reported by
> the syzbot are usually quite subtile and I already broke
> something when I tried to fix the original stack out-of-bounds
> bug. So maybe we should wait until the v4.15 release before
> backporting...
>
At this time I cant build an IPSec VPN on a 4.14 kernel. 4.14 is LTS and so has been used as kernel choice for a prod system. This is a production fault.
My only choice is going back to a 4.13 or waiting for 4.15 or a custom build, which I am not keen on.
Unless the memory bug effects security and can be exploited, then I ask that the revert be back ported to 4.14
Thanks
^ permalink raw reply
* Re: [PATCH bpf-next] bpf: fix stacksafe exploration when comparing states
From: Alexei Starovoitov @ 2017-12-23 19:08 UTC (permalink / raw)
To: Gianluca Borello; +Cc: netdev, ast, daniel
In-Reply-To: <20171223100955.7208-1-g.borello@gmail.com>
On Sat, Dec 23, 2017 at 10:09:55AM +0000, Gianluca Borello wrote:
...
> Fixes: cc2b14d51053 ("bpf: teach verifier to recognize zero initialized stack")
> Signed-off-by: Gianluca Borello <g.borello@gmail.com>
> Acked-by: Alexei Starovoitov <ast@kernel.org>
> ---
> kernel/bpf/verifier.c | 2 +-
> tools/testing/selftests/bpf/test_verifier.c | 51 +++++++++++++++++++++++++++++
> 2 files changed, 52 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 8b442ae125d0..93e1c77dae1d 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -4107,7 +4107,7 @@ static bool stacksafe(struct bpf_func_state *old,
>
> if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
> /* explored state didn't use this */
> - return true;
> + continue;
argh. Not sure what I was thinking.
Applied, Thanks a lot Gianluca!
^ permalink raw reply
* Re: [RFT net-next 2/2] net: stmmac: dwmac-meson8b: don't try to change m250_div parent's rate
From: Martin Blumenstingl @ 2017-12-23 20:00 UTC (permalink / raw)
To: Jerome Brunet
Cc: netdev, ingrassia, linus.luessing, khilman, linux-amlogic,
Neil Armstrong, peppe.cavallaro, alexandre.torgue
In-Reply-To: <1514050857.29566.53.camel@baylibre.com>
Hi Jerome,
On Sat, Dec 23, 2017 at 6:40 PM, Jerome Brunet <jbrunet@baylibre.com> wrote:
> On Sat, 2017-12-23 at 18:04 +0100, Martin Blumenstingl wrote:
>> Trying to set the rate of m250_div's parent clock makes no sense since
>> it's a mux which has neither CLK_MUX_ROUND_CLOSEST nor
>> CLK_SET_RATE_PARENT set.
>> It even does harm on Meson8b SoCs where the input clock for the mux
>> cannot be divided down to 250MHz evenly (the parent rate is 500002394Hz)
>
> So your problem is more with the mux actually, not the divider. Instead of
> removing CLK_SET_RATE_PARENT from the divider, may I suggest to put
>
> CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT on the parent mux, and keep
> CLK_SET_RATE_PARENT (with CLK_DIVIDER_ROUND_CLOSEST) on the divS.
I just gave this a try, here's the result:
mpll2 1 1 127488329
0 0
c9410000.ethernet#m250_sel 1 1
127488329 0 0
c9410000.ethernet#m250_div 1 1
127488329 0 0
c9410000.ethernet#m25_div 1 1
25497666 0 0
> I suppose it would a accomplish the same thing with one added benefits for
> meson8b :
>
> If the bootloader did not set the mpll2 to the correct rate, it will now be done
> thanks to rate propagation.
indeed, mpll2 is set to 0 by the bootloader on my board
with that change we'll now also set the rate "correctly" (see below)
> If I missed anything, feel free to point it out.
it seems however that clk-mpll incorrectly calculates the register values
with the mpll2 register values from Emiliano we can get to 25000120Hz
however, I guess we need to have a look at the clk-mpll driver because:
- by letting the common clock framework set the rate of mpll2 we get
25497666Hz (which means we're off by by ~500kHz, instead of 120Hz)
- according to the datasheet the allowed range of the mpll2 clock is
250..637MHz - 127488329Hz is what we get from the common clock
framework
Jerome: any idea why that is (more theoretical number games below though :))?
just a reminder from the other patch - these are the values used for
the mpll2 clock:
- parent rate = 2550MHz
- SDM_DEN = 16384
- SDM = 1638
- N2 = 5
= (parent rate 2550000000 * SDM_DEN 16384) / ((SDM_DEN 16384 * N2 5) +
1638) = 500002394Hz
using an SDM of 1639 gives us a 499996410Hz mpll2 clock
with all the dividers we get down to a RGMII clock of 24999821Hz which
is 179Hz off the desired 25MHz
in other words: the mpll2 values set by Odroid-C1's u-boot are "best",
so if we try to set mpll2's rate through the common clock framework we
should try to get the same results (else we would just make the result
worse)
> Cheers
>
>> which is why we need to use CLK_DIVIDER_ROUND_CLOSEST for the m250_div
>> clock. The clk-divider driver however ignores the
>> CLK_DIVIDER_ROUND_CLOSEST flag if CLK_SET_RATE_PARENT is set (because
>> it simply tries to set the best possible clock rate for the parent,
>> which does nothing in our case since the parent is a mux which doesn't
>> allow rate changes as explained above).
>>
>> This fixes setting the RGMII clock on Meson8 SoCs which ended up with a
>> ~20MHz clock instead of the expected ~25MHz.
>> The dwmac-meson8b driver requests a 25MHz clock rate for the m25_div
>> (which only supports "divide by 5" and "divide by 10") clock which is
>> derived from the m250_div clock. Due to clk-divider ignoring the
>> CLK_DIVIDER_ROUND_CLOSEST flag the resulting m250_div clock was set to
>> ~100MHz (divider = 5) and the m25_div clock was set to ~20MHz (divider =
>> 5) by the common clock framework (as this value is closest to 25MHz if
>> we would not have set CLK_DIVIDER_ROUND_CLOSEST). What we actually need
>> however is a rate of ~250MHz on the m250_div clock (divider = 2) and
>> ~25MHz on the m25_div clock (divider = 10) - these are also the values
>> chosen by the out-of-tree vendor driver.
>> With this we end up with a RGMII clock of 25000120Hz (which is as close
>> to 25MHz we can get with an input clock of 500002394Hz).
>>
>> SoCs from the Meson GX series are not affected by this change because
>> the input clock is FCLK_DIV2 whose rate cannot be changed. Additionally
>> the GX SoCs don't need to use the "closest" divider since the parent
>> clock is a multiple of 250MHz.
>>
>> Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC")
>> Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
>> ---
>> drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
>> index c71966332387..26f41c117d63 100644
>> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
>> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
>> @@ -135,7 +135,7 @@ static int meson8b_init_clk(struct meson8b_dwmac *dwmac)
>> snprintf(clk_name, sizeof(clk_name), "%s#m250_div", dev_name(dev));
>> init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
>> init.ops = &clk_divider_ops;
>> - init.flags = CLK_SET_RATE_PARENT;
>> + init.flags = 0;
>> clk_div_parents[0] = __clk_get_name(dwmac->m250_mux_clk);
>> init.parent_names = clk_div_parents;
>> init.num_parents = ARRAY_SIZE(clk_div_parents);
>
Regards
Martin
^ permalink raw reply
* Re: [RFT net-next 2/2] net: stmmac: dwmac-meson8b: don't try to change m250_div parent's rate
From: Jerome Brunet @ 2017-12-23 20:40 UTC (permalink / raw)
To: Martin Blumenstingl
Cc: netdev, ingrassia, linus.luessing, khilman, linux-amlogic,
Neil Armstrong, peppe.cavallaro, alexandre.torgue
In-Reply-To: <CAFBinCBu-mswBeZudwcKVE921NFCNDLJ+ed1QUqFssDy+ar5vA@mail.gmail.com>
On Sat, 2017-12-23 at 21:00 +0100, Martin Blumenstingl wrote:
> Hi Jerome,
>
> On Sat, Dec 23, 2017 at 6:40 PM, Jerome Brunet <jbrunet@baylibre.com> wrote:
> > On Sat, 2017-12-23 at 18:04 +0100, Martin Blumenstingl wrote:
> > > Trying to set the rate of m250_div's parent clock makes no sense since
> > > it's a mux which has neither CLK_MUX_ROUND_CLOSEST nor
> > > CLK_SET_RATE_PARENT set.
> > > It even does harm on Meson8b SoCs where the input clock for the mux
> > > cannot be divided down to 250MHz evenly (the parent rate is 500002394Hz)
> >
> > So your problem is more with the mux actually, not the divider. Instead of
> > removing CLK_SET_RATE_PARENT from the divider, may I suggest to put
> >
> > CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT on the parent mux, and keep
> > CLK_SET_RATE_PARENT (with CLK_DIVIDER_ROUND_CLOSEST) on the divS.
>
> I just gave this a try, here's the result:
> mpll2 1 1 127488329
> 0 0
> c9410000.ethernet#m250_sel 1 1
> 127488329 0 0
> c9410000.ethernet#m250_div 1 1
> 127488329 0 0
> c9410000.ethernet#m25_div 1 1
> 25497666 0 0
>
> > I suppose it would a accomplish the same thing with one added benefits for
> > meson8b :
> >
> > If the bootloader did not set the mpll2 to the correct rate, it will now be done
> > thanks to rate propagation.
>
> indeed, mpll2 is set to 0 by the bootloader on my board
> with that change we'll now also set the rate "correctly" (see below)
>
> > If I missed anything, feel free to point it out.
>
> it seems however that clk-mpll incorrectly calculates the register values
> with the mpll2 register values from Emiliano we can get to 25000120Hz
> however, I guess we need to have a look at the clk-mpll
Huuuu ! not again ! ;)
> driver because:
> - by letting the common clock framework set the rate of mpll2 we get
> 25497666Hz (which means we're off by by ~500kHz, instead of 120Hz)
Could you dump the SDM and N2 values (devmem) that have been applied by CCF ?
to compare. If a better solution is available, I'm a bit surprised we don't find
it. You may have a found something worth checking ...
> - according to the datasheet the allowed range of the mpll2 clock is
> 250..637MHz - 127488329Hz is what we get from the common clock
> framework
Yes, I've seen that but we are able compute out of that range and, so far, the
actual rate of clock seems coherent with the calculation. At least, when I
tested with the audio, it was the case.
>
> Jerome: any idea why that is (more theoretical number games below though :))?
>
> just a reminder from the other patch - these are the values used for
> the mpll2 clock:
> - parent rate = 2550MHz
> - SDM_DEN = 16384
> - SDM = 1638
> - N2 = 5
> = (parent rate 2550000000 * SDM_DEN 16384) / ((SDM_DEN 16384 * N2 5) +
> 1638) = 500002394Hz
>
> using an SDM of 1639 gives us a 499996410Hz mpll2 clock
> with all the dividers we get down to a RGMII clock of 24999821Hz which
> is 179Hz off the desired 25MHz
> in other words: the mpll2 values set by Odroid-C1's u-boot are "best",
> so if we try to set mpll2's rate through the common clock framework we
> should try to get the same results (else we would just make the result
> worse)
Indeed, we should be able to do a lot better than 500kHz error. We should get to
the bottom of this. When testing on the S905 with audio, I got very values so I
did not question the mpll driver too much. Maybe there is something there.
Purely for debugging, from the ethernet driver, Could you try the following:
- Remove CLK_SET_RATE_PARENT from div250 to break the propagation there
- call set_rate on the mux with 500Mhz (we'll see far off we really are compared
to u-boot values and we will be able to compare)
- call set_rate on div25 as usual to get your ethernet running.
>
> > Cheers
> >
> > > which is why we need to use CLK_DIVIDER_ROUND_CLOSEST for the m250_div
> > > clock. The clk-divider driver however ignores the
> > > CLK_DIVIDER_ROUND_CLOSEST flag if CLK_SET_RATE_PARENT is set (because
> > > it simply tries to set the best possible clock rate for the parent,
> > > which does nothing in our case since the parent is a mux which doesn't
> > > allow rate changes as explained above).
> > >
> > > This fixes setting the RGMII clock on Meson8 SoCs which ended up with a
> > > ~20MHz clock instead of the expected ~25MHz.
> > > The dwmac-meson8b driver requests a 25MHz clock rate for the m25_div
> > > (which only supports "divide by 5" and "divide by 10") clock which is
> > > derived from the m250_div clock. Due to clk-divider ignoring the
> > > CLK_DIVIDER_ROUND_CLOSEST flag the resulting m250_div clock was set to
> > > ~100MHz (divider = 5) and the m25_div clock was set to ~20MHz (divider =
> > > 5) by the common clock framework (as this value is closest to 25MHz if
> > > we would not have set CLK_DIVIDER_ROUND_CLOSEST). What we actually need
> > > however is a rate of ~250MHz on the m250_div clock (divider = 2) and
> > > ~25MHz on the m25_div clock (divider = 10) - these are also the values
> > > chosen by the out-of-tree vendor driver.
> > > With this we end up with a RGMII clock of 25000120Hz (which is as close
> > > to 25MHz we can get with an input clock of 500002394Hz).
> > >
> > > SoCs from the Meson GX series are not affected by this change because
> > > the input clock is FCLK_DIV2 whose rate cannot be changed. Additionally
> > > the GX SoCs don't need to use the "closest" divider since the parent
> > > clock is a multiple of 250MHz.
> > >
> > > Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC")
> > > Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
> > > ---
> > > drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +-
> > > 1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
> > > index c71966332387..26f41c117d63 100644
> > > --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
> > > +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
> > > @@ -135,7 +135,7 @@ static int meson8b_init_clk(struct meson8b_dwmac *dwmac)
> > > snprintf(clk_name, sizeof(clk_name), "%s#m250_div", dev_name(dev));
> > > init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
> > > init.ops = &clk_divider_ops;
> > > - init.flags = CLK_SET_RATE_PARENT;
> > > + init.flags = 0;
> > > clk_div_parents[0] = __clk_get_name(dwmac->m250_mux_clk);
> > > init.parent_names = clk_div_parents;
> > > init.num_parents = ARRAY_SIZE(clk_div_parents);
>
> Regards
> Martin
^ permalink raw reply
* Re: [RFT net-next 2/2] net: stmmac: dwmac-meson8b: don't try to change m250_div parent's rate
From: Martin Blumenstingl @ 2017-12-23 21:49 UTC (permalink / raw)
To: Jerome Brunet
Cc: netdev, ingrassia, linus.luessing, khilman, linux-amlogic,
Neil Armstrong, peppe.cavallaro, alexandre.torgue
In-Reply-To: <1514061655.2373.13.camel@baylibre.com>
Hi Jerome,
On Sat, Dec 23, 2017 at 9:40 PM, Jerome Brunet <jbrunet@baylibre.com> wrote:
> On Sat, 2017-12-23 at 21:00 +0100, Martin Blumenstingl wrote:
>> Hi Jerome,
>>
>> On Sat, Dec 23, 2017 at 6:40 PM, Jerome Brunet <jbrunet@baylibre.com> wrote:
>> > On Sat, 2017-12-23 at 18:04 +0100, Martin Blumenstingl wrote:
>> > > Trying to set the rate of m250_div's parent clock makes no sense since
>> > > it's a mux which has neither CLK_MUX_ROUND_CLOSEST nor
>> > > CLK_SET_RATE_PARENT set.
>> > > It even does harm on Meson8b SoCs where the input clock for the mux
>> > > cannot be divided down to 250MHz evenly (the parent rate is 500002394Hz)
>> >
>> > So your problem is more with the mux actually, not the divider. Instead of
>> > removing CLK_SET_RATE_PARENT from the divider, may I suggest to put
>> >
>> > CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT on the parent mux, and keep
>> > CLK_SET_RATE_PARENT (with CLK_DIVIDER_ROUND_CLOSEST) on the divS.
>>
>> I just gave this a try, here's the result:
>> mpll2 1 1 127488329
>> 0 0
>> c9410000.ethernet#m250_sel 1 1
>> 127488329 0 0
>> c9410000.ethernet#m250_div 1 1
>> 127488329 0 0
>> c9410000.ethernet#m25_div 1 1
>> 25497666 0 0
>>
>> > I suppose it would a accomplish the same thing with one added benefits for
>> > meson8b :
>> >
>> > If the bootloader did not set the mpll2 to the correct rate, it will now be done
>> > thanks to rate propagation.
>>
>> indeed, mpll2 is set to 0 by the bootloader on my board
>> with that change we'll now also set the rate "correctly" (see below)
>>
>> > If I missed anything, feel free to point it out.
>>
>> it seems however that clk-mpll incorrectly calculates the register values
>> with the mpll2 register values from Emiliano we can get to 25000120Hz
>> however, I guess we need to have a look at the clk-mpll
>
> Huuuu ! not again ! ;)
>
>> driver because:
>> - by letting the common clock framework set the rate of mpll2 we get
>> 25497666Hz (which means we're off by by ~500kHz, instead of 120Hz)
>
> Could you dump the SDM and N2 values (devmem) that have been applied by CCF ?
> to compare. If a better solution is available, I'm a bit surprised we don't find
> it. You may have a found something worth checking ...
while calculating this with a target frequency of 500MHz manually
again I saw that there's a remainder of 10Mhz after the initial
division.
remainder * SDM_DEN = 163840000000 - this value overflows 32-bit,
things will go downhill from here... :)
long story short: here's a patch for that issue: [0]
the results with "CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT" on the mux:
fixed_pll 3 3 2550000000
0 0
mpll2 1 1 124999851
0 0
c9410000.ethernet#m250_sel 1 1
124999851 0 0
c9410000.ethernet#m250_div 1 1
124999851 0 0
c9410000.ethernet#m25_div 1 1
24999971 0 0
this is even closer to 25MHz than the values in the vendor driver
(120Hz vs 29Hz)
I'll re-spin this series, then Emiliano "just" has to confirm that
it's also working for him (despite the dividers in the dwmac-meson8b
driver are set up different compared to the vendor driver)
>> - according to the datasheet the allowed range of the mpll2 clock is
>> 250..637MHz - 127488329Hz is what we get from the common clock
>> framework
>
> Yes, I've seen that but we are able compute out of that range and, so far, the
> actual rate of clock seems coherent with the calculation. At least, when I
> tested with the audio, it was the case.
I'm curious: ...on a GX SoCs probably?
>>
>> Jerome: any idea why that is (more theoretical number games below though :))?
>>
>> just a reminder from the other patch - these are the values used for
>> the mpll2 clock:
>> - parent rate = 2550MHz
>> - SDM_DEN = 16384
>> - SDM = 1638
>> - N2 = 5
>> = (parent rate 2550000000 * SDM_DEN 16384) / ((SDM_DEN 16384 * N2 5) +
>> 1638) = 500002394Hz
>>
>> using an SDM of 1639 gives us a 499996410Hz mpll2 clock
>> with all the dividers we get down to a RGMII clock of 24999821Hz which
>> is 179Hz off the desired 25MHz
>> in other words: the mpll2 values set by Odroid-C1's u-boot are "best",
>> so if we try to set mpll2's rate through the common clock framework we
>> should try to get the same results (else we would just make the result
>> worse)
>
> Indeed, we should be able to do a lot better than 500kHz error. We should get to
> the bottom of this. When testing on the S905 with audio, I got very values so I
> did not question the mpll driver too much. Maybe there is something there.
>
> Purely for debugging, from the ethernet driver, Could you try the following:
> - Remove CLK_SET_RATE_PARENT from div250 to break the propagation there
> - call set_rate on the mux with 500Mhz (we'll see far off we really are compared
> to u-boot values and we will be able to compare)
> - call set_rate on div25 as usual to get your ethernet running.
>
>>
>> > Cheers
>> >
>> > > which is why we need to use CLK_DIVIDER_ROUND_CLOSEST for the m250_div
>> > > clock. The clk-divider driver however ignores the
>> > > CLK_DIVIDER_ROUND_CLOSEST flag if CLK_SET_RATE_PARENT is set (because
>> > > it simply tries to set the best possible clock rate for the parent,
>> > > which does nothing in our case since the parent is a mux which doesn't
>> > > allow rate changes as explained above).
>> > >
>> > > This fixes setting the RGMII clock on Meson8 SoCs which ended up with a
>> > > ~20MHz clock instead of the expected ~25MHz.
>> > > The dwmac-meson8b driver requests a 25MHz clock rate for the m25_div
>> > > (which only supports "divide by 5" and "divide by 10") clock which is
>> > > derived from the m250_div clock. Due to clk-divider ignoring the
>> > > CLK_DIVIDER_ROUND_CLOSEST flag the resulting m250_div clock was set to
>> > > ~100MHz (divider = 5) and the m25_div clock was set to ~20MHz (divider =
>> > > 5) by the common clock framework (as this value is closest to 25MHz if
>> > > we would not have set CLK_DIVIDER_ROUND_CLOSEST). What we actually need
>> > > however is a rate of ~250MHz on the m250_div clock (divider = 2) and
>> > > ~25MHz on the m25_div clock (divider = 10) - these are also the values
>> > > chosen by the out-of-tree vendor driver.
>> > > With this we end up with a RGMII clock of 25000120Hz (which is as close
>> > > to 25MHz we can get with an input clock of 500002394Hz).
>> > >
>> > > SoCs from the Meson GX series are not affected by this change because
>> > > the input clock is FCLK_DIV2 whose rate cannot be changed. Additionally
>> > > the GX SoCs don't need to use the "closest" divider since the parent
>> > > clock is a multiple of 250MHz.
>> > >
>> > > Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC")
>> > > Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
>> > > ---
>> > > drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +-
>> > > 1 file changed, 1 insertion(+), 1 deletion(-)
>> > >
>> > > diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
>> > > index c71966332387..26f41c117d63 100644
>> > > --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
>> > > +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
>> > > @@ -135,7 +135,7 @@ static int meson8b_init_clk(struct meson8b_dwmac *dwmac)
>> > > snprintf(clk_name, sizeof(clk_name), "%s#m250_div", dev_name(dev));
>> > > init.name = devm_kstrdup(dev, clk_name, GFP_KERNEL);
>> > > init.ops = &clk_divider_ops;
>> > > - init.flags = CLK_SET_RATE_PARENT;
>> > > + init.flags = 0;
>> > > clk_div_parents[0] = __clk_get_name(dwmac->m250_mux_clk);
>> > > init.parent_names = clk_div_parents;
>> > > init.num_parents = ARRAY_SIZE(clk_div_parents);
>>
>> Regards
>> Martin
>
Regards
Martin
[0] http://lists.infradead.org/pipermail/linux-amlogic/2017-December/005856.html
^ permalink raw reply
* Re: [PATCH net v2] netns, rtnetlink: fix struct net reference leak
From: Nicolas Dichtel @ 2017-12-23 22:12 UTC (permalink / raw)
To: Craig Gallek, David Miller, Jiri Benc; +Cc: netdev, Jason A . Donenfeld
In-Reply-To: <20171222203626.113363-1-kraigatgoog@gmail.com>
Le 22/12/2017 à 21:36, Craig Gallek a écrit :
> From: Craig Gallek <kraig@google.com>
>
> netns ids were added in commit 0c7aecd4bde4 and defined as signed
> integers in both the kernel datastructures and the netlink interface.
> However, the semantics of the implementation assume that the ids
> are always greater than or equal to zero, except for an internal
> sentinal value NETNSA_NSID_NOT_ASSIGNED.
>
> Several subsequent patches carried this pattern forward. This patch
> updates all of the netlink input paths of this value to enforce the
> 'greater than or equal to zero' constraint.
>
> This issue was discovered by syskaller. It would set a negative
> value for a netns id and then repeatedly call the RTM_GETLINK.
> The put_net call in that path would not trigger for negative netns ids,
> caused a reference count leak, and eventually overflowed. There are
> probably additional error paths that do not handle this situation
> correctly, but this was the only one I was able to trigger a real
> issue through.
>
> Fixes: 0c7aecd4bde4 ("netns: add rtnl cmd to add and get peer netns ids")
> Fixes: 317f4810e45e ("rtnl: allow to create device with IFLA_LINK_NETNSID set")
> Fixes: 79e1ad148c84 ("rtnetlink: use netnsid to query interface")
> CC: Jiri Benc <jbenc@redhat.com>
> CC: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> CC: Jason A. Donenfeld <Jason@zx2c4.com>
> Signed-off-by: Craig Gallek <kraig@google.com>
> ---
> net/core/net_namespace.c | 2 ++
> net/core/rtnetlink.c | 17 +++++++++++++----
> 2 files changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 60a71be75aea..4b7ea33f5705 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -627,6 +627,8 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
> return -EINVAL;
> }
> nsid = nla_get_s32(tb[NETNSA_NSID]);
> + if (nsid < 0)
> + return -EINVAL;
No, this breaks the current behavior.
Look at alloc_netid(). If reqid is < 0, the kernel allocates an nsid with no
constraint. If reqid is >= 0, it tries to alloc the specified nsid.
>
> if (tb[NETNSA_PID]) {
> peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index dabba2a91fc8..a928b8f081b8 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -1733,10 +1733,12 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
> ifla_policy, NULL) >= 0) {
> if (tb[IFLA_IF_NETNSID]) {
> netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
> - tgt_net = get_target_net(skb, netnsid);
> - if (IS_ERR(tgt_net)) {
> - tgt_net = net;
> - netnsid = -1;
> + if (netnsid >= 0) {
> + tgt_net = get_target_net(skb, netnsid);
I would prefer to put this test in get_target_net.
> + if (IS_ERR(tgt_net)) {
> + tgt_net = net;
> + netnsid = -1;
Maybe using NETNSA_NSID_NOT_ASSIGNED is better? Same for the initialization of
this variable.
> + }
> }
> }
>
> @@ -2792,6 +2794,11 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
> if (tb[IFLA_LINK_NETNSID]) {
> int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
>
> + if (id < 0) {
> + err = -EINVAL;
> + goto out;
> + }
> +
This is not needed. get_net_ns_by_id() returns NULL if id is < 0.
> link_net = get_net_ns_by_id(dest_net, id);
> if (!link_net) {
> err = -EINVAL;
> @@ -2883,6 +2890,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh,
>
> if (tb[IFLA_IF_NETNSID]) {
> netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]);
> + if (netnsid < 0)
> + return -EINVAL;
> tgt_net = get_target_net(skb, netnsid);
> if (IS_ERR(tgt_net))
> return PTR_ERR(tgt_net);
>
^ permalink raw reply
* Re: [RFT net-next 2/2] net: stmmac: dwmac-meson8b: don't try to change m250_div parent's rate
From: Jerome Brunet @ 2017-12-23 22:41 UTC (permalink / raw)
To: Martin Blumenstingl
Cc: netdev, ingrassia, linus.luessing, khilman, linux-amlogic,
Neil Armstrong, peppe.cavallaro, alexandre.torgue
In-Reply-To: <CAFBinCBuyNwu9L-OhsPJ72S9xNjyAWovgjFjPXGAfri-+tDxWg@mail.gmail.com>
On Sat, 2017-12-23 at 22:49 +0100, Martin Blumenstingl wrote:
> while calculating this with a target frequency of 500MHz manually
> again I saw that there's a remainder of 10Mhz after the initial
> division.
> remainder * SDM_DEN = 163840000000 - this value overflows 32-bit,
> things will go downhill from here... :)
> long story short: here's a patch for that issue: [0]
Thanks for the fix !
>
> the results with "CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT" on the mux:
Actually, I don't think you need the CLK_SET_RATE_NO_REPARENT
> fixed_pll 3 3 2550000000
> 0 0
> mpll2 1 1 124999851
> 0 0
> c9410000.ethernet#m250_sel 1 1
> 124999851 0 0
> c9410000.ethernet#m250_div 1 1
> 124999851 0 0
> c9410000.ethernet#m25_div 1 1
> 24999971 0 0
>
> this is even closer to 25MHz than the values in the vendor driver
> (120Hz vs 29Hz)
> I'll re-spin this series, then Emiliano "just" has to confirm that
> it's also working for him (despite the dividers in the dwmac-meson8b
> driver are set up different compared to the vendor driver)
if the rate of the parent clock of mux may change, I think following part should
be changed as well
case PHY_INTERFACE_MODE_RMII:
/* Use the rate of the mux clock for the internal RMII PHY */
clk_rate = clk_get_rate(dwmac->m250_mux_clk);
I think it is a bit weak anyway, as it depends on the initial settings. Do you
remember why you wrote this way ?
>
> > > - according to the datasheet the allowed range of the mpll2 clock is
> > > 250..637MHz - 127488329Hz is what we get from the common clock
> > > framework
> >
> > Yes, I've seen that but we are able compute out of that range and, so far, the
> > actual rate of clock seems coherent with the calculation. At least, when I
> > tested with the audio, it was the case.
>
> I'm curious: ...on a GX SoCs probably?
Indeed, you got me !
^ permalink raw reply
* Re: [RFT net-next 2/2] net: stmmac: dwmac-meson8b: don't try to change m250_div parent's rate
From: Martin Blumenstingl @ 2017-12-23 23:12 UTC (permalink / raw)
To: Jerome Brunet
Cc: netdev, ingrassia, linus.luessing, khilman, linux-amlogic,
Neil Armstrong, peppe.cavallaro, alexandre.torgue
In-Reply-To: <1514068861.4333.10.camel@baylibre.com>
On Sat, Dec 23, 2017 at 11:41 PM, Jerome Brunet <jbrunet@baylibre.com> wrote:
> On Sat, 2017-12-23 at 22:49 +0100, Martin Blumenstingl wrote:
>> while calculating this with a target frequency of 500MHz manually
>> again I saw that there's a remainder of 10Mhz after the initial
>> division.
>> remainder * SDM_DEN = 163840000000 - this value overflows 32-bit,
>> things will go downhill from here... :)
>> long story short: here's a patch for that issue: [0]
>
> Thanks for the fix !
>
>>
>> the results with "CLK_SET_RATE_PARENT | CLK_SET_RATE_NO_REPARENT" on the mux:
>
> Actually, I don't think you need the CLK_SET_RATE_NO_REPARENT
as I have to re-test this on the GX SoCs anyways I'll give it a go
without CLK_SET_RATE_NO_REPARENT. stay tuned...
>> fixed_pll 3 3 2550000000
>> 0 0
>> mpll2 1 1 124999851
>> 0 0
>> c9410000.ethernet#m250_sel 1 1
>> 124999851 0 0
>> c9410000.ethernet#m250_div 1 1
>> 124999851 0 0
>> c9410000.ethernet#m25_div 1 1
>> 24999971 0 0
>>
>> this is even closer to 25MHz than the values in the vendor driver
>> (120Hz vs 29Hz)
>> I'll re-spin this series, then Emiliano "just" has to confirm that
>> it's also working for him (despite the dividers in the dwmac-meson8b
>> driver are set up different compared to the vendor driver)
>
> if the rate of the parent clock of mux may change, I think following part should
> be changed as well
>
> case PHY_INTERFACE_MODE_RMII:
> /* Use the rate of the mux clock for the internal RMII PHY */
> clk_rate = clk_get_rate(dwmac->m250_mux_clk);
>
> I think it is a bit weak anyway, as it depends on the initial settings. Do you
> remember why you wrote this way ?
actually this is confusing and a no-op: I'll have to fix it with a
re-spin of this series anyways.
while trying to make sense why Ethernet wouldn't work on Emiliano's
Odroid-C1 I stumbled across this header: [0]
it explains why it doesn't matter which values are set for the
mux/dividers in RMII mode: the clocks are used to generate the "RGMII
clock"
the currrent code is a no-op since the common clock framework sets all
dividers in PRG_ETH0 to 0 - which results in a register value of
0x1800 (which used by the vendor driver for RMII)
I will add a patch which ignores the clocks in RMII mode: this is also
needed because we don't want to configure MPLL2 on Meson8b in RMII
mode
>>
>> > > - according to the datasheet the allowed range of the mpll2 clock is
>> > > 250..637MHz - 127488329Hz is what we get from the common clock
>> > > framework
>> >
>> > Yes, I've seen that but we are able compute out of that range and, so far, the
>> > actual rate of clock seems coherent with the calculation. At least, when I
>> > tested with the audio, it was the case.
>>
>> I'm curious: ...on a GX SoCs probably?
>
> Indeed, you got me !
good to know - I'll try to keep the 32-bit overflow issues in mind
when things don't work on Meson8/Meson8b even if they do work on the
GX SoCs
Regards
Martin
[0] https://github.com/endlessm/u-boot-meson/blob/345ee7eb02903f5ecb1173ffb2cd36666e44ebed/arch/arm/include/asm/arch-m8b/aml_eth_reg.h#L489
^ permalink raw reply
* [RFT net-next v2 0/3] dwmac-meson8b: RGMII clock fixes for Meson8b
From: Martin Blumenstingl @ 2017-12-23 23:40 UTC (permalink / raw)
To: netdev, ingrassia
Cc: linus.luessing, khilman, linux-amlogic, jbrunet, narmstrong,
peppe.cavallaro, alexandre.torgue, Martin Blumenstingl
Hi Dave,
please do not apply this series until it got a Tested-by from Emiliano.
Hi Emiliano,
you reported [0] that you couldn't get dwmac-meson8b to work on your
Odroid-C1. With your findings (register dumps, clk_summary output, etc.)
I think I was able to find a fix: it consists of two patches (which you
find in this series)
Unfortunately I don't have any Meson8b boards with RGMII PHY so I could
only partially test this (I could only check if the clocks were
calculated correctly when using a dummy 500002394Hz input clock instead
of MPLL2).
Could you please give this series a try and let me know about the
results?
You obviously still need your two "ARM: dts: meson8b" patches which
- add the amlogic,meson8b-dwmac" compatible to meson8b.dtsi
- enable Ethernet on the Odroid-C1
I have tested this myself on a Khadas VIM (GXL SoC, internal RMII PHY)
and a Khadas VIM2 (GXM SoC, external RGMII PHY). Both are still working
fine (so let's hope that this also fixes your Meson8b issue :)).
changes since v1 at [1]:
- changed the subject of the cover-letter to indicate that this is all
about the RGMII clock
- added PATCH #1 which ensures that we don't unnecessarily change the
parent clocks in RMII mode (and also makes the code easier to
understand)
- changed subject of PATCH #2 (formerly PATCH #1) to state that this
is about the RGMII clock
- added Jerome's Reviewed-by to PATCH #2 (formerly PATCH #1)
- replaced PATCH #3 (formerly PATCH #2) with one that sets
CLK_SET_RATE_PARENT on the mux and thus re-configures the MPLL2 clock
on Meson8b correctly
[0] http://lists.infradead.org/pipermail/linux-amlogic/2017-December/005596.html
[1] http://lists.infradead.org/pipermail/linux-amlogic/2017-December/005848.html
Martin Blumenstingl (3):
net: stmmac: dwmac-meson8b: only configure the clocks in RGMII mode
net: stmmac: dwmac-meson8b: fix setting the RGMII clock on Meson8b
net: stmmac: dwmac-meson8b: propagate rate changes to the parent clock
.../net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 55 +++++++++++-----------
1 file changed, 27 insertions(+), 28 deletions(-)
--
2.15.1
^ permalink raw reply
* [RFT net-next v2 1/3] net: stmmac: dwmac-meson8b: only configure the clocks in RGMII mode
From: Martin Blumenstingl @ 2017-12-23 23:40 UTC (permalink / raw)
To: netdev, ingrassia
Cc: linus.luessing, khilman, linux-amlogic, jbrunet, narmstrong,
peppe.cavallaro, alexandre.torgue, Martin Blumenstingl
In-Reply-To: <20171223234100.11814-1-martin.blumenstingl@googlemail.com>
Neither the m25_div_clk nor the m250_div_clk or m250_mux_clk are used in
RMII mode. The m25_div_clk output is routed to the RGMII PHY's "RGMII
clock".
This means that we don't need to configure the clocks in RMII mode. The
driver however did this - with no effect since the clocks are not routed
to the PHY in RMII mode.
While here also rename meson8b_init_clk to meson8b_init_rgmii_clk to
make it easier to understand the code.
Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC")
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
---
.../net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 46 ++++++++++------------
1 file changed, 21 insertions(+), 25 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 4404650b32c5..e1d5907e481c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -81,7 +81,7 @@ static void meson8b_dwmac_mask_bits(struct meson8b_dwmac *dwmac, u32 reg,
writel(data, dwmac->regs + reg);
}
-static int meson8b_init_clk(struct meson8b_dwmac *dwmac)
+static int meson8b_init_rgmii_clk(struct meson8b_dwmac *dwmac)
{
struct clk_init_data init;
int i, ret;
@@ -176,7 +176,6 @@ static int meson8b_init_clk(struct meson8b_dwmac *dwmac)
static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
{
int ret;
- unsigned long clk_rate;
u8 tx_dly_val = 0;
switch (dwmac->phy_mode) {
@@ -191,9 +190,6 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
case PHY_INTERFACE_MODE_RGMII_ID:
case PHY_INTERFACE_MODE_RGMII_TXID:
- /* Generate a 25MHz clock for the PHY */
- clk_rate = 25 * 1000 * 1000;
-
/* enable RGMII mode */
meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_RGMII_MODE,
PRG_ETH0_RGMII_MODE);
@@ -204,12 +200,24 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_TXDLY_MASK,
tx_dly_val << PRG_ETH0_TXDLY_SHIFT);
+
+ ret = clk_prepare_enable(dwmac->m25_div_clk);
+ if (ret) {
+ dev_err(&dwmac->pdev->dev, "failed to enable the PHY clock\n");
+ return ret;
+ }
+
+ /* Generate the 25MHz RGMII clock for the PHY */
+ ret = clk_set_rate(dwmac->m25_div_clk, 25 * 1000 * 1000);
+ if (ret) {
+ clk_disable_unprepare(dwmac->m25_div_clk);
+
+ dev_err(&dwmac->pdev->dev, "failed to set PHY clock\n");
+ return ret;
+ }
break;
case PHY_INTERFACE_MODE_RMII:
- /* Use the rate of the mux clock for the internal RMII PHY */
- clk_rate = clk_get_rate(dwmac->m250_mux_clk);
-
/* disable RGMII mode -> enables RMII mode */
meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_RGMII_MODE,
0);
@@ -231,20 +239,6 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
return -EINVAL;
}
- ret = clk_prepare_enable(dwmac->m25_div_clk);
- if (ret) {
- dev_err(&dwmac->pdev->dev, "failed to enable the PHY clock\n");
- return ret;
- }
-
- ret = clk_set_rate(dwmac->m25_div_clk, clk_rate);
- if (ret) {
- clk_disable_unprepare(dwmac->m25_div_clk);
-
- dev_err(&dwmac->pdev->dev, "failed to set PHY clock\n");
- return ret;
- }
-
/* enable TX_CLK and PHY_REF_CLK generator */
meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_TX_AND_PHY_REF_CLK,
PRG_ETH0_TX_AND_PHY_REF_CLK);
@@ -294,7 +288,7 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
&dwmac->tx_delay_ns))
dwmac->tx_delay_ns = 2;
- ret = meson8b_init_clk(dwmac);
+ ret = meson8b_init_rgmii_clk(dwmac);
if (ret)
goto err_remove_config_dt;
@@ -311,7 +305,8 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
return 0;
err_clk_disable:
- clk_disable_unprepare(dwmac->m25_div_clk);
+ if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
+ clk_disable_unprepare(dwmac->m25_div_clk);
err_remove_config_dt:
stmmac_remove_config_dt(pdev, plat_dat);
@@ -322,7 +317,8 @@ static int meson8b_dwmac_remove(struct platform_device *pdev)
{
struct meson8b_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
- clk_disable_unprepare(dwmac->m25_div_clk);
+ if (phy_interface_mode_is_rgmii(dwmac->phy_mode))
+ clk_disable_unprepare(dwmac->m25_div_clk);
return stmmac_pltfr_remove(pdev);
}
--
2.15.1
^ permalink raw reply related
* [RFT net-next v2 2/3] net: stmmac: dwmac-meson8b: fix setting the RGMII clock on Meson8b
From: Martin Blumenstingl @ 2017-12-23 23:40 UTC (permalink / raw)
To: netdev, ingrassia
Cc: linus.luessing, khilman, linux-amlogic, jbrunet, narmstrong,
peppe.cavallaro, alexandre.torgue, Martin Blumenstingl
In-Reply-To: <20171223234100.11814-1-martin.blumenstingl@googlemail.com>
Meson8b only supports MPLL2 as clock input. The rate of the MPLL2 clock
set by Odroid-C1's u-boot is close to 500MHz. The exact rate is
500002394Hz, which is calculated in drivers/clk/meson/clk-mpll.c
using the following formula:
DIV_ROUND_UP_ULL((u64)parent_rate * SDM_DEN, (SDM_DEN * n2) + sdm)
Odroid-C1's u-boot configures MPLL2 with the following values:
- SDM_DEN = 16384
- SDM = 1638
- N2 = 5
The 250MHz and 25MHz clocks inside dwmac-meson8b driver are derived
from the MPLL2 clock. Due to MPLL2 running slightly faster than 500MHz
the common clock framework chooses dividers which are too big to
generate the 250MHz and 25MHz clocks. Emiliano Ingrassia observed that
the divider for the 250MHz clock was set to 0x5 which results in a clock
rate of close to 100MHz instead of 250MHz. The divider for the 25MHz
clock is set to 0x0 (which means "divide by 5") so the resulting RGMII
clock is running at 20MHz (plus a few additional Hz). The RTL8211F PHY
on Odroid-C1 however fails to operate with a 20MHz RGMII clock.
Round the divider's clock rates to prevent this issue on Meson8b. This
means we'll now end up with a clock rate of 25000120Hz (= 25MHz plus
120Hz).
This has no effect on the Meson GX SoCs since there fclk_div2 is used as
input clock, which has a rate of 1000MHz (and thus is divisible cleanly
to 250MHz and 25MHz).
Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC")
Reported-by: Emiliano Ingrassia <ingrassia@epigenesys.com>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Jerome Brunet <jbrunet@baylibre.com>
---
drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index e1d5907e481c..0da551c84fe8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -144,7 +144,9 @@ static int meson8b_init_rgmii_clk(struct meson8b_dwmac *dwmac)
dwmac->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
dwmac->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
dwmac->m250_div.hw.init = &init;
- dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED | CLK_DIVIDER_ALLOW_ZERO;
+ dwmac->m250_div.flags = CLK_DIVIDER_ONE_BASED |
+ CLK_DIVIDER_ALLOW_ZERO |
+ CLK_DIVIDER_ROUND_CLOSEST;
dwmac->m250_div_clk = devm_clk_register(dev, &dwmac->m250_div.hw);
if (WARN_ON(IS_ERR(dwmac->m250_div_clk)))
@@ -164,7 +166,8 @@ static int meson8b_init_rgmii_clk(struct meson8b_dwmac *dwmac)
dwmac->m25_div.width = PRG_ETH0_CLK_M25_DIV_WIDTH;
dwmac->m25_div.table = clk_25m_div_table;
dwmac->m25_div.hw.init = &init;
- dwmac->m25_div.flags = CLK_DIVIDER_ALLOW_ZERO;
+ dwmac->m25_div.flags = CLK_DIVIDER_ALLOW_ZERO |
+ CLK_DIVIDER_ROUND_CLOSEST;
dwmac->m25_div_clk = devm_clk_register(dev, &dwmac->m25_div.hw);
if (WARN_ON(IS_ERR(dwmac->m25_div_clk)))
--
2.15.1
^ permalink raw reply related
* [RFT net-next v2 3/3] net: stmmac: dwmac-meson8b: propagate rate changes to the parent clock
From: Martin Blumenstingl @ 2017-12-23 23:41 UTC (permalink / raw)
To: netdev, ingrassia
Cc: linus.luessing, khilman, linux-amlogic, jbrunet, narmstrong,
peppe.cavallaro, alexandre.torgue, Martin Blumenstingl
In-Reply-To: <20171223234100.11814-1-martin.blumenstingl@googlemail.com>
On Meson8b the only valid input clock is MPLL2. The bootloader
configures that to run at 500002394Hz which cannot be divided evenly
down to 25MHz using the m250_div and m25_div clocks. Currently the
common clock framework chooses a m250_div of 2 and a m25_div of 10,
which results in a RGMII clock of 25000120Hz (120Hz above the requested
25MHz).
Letting the common clock framework propagate the rate changes from the
m25_div clock to m250_div clock to m250_mux to it's parent allows us to
get the best possible parent clock rate. With this patch the common clock
framework calculates a rate of close-to-125MHz (124999851Hz to be exact)
for the MPLL2 clock (which is the mux input). Dividing that by 5 (using
only the m25_div) gives us a RGMII clock of 24999971Hz (which is only
29Hz off the requested 25MHz, compared to 120Hz from u-boot and the
vendor driver).
For now we only want to propagate rate changes to make sure that the mux
parent is not changed automatically by the common clock framework. This
is just to keep the number of side-effects from this patch at a minimum.
SoCs from the Meson GX series are not affected by this change because
the input clock is FCLK_DIV2 whose rate cannot be changed. Additionally
the GX SoCs don't need to use the "closest" divider since the parent
clock is a multiple of 250MHz.
Fixes: 566e8251625304 ("net: stmmac: add a glue driver for the Amlogic Meson 8b / GXBB DWMAC")
Suggested-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
---
drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 0da551c84fe8..e542c8a14f2a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -116,7 +116,7 @@ static int meson8b_init_rgmii_clk(struct meson8b_dwmac *dwmac)
snprintf(clk_name, sizeof(clk_name), "%s#m250_sel", dev_name(dev));
init.name = clk_name;
init.ops = &clk_mux_ops;
- init.flags = 0;
+ init.flags = CLK_SET_RATE_PARENT;
init.parent_names = mux_parent_names;
init.num_parents = MUX_CLK_NUM_PARENTS;
--
2.15.1
^ permalink raw reply related
* Re: [patch net-next v4 00/10] net: sched: allow qdiscs to share filter block instances
From: David Ahern @ 2017-12-24 1:54 UTC (permalink / raw)
To: Jiri Pirko, netdev
Cc: davem, jhs, xiyou.wangcong, mlxsw, andrew, vivien.didelot,
f.fainelli, michael.chan, ganeshgr, saeedm, matanb, leonro,
idosch, jakub.kicinski, simon.horman, pieter.jansenvanvuuren,
john.hurley, alexander.h.duyck, ogerlitz, john.fastabend, daniel
In-Reply-To: <20171223155436.9014-1-jiri@resnulli.us>
On 12/23/17 9:54 AM, Jiri Pirko wrote:
> So back to the example. First, we create 2 qdiscs. Both will share
> block number 22. "22" is just an identification. If we don't pass any
> block number, a new one will be generated by kernel:
>
> $ tc qdisc add dev ens7 ingress block 22
> ^^^^^^^^
> $ tc qdisc add dev ens8 ingress block 22
> ^^^^^^^^
>
> Now if we list the qdiscs, we will see the block index in the output:
>
> $ tc qdisc
> qdisc ingress ffff: dev ens7 parent ffff:fff1 block 22
> qdisc ingress ffff: dev ens8 parent ffff:fff1 block 22
>
> To make is more visual, the situation looks like this:
>
> ens7 ingress qdisc ens7 ingress qdisc
> | |
> | |
> +----------> block 22 <----------+
>
> Unlimited number of qdiscs may share the same block.
>
> Now we can add filter to any of qdiscs sharing the same block:
>
> $ tc filter add dev ens7 ingress protocol ip pref 25 flower dst_ip 192.168.0.0/16 action drop
Allowing config of a shared block through any qdisc that references it
is akin to me allowing nexthop objects to be manipulated by any route
that references it -- sure, it could be done but causes a lot surprises
to the user.
You are adding a new tc object -- a shared block. Why the resistance to
creating a proper API for managing it?
^ permalink raw reply
* Re: [patch net-next v4 05/10] net: sched: keep track of offloaded filters and check tc offload feature
From: Jakub Kicinski @ 2017-12-24 2:20 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, jhs, xiyou.wangcong, mlxsw, andrew, vivien.didelot,
f.fainelli, michael.chan, ganeshgr, saeedm, matanb, leonro,
idosch, simon.horman, pieter.jansenvanvuuren, john.hurley,
alexander.h.duyck, ogerlitz, john.fastabend, daniel, dsahern
In-Reply-To: <20171223155436.9014-6-jiri@resnulli.us>
On Sat, 23 Dec 2017 16:54:31 +0100, Jiri Pirko wrote:
> -static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
> - struct tcf_block_ext_info *ei)
> +static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
> + struct tcf_block_ext_info *ei)
> {
> - tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
> + struct net_device *dev = q->dev_queue->dev;
> + int err;
> +
> + if (!dev->netdev_ops->ndo_setup_tc)
> + return 0;
> +
> + /* If tc offload feature is disabled and the block we try to bind
> + * to already has some offloaded filters, forbid to bind.
> + */
> + if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
> + return -EOPNOTSUPP;
> +
> + err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
> + if (err == -EOPNOTSUPP)
> + /* Driver does not support binding. */
> + return 0;
> + return err;
> }
Would you mind explaining why those return 0s are safe?
Say I have 2 netdevs, one dumb NIC without ndo_setup_tc (dnic) and one
NIC that can offload everything (enic). I can share a block between
them (before or after adding any filters) and adding filters with
skip_sw will succeed, even though dnic will not see them ever. There
is only one callback for enic, "all callbacks" != "all devices".
It's fine to share the block in such case, but that block can never
accept a skip_sw filter. Don't we need something like (reverse) patch 3
for keeping track of netdevs sharing the block which are not OK with
offloads?
Am I misunderstanding how this is supposed to work? Or simply too nit
picky about providing predictable behaviour?
Quick hack to illustrate the idea (untested):
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 22a3a1d22ffa..e61e59161243 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -289,6 +289,7 @@ struct tcf_block {
struct list_head cb_list;
struct list_head owner_list;
bool keep_dst;
+ bool nonoffload_taint;
unsigned int offloadcnt;
};
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 37eea70d1d72..4e017cbbf356 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -290,7 +290,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
int err;
if (!dev->netdev_ops->ndo_setup_tc)
- return 0;
+ goto mark_no_offload;
/* If tc offload feature is disabled and the block we try to bind
* to already has some offloaded filters, forbid to bind.
@@ -300,9 +300,14 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
if (err == -EOPNOTSUPP)
- /* Driver does not support binding. */
- return 0;
+ goto mark_no_offload;
return err;
+
+mark_no_offload:
+ if (tcf_block_offload_in_use(block))
+ return -EOPNOTSUPP;
+ block->nonoffload_taint = true;
+ return 0;
}
static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
@@ -1492,6 +1497,10 @@ int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
int ok_count;
int ret;
+ /* Make sure all netdevs sharing this block are offload-capable */
+ if (block->nonoffload_taint && err_stop)
+ return -EOPNOTSUPP;
+
ret = tcf_block_cb_call(block, type, type_data, err_stop);
if (ret < 0)
return ret;
Here a block once tainted with a bad netdev will never be offloadable
again, so tracking a'la patch 3 would be nicer..
^ permalink raw reply related
* [PATCH] sky2: Replace mdelay with msleep in sky2_vpd_wait
From: Jia-Ju Bai @ 2017-12-24 3:54 UTC (permalink / raw)
To: mlindner, stephen, shemminger; +Cc: netdev, linux-kernel, Jia-Ju Bai
sky2_vpd_wait is not called in an interrupt handler nor holding a spinlock.
The function mdelay in it can be replaced with msleep, to reduce busy wait.
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
drivers/net/ethernet/marvell/sky2.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 9efe177..9fe8530 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -4287,7 +4287,7 @@ static int sky2_vpd_wait(const struct sky2_hw *hw, int cap, u16 busy)
dev_err(&hw->pdev->dev, "VPD cycle timed out\n");
return -ETIMEDOUT;
}
- mdelay(1);
+ msleep(1);
}
return 0;
--
1.7.9.5
^ permalink raw reply related
* [PATCH] b43: Replace mdelay with msleep in b43_radio_2057_init_post
From: Jia-Ju Bai @ 2017-12-24 4:03 UTC (permalink / raw)
To: kvalo, colin.king, johannes.berg, tiwai, kstewart, gregkh,
andrew.zaborowski
Cc: linux-wireless, b43-dev, netdev, linux-kernel, Jia-Ju Bai
b43_radio_2057_init_post is not called in an interrupt handler
nor holding a spinlock.
The function mdelay in it can be replaced with msleep, to reduce busy wait.
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
drivers/net/wireless/broadcom/b43/phy_n.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/wireless/broadcom/b43/phy_n.c b/drivers/net/wireless/broadcom/b43/phy_n.c
index a5557d7..5bc838e 100644
--- a/drivers/net/wireless/broadcom/b43/phy_n.c
+++ b/drivers/net/wireless/broadcom/b43/phy_n.c
@@ -1031,7 +1031,7 @@ static void b43_radio_2057_init_post(struct b43_wldev *dev)
b43_radio_set(dev, R2057_RFPLL_MISC_CAL_RESETN, 0x78);
b43_radio_set(dev, R2057_XTAL_CONFIG2, 0x80);
- mdelay(2);
+ msleep(2);
b43_radio_mask(dev, R2057_RFPLL_MISC_CAL_RESETN, ~0x78);
b43_radio_mask(dev, R2057_XTAL_CONFIG2, ~0x80);
--
1.7.9.5
^ permalink raw reply related
* Re: [Patch net-next] net_sched: remove the unsafe __skb_array_empty()
From: John Fastabend @ 2017-12-24 6:57 UTC (permalink / raw)
To: Cong Wang; +Cc: Linux Kernel Network Developers, Jakub Kicinski
In-Reply-To: <CAM_iQpXm6ASPmUh0L_KodWZds==JJukBMa+MzOvM2t9qKkDu1Q@mail.gmail.com>
On 12/22/2017 12:31 PM, Cong Wang wrote:
> On Thu, Dec 21, 2017 at 7:06 PM, John Fastabend
> <john.fastabend@gmail.com> wrote:
>> On 12/21/2017 04:03 PM, Cong Wang wrote:
>>> __skb_array_empty() is only safe if array is never resized.
>>> pfifo_fast_dequeue() is called in TX BH context and without
>>> qdisc lock, so even after we disable BH on ->reset() path
>>> we can still race with other CPU's.
>>>
>>> Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array")
>>> Reported-by: Jakub Kicinski <jakub.kicinski@netronome.com>
>>> Cc: John Fastabend <john.fastabend@gmail.com>
>>> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
>>> ---
>>> net/sched/sch_generic.c | 3 ---
>>> 1 file changed, 3 deletions(-)
>>>
>>> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
>>> index 00ddb5f8f430..9279258ce060 100644
>>> --- a/net/sched/sch_generic.c
>>> +++ b/net/sched/sch_generic.c
>>> @@ -622,9 +622,6 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
>>> for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
>>> struct skb_array *q = band2list(priv, band);
>>>
>>> - if (__skb_array_empty(q))
>>> - continue;
>>> -
>>> skb = skb_array_consume_bh(q);
>>> }
>>> if (likely(skb)) {
>>>
>>
>>
>> So this is a performance thing we don't want to grab the consumer lock on
>> empty bands. Which can be fairly common depending on traffic patterns.
>
>
> I understand why you had it, but it is just not safe. You don't want
> to achieve performance gain by crashing system, right?
huh? So my point is the patch you submit here is not a
real fix but a work around. To peek the head of a consumer/producer ring
without a lock, _should_ be fine. This _should_ work as well with
consumer or producer operations happening at the same time. After some
digging the issue is in the ptr_ring code.
The peek code (what empty check calls) is the following,
static inline void *__ptr_ring_peek(struct ptr_ring *r)
{
if (likely(r->size))
return r->queue[r->consumer_head];
return NULL;
}
So what the splat is detecting is consumer head being 'out of bounds'.
This happens because ptr_ring_discard_one increments the consumer_head
and then checks to see if it overran the array size. If above peek
happens after the increment, but before the size check we get the
splat. There are two ways, as far as I can see, to fix this. First
do the check before incrementing the consumer head. Or the easier
fix,
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -438,7 +438,7 @@ static inline int ptr_ring_consume_batched_bh(struct
ptr_ring *r,
static inline void **__ptr_ring_init_queue_alloc(unsigned int size,
gfp_t gfp)
{
- return kcalloc(size, sizeof(void *), gfp);
+ return kcalloc(size + 1, sizeof(void *), gfp);
}
With Jakub's help (Thanks!) I was able to reproduce the original splat
and also verify the above removes it.
To be clear "resizing" a skb_array only refers to changing the actual
array size not adding/removing elements.
>
>>
>> Although its not logical IMO to have both reset and dequeue running at
>> the same time. Some skbs would get through others would get sent, sort
>> of a mess. I don't see how it can be an issue. The never resized bit
>> in the documentation is referring to resizing the ring size _not_ popping
>> off elements of the ring. array_empty just reads the consumer head.
>> The only ring resizing in pfifo fast should be at init and destroy where
>> enqueue/dequeue should be disconnected by then. Although based on the
>> trace I missed a case.
>
>
> Both pfifo_fast_reset() and pfifo_fast_dequeue() call
> skb_array_consume_bh(), so there is no difference w.r.t. resizing.
>
Sorry not following.
> And ->reset() is called in qdisc_graft() too. Let's say we have htb+pfifo_fast,
> htb_graft() calls qdisc_replace() which calls qdisc_reset() on pfifo_fast,
> so clearly pfifo_fast_reset() can run with pfifo_fast_dequeue()
> concurrently.
Yes and this _should_ be perfectly fine for pfifo_fast. I'm wondering
though if this API can be cleaned up. What are the paths that do a reset
without a destroy.. Do we really need to have this pattern where reset
is called then later destroy. Seems destroy could do the entire cleanup
and this would simplify things. None of this has to do with the splat
though.
>
>
>>
>> I think the right fix is to only call reset/destroy patterns after
>> waiting a grace period and for all tx_action calls in-flight to
>> complete. This is also better going forward for more complex qdiscs.
>
> But we don't even have rcu read lock in TX BH, do we?
>
> Also, people certainly don't like yet another synchronize_net()...
>
This needs a fix and is a _real_ bug, but removing __skb_array_empty()
doesn't help solve this at all. Will work on a fix after the holiday
break. The fix here is to ensure the destroy is not going to happen
while tx_action is in-flight. Can be done with qdisc_run and checking
correct bits in lockless case.
.John
^ permalink raw reply
* Re: [patch net-next v4 00/10] net: sched: allow qdiscs to share filter block instances
From: Jiri Pirko @ 2017-12-24 7:19 UTC (permalink / raw)
To: David Ahern
Cc: netdev, davem, jhs, xiyou.wangcong, mlxsw, andrew, vivien.didelot,
f.fainelli, michael.chan, ganeshgr, saeedm, matanb, leonro,
idosch, jakub.kicinski, simon.horman, pieter.jansenvanvuuren,
john.hurley, alexander.h.duyck, ogerlitz, john.fastabend, daniel
In-Reply-To: <ad7dbefc-515a-2cdd-a55f-86e0d8cc587b@gmail.com>
Sun, Dec 24, 2017 at 02:54:47AM CET, dsahern@gmail.com wrote:
>On 12/23/17 9:54 AM, Jiri Pirko wrote:
>> So back to the example. First, we create 2 qdiscs. Both will share
>> block number 22. "22" is just an identification. If we don't pass any
>> block number, a new one will be generated by kernel:
>>
>> $ tc qdisc add dev ens7 ingress block 22
>> ^^^^^^^^
>> $ tc qdisc add dev ens8 ingress block 22
>> ^^^^^^^^
>>
>> Now if we list the qdiscs, we will see the block index in the output:
>>
>> $ tc qdisc
>> qdisc ingress ffff: dev ens7 parent ffff:fff1 block 22
>> qdisc ingress ffff: dev ens8 parent ffff:fff1 block 22
>>
>> To make is more visual, the situation looks like this:
>>
>> ens7 ingress qdisc ens7 ingress qdisc
>> | |
>> | |
>> +----------> block 22 <----------+
>>
>> Unlimited number of qdiscs may share the same block.
>>
>> Now we can add filter to any of qdiscs sharing the same block:
>>
>> $ tc filter add dev ens7 ingress protocol ip pref 25 flower dst_ip 192.168.0.0/16 action drop
>
>
>Allowing config of a shared block through any qdisc that references it
>is akin to me allowing nexthop objects to be manipulated by any route
>that references it -- sure, it could be done but causes a lot surprises
>to the user.
>
>You are adding a new tc object -- a shared block. Why the resistance to
>creating a proper API for managing it?
Again, no resistance, I said many times it would be done as a follow-up.
But as an api already exists, it has to continue to work. Or do you
suggest it should stop working? That, I don't agree with.
^ permalink raw reply
* Re: [patch net-next v4 05/10] net: sched: keep track of offloaded filters and check tc offload feature
From: Jiri Pirko @ 2017-12-24 7:52 UTC (permalink / raw)
To: Jakub Kicinski
Cc: netdev, davem, jhs, xiyou.wangcong, mlxsw, andrew, vivien.didelot,
f.fainelli, michael.chan, ganeshgr, saeedm, matanb, leonro,
idosch, simon.horman, pieter.jansenvanvuuren, john.hurley,
alexander.h.duyck, ogerlitz, john.fastabend, daniel, dsahern
In-Reply-To: <20171223182045.610b04e4@cakuba.netronome.com>
Sun, Dec 24, 2017 at 03:20:45AM CET, kubakici@wp.pl wrote:
>On Sat, 23 Dec 2017 16:54:31 +0100, Jiri Pirko wrote:
>> -static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
>> - struct tcf_block_ext_info *ei)
>> +static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
>> + struct tcf_block_ext_info *ei)
>> {
>> - tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND);
>> + struct net_device *dev = q->dev_queue->dev;
>> + int err;
>> +
>> + if (!dev->netdev_ops->ndo_setup_tc)
>> + return 0;
>> +
>> + /* If tc offload feature is disabled and the block we try to bind
>> + * to already has some offloaded filters, forbid to bind.
>> + */
>> + if (!tc_can_offload(dev) && tcf_block_offload_in_use(block))
>> + return -EOPNOTSUPP;
>> +
>> + err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
>> + if (err == -EOPNOTSUPP)
>> + /* Driver does not support binding. */
>> + return 0;
>> + return err;
>> }
>
>Would you mind explaining why those return 0s are safe?
>
>Say I have 2 netdevs, one dumb NIC without ndo_setup_tc (dnic) and one
>NIC that can offload everything (enic). I can share a block between
>them (before or after adding any filters) and adding filters with
>skip_sw will succeed, even though dnic will not see them ever. There
>is only one callback for enic, "all callbacks" != "all devices".
>It's fine to share the block in such case, but that block can never
>accept a skip_sw filter. Don't we need something like (reverse) patch 3
>for keeping track of netdevs sharing the block which are not OK with
>offloads?
>
>Am I misunderstanding how this is supposed to work? Or simply too nit
>picky about providing predictable behaviour?
You undestand it correctly. Original plan was to ignore thore devices
that does not support offloading. But thinking about it a bit more,
you are probably right that they should be taken into consideration
when user explicitly says "skip_sw".
Will include the accounting you suggest below. Thanks!
>
>Quick hack to illustrate the idea (untested):
>
>diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>index 22a3a1d22ffa..e61e59161243 100644
>--- a/include/net/sch_generic.h
>+++ b/include/net/sch_generic.h
>@@ -289,6 +289,7 @@ struct tcf_block {
> struct list_head cb_list;
> struct list_head owner_list;
> bool keep_dst;
>+ bool nonoffload_taint;
> unsigned int offloadcnt;
> };
>
>diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
>index 37eea70d1d72..4e017cbbf356 100644
>--- a/net/sched/cls_api.c
>+++ b/net/sched/cls_api.c
>@@ -290,7 +290,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
> int err;
>
> if (!dev->netdev_ops->ndo_setup_tc)
>- return 0;
>+ goto mark_no_offload;
>
> /* If tc offload feature is disabled and the block we try to bind
> * to already has some offloaded filters, forbid to bind.
>@@ -300,9 +300,14 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
>
> err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND);
> if (err == -EOPNOTSUPP)
>- /* Driver does not support binding. */
>- return 0;
>+ goto mark_no_offload;
> return err;
>+
>+mark_no_offload:
>+ if (tcf_block_offload_in_use(block))
>+ return -EOPNOTSUPP;
>+ block->nonoffload_taint = true;
>+ return 0;
> }
>
> static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
>@@ -1492,6 +1497,10 @@ int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
> int ok_count;
> int ret;
>
>+ /* Make sure all netdevs sharing this block are offload-capable */
>+ if (block->nonoffload_taint && err_stop)
>+ return -EOPNOTSUPP;
>+
> ret = tcf_block_cb_call(block, type, type_data, err_stop);
> if (ret < 0)
> return ret;
>
>
>Here a block once tainted with a bad netdev will never be offloadable
>again, so tracking a'la patch 3 would be nicer..
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox