Netdev List
 help / color / mirror / Atom feed
* [net-next PATCH] octeontx2-pf: TC flower offload support for ICMP type and code
From: Geetha sowjanya @ 2023-10-31  6:19 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: kuba, davem, pabeni, edumazet, sgoutham, gakula, sbhatta, hkelam

Adds tc offload support for matching on ICMP type and code.

Example usage:
To enable adding tc ingress rules
        tc qdisc add dev eth0 ingress

TC rule drop the ICMP echo reply:
        tc filter add dev eth0 protocol ip parent ffff: \
        flower ip_proto icmp type 8 code 0 skip_sw action drop

TC rule to drop ICMPv6 echo reply:
        tc filter add dev eth0 protocol ipv6 parent ffff: flower \
        indev eth0 ip_proto icmpv6 type 128 code 0 action drop

Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
Signed-off-by: Geetha sowjanya <gakula@marvell.com>
---
 .../net/ethernet/marvell/octeontx2/af/mbox.h  |  2 ++
 .../net/ethernet/marvell/octeontx2/af/npc.h   |  2 ++
 .../marvell/octeontx2/af/rvu_debugfs.c        |  8 +++++++
 .../marvell/octeontx2/af/rvu_npc_fs.c         | 23 ++++++++++++++-----
 .../ethernet/marvell/octeontx2/nic/otx2_tc.c  | 14 +++++++++++
 5 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
index 6b5b06c2b4e9..78088dd4e2f9 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h
@@ -1473,6 +1473,8 @@ struct flow_msg {
 		u8 next_header;
 	};
 	__be16 vlan_itci;
+	u8 icmp_type;
+	u8 icmp_code;
 };
 
 struct npc_install_flow_req {
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc.h b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
index de9fbd98dfb7..2f1ed5411d75 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/npc.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/npc.h
@@ -206,6 +206,8 @@ enum key_fields {
 	NPC_SPORT_SCTP,
 	NPC_DPORT_SCTP,
 	NPC_IPSEC_SPI,
+	NPC_TYPE_ICMP,
+	NPC_CODE_ICMP,
 	NPC_HEADER_FIELDS_MAX,
 	NPC_CHAN = NPC_HEADER_FIELDS_MAX, /* Valid when Rx */
 	NPC_PF_FUNC, /* Valid when Tx */
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index d30e84803481..2b32b9d6c625 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@ -2836,6 +2836,14 @@ static void rvu_dbg_npc_mcam_show_flows(struct seq_file *s,
 			seq_printf(s, "0x%x ", ntohl(rule->packet.spi));
 			seq_printf(s, "mask 0x%x\n", ntohl(rule->mask.spi));
 			break;
+		case NPC_TYPE_ICMP:
+			seq_printf(s, "%d ", rule->packet.icmp_type);
+			seq_printf(s, "mask 0x%x\n", rule->mask.icmp_type);
+			break;
+		case NPC_CODE_ICMP:
+			seq_printf(s, "%d ", rule->packet.icmp_code);
+			seq_printf(s, "mask 0x%x\n", rule->mask.icmp_code);
+			break;
 		default:
 			seq_puts(s, "\n");
 			break;
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
index 237f82082ebe..ad204e21867b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_fs.c
@@ -43,6 +43,8 @@ static const char * const npc_flow_names[] = {
 	[NPC_DPORT_SCTP] = "sctp destination port",
 	[NPC_LXMB]	= "Mcast/Bcast header ",
 	[NPC_IPSEC_SPI] = "SPI ",
+	[NPC_TYPE_ICMP] = "icmp type",
+	[NPC_CODE_ICMP] = "icmp code",
 	[NPC_UNKNOWN]	= "unknown",
 };
 
@@ -518,6 +520,8 @@ do {									       \
 	NPC_SCAN_HDR(NPC_DPORT_TCP, NPC_LID_LD, NPC_LT_LD_TCP, 2, 2);
 	NPC_SCAN_HDR(NPC_SPORT_SCTP, NPC_LID_LD, NPC_LT_LD_SCTP, 0, 2);
 	NPC_SCAN_HDR(NPC_DPORT_SCTP, NPC_LID_LD, NPC_LT_LD_SCTP, 2, 2);
+	NPC_SCAN_HDR(NPC_TYPE_ICMP, NPC_LID_LD, NPC_LT_LD_ICMP, 0, 1);
+	NPC_SCAN_HDR(NPC_CODE_ICMP, NPC_LID_LD, NPC_LT_LD_ICMP, 1, 1);
 	NPC_SCAN_HDR(NPC_ETYPE_ETHER, NPC_LID_LA, NPC_LT_LA_ETHER, 12, 2);
 	NPC_SCAN_HDR(NPC_ETYPE_TAG1, NPC_LID_LB, NPC_LT_LB_CTAG, 4, 2);
 	NPC_SCAN_HDR(NPC_ETYPE_TAG2, NPC_LID_LB, NPC_LT_LB_STAG_QINQ, 8, 2);
@@ -539,7 +543,7 @@ static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf)
 {
 	struct npc_mcam *mcam = &rvu->hw->mcam;
 	u64 *features = &mcam->rx_features;
-	u64 tcp_udp_sctp;
+	u64 proto_flags;
 	int hdr;
 
 	if (is_npc_intf_tx(intf))
@@ -550,18 +554,21 @@ static void npc_set_features(struct rvu *rvu, int blkaddr, u8 intf)
 			*features |= BIT_ULL(hdr);
 	}
 
-	tcp_udp_sctp = BIT_ULL(NPC_SPORT_TCP) | BIT_ULL(NPC_SPORT_UDP) |
+	proto_flags = BIT_ULL(NPC_SPORT_TCP) | BIT_ULL(NPC_SPORT_UDP) |
 		       BIT_ULL(NPC_DPORT_TCP) | BIT_ULL(NPC_DPORT_UDP) |
-		       BIT_ULL(NPC_SPORT_SCTP) | BIT_ULL(NPC_DPORT_SCTP);
+		       BIT_ULL(NPC_SPORT_SCTP) | BIT_ULL(NPC_DPORT_SCTP) |
+		       BIT_ULL(NPC_SPORT_SCTP) | BIT_ULL(NPC_DPORT_SCTP) |
+		       BIT_ULL(NPC_TYPE_ICMP) | BIT_ULL(NPC_CODE_ICMP);
 
 	/* for tcp/udp/sctp corresponding layer type should be in the key */
-	if (*features & tcp_udp_sctp) {
+	if (*features & proto_flags) {
 		if (!npc_check_field(rvu, blkaddr, NPC_LD, intf))
-			*features &= ~tcp_udp_sctp;
+			*features &= ~proto_flags;
 		else
 			*features |= BIT_ULL(NPC_IPPROTO_TCP) |
 				     BIT_ULL(NPC_IPPROTO_UDP) |
-				     BIT_ULL(NPC_IPPROTO_SCTP);
+				     BIT_ULL(NPC_IPPROTO_SCTP) |
+				     BIT_ULL(NPC_IPPROTO_ICMP);
 	}
 
 	/* for AH/ICMP/ICMPv6/, check if corresponding layer type is present in the key */
@@ -950,6 +957,10 @@ do {									      \
 		       ntohs(mask->sport), 0);
 	NPC_WRITE_FLOW(NPC_DPORT_SCTP, dport, ntohs(pkt->dport), 0,
 		       ntohs(mask->dport), 0);
+	NPC_WRITE_FLOW(NPC_TYPE_ICMP, icmp_type, pkt->icmp_type, 0,
+		       mask->icmp_type, 0);
+	NPC_WRITE_FLOW(NPC_CODE_ICMP, icmp_code, pkt->icmp_code, 0,
+		       mask->icmp_code, 0);
 
 	NPC_WRITE_FLOW(NPC_IPSEC_SPI, spi, ntohl(pkt->spi), 0,
 		       ntohl(mask->spi), 0);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
index fab9d85bfb37..bede05dfad7b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c
@@ -519,6 +519,7 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
 	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
 	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
 	      BIT(FLOW_DISSECTOR_KEY_IPSEC) |
+	      BIT_ULL(FLOW_DISSECTOR_KEY_ICMP) |
 	      BIT_ULL(FLOW_DISSECTOR_KEY_IP))))  {
 		netdev_info(nic->netdev, "unsupported flow used key 0x%llx",
 			    dissector->used_keys);
@@ -738,6 +739,19 @@ static int otx2_tc_prepare_flow(struct otx2_nic *nic, struct otx2_tc_flow *node,
 		}
 	}
 
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
+		struct flow_match_icmp match;
+
+		flow_rule_match_icmp(rule, &match);
+
+		flow_spec->icmp_type = match.key->type;
+		flow_mask->icmp_type = match.mask->type;
+		req->features |= BIT_ULL(NPC_TYPE_ICMP);
+
+		flow_spec->icmp_code = match.key->code;
+		flow_mask->icmp_code = match.mask->code;
+		req->features |= BIT_ULL(NPC_CODE_ICMP);
+	}
 	return otx2_tc_parse_actions(nic, &rule->action, req, f, node);
 }
 
-- 
2.25.1


^ permalink raw reply related

* [PATCH net] tcp: fix fastopen code vs usec TS
From: Eric Dumazet @ 2023-10-31  6:19 UTC (permalink / raw)
  To: David S . Miller, Jakub Kicinski, Paolo Abeni
  Cc: netdev, eric.dumazet, Eric Dumazet, kernel test robot,
	Neal Cardwell, David Morley

After blamed commit, TFO client-ack-dropped-then-recovery-ms-timestamps
packetdrill test failed.

David Morley and Neal Cardwell started investigating and Neal pointed
that we had :

tcp_conn_request()
  tcp_try_fastopen()
   -> tcp_fastopen_create_child
     -> child = inet_csk(sk)->icsk_af_ops->syn_recv_sock()
       -> tcp_create_openreq_child()
          -> copy req_usec_ts from req:
          newtp->tcp_usec_ts = treq->req_usec_ts;
          // now the new TFO server socket always does usec TS, no matter
          // what the route options are...
  send_synack()
    -> tcp_make_synack()
        // disable tcp_rsk(req)->req_usec_ts if route option is not present:
        if (tcp_rsk(req)->req_usec_ts < 0)
                tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst);

tcp_conn_request() has the initial dst, we can initialize
tcp_rsk(req)->req_usec_ts there instead of later in send_synack();

This means tcp_rsk(req)->req_usec_ts can be a boolean.

Many thanks to David an Neal for their help.

Fixes: 614e8316aa4c ("tcp: add support for usec resolution in TCP TS values")
Reported-by: kernel test robot <oliver.sang@intel.com>
Closes: https://lore.kernel.org/oe-lkp/202310302216.f79d78bc-oliver.sang@intel.com
Suggested-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: David Morley <morleyd@google.com>
---
 include/linux/tcp.h   | 2 +-
 net/ipv4/syncookies.c | 2 +-
 net/ipv4/tcp_input.c  | 7 ++++---
 net/ipv4/tcp_output.c | 2 --
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index ec4e9367f5b03be610f5f88621855f3a512604eb..68f3d315d2e18d93a356b0738e4ed855fac94591 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -152,7 +152,7 @@ struct tcp_request_sock {
 	u64				snt_synack; /* first SYNACK sent time */
 	bool				tfo_listener;
 	bool				is_mptcp;
-	s8				req_usec_ts;
+	bool				req_usec_ts;
 #if IS_ENABLED(CONFIG_MPTCP)
 	bool				drop_req;
 #endif
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 98b25e5d147bac5262982681b0bc5b38434a473a..d37282c06e3da05fd36c48e6b4236d74ac2b7fe2 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -306,7 +306,7 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops,
 	treq->af_specific = af_ops;
 
 	treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield;
-	treq->req_usec_ts = -1;
+	treq->req_usec_ts = false;
 
 #if IS_ENABLED(CONFIG_MPTCP)
 	treq->is_mptcp = sk_is_mptcp(sk);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 50aaa1527150bd8adabce125775aab8b97018d53..bcb55d98004c5213f0095613124d5193b15b2793 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -7115,7 +7115,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	req->syncookie = want_cookie;
 	tcp_rsk(req)->af_specific = af_ops;
 	tcp_rsk(req)->ts_off = 0;
-	tcp_rsk(req)->req_usec_ts = -1;
+	tcp_rsk(req)->req_usec_ts = false;
 #if IS_ENABLED(CONFIG_MPTCP)
 	tcp_rsk(req)->is_mptcp = 0;
 #endif
@@ -7143,9 +7143,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 	if (!dst)
 		goto drop_and_free;
 
-	if (tmp_opt.tstamp_ok)
+	if (tmp_opt.tstamp_ok) {
+		tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst);
 		tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
-
+	}
 	if (!want_cookie && !isn) {
 		int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f558c054cf6e7538ecc3d711637af0bd44872318..0d8dd5b7e2e5e078d3e4bcc0d4270215f1be366d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3693,8 +3693,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 	mss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
 
 	memset(&opts, 0, sizeof(opts));
-	if (tcp_rsk(req)->req_usec_ts < 0)
-		tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst);
 	now = tcp_clock_ns();
 #ifdef CONFIG_SYN_COOKIES
 	if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok))
-- 
2.42.0.820.g83a721a137-goog


^ permalink raw reply related

* Re: [PATCH v2 01/12] dt-bindings: net: snps,dwmac: Allow exclusive usage of ahb reset
From: Krzysztof Kozlowski @ 2023-10-31  5:48 UTC (permalink / raw)
  To: Cristian Ciocaltea, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Rob Herring, Krzysztof Kozlowski, Conor Dooley,
	Emil Renner Berthing, Samin Guo, Paul Walmsley, Palmer Dabbelt,
	Albert Ou, Alexandre Torgue, Jose Abreu, Maxime Coquelin,
	Richard Cochran, Giuseppe Cavallaro
  Cc: netdev, devicetree, linux-kernel, linux-riscv, linux-stm32,
	linux-arm-kernel, kernel
In-Reply-To: <d532514a-524c-4607-b97b-2f89bc563406@collabora.com>

On 30/10/2023 20:07, Cristian Ciocaltea wrote:
> On 10/30/23 09:26, Krzysztof Kozlowski wrote:
>> On 29/10/2023 23:24, Cristian Ciocaltea wrote:
>>> On 10/29/23 13:25, Krzysztof Kozlowski wrote:
>>>> On 29/10/2023 05:27, Cristian Ciocaltea wrote:
>>>>> The Synopsys DesignWare MAC found on the StarFive JH7100 SoC requires
>>>>> just the 'ahb' reset name, but the binding allows selecting it only in
>>>>> conjunction with 'stmmaceth'.
>>>>>
>>>>> Fix the issue by permitting exclusive usage of the 'ahb' reset name.
>>>>>
>>>>> Signed-off-by: Cristian Ciocaltea <cristian.ciocaltea@collabora.com>
>>>>> ---
>>>>>  Documentation/devicetree/bindings/net/snps,dwmac.yaml | 2 +-
>>>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
>>>>> index 5c2769dc689a..a4d7172ea701 100644
>>>>> --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
>>>>> +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
>>>>> @@ -146,7 +146,7 @@ properties:
>>>>>    reset-names:
>>>>>      minItems: 1
>>>>>      items:
>>>>> -      - const: stmmaceth
>>>>> +      - enum: [stmmaceth, ahb]
>>>>
>>>> Also, this makes sense only with patch #4, so this should be squashed there.
>>>
>>> I added this as a separate patch since it changes the generic schema
>>> which is included by many other bindings.  JH7100 just happens to be the
>>> first use-case requiring this update.  But I can squash the patch if
>>> that's not a good enough reason to keep it separately.
>>
>> If there is no single user of this, why changing this? I would even
>> argue that it is not correct from existing bindings point of view -
>> nothing allows and uses ahb as the only reset. Even the commit msg
>> mentions your hardware from patch 4.
> 
> Sorry, I'm not sure I follow. JH7100 is (or will be) the user of it and,
> as a matter of fact, something similar has been done recently while
> adding support for JH7110.

Every patch should stand on its own and at this point nothing uses it.
We apply this patch #1 and we add dead code, unused case.

> 
> In particular, commit [1] changed this binding before the JH7110
> compatible was introduced in a subsequent patch. On a closer look that
> commit made a statement which is not entirely correct:
> 
> "dwmac controller may require one (stmmaceth) or two (stmmaceth+ahb)
> reset signals"
> 
> That's because stmmaceth is also optional in dwmac's driver, hence the
> correct message would have been:
> 
> "[...] may require one (stmmaceth OR ahb) [...]"

Driver does not describe the hardware. The bindings do, so according to
that description all supported hardware required MAC reset (stmmaceth).
Otherwise please point me to any hardware which skips MAC reset and
requires AHB reset instead (not future hardware, but current).

> 
> Hence, I think it makes sense to keep this patch, after adding the above
> details in the commit message.
> 
> [1] 843f603762a5 ("dt-bindings: net: snps,dwmac: Add 'ahb'
> reset/reset-name")
> 
> Thanks,
> Cristian

Best regards,
Krzysztof


^ permalink raw reply

* Re: linux-next: build failure after merge of the crypto tree
From: Eric Biggers @ 2023-10-31  4:51 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Herbert Xu, Stephen Rothwell, David Miller, Paolo Abeni,
	Networking, Linux Crypto List, Dmitry Safonov, Dmitry Safonov,
	Francesco Ruggeri, Salam Noureddine, Linux Kernel Mailing List,
	Linux Next Mailing List
In-Reply-To: <20231030150243.0e66ba73@kernel.org>

On Mon, Oct 30, 2023 at 03:02:43PM -0700, Jakub Kicinski wrote:
> On Mon, 30 Oct 2023 13:23:53 +0800 Herbert Xu wrote:
> > If we simply apply this patch to the netdev tree then everything
> > should work at the next merge window.  But perhaps you could change
> > the patch description to say something like remove the obsolete
> > crypto_hash_alignmask.  It's not important though.
> 
> I'm happy to massage the commit message and apply the fix to net.
> But is it actually 100% correct to do that? IOW is calling
> crypto_ahash_alignmask() already not necessary in net-next or does
> it only become unnecessary after some prep work in crypto-next?
> 
> We can tell Linus to squash this fix into the merge of either
> crypto-next or net-next, I'm pretty sure he'd be okay with that..

It's safe to fold the patch into net-next.  It actually looks like a bug to be
using the alignmask in the way that net/ipv4/tcp_ao.c is using it.  You don't
want to be erroring out just because the algorithm declared an alignmask.

- Eric

^ permalink raw reply

* Re: [PATCHv2 net] selftests: pmtu.sh: fix result checking
From: Po-Hsu Lin @ 2023-10-31  4:41 UTC (permalink / raw)
  To: Hangbin Liu
  Cc: netdev, David S. Miller, Jakub Kicinski, Eric Dumazet,
	Paolo Abeni, Shuah Khan, David Ahern, linux-kselftest
In-Reply-To: <20231031034732.3531008-1-liuhangbin@gmail.com>

On Tue, Oct 31, 2023 at 11:47 AM Hangbin Liu <liuhangbin@gmail.com> wrote:
>
> In the PMTU test, when all previous tests are skipped and the new test
> passes, the exit code is set to 0. However, the current check mistakenly
> treats this as an assignment, causing the check to pass every time.
>
> Consequently, regardless of how many tests have failed, if the latest test
> passes, the PMTU test will report a pass.
>
> Fixes: 2a9d3716b810 ("selftests: pmtu.sh: improve the test result processing")
> Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
> ---
> v2: use "-eq" instead of "=" to make less error-prone
> ---
>  tools/testing/selftests/net/pmtu.sh | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
> index f838dd370f6a..b3b2dc5a630c 100755
> --- a/tools/testing/selftests/net/pmtu.sh
> +++ b/tools/testing/selftests/net/pmtu.sh
> @@ -2048,7 +2048,7 @@ run_test() {
>         case $ret in
>                 0)
>                         all_skipped=false
> -                       [ $exitcode=$ksft_skip ] && exitcode=0
> +                       [ $exitcode -eq $ksft_skip ] && exitcode=0
>                 ;;
>                 $ksft_skip)
>                         [ $all_skipped = true ] && exitcode=$ksft_skip
> --
> 2.41.0
>
Acked-by: Po-Hsu Lin <po-hsu.lin@canonical.com>

Looking good to me, thanks!

^ permalink raw reply

* Re: [PATCH 4/8] clk: qcom: ipq5332: add gpll0_out_aux clock
From: Kathiravan Thirumoorthy @ 2023-10-31  4:24 UTC (permalink / raw)
  To: Stephen Boyd, Andy Gross, Bjorn Andersson, Catalin Marinas,
	Conor Dooley, Konrad Dybcio, Krzysztof Kozlowski,
	Michael Turquette, Richard Cochran, Rob Herring, Will Deacon
  Cc: linux-arm-msm, linux-clk, linux-kernel, devicetree, netdev,
	linux-arm-kernel
In-Reply-To: <ac223f97efea0d5077a4e3e4dbd805b4.sboyd@kernel.org>



On 10/31/2023 12:27 AM, Stephen Boyd wrote:
> Quoting Kathiravan Thirumoorthy (2023-10-30 02:47:19)
>> Add support for gpll0_out_aux clock which acts as the parent for
>> certain networking subsystem (NSS) clocks.
>>
>> Signed-off-by: Kathiravan Thirumoorthy <quic_kathirav@quicinc.com>
>> ---
>>   drivers/clk/qcom/gcc-ipq5332.c | 14 ++++++++++++++
>>   1 file changed, 14 insertions(+)
>>
>> diff --git a/drivers/clk/qcom/gcc-ipq5332.c b/drivers/clk/qcom/gcc-ipq5332.c
>> index 235849876a9a..966bb7ca8854 100644
>> --- a/drivers/clk/qcom/gcc-ipq5332.c
>> +++ b/drivers/clk/qcom/gcc-ipq5332.c
>> @@ -87,6 +87,19 @@ static struct clk_alpha_pll_postdiv gpll0 = {
>>          },
>>   };
>>   
>> +static struct clk_alpha_pll_postdiv gpll0_out_aux = {
>> +       .offset = 0x20000,
>> +       .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_STROMER_PLUS],
>> +       .width = 4,
>> +       .clkr.hw.init = &(struct clk_init_data) {
> 
> const initdata


Thanks for pointing it out. Some of the clock structure doesn't have the 
"const" qualifier. Will fix all those in V2.

> 
>> +               .name = "gpll0_out_aux",
>> +               .parent_hws = (const struct clk_hw *[]) {
>> +                               &gpll0_main.clkr.hw },
>> +               .num_parents = 1,
>> +               .ops = &clk_alpha_pll_postdiv_ro_ops,
>> +       },
>> +};
>> +
>>   static struct clk_alpha_pll gpll2_main = {
>>          .offset = 0x21000,
>>          .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_STROMER_PLUS],

^ permalink raw reply

* Re: [PATCH net-next v2 6/9] dt-bindings: net: oa-tc6: add PHY register access capability
From: Parthiban.Veerasooran @ 2023-10-31  4:22 UTC (permalink / raw)
  To: andrew
  Cc: davem, edumazet, kuba, pabeni, robh+dt, krzysztof.kozlowski+dt,
	conor+dt, corbet, Steen.Hegelund, rdunlap, horms, casper.casan,
	netdev, devicetree, linux-kernel, linux-doc, Horatiu.Vultur,
	Woojung.Huh, Nicolas.Ferre, UNGLinuxDriver, Thorsten.Kummermehr
In-Reply-To: <3d4b86a5-6a92-4456-a270-9091bdf8157e@lunn.ch>

Hi Andrew,

On 24/10/23 6:51 am, Andrew Lunn wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
> On Mon, Oct 23, 2023 at 09:16:46PM +0530, Parthiban Veerasooran wrote:
>> Direct PHY Register Access Capability indicates if PHY registers are
>> directly accessible within the SPI register memory space. Indirect PHY
>> Register Access Capability indicates if PHY registers are indirectly
>> accessible through the MDIO/MDC registers MDIOACCn.
>>
>> Signed-off-by: Parthiban Veerasooran <Parthiban.Veerasooran@microchip.com>
> 
> It is more normal to put all the bindings into one patch.
> 
> Again, this seems like configuration, not a description of the
> hardware. Its also not clear to my why you would want to configure it.
Yes, will remove this option from DT binding and will read the 
capability register (STDCAP) for the support.

Best Regards,
Parthiban V
> 
>          Andrew


^ permalink raw reply

* Re: [PATCH net-next v2 5/9] net: ethernet: oa_tc6: implement internal PHY initialization
From: Parthiban.Veerasooran @ 2023-10-31  4:20 UTC (permalink / raw)
  To: andrew
  Cc: davem, edumazet, kuba, pabeni, robh+dt, krzysztof.kozlowski+dt,
	conor+dt, corbet, Steen.Hegelund, rdunlap, horms, casper.casan,
	netdev, devicetree, linux-kernel, linux-doc, Horatiu.Vultur,
	Woojung.Huh, Nicolas.Ferre, UNGLinuxDriver, Thorsten.Kummermehr
In-Reply-To: <5c240b3b-60c2-45bb-8861-e3a8de28d00f@lunn.ch>

Hi Andrew,

On 24/10/23 6:26 am, Andrew Lunn wrote:
> EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe
> 
>> +     /* Minimum supported Chunk Payload Size */
>>        mincps = FIELD_GET(MINCPS, regval);
>> +     /* Cut-Through Capability */
>>        ctc = (regval & CTC) ? true : false;
> 
> These comment should be in the patch which added these, not here.
Ah yes. Will correct it.
> 
>> +     /* Direct PHY Register Access Capability */
>> +     dprac = (regval & DPRAC) ? true : false;
>> +     /* Indirect PHY Register access Capability */
>> +     iprac = (regval & IPRAC) ? true : false;
>>
>>        regval = 0;
>>        oa_node = of_get_child_by_name(spi->dev.of_node, "oa-tc6");
>> @@ -242,7 +257,7 @@ static int oa_tc6_configure(struct oa_tc6 *tc6)
>>                        if (tc6->cps < mincps)
>>                                return -ENODEV;
>>                } else {
>> -                     tc6->cps = 64;
>> +                     tc6->cps = OA_TC6_MAX_CPS;
> 
> This also should of been in an earlier patch.
Yes, will correct it.
> 
>>                }
>>                if (of_property_present(oa_node, "oa-txcte")) {
>>                        /* Return error if the tx cut through mode is configured
>> @@ -266,8 +281,26 @@ static int oa_tc6_configure(struct oa_tc6 *tc6)
>>                        regval |= PROTE;
>>                        tc6->prote = true;
>>                }
>> +             if (of_property_present(oa_node, "oa-dprac")) {
>> +                     /* Return error if the direct phy register access mode
>> +                      * is configured but it is not supported by MAC-PHY.
>> +                      */
>> +                     if (dprac)
>> +                             tc6->dprac = true;
>> +                     else
>> +                             return -ENODEV;
>> +             }
> 
> This is not in the binding. Why do we even need to be able to
> configure it. Direct is faster, so use it is available. If not, use
> indirect. And if both dprac and iproc are false, dev_err() and
> -ENODEV.
Ok, I will remove this option even in the next patch and will go with 
the option read from the capability register (STDCAP).
> 
>> +static int oa_tc6_mdiobus_read(struct mii_bus *bus, int phy_id, int idx)
> 
> It would be good to put direct in the name. If somebody implements
> indirect, it will make the naming easier.
Yes sure.
> 
>> +{
>> +     struct oa_tc6 *tc6 = bus->priv;
>> +     u32 regval;
>> +     bool ret;
>> +
>> +     ret = oa_tc6_read_register(tc6, 0xFF00 | (idx & 0xFF), &regval);
>> +     if (ret)
>> +             return -ENODEV;
>> +
>> +     return regval;
>> +}
>> +
>> +static int oa_tc6_mdiobus_write(struct mii_bus *bus, int phy_id, int idx,
>> +                             u16 val)
>> +{
>> +     struct oa_tc6 *tc6 = bus->priv;
>> +
>> +     return oa_tc6_write_register(tc6, 0xFF00 | (idx & 0xFF), val);
>> +}
>> +
>> +static int oa_tc6_phy_init(struct oa_tc6 *tc6)
>> +{
>> +     int ret;
>> +
>> +     if (tc6->dprac) {
> 
> You can avoid the indentation by first checking indirect is the only
> choice, and doing a dev_err() followed by return -ENODEV.
Ah ok, will do it.
> 
>> +             tc6->mdiobus = mdiobus_alloc();
>> +             if (!tc6->mdiobus) {
>> +                     netdev_err(tc6->netdev, "MDIO bus alloc failed\n");
>> +                     return -ENODEV;
>> +             }
>> +
>> +             tc6->mdiobus->phy_mask = ~(u32)BIT(1);
> 
> Does the standard define this ? BIT(1), not BIT(0)?
Ok, I think here is a typo. Will correct it.
> 
>>   /**
>>    * oa_tc6_init - allocates and intializes oa_tc6 structure.
>>    * @spi: device with which data will be exchanged.
>> - * @prote: control data (register) read/write protection enable/disable.
> 
> Something else which should of been in the previous patch. Please look
> through this patch and find all the other instances.
Yes sure. Will correct it.
> 
>> + * @netdev: network device to use.
>>    *
>>    * Returns pointer reference to the oa_tc6 structure if all the memory
>>    * allocation success otherwise NULL.
>>    */
>> -struct oa_tc6 *oa_tc6_init(struct spi_device *spi)
>> +struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev)
>>   {
>>        struct oa_tc6 *tc6;
>>
>> @@ -395,15 +521,19 @@ struct oa_tc6 *oa_tc6_init(struct spi_device *spi)
>>        if (!tc6)
>>                return NULL;
>>
>> +     /* Allocate memory for the control tx buffer used for SPI transfer. */
>>        tc6->ctrl_tx_buf = devm_kzalloc(&spi->dev, TC6_CTRL_BUF_SIZE, GFP_KERNEL);
>>        if (!tc6->ctrl_tx_buf)
>>                return NULL;
>>
>> +     /* Allocate memory for the control rx buffer used for SPI transfer. */
>>        tc6->ctrl_rx_buf = devm_kzalloc(&spi->dev, TC6_CTRL_BUF_SIZE, GFP_KERNEL);
>>        if (!tc6->ctrl_rx_buf)
>>                return NULL;
>>
>>        tc6->spi = spi;
>> +     tc6->netdev = netdev;
>> +     SET_NETDEV_DEV(netdev, &spi->dev);
>>
>>        /* Perform MAC-PHY software reset */
>>        if (oa_tc6_sw_reset(tc6)) {
>> @@ -417,10 +547,27 @@ struct oa_tc6 *oa_tc6_init(struct spi_device *spi)
>>                return NULL;
>>        }
>>
>> +     /* Initialize PHY */
>> +     if (oa_tc6_phy_init(tc6)) {
>> +             dev_err(&spi->dev, "PHY initialization failed\n");
>> +             return NULL;
>> +     }
>> +
>>        return tc6;
>>   }
>>   EXPORT_SYMBOL_GPL(oa_tc6_init);
>>
>> +/**
>> + * oa_tc6_exit - exit function.
>> + * @tc6: oa_tc6 struct.
>> + *
>> + */
>> +void oa_tc6_exit(struct oa_tc6 *tc6)
>> +{
>> +     oa_tc6_phy_exit(tc6);
>> +}
>> +EXPORT_SYMBOL_GPL(oa_tc6_exit);
>> +
>>   MODULE_DESCRIPTION("OPEN Alliance 10BASE‑T1x MAC‑PHY Serial Interface Lib");
>>   MODULE_AUTHOR("Parthiban Veerasooran <parthiban.veerasooran@microchip.com>");
>>   MODULE_LICENSE("GPL");
>> diff --git a/include/linux/oa_tc6.h b/include/linux/oa_tc6.h
>> index 378636fd9ca8..36b729c384ac 100644
>> --- a/include/linux/oa_tc6.h
>> +++ b/include/linux/oa_tc6.h
>> @@ -5,54 +5,59 @@
>>    * Author: Parthiban Veerasooran <parthiban.veerasooran@microchip.com>
>>    */
>>
>> +#include <linux/etherdevice.h>
>>   #include <linux/spi/spi.h>
>>
>>   /* Control header */
>> -#define CTRL_HDR_DNC         BIT(31)         /* Data-Not-Control */
>> -#define CTRL_HDR_HDRB                BIT(30)         /* Received Header Bad */
>> -#define CTRL_HDR_WNR         BIT(29)         /* Write-Not-Read */
>> -#define CTRL_HDR_AID         BIT(28)         /* Address Increment Disable */
>> -#define CTRL_HDR_MMS         GENMASK(27, 24) /* Memory Map Selector */
>> -#define CTRL_HDR_ADDR                GENMASK(23, 8)  /* Address */
>> -#define CTRL_HDR_LEN         GENMASK(7, 1)   /* Length */
>> -#define CTRL_HDR_P           BIT(0)          /* Parity Bit */
>> +#define CTRL_HDR_DNC BIT(31)         /* Data-Not-Control */
>> +#define CTRL_HDR_HDRB        BIT(30)         /* Received Header Bad */
>> +#define CTRL_HDR_WNR BIT(29)         /* Write-Not-Read */
>> +#define CTRL_HDR_AID BIT(28)         /* Address Increment Disable */
>> +#define CTRL_HDR_MMS GENMASK(27, 24) /* Memory Map Selector */
>> +#define CTRL_HDR_ADDR        GENMASK(23, 8)  /* Address */
>> +#define CTRL_HDR_LEN GENMASK(7, 1)   /* Length */
>> +#define CTRL_HDR_P   BIT(0)          /* Parity Bit */
> 
> Please don't change the whitespace like this.
Ah yes, will correct it.

Best Regards,
Parthiban V
> 
>         Andrew


^ permalink raw reply

* Re: [PATCH 2/8] dt-bindings: clock: ipq5332: drop the few nss clocks definition
From: Kathiravan Thirumoorthy @ 2023-10-31  4:20 UTC (permalink / raw)
  To: Stephen Boyd, Andy Gross, Bjorn Andersson, Catalin Marinas,
	Conor Dooley, Konrad Dybcio, Krzysztof Kozlowski,
	Michael Turquette, Richard Cochran, Rob Herring, Will Deacon
  Cc: linux-arm-msm, linux-clk, linux-kernel, devicetree, netdev,
	linux-arm-kernel
In-Reply-To: <4ac6b9d1e6dc7859b39fa456cec70fc7.sboyd@kernel.org>



On 10/31/2023 12:26 AM, Stephen Boyd wrote:
> Quoting Kathiravan Thirumoorthy (2023-10-30 02:47:17)
>> gcc_snoc_nssnoc_clk, gcc_snoc_nssnoc_1_clk, gcc_nssnoc_nsscc_clk are
>> enabled by default and it's RCG is properly configured by bootloader.
>>
>> Some of the NSS clocks needs these clocks to be enabled. To avoid
>> these clocks being disabled by clock framework, drop these entries.
>>
>> Signed-off-by: Kathiravan Thirumoorthy <quic_kathirav@quicinc.com>
>> ---
> 
> Instead of this patch just drop the clks from the table and enable the
> clks during probe with register writes.


Thanks for the suggestion Stephen, will handle this way in V2. Between, 
I think still the entries in the dt-bindings can be dropped along with 
the entries in the clock table?

^ permalink raw reply

* Re: [PATCH 7/8] arm64: dts: qcom: ipq5332: add support for the NSSCC
From: Kathiravan Thirumoorthy @ 2023-10-31  4:18 UTC (permalink / raw)
  To: Krzysztof Kozlowski, Bjorn Andersson, Andy Gross, Konrad Dybcio,
	Michael Turquette, Stephen Boyd, Rob Herring, Krzysztof Kozlowski,
	Conor Dooley, Richard Cochran, Catalin Marinas, Will Deacon
  Cc: linux-arm-msm, linux-clk, linux-kernel, devicetree, netdev,
	linux-arm-kernel
In-Reply-To: <6cc57f82-cd65-42b3-99cd-79b5b784c386@linaro.org>



On 10/30/2023 4:43 PM, Krzysztof Kozlowski wrote:
> On 30/10/2023 10:47, Kathiravan Thirumoorthy wrote:
>> Describe the NSS clock controller node and it's relevant external
>> clocks.
>>
>> Signed-off-by: Kathiravan Thirumoorthy <quic_kathirav@quicinc.com>
>> ---
>>   arch/arm64/boot/dts/qcom/ipq5332.dtsi | 28 ++++++++++++++++++++++++++++
>>   1 file changed, 28 insertions(+)
>>
>> diff --git a/arch/arm64/boot/dts/qcom/ipq5332.dtsi b/arch/arm64/boot/dts/qcom/ipq5332.dtsi
>> index 42e2e48b2bc3..291f14a3f10a 100644
>> --- a/arch/arm64/boot/dts/qcom/ipq5332.dtsi
>> +++ b/arch/arm64/boot/dts/qcom/ipq5332.dtsi
>> @@ -15,6 +15,18 @@ / {
>>   	#size-cells = <2>;
>>   
>>   	clocks {
>> +		cmn_pll_nss_clk_200m: cmn-pll-nss-clk-200m {
> 
> Node names should be generic. See also an explanation and list of
> examples (not exhaustive) in DT specification:
> https://devicetree-specification.readthedocs.io/en/latest/chapter2-devicetree-basics.html#generic-names-recommendation
> 
> 
> so with "clk" suffix, e.g. cmn-pll-nss-1-clk.


Sure, will fix it in V2.


> 
>> +			compatible = "fixed-clock";
>> +			clock-frequency = <200000000>;
>> +			#clock-cells = <0>;
>> +		};
>> +
>> +		cmn_pll_nss_clk_300m: cmn-pll-nss-clk-300m {
>> +			compatible = "fixed-clock";
>> +			clock-frequency = <300000000>;
>> +			#clock-cells = <0>;
>> +		};
>> +
>>   		sleep_clk: sleep-clk {
>>   			compatible = "fixed-clock";
>>   			#clock-cells = <0>;
>> @@ -473,6 +485,22 @@ frame@b128000 {
>>   				status = "disabled";
>>   			};
>>   		};
> Best regards,
> Krzysztof
> 

^ permalink raw reply

* Re: [PATCH 5/8] dt-bindings: clock: add IPQ5332 NSSCC clock and reset definitions
From: Kathiravan Thirumoorthy @ 2023-10-31  4:17 UTC (permalink / raw)
  To: Krzysztof Kozlowski, Bjorn Andersson, Andy Gross, Konrad Dybcio,
	Michael Turquette, Stephen Boyd, Rob Herring, Krzysztof Kozlowski,
	Conor Dooley, Richard Cochran, Catalin Marinas, Will Deacon
  Cc: linux-arm-msm, linux-clk, linux-kernel, devicetree, netdev,
	linux-arm-kernel
In-Reply-To: <02e2bd74-2509-4cec-a85e-4acfc13eea84@linaro.org>



On 10/30/2023 4:41 PM, Krzysztof Kozlowski wrote:
> On 30/10/2023 10:47, Kathiravan Thirumoorthy wrote:
>> Add NSSCC clock and reset definitions for IPQ5332.
> 
> Qualcomm IPQ5332
> 
> This applies to all your patches in all your patchsets in entire
> Qualcomm organisation. You add code to common, upstream Linux kernel
> where hundreds of companies also contribute. Except me and few more
> folks, no one knows what is IPQ5332. Other 5000 developers do not know.
> Other millions of users do not know.


Thanks, Understand the concern. Will follow it in the upcoming patches / 
series.

> 
>>
>> Signed-off-by: Kathiravan Thirumoorthy <quic_kathirav@quicinc.com>
>> ---
> 
> Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>


Thanks!

> 
> Best regards,
> Krzysztof
> 

^ permalink raw reply

* Re: [PATCH 8/8] arm64: defconfig: build NSS Clock Controller driver for IPQ5332
From: Kathiravan Thirumoorthy @ 2023-10-31  4:16 UTC (permalink / raw)
  To: Krzysztof Kozlowski, Bjorn Andersson, Andy Gross, Konrad Dybcio,
	Michael Turquette, Stephen Boyd, Rob Herring, Krzysztof Kozlowski,
	Conor Dooley, Richard Cochran, Catalin Marinas, Will Deacon
  Cc: linux-arm-msm, linux-clk, linux-kernel, devicetree, netdev,
	linux-arm-kernel
In-Reply-To: <18ca181a-8aee-46f5-9e2d-bfba4c8bd99e@linaro.org>



On 10/30/2023 4:38 PM, Krzysztof Kozlowski wrote:
> On 30/10/2023 10:47, Kathiravan Thirumoorthy wrote:
>> Build Qualcomm IPQ9574 NSSCC driver as module.
> 
> Why? Commit msg should answer this.


Sure, will fix it in V2.

> 
> Best regards,
> Krzysztof
> 

^ permalink raw reply

* Re: [PATCH v4 1/5] r8169: Coalesce r8169_mac_ocp_write/modify calls to reduce spinlock stalls
From: Mirsad Todorovac @ 2023-10-31  3:51 UTC (permalink / raw)
  To: Jacob Keller, Heiner Kallweit, Jason Gunthorpe, Joerg Roedel,
	Lu Baolu, iommu, linux-kernel, netdev
  Cc: Joerg Roedel, Will Deacon, Robin Murphy, nic_swsd,
	David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Marco Elver
In-Reply-To: <e1c666d8-c7f0-440e-b362-3dbb7a67b242@intel.com>



On 10/31/23 00:14, Jacob Keller wrote:
> 
> 
> On 10/30/2023 3:08 PM, Heiner Kallweit wrote:
>> On 30.10.2023 22:50, Jacob Keller wrote:
>>>
>>>
>>> On 10/29/2023 4:04 AM, Mirsad Goran Todorovac wrote:> A pair of new
>>> helpers r8168_mac_ocp_write_seq() and r8168_mac_ocp_modify_seq()
>>>> are introduced.
>>>>
>>>> The motivation for these helpers was the locking overhead of 130 consecutive
>>>> r8168_mac_ocp_write() calls in the RTL8411b reset after the NIC gets confused
>>>> if the PHY is powered-down.
>>>>
>>>> To quote Heiner:
>>>>
>>>>      On RTL8411b the RX unit gets confused if the PHY is powered-down.
>>>>      This was reported in [0] and confirmed by Realtek. Realtek provided
>>>>      a sequence to fix the RX unit after PHY wakeup.
>>>>
>>>> A series of about 130 r8168_mac_ocp_write() calls is performed to program the
>>>> RTL registers for recovery, each doing an expensive spin_lock_irqsave() and
>>>> spin_unlock_irqrestore().
>>>>
>>>> Each mac ocp write is made of:
>>>>
>>>>      static void __r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg,
>>>>                        u32 data)
>>>>      {
>>>>          if (rtl_ocp_reg_failure(reg))
>>>>              return;
>>>>
>>>>          RTL_W32(tp, OCPDR, OCPAR_FLAG | (reg << 15) | data);
>>>>      }
>>>>
>>>>      static void r8168_mac_ocp_write(struct rtl8169_private *tp, u32 reg,
>>>>                      u32 data)
>>>>      {
>>>>          unsigned long flags;
>>>>
>>>>          raw_spin_lock_irqsave(&tp->mac_ocp_lock, flags);
>>>>          __r8168_mac_ocp_write(tp, reg, data);
>>>>          raw_spin_unlock_irqrestore(&tp->mac_ocp_lock, flags);
>>>>      }
>>>>
>>>> Register programming is done through RTL_W32() macro which expands into
>>>>
>>>>      #define RTL_W32(tp, reg, val32) writel((val32), tp->mmio_addr + (reg))
>>>>
>>>> which is further (on Alpha):
>>>>
>>>>      extern inline void writel(u32 b, volatile void __iomem *addr)
>>>>      {
>>>>          mb();
>>>>          __raw_writel(b, addr);
>>>>      }
>>>>
>>>> or on i386/x86_64:
>>>>
>>>>      #define build_mmio_write(name, size, type, reg, barrier) \
>>>>      static inline void name(type val, volatile void __iomem *addr) \
>>>>      { asm volatile("mov" size " %0,%1": :reg (val), \
>>>>      "m" (*(volatile type __force *)addr) barrier); }
>>>>
>>>>      build_mmio_write(writel, "l", unsigned int, "r", :"memory")
>>>>
>>>> This obviously involves iat least a compiler barrier.
>>>>
>>>> mb() expands into something like this i.e. on x86_64:
>>>>
>>>>      #define mb()    asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
>>>>
>>>> This means a whole lot of memory bus stalls: for spin_lock_irqsave(),
>>>> memory barrier, writel(), and spin_unlock_irqrestore().
>>>>
>>>> With about 130 of these sequential calls to r8168_mac_ocp_write() this looks like
>>>> a lock storm that will stall all of the cores and CPUs on the same memory controller
>>>> for certain time I/O takes to finish.
>>>>
>>>> In a sequential case of RTL register programming, the writes to RTL registers
>>>> can be coalesced under a same raw spinlock. This can dramatically decrease the
>>>> number of bus stalls in a multicore or multi-CPU system.
>>>>
>>>> Macro helpers r8168_mac_ocp_write_seq() and r8168_mac_ocp_modify_seq() are
>>>> provided to reduce lock contention:
>>>>
>>>>      static void rtl_hw_start_8411_2(struct rtl8169_private *tp)
>>>>      {
>>>>
>>>>          ...
>>>>
>>>>          /* The following Realtek-provided magic fixes an issue with the RX unit
>>>>           * getting confused after the PHY having been powered-down.
>>>>           */
>>>>
>>>>          static const struct recover_8411b_info init_zero_seq[] = {
>>>>              { 0xFC28, 0x0000 }, { 0xFC2A, 0x0000 }, { 0xFC2C, 0x0000 },
>>>>              ...
>>>>          };
>>>>
>>>>          ...
>>>>
>>>>          r8168_mac_ocp_write_seq(tp, init_zero_seq);
>>>>
>>>>          ...
>>>>
>>>>      }
>>>>
>>>> The hex data is preserved intact through s/r8168_mac_ocp_write[(]tp,/{ / and s/[)];/ },/
>>>> functions that only changed the function names and the ending of the line, so the actual
>>>> hex data is unchanged.
>>>>
>>>> To repeat, the reason for the introduction of the original commit
>>>> was to enable recovery of the RX unit on the RTL8411b which was confused by the
>>>> powered-down PHY. This sequence of r8168_mac_ocp_write() calls amplifies the problem
>>>> into a series of about 500+ memory bus locks, most waiting for the main memory read,
>>>> modify and write under a LOCK. The memory barrier in RTL_W32 should suffice for
>>>> the programming sequence to reach RTL NIC registers.
>>>>
>>>> [0] https://bugzilla.redhat.com/show_bug.cgi?id=1692075
>>>>
>>>
>>>
>>> I might have chosen to send some of this information as the cover letter
>>> for the series instead of just as part of the commit message for [1/5],
>>> but either way:
>>>
>>> Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
>>
>> Cover letter is still missing, and there's a v5 already.
>> Good example why we have the "max one version per day" rule.
>>
>> There's still some issues with the series, see my review comments
>> for v5. As-is I'd NAK the series.

I realise we need to keep the development process coherent. I am sorry that
my inexperience in the patch submission process made the whole series look bad.

As I previously stated to Mr. Kallweit, I will do the required number of iterations
to ensure the quality of the patches (I saw some go up to over 20 versions).

> Heh, ya. A v5 was sent without there being a single (public) comment on
> the list prior to my reviewing. I didn't notice the v5, and my mail
> scripts pointed out this series didn't have anyone who'd looked at it
> yet.. I guess I could have searched for and noticed a newer version.

Well, dear Sir,

I see I owe you an apology for I did not know about the "max one version per day"
rule. I was warned however not to overwhelm the maintainers by Guillaume Nault in
January and somehow I hypomanicaly OCD'd on this. My fault entirely.

I hope we can mend this.

I guess this is my time to take a break, do some homework and return to the drawing
board.

Besides, now we are in the merge window anyway, so I should thank Mr. Kallweit for
the special attention and for making an exception.

Am I allowed to keep Mr. Keller's Reviewed-by: tags on the reviewed diffs provided
that I fix the cover letter issue and objections?

Have a nice day.

Regards,
Mirsad

^ permalink raw reply

* [PATCHv2 net] selftests: pmtu.sh: fix result checking
From: Hangbin Liu @ 2023-10-31  3:47 UTC (permalink / raw)
  To: netdev
  Cc: David S. Miller, Jakub Kicinski, Eric Dumazet, Paolo Abeni,
	Shuah Khan, David Ahern, linux-kselftest, Po-Hsu Lin, Hangbin Liu

In the PMTU test, when all previous tests are skipped and the new test
passes, the exit code is set to 0. However, the current check mistakenly
treats this as an assignment, causing the check to pass every time.

Consequently, regardless of how many tests have failed, if the latest test
passes, the PMTU test will report a pass.

Fixes: 2a9d3716b810 ("selftests: pmtu.sh: improve the test result processing")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
v2: use "-eq" instead of "=" to make less error-prone
---
 tools/testing/selftests/net/pmtu.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index f838dd370f6a..b3b2dc5a630c 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -2048,7 +2048,7 @@ run_test() {
 	case $ret in
 		0)
 			all_skipped=false
-			[ $exitcode=$ksft_skip ] && exitcode=0
+			[ $exitcode -eq $ksft_skip ] && exitcode=0
 		;;
 		$ksft_skip)
 			[ $all_skipped = true ] && exitcode=$ksft_skip
-- 
2.41.0


^ permalink raw reply related

* Re: [PATCH net v2] net: dsa: tag_rtl4_a: Bump min packet size
From: Florian Fainelli @ 2023-10-31  3:37 UTC (permalink / raw)
  To: Andrew Lunn, Vladimir Oltean
  Cc: Linus Walleij, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, netdev, linux-kernel
In-Reply-To: <494a8bb7-7ca1-40bd-b3a7-babeadfd88a0@lunn.ch>



On 10/30/2023 5:37 PM, Andrew Lunn wrote:
> On Tue, Oct 31, 2023 at 01:09:06AM +0200, Vladimir Oltean wrote:
>> On Mon, Oct 30, 2023 at 11:57:33PM +0100, Linus Walleij wrote:
>>> This of course make no sense, since the padding function should do nothing
>>> when the packet is bigger than 60 bytes.
>>
>> Indeed, this of course makes no sense. Ping doesn't work, or ARP doesn't
>> work? Could you add a static ARP entry for the 192.168.1.137 IP address?
> 
> Probably the ARP, since they are short packets and probably need the
> padding.

Does this also mean that there is a general problem with unicast 
packets, and that broadcast packets happen to work by accident? Could 
you check whether a broadcast ping (ping -b) size sweep is immune to 
what you are reporting?
-- 
Florian

^ permalink raw reply

* Re: [PATCH v3 1/1] r8169: Coalesce RTL8411b PHY power-down recovery programming instructions to reduce spinlock stalls
From: Mirsad Todorovac @ 2023-10-31  3:35 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Heiner Kallweit, netdev, linux-kernel, nic_swsd, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Marco Elver
In-Reply-To: <bd4a59be-c393-4302-9d32-759e7cbfe255@lunn.ch>

On 10/31/23 02:21, Andrew Lunn wrote:
>> I will not contradict, but the cummulative amount of memory barriers on each MMIO read/write
>> in each single one of the drivers could amount to some degrading of overall performance and
>> latency in a multicore system.
> 
> For optimisations, we like to see benchmark results which show some
> improvements. Do you have any numbers?

Hi, Andrew,

Thank you for your interest in RTL NIC driver optimisations.

My knowledge about the timing costs of synchronisation is mostly theoretical.

By the table below, the cost of LOCK CHPXCHG m,616/32/64 is 52 cycles (in case the
memory location is already in the L3 cache I suppose, cache miss can makes things worse,
but we are hitting the same lock repeatedly).

So, we have (in the best case) 52 clocks for lock, 52 for unlock (for the uncontentended
case).

This means 104 cycle x 130 sequential lock+unlock pairs = 13520 clk ~ 3.38 us on a 4 GHz
machine (5.633 us on a 2.4 GHz CPU) while the multicore system does nothing but locking
and unclocking after therecovery from the loss of PHY.

We are talking about the RTl8411b.

The other case it the rather new 8125/8125b.

     static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
     {
         rtl_pcie_state_l2l3_disable(tp);

         RTL_W16(tp, 0x382, 0x221b);
         RTL_W8(tp, 0x4500, 0);
         RTL_W16(tp, 0x4800, 0);

         /* disable UPS */
         r8168_mac_ocp_modify(tp, 0xd40a, 0x0010, 0x0000);

         RTL_W8(tp, Config1, RTL_R8(tp, Config1) & ~0x10);

         r8168_mac_ocp_write(tp, 0xc140, 0xffff);
         r8168_mac_ocp_write(tp, 0xc142, 0xffff);

         r8168_mac_ocp_modify(tp, 0xd3e2, 0x0fff, 0x03a9);
         r8168_mac_ocp_modify(tp, 0xd3e4, 0x00ff, 0x0000);
         r8168_mac_ocp_modify(tp, 0xe860, 0x0000, 0x0080);

         /* disable new tx descriptor format */
         r8168_mac_ocp_modify(tp, 0xeb58, 0x0001, 0x0000);

         if (tp->mac_version == RTL_GIGA_MAC_VER_63)
                 r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0200);
         else
                 r8168_mac_ocp_modify(tp, 0xe614, 0x0700, 0x0400);

         if (tp->mac_version == RTL_GIGA_MAC_VER_63)
                 r8168_mac_ocp_modify(tp, 0xe63e, 0x0c30, 0x0000);
         else
                 r8168_mac_ocp_modify(tp, 0xe63e, 0x0c30, 0x0020);

         r8168_mac_ocp_modify(tp, 0xc0b4, 0x0000, 0x000c);
         r8168_mac_ocp_modify(tp, 0xeb6a, 0x00ff, 0x0033);
         r8168_mac_ocp_modify(tp, 0xeb50, 0x03e0, 0x0040);
         r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0030);
         r8168_mac_ocp_modify(tp, 0xe040, 0x1000, 0x0000);
         r8168_mac_ocp_modify(tp, 0xea1c, 0x0003, 0x0001);
         r8168_mac_ocp_modify(tp, 0xe0c0, 0x4f0f, 0x4403);
         r8168_mac_ocp_modify(tp, 0xe052, 0x0080, 0x0068);
         r8168_mac_ocp_modify(tp, 0xd430, 0x0fff, 0x047f);

         r8168_mac_ocp_modify(tp, 0xea1c, 0x0004, 0x0000);
         r8168_mac_ocp_modify(tp, 0xeb54, 0x0000, 0x0001);
         udelay(1);
         r8168_mac_ocp_modify(tp, 0xeb54, 0x0001, 0x0000);
         RTL_W16(tp, 0x1880, RTL_R16(tp, 0x1880) & ~0x0030);

         r8168_mac_ocp_write(tp, 0xe098, 0xc302);

         rtl_loop_wait_low(tp, &rtl_mac_ocp_e00e_cond, 1000, 10);

         if (tp->mac_version == RTL_GIGA_MAC_VER_63)
                 rtl8125b_config_eee_mac(tp);
         else
                 rtl8125a_config_eee_mac(tp);

         rtl_disable_rxdvgate(tp);
    }

There are 22 calls to mac ocp write/modify, each having a raw_spin_lock_irqsave()/
raw_spin_unlock_irqrestore() pair.

The minimal timing cost is 22x104 = 2288 cycles or a total of 0.572 us on a 4 GHz CPU,
preventing all cores from the memory access. (0.953 us on a 2.4 GHz system).

This does happen only at the startup apparently, but also at each rtl_reset_work(tp)
and rtl8169_up(tp), rtl_open() at driver load, and rtl8169_runtime_resume() called by
the rtl8169_resume().

The revised version does only 2 pairs of raw_spin_lock_irqsave()/
raw_sping_unlock_irqrestore(), but I admit it might be harder to read and maintain.

My knowledge of the Linux kernel network stack isn't that deep, and I can't tell how
often rtl8169_resume() is called in real life and how to reproduce and benchmark that
call.

This is a sort of a "loophole optimisation", I don't have the numbers on the impact
on the entire Linux on i.e. 16 core/32 threads Ryzen 9 7950X CPU.

Probably things would be worse on a 64 core CPU, because the 5.633 us and 0.953 us
would have to be multiplied by the number of blocked cores, amounting to estimated
total CPU cost of 360 us and 61 us respectively. Per NIC reset.

However, I am relatively new to the r8169 driver, I came across while analysing
KCSAN reports since the beginning of August. So ATM I don't have any benchmarks
to confirm the theoretical findings.

I realise that maintainers have to cherry pick patches or the kernel of +35M lines
would become way too cluttered and unmaintainable.

NOTE: The numbers are only valid for a x86_64 system.

Regards,
Mirsad

SOURCE AND REFERENCES:

Synchronization (Source: [1])
===============================================
instruction	operands	ops	latency			
===============================================	
LOCK ADD 	m,r		1	 ~55
XADD 		m,r		4	 10
LOCK XADD 	m,r 		4	 ~51
CMPXCHG 	m8,r8		5	 15
LOCK CMPXCHG 	m8,r8		5	 ~51
CMPXCHG 	m,r16/32/64	6	 14
LOCK CMPXCHG 	m,r16/32/64	6	 ~52
CMPXCHG8B 	m64		18	 15
LOCK CMPXCHG8B 	m64		18	 ~53
CMPXCHG16B 	m128		22	 52
LOCK CMPXCHG16B m128		22	 ~94
===============================================

Explanation of column headings:
Instruction:
	Instruction name. cc means any condition code. For example, Jcc can be JB, JNE,
	etc.

Operands:
	i = immediate constant, r = any register, r32 = 32-bit register, etc., mm = 64 bit
	mmx register, x = 128 bit xmm register, y = 256 bit ymm register, m = any memory
	operand including indirect operands, m64 means 64-bit memory operand, etc.
Ops:
	Number of macro-operations issued from instruction decoder to schedulers. In-
	structions with more than 2 macro-operations use microcode.
Latency:
	This is the delay that the instruction generates in a dependency chain. The num-
	bers are minimum values. Cache misses, misalignment, and exceptions may in-
	crease the clock counts considerably. Floating point operands are presumed to be
	normal numbers. Denormal numbers, NAN's, infinity and exceptions increase the
	delays. The latency listed does not include the memory operand where the listing
	for register and memory operand are joined (r/m).

[1] TL;DR https://www.agner.org/optimize/instruction_tables.pdf
[2] TL;DR http://www.rdrop.com/users/paulmck/scalability/paper/whymb.2010.07.23a.pdf


^ permalink raw reply

* RE: [PATCH net-next 1/1] net: stmmac: check CBS input values before configuration
From: Gan, Yi Fang @ 2023-10-31  3:11 UTC (permalink / raw)
  To: Drewek, Wojciech, Alexandre Torgue, Jose Abreu, David S . Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Maxime Coquelin,
	netdev@vger.kernel.org, linux-stm32@st-md-mailman.stormreply.com,
	linux-arm-kernel@lists.infradead.org,
	linux-kernel@vger.kernel.org
  Cc: Looi, Hong Aun, Voon, Weifeng, Song, Yoong Siang,
	Sit, Michael Wei Hong
In-Reply-To: <99233115-89ca-4ae8-8679-a16e1f959727@intel.com>

Hi,

Value zero is allowed. I will update V2 with the commit msg updated.

Best regards,
Gan Yi Fang

> -----Original Message-----
> From: Drewek, Wojciech <wojciech.drewek@intel.com>
> Sent: Friday, October 27, 2023 6:08 PM
> To: Gan, Yi Fang <yi.fang.gan@intel.com>; Alexandre Torgue
> <alexandre.torgue@foss.st.com>; Jose Abreu <joabreu@synopsys.com>;
> David S . Miller <davem@davemloft.net>; Eric Dumazet
> <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; Paolo Abeni
> <pabeni@redhat.com>; Maxime Coquelin <mcoquelin.stm32@gmail.com>;
> netdev@vger.kernel.org; linux-stm32@st-md-mailman.stormreply.com; linux-
> arm-kernel@lists.infradead.org; linux-kernel@vger.kernel.org
> Cc: Looi, Hong Aun <hong.aun.looi@intel.com>; Voon, Weifeng
> <weifeng.voon@intel.com>; Song, Yoong Siang
> <yoong.siang.song@intel.com>; Sit, Michael Wei Hong
> <michael.wei.hong.sit@intel.com>
> Subject: Re: [PATCH net-next 1/1] net: stmmac: check CBS input values before
> configuration
> 
> 
> 
> On 27.10.2023 08:11, Gan Yi Fang wrote:
> > From: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
> >
> > Add check for below conditions before proceeding to configuration.
> > A message will be prompted if the input value is invalid.
> >
> > Idleslope minus sendslope should equal speed_div.
> > Idleslope is always a positive value.
> > Sendslope is always a negative value.
> > Hicredit is always a positive value.
> > Locredit is always a negative value.
> 
> Can those values be equal to 0? The code allows it but the commit msg
> doesn't mention that.
> Some drivers does not allow 0, lan966x_cbs_add e.g. Would be good to
> double check that.
> 
> >
> > Signed-off-by: Michael Sit Wei Hong <michael.wei.hong.sit@intel.com>
> > Signed-off-by: Gan, Yi Fang <yi.fang.gan@intel.com>
> > ---
> >  drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c | 5 +++++
> >  1 file changed, 5 insertions(+)
> >
> > diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
> b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
> > index ac41ef4cbd2f..e8a079946f84 100644
> > --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
> > +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
> > @@ -381,6 +381,11 @@ static int tc_setup_cbs(struct stmmac_priv *priv,
> >  		return -EOPNOTSUPP;
> >  	}
> >
> > +	if ((qopt->idleslope - qopt->sendslope != speed_div) ||
> > +	    qopt->idleslope < 0 || qopt->sendslope > 0 ||
> > +	    qopt->hicredit < 0 || qopt->locredit > 0)
> > +		return -EINVAL;
> > +
> >  	mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use;
> >  	if (mode_to_use == MTL_QUEUE_DCB && qopt->enable) {
> >  		ret = stmmac_dma_qmode(priv, priv->ioaddr, queue,
> MTL_QUEUE_AVB);

^ permalink raw reply

* Re: [PATCH net 4/5] net/smc: protect connection state transitions in listen work
From: D. Wythe @ 2023-10-31  3:04 UTC (permalink / raw)
  To: Wenjia Zhang, kgraul, jaka, wintera
  Cc: kuba, davem, netdev, linux-s390, linux-rdma
In-Reply-To: <52133656-4dc6-4f32-9881-b63f19bb8859@linux.ibm.com>



On 10/13/23 1:14 AM, Wenjia Zhang wrote:
>
>
> On 11.10.23 09:33, D. Wythe wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> Consider the following scenario:
>>
>>                 smc_close_passive_work
>> smc_listen_out_connected
>>                 lock_sock()
>> if (state  == SMC_INIT)
>>                 if (state  == SMC_INIT)
>>                     state = SMC_APPCLOSEWAIT1;
>>     state = SMC_ACTIVE
>>                 release_sock()
>>
>> This would cause the state machine of the connection to be corrupted.
>> Also, this issue can occur in smc_listen_out_err().
>>
>> To solve this problem, we can protect the state transitions under
>> the lock of sock to avoid collision.
>>
> To this fix, I have to repeat the question from Alexandra.
> Did the scenario occur in real life? Or is it just kind of potencial 
> problem you found during the code review?
>

Hi Wenjia,

This is a real issue that occurred in our environment rather than being 
obtained from code reviews.
Unfortunately, since this patch does not cause panic, but rather 
potential reference leaks, so it is difficult for me
to provide a very intuitive error message.

> If it is the former one, could you please show us the corresponding 
> message, e.g. from dmesg? If it is the latter one, I'd like to deal 
> with it more carefully. Going from this scenario, I noticed that there 
> could also be other similar places where we need to make sure that no 
> race happens. Thus, it would make more sense to find a systematic 
> approach.
>

We agree that we should deal with it with more care, In fact, this issue 
is very complex and we may spend a lot of time discussing it. Therefore, 
I suggest that we can temporarily drop it
so that we can quickly accept the patch we have already agreed on. I 
will send those patches separately in the future.

Best Wishes,
D. Wythe

>> Fixes: 3b2dec2603d5 ("net/smc: restructure client and server code in 
>> af_smc")
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>>   net/smc/af_smc.c | 5 +++++
>>   1 file changed, 5 insertions(+)
>>
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index 5ad2a9f..3bb8265 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -1926,8 +1926,10 @@ static void smc_listen_out_connected(struct 
>> smc_sock *new_smc)
>>   {
>>       struct sock *newsmcsk = &new_smc->sk;
>>   +    lock_sock(newsmcsk);
>>       if (newsmcsk->sk_state == SMC_INIT)
>>           newsmcsk->sk_state = SMC_ACTIVE;
>> +    release_sock(newsmcsk);
>>         smc_listen_out(new_smc);
>>   }
>> @@ -1939,9 +1941,12 @@ static void smc_listen_out_err(struct smc_sock 
>> *new_smc)
>>       struct net *net = sock_net(newsmcsk);
>> this_cpu_inc(net->smc.smc_stats->srv_hshake_err_cnt);
>> +
>> +    lock_sock(newsmcsk);
>>       if (newsmcsk->sk_state == SMC_INIT)
>>           sock_put(&new_smc->sk); /* passive closing */
>>       newsmcsk->sk_state = SMC_CLOSED;
>> +    release_sock(newsmcsk);
>>         smc_listen_out(new_smc);
>>   }


^ permalink raw reply

* [PATCH net v3] net: stmmac: xgmac: Enable support for multiple Flexible PPS outputs
From: Furong Xu @ 2023-10-31  2:27 UTC (permalink / raw)
  To: David S. Miller, Alexandre Torgue, Jose Abreu, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Maxime Coquelin, Joao Pinto,
	Simon Horman, Serge Semin, Jacob Keller
  Cc: netdev, linux-stm32, linux-arm-kernel, linux-kernel, xfr, rock.xu,
	Furong Xu

From XGMAC Core 3.20 and later, each Flexible PPS has individual PPSEN bit
to select Fixed mode or Flexible mode. The PPSEN must be set, or it stays
in Fixed PPS mode by default.
XGMAC Core prior 3.20, only PPSEN0(bit 4) is writable. PPSEN{1,2,3} are
read-only reserved, and they are already in Flexible mode by default, our
new code always set PPSEN{1,2,3} do not make things worse ;-)

Fixes: 95eaf3cd0a90 ("net: stmmac: dwxgmac: Add Flexible PPS support")
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Furong Xu <0x1207@gmail.com>
---
Changes in v3:
  - Tagged Fixes: and sent through net instead of net-next, thanks Jacob Keller.

Changes in v2:
  - Add comment for XGMAC_PPSEN description among different XGMAC core versions.
  - Update commit message, thanks Serge Semin.
---
 drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h     |  2 +-
 .../net/ethernet/stmicro/stmmac/dwxgmac2_core.c    | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 7a8f47e7b728..a4e8b498dea9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -259,7 +259,7 @@
 	((val) << XGMAC_PPS_MINIDX(x))
 #define XGMAC_PPSCMD_START		0x2
 #define XGMAC_PPSCMD_STOP		0x5
-#define XGMAC_PPSEN0			BIT(4)
+#define XGMAC_PPSENx(x)			BIT(4 + (x) * 8)
 #define XGMAC_PPSx_TARGET_TIME_SEC(x)	(0x00000d80 + (x) * 0x10)
 #define XGMAC_PPSx_TARGET_TIME_NSEC(x)	(0x00000d84 + (x) * 0x10)
 #define XGMAC_TRGTBUSY0			BIT(31)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index f352be269deb..453e88b75be0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -1178,7 +1178,19 @@ static int dwxgmac2_flex_pps_config(void __iomem *ioaddr, int index,
 
 	val |= XGMAC_PPSCMDx(index, XGMAC_PPSCMD_START);
 	val |= XGMAC_TRGTMODSELx(index, XGMAC_PPSCMD_START);
-	val |= XGMAC_PPSEN0;
+
+	/* XGMAC Core has 4 PPS outputs at most.
+	 *
+	 * Prior XGMAC Core 3.20, Fixed mode or Flexible mode are selectable for
+	 * PPS0 only via PPSEN0. PPS{1,2,3} are in Flexible mode by default,
+	 * and can not be switched to Fixed mode, since PPSEN{1,2,3} are
+	 * read-only reserved to 0.
+	 * But we always set PPSEN{1,2,3} do not make things worse ;-)
+	 *
+	 * From XGMAC Core 3.20 and later, PPSEN{0,1,2,3} are writable and must
+	 * be set, or the PPS outputs stay in Fixed PPS mode by default.
+	 */
+	val |= XGMAC_PPSENx(index);
 
 	writel(cfg->start.tv_sec, ioaddr + XGMAC_PPSx_TARGET_TIME_SEC(index));
 
-- 
2.34.1


^ permalink raw reply related

* [PATCH] net: ipmr_base: Check iif when returning a (*, G) MFC
From: Yang Sun @ 2023-10-31  1:57 UTC (permalink / raw)
  To: davem, dsahern; +Cc: edumazet, kuba, pabeni, netdev, Yang Sun

Looking for a (*, G) MFC returns the first match without checking
the iif. This can return a MFC not intended for a packet's iif and
forwarding the packet with this MFC will not work correctly.

When looking up for a (*, G) MFC, check that the MFC's iif is
the same as the packet's iif.

Signed-off-by: Yang Sun <sunytt@google.com>
---
 net/ipv4/ipmr_base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 271dc03fc6db..5cf7c7088dfe 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -97,7 +97,7 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg)
 
 	list = rhltable_lookup(&mrt->mfc_hash, hasharg, *mrt->ops.rht_params);
 	rhl_for_each_entry_rcu(c, tmp, list, mnode) {
-		if (c->mfc_un.res.ttls[vifi] < 255)
+		if (c->mfc_parent == vifi && c->mfc_un.res.ttls[vifi] < 255)
 			return c;
 
 		/* It's ok if the vifi is part of the static tree */
-- 
2.42.0.820.g83a721a137-goog


^ permalink raw reply related

* Re: [PATCH v3 1/1] r8169: Coalesce RTL8411b PHY power-down recovery programming instructions to reduce spinlock stalls
From: Andrew Lunn @ 2023-10-31  1:21 UTC (permalink / raw)
  To: Mirsad Todorovac
  Cc: Heiner Kallweit, netdev, linux-kernel, nic_swsd, David S. Miller,
	Eric Dumazet, Jakub Kicinski, Paolo Abeni, Marco Elver
In-Reply-To: <9f99c3a4-2752-464b-b37d-58a4f8041804@alu.unizg.hr>

> I will not contradict, but the cummulative amount of memory barriers on each MMIO read/write
> in each single one of the drivers could amount to some degrading of overall performance and
> latency in a multicore system.

For optimisations, we like to see benchmark results which show some
improvements. Do you have any numbers?

	Andrew

^ permalink raw reply

* Re: [PATCH net-next v4 2/2] net:dsa:microchip: add property to select
From: Andrew Lunn @ 2023-10-31  1:00 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Ante Knezic, conor+dt, o.rempel, UNGLinuxDriver, davem,
	devicetree, edumazet, f.fainelli, krzysztof.kozlowski+dt, kuba,
	linux-kernel, marex, netdev, pabeni, robh+dt, woojung.huh
In-Reply-To: <20231030174225.hqhc3afbayi7dmos@skbuf>

> So, my opinion is that although what Oleksij would like to see is
> admirable, I don't think that the REF_CLK direction is a matter of RMII
> MAC vs PHY role, and thus, we wouldn't need to change "rmii" to "rev-rmii"
> and cause breakage everywhere. It's just that - a matter of REF_CLK
> direction. It's true, though, that this is a generic problem and that
> the generic bindings for RMII that we currently have are under-specified.
> We could try to devise an extended RMII binding which makes it clear for
> both the MAC and the PHY who is responsible to drive this signal. You
> are not attempting that, you are just coming up with yet another
> vendor-specific MAC property which solves a generic problem. I can't say
> I am completely opposed to that, either, which is why I haven't really
> spoken out against it. The PHY maintainers would also have to weigh in,
> and not all of them are CCed here.

I would recommend looking around other PHYs and find a property which
does what you want, and copy it.

We do have all sorts of properties. There are some to enable the
REF_CLK out of the PHY. Some to disable the REF_CLK out, some to
disable it when the link is down, some to indicate what frequency it
should tick at, etc.

If you want to go the extra mile, maybe you can make a summary of all
these properties, and maybe we can produce a guide line for what we
want the properties to be called going forward.

> I am afraid that creating a CCF style binding for REF_CLK will be an
> enormous hammer for a very small nail and will see very limited adoption
> to other drivers, but I might as well be wrong about it. Compatibility
> between RMII MACs and PHYs which may or may not be CCF-ready might also
> be a concern.

I also don't think using the CCF makes too much sense, except for
where the SoC provides the lock, and already has a CCF covering it.

I would also be hesitant to add more dependencies between the MAC and
the PHY. The DT often has circular dependencies and we have had issues
with probing being deferred because the core does not always
understand these circular dependencies.

	   Andrew

^ permalink raw reply

* Re: [PATCH net-next V3] net: pcs: xpcs: Add 2500BASE-X case in get state for XPCS drivers
From: Serge Semin @ 2023-10-31  0:56 UTC (permalink / raw)
  To: Russell King (Oracle)
  Cc: Raju Lakkaraju, netdev, davem, kuba, linux-kernel, andrew,
	Jose.Abreu, UNGLinuxDriver
In-Reply-To: <ZTuvwnGZKEueGDwa@shell.armlinux.org.uk>

On Fri, Oct 27, 2023 at 01:40:34PM +0100, Russell King (Oracle) wrote:
> On Fri, Oct 27, 2023 at 03:06:19PM +0300, Serge Semin wrote:
> > Hi Russell
> > 
> > On Fri, Oct 27, 2023 at 12:54:36PM +0100, Russell King (Oracle) wrote:
> > > On Fri, Oct 27, 2023 at 02:04:15PM +0300, Serge Semin wrote:
> > > > Cc += Russell
> > > > 
> > > > * It's a good practice to add all the reviewers to Cc in the new patch
> > > > * revisions.
> > > > 
> > > > On Fri, Oct 27, 2023 at 10:13:06AM +0530, Raju Lakkaraju wrote:
> > > > > Add DW_2500BASEX case in xpcs_get_state( ) to update speed, duplex and pause
> > > > > 
> > > > > Signed-off-by: Raju Lakkaraju <Raju.Lakkaraju@microchip.com>
> > > > 
> > > > With a nitpick below clarified, feel free to add:
> > > > Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
> > > > 
> > > > > ---
> > > > >  drivers/net/pcs/pcs-xpcs.c | 29 +++++++++++++++++++++++++++++
> > > > >  drivers/net/pcs/pcs-xpcs.h |  2 ++
> > > > >  2 files changed, 31 insertions(+)
> > > > > 
> > > > > diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
> > > > > index 4dbc21f604f2..31f0beba638a 100644
> > > > > --- a/drivers/net/pcs/pcs-xpcs.c
> > > > > +++ b/drivers/net/pcs/pcs-xpcs.c
> > > > > @@ -1090,6 +1090,28 @@ static int xpcs_get_state_c37_1000basex(struct dw_xpcs *xpcs,
> > > > >  	return 0;
> > > > >  }
> > > > >  
> > > > > +static int xpcs_get_state_2500basex(struct dw_xpcs *xpcs,
> > > > > +				    struct phylink_link_state *state)
> > > > > +{
> > > > > +	int ret;
> > > > > +
> > > > > +	ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_MMD_STS);
> > > > > +	if (ret < 0) {
> > > > > +		state->link = 0;
> > > > > +		return ret;
> > > > > +	}
> > > > > +
> > > > > +	state->link = !!(ret & DW_VR_MII_MMD_STS_LINK_STS);
> > > > > +	if (!state->link)
> > > > > +		return 0;
> > > > > +
> > > > > +	state->speed = SPEED_2500;
> > > > 
> > > > > +	state->pause |= MLO_PAUSE_TX | MLO_PAUSE_RX;
> > > > 
> > > > Why is it '|=' instead of just '='? Is it possible to have the 'pause'
> > > > field having some additional flags set which would be required to
> > > > preserve?
> > > 
> > > The code is correct. There are other flags on state->pause other than
> > > these, and phylink initialises state->pause prior to calling the
> > > function. The only flags that should be modified here are these two
> > > bits that the code is setting.
> > > 
> > > Phylink will initialise it to MLO_PAUSE_NONE if expecting autoneg, or
> > > the configured values if autoneg on the link is disabled.
> > 
> > Thanks for clarification. Then no more comments from my side in this
> > patch regard.
> > 
> > Regarding the XPCS driver in general. Based on what you said the rest
> > of the XPCS state getters are wrong in fully re-writing the 'pause'
> > field. Right?
> 
> Yes.
> 
> xpcs_resolve_pma:
>         state->pause = MLO_PAUSE_TX | MLO_PAUSE_RX;
> 
> xpcs_get_state_c37_sgmii:
>         state->pause = 0;
> 
> are both incorrect. The former should be |=, the latter is totally
> unnecessary.
> 
> Documentation:
>  * pcs_get_state() - Read the current inband link state from the hardware
>  * @pcs: a pointer to a &struct phylink_pcs.
>  * @state: a pointer to a &struct phylink_link_state.
>  *
>  * Read the current inband link state from the MAC PCS, reporting the
>  * current speed in @state->speed, duplex mode in @state->duplex, pause
>                                                                   ^^^^^
>  * mode in @state->pause using the %MLO_PAUSE_RX and %MLO_PAUSE_TX bits,
>    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> 
> I guess I need to make that more explicit that pcs_get_state() methods
> are only expected to _set_ these two bits as appropriate, leaving all
> other bits as-is.

Thanks for the detailed response. I'll send fixup patches for the
denoted problems as soon as I get some free time for it. Hopefully
within a month if nobody does it sooner.

-Serge(y)

> 
> -- 
> RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
> FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!

^ permalink raw reply

* Re: [PATCH net v2] net: dsa: tag_rtl4_a: Bump min packet size
From: Andrew Lunn @ 2023-10-31  0:37 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Linus Walleij, Florian Fainelli, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, netdev, linux-kernel
In-Reply-To: <20231030230906.s5feepjcvgbg5e7v@skbuf>

On Tue, Oct 31, 2023 at 01:09:06AM +0200, Vladimir Oltean wrote:
> On Mon, Oct 30, 2023 at 11:57:33PM +0100, Linus Walleij wrote:
> > This of course make no sense, since the padding function should do nothing
> > when the packet is bigger than 60 bytes.
> 
> Indeed, this of course makes no sense. Ping doesn't work, or ARP doesn't
> work? Could you add a static ARP entry for the 192.168.1.137 IP address?

Probably the ARP, since they are short packets and probably need the
padding.

	Andrew

^ permalink raw reply

* Re: [PATCH] net: phy: broadcom: Wire suspend/resume for BCM54612E
From: Andrew Lunn @ 2023-10-31  0:31 UTC (permalink / raw)
  To: Marco von Rosenberg
  Cc: Florian Fainelli, Broadcom internal kernel review list,
	Heiner Kallweit, Russell King, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, netdev, linux-kernel
In-Reply-To: <20231030225446.17422-1-marcovr@selfnet.de>

On Mon, Oct 30, 2023 at 11:54:45PM +0100, Marco von Rosenberg wrote:
> On some devices, the bootloader suspends the PHY before booting the OS.
> Not having a resume callback wired up is a problem in such situations
> since it is then never resumed.

Hi Marco

This description seems odd to me. I'm guessing here:

Are we talking about a device which as been suspended? The PHY has
been left running because there is no suspend callback? Something then
triggers a resume. The bootloader then suspends the active PHY? Linux
then boots, detects its a resume, so does not touch the hardware
because there is no resume callback? The suspended PHY is then
useless.

Adding suspend/resume calls makes sense, i just don't follow the
commit message reasoning.

	Andrew

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox