Netdev List
 help / color / mirror / Atom feed
* [PATCH net 3/5] ipvs: add missing module descriptions
From: Pablo Neira Ayuso @ 2023-11-08 15:58 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev, kuba, pabeni, edumazet, fw, kadlec
In-Reply-To: <20231108155802.84617-1-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

W=1 builds warn on missing MODULE_DESCRIPTION, add them.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_core.c   | 1 +
 net/netfilter/ipvs/ip_vs_dh.c     | 1 +
 net/netfilter/ipvs/ip_vs_fo.c     | 1 +
 net/netfilter/ipvs/ip_vs_ftp.c    | 1 +
 net/netfilter/ipvs/ip_vs_lblc.c   | 1 +
 net/netfilter/ipvs/ip_vs_lblcr.c  | 1 +
 net/netfilter/ipvs/ip_vs_lc.c     | 1 +
 net/netfilter/ipvs/ip_vs_nq.c     | 1 +
 net/netfilter/ipvs/ip_vs_ovf.c    | 1 +
 net/netfilter/ipvs/ip_vs_pe_sip.c | 1 +
 net/netfilter/ipvs/ip_vs_rr.c     | 1 +
 net/netfilter/ipvs/ip_vs_sed.c    | 1 +
 net/netfilter/ipvs/ip_vs_sh.c     | 1 +
 net/netfilter/ipvs/ip_vs_twos.c   | 1 +
 net/netfilter/ipvs/ip_vs_wlc.c    | 1 +
 net/netfilter/ipvs/ip_vs_wrr.c    | 1 +
 16 files changed, 16 insertions(+)

diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 3230506ae3ff..a2c16b501087 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -2450,3 +2450,4 @@ static void __exit ip_vs_cleanup(void)
 module_init(ip_vs_init);
 module_exit(ip_vs_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IP Virtual Server");
diff --git a/net/netfilter/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c
index 5e6ec32aff2b..75f4c231f4a0 100644
--- a/net/netfilter/ipvs/ip_vs_dh.c
+++ b/net/netfilter/ipvs/ip_vs_dh.c
@@ -270,3 +270,4 @@ static void __exit ip_vs_dh_cleanup(void)
 module_init(ip_vs_dh_init);
 module_exit(ip_vs_dh_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs destination hashing scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_fo.c b/net/netfilter/ipvs/ip_vs_fo.c
index b846cc385279..ab117e5bc34e 100644
--- a/net/netfilter/ipvs/ip_vs_fo.c
+++ b/net/netfilter/ipvs/ip_vs_fo.c
@@ -72,3 +72,4 @@ static void __exit ip_vs_fo_cleanup(void)
 module_init(ip_vs_fo_init);
 module_exit(ip_vs_fo_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs weighted failover scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index ef1f45e43b63..f53899d12416 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -635,3 +635,4 @@ static void __exit ip_vs_ftp_exit(void)
 module_init(ip_vs_ftp_init);
 module_exit(ip_vs_ftp_exit);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs ftp helper");
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index cf78ba4ce5ff..8ceec7a2fa8f 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -632,3 +632,4 @@ static void __exit ip_vs_lblc_cleanup(void)
 module_init(ip_vs_lblc_init);
 module_exit(ip_vs_lblc_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs locality-based least-connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 9eddf118b40e..0fb64707213f 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -817,3 +817,4 @@ static void __exit ip_vs_lblcr_cleanup(void)
 module_init(ip_vs_lblcr_init);
 module_exit(ip_vs_lblcr_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs locality-based least-connection with replication scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c
index 9d34d81fc6f1..c2764505e380 100644
--- a/net/netfilter/ipvs/ip_vs_lc.c
+++ b/net/netfilter/ipvs/ip_vs_lc.c
@@ -86,3 +86,4 @@ static void __exit ip_vs_lc_cleanup(void)
 module_init(ip_vs_lc_init);
 module_exit(ip_vs_lc_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs least connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c
index f56862a87518..ed7f5c889b41 100644
--- a/net/netfilter/ipvs/ip_vs_nq.c
+++ b/net/netfilter/ipvs/ip_vs_nq.c
@@ -136,3 +136,4 @@ static void __exit ip_vs_nq_cleanup(void)
 module_init(ip_vs_nq_init);
 module_exit(ip_vs_nq_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs never queue scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_ovf.c b/net/netfilter/ipvs/ip_vs_ovf.c
index c03066fdd5ca..c7708b809700 100644
--- a/net/netfilter/ipvs/ip_vs_ovf.c
+++ b/net/netfilter/ipvs/ip_vs_ovf.c
@@ -79,3 +79,4 @@ static void __exit ip_vs_ovf_cleanup(void)
 module_init(ip_vs_ovf_init);
 module_exit(ip_vs_ovf_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs overflow connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 0ac6705a61d3..e4ce1d9a63f9 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -185,3 +185,4 @@ static void __exit ip_vs_sip_cleanup(void)
 module_init(ip_vs_sip_init);
 module_exit(ip_vs_sip_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs sip helper");
diff --git a/net/netfilter/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c
index 38495c6f6c7c..6baa34dff9f0 100644
--- a/net/netfilter/ipvs/ip_vs_rr.c
+++ b/net/netfilter/ipvs/ip_vs_rr.c
@@ -122,4 +122,5 @@ static void __exit ip_vs_rr_cleanup(void)
 
 module_init(ip_vs_rr_init);
 module_exit(ip_vs_rr_cleanup);
+MODULE_DESCRIPTION("ipvs round-robin scheduler");
 MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c
index 7663288e5358..a46f99a56618 100644
--- a/net/netfilter/ipvs/ip_vs_sed.c
+++ b/net/netfilter/ipvs/ip_vs_sed.c
@@ -137,3 +137,4 @@ static void __exit ip_vs_sed_cleanup(void)
 module_init(ip_vs_sed_init);
 module_exit(ip_vs_sed_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs shortest expected delay scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index c2028e412092..92e77d7a6b50 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -376,3 +376,4 @@ static void __exit ip_vs_sh_cleanup(void)
 module_init(ip_vs_sh_init);
 module_exit(ip_vs_sh_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs source hashing scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_twos.c b/net/netfilter/ipvs/ip_vs_twos.c
index 3308e4cc740a..8d5419edde50 100644
--- a/net/netfilter/ipvs/ip_vs_twos.c
+++ b/net/netfilter/ipvs/ip_vs_twos.c
@@ -137,3 +137,4 @@ static void __exit ip_vs_twos_cleanup(void)
 module_init(ip_vs_twos_init);
 module_exit(ip_vs_twos_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs power of twos choice scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c
index 09f584b564a0..9fa500927c0a 100644
--- a/net/netfilter/ipvs/ip_vs_wlc.c
+++ b/net/netfilter/ipvs/ip_vs_wlc.c
@@ -109,3 +109,4 @@ static void __exit ip_vs_wlc_cleanup(void)
 module_init(ip_vs_wlc_init);
 module_exit(ip_vs_wlc_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs weighted least connection scheduler");
diff --git a/net/netfilter/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c
index 1bc7a0789d85..85ce0d04afac 100644
--- a/net/netfilter/ipvs/ip_vs_wrr.c
+++ b/net/netfilter/ipvs/ip_vs_wrr.c
@@ -263,3 +263,4 @@ static void __exit ip_vs_wrr_cleanup(void)
 module_init(ip_vs_wrr_init);
 module_exit(ip_vs_wrr_cleanup);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ipvs weighted round-robin scheduler");
-- 
2.30.2


^ permalink raw reply related

* [PATCH net 1/5] netfilter: add missing module descriptions
From: Pablo Neira Ayuso @ 2023-11-08 15:57 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev, kuba, pabeni, edumazet, fw, kadlec
In-Reply-To: <20231108155802.84617-1-pablo@netfilter.org>

From: Florian Westphal <fw@strlen.de>

W=1 builds warn on missing MODULE_DESCRIPTION, add them.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtable_broute.c      | 1 +
 net/bridge/netfilter/ebtable_filter.c      | 1 +
 net/bridge/netfilter/ebtable_nat.c         | 1 +
 net/bridge/netfilter/ebtables.c            | 1 +
 net/bridge/netfilter/nf_conntrack_bridge.c | 1 +
 net/ipv4/netfilter/iptable_nat.c           | 1 +
 net/ipv4/netfilter/iptable_raw.c           | 1 +
 net/ipv4/netfilter/nf_defrag_ipv4.c        | 1 +
 net/ipv4/netfilter/nf_reject_ipv4.c        | 1 +
 net/ipv6/netfilter/ip6table_nat.c          | 1 +
 net/ipv6/netfilter/ip6table_raw.c          | 1 +
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c  | 1 +
 net/ipv6/netfilter/nf_reject_ipv6.c        | 1 +
 net/netfilter/nf_conntrack_broadcast.c     | 1 +
 net/netfilter/nf_conntrack_netlink.c       | 1 +
 net/netfilter/nf_conntrack_proto.c         | 1 +
 net/netfilter/nf_nat_core.c                | 1 +
 net/netfilter/nf_tables_api.c              | 1 +
 net/netfilter/nfnetlink_osf.c              | 1 +
 net/netfilter/nft_chain_nat.c              | 1 +
 net/netfilter/nft_fib.c                    | 1 +
 net/netfilter/nft_fwd_netdev.c             | 1 +
 22 files changed, 22 insertions(+)

diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 8f19253024b0..741360219552 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -135,3 +135,4 @@ static void __exit ebtable_broute_fini(void)
 module_init(ebtable_broute_init);
 module_exit(ebtable_broute_fini);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Force packets to be routed instead of bridged");
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 278f324e6752..dacd81b12e62 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -116,3 +116,4 @@ static void __exit ebtable_filter_fini(void)
 module_init(ebtable_filter_init);
 module_exit(ebtable_filter_fini);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ebtables legacy filter table");
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 9066f7f376d5..0f2a8c6118d4 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -116,3 +116,4 @@ static void __exit ebtable_nat_fini(void)
 module_init(ebtable_nat_init);
 module_exit(ebtable_nat_fini);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ebtables legacy stateless nat table");
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index aa23479b20b2..99d82676f780 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -2595,3 +2595,4 @@ EXPORT_SYMBOL(ebt_do_table);
 module_init(ebtables_init);
 module_exit(ebtables_fini);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ebtables legacy core");
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 71056ee84773..b5c406a6e765 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -416,3 +416,4 @@ module_exit(nf_conntrack_l3proto_bridge_fini);
 
 MODULE_ALIAS("nf_conntrack-" __stringify(AF_BRIDGE));
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Bridge IPv4 and IPv6 connection tracking");
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 56f6ecc43451..4d42d0756fd7 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -170,3 +170,4 @@ module_init(iptable_nat_init);
 module_exit(iptable_nat_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("iptables legacy nat table");
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index ca5e5b21587c..0e7f53964d0a 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -108,3 +108,4 @@ static void __exit iptable_raw_fini(void)
 module_init(iptable_raw_init);
 module_exit(iptable_raw_fini);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("iptables legacy raw table");
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 265b39bc435b..482e733c3375 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -186,3 +186,4 @@ module_init(nf_defrag_init);
 module_exit(nf_defrag_fini);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv4 defragmentation support");
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index f33aeab9424f..f01b038fc1cd 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -336,3 +336,4 @@ void nf_send_unreach(struct sk_buff *skb_in, int code, int hook)
 EXPORT_SYMBOL_GPL(nf_send_unreach);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv4 packet rejection core");
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index bf3cb3a13600..52cf104e3478 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -170,3 +170,4 @@ module_init(ip6table_nat_init);
 module_exit(ip6table_nat_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Ip6tables legacy nat table");
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 08861d5d1f4d..fc9f6754028f 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -106,3 +106,4 @@ static void __exit ip6table_raw_fini(void)
 module_init(ip6table_raw_init);
 module_exit(ip6table_raw_fini);
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Ip6tables legacy raw table");
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index d59b296b4f51..be7817fbc024 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -182,3 +182,4 @@ module_init(nf_defrag_init);
 module_exit(nf_defrag_fini);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6 defragmentation support");
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 58ccdb08c0fd..d45bc54b7ea5 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -413,3 +413,4 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in,
 EXPORT_SYMBOL_GPL(nf_send_unreach6);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv6 packet rejection core");
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 9fb9b8031298..cfa0fe0356de 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -82,3 +82,4 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
 EXPORT_SYMBOL_GPL(nf_conntrack_broadcast_help);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Broadcast connection tracking helper");
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 334db22199c1..fb0ae15e96df 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -57,6 +57,7 @@
 #include "nf_internals.h"
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("List and change connection tracking table");
 
 struct ctnetlink_list_dump_ctx {
 	struct nf_conn *last;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index c928ff63b10e..f36727ed91e1 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -699,3 +699,4 @@ MODULE_ALIAS("ip_conntrack");
 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("IPv4 and IPv6 connection tracking");
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index c4e0516a8dfa..c3d7ecbc777c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1263,6 +1263,7 @@ static void __exit nf_nat_cleanup(void)
 }
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Network address translation core");
 
 module_init(nf_nat_init);
 module_exit(nf_nat_cleanup);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3c1fd8283bf4..146b7447a969 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -11386,4 +11386,5 @@ module_exit(nf_tables_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("Framework for packet filtering and classification");
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFTABLES);
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index 50723ba08289..c0fc431991e8 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -447,4 +447,5 @@ module_init(nfnl_osf_init);
 module_exit(nfnl_osf_fini);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Passive OS fingerprint matching");
 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
index 98e4946100c5..40e230d8b712 100644
--- a/net/netfilter/nft_chain_nat.c
+++ b/net/netfilter/nft_chain_nat.c
@@ -137,6 +137,7 @@ module_init(nft_chain_nat_init);
 module_exit(nft_chain_nat_exit);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("nftables network address translation support");
 #ifdef CONFIG_NF_TABLES_IPV4
 MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
 #endif
diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c
index 04b51f285332..1bfe258018da 100644
--- a/net/netfilter/nft_fib.c
+++ b/net/netfilter/nft_fib.c
@@ -204,4 +204,5 @@ bool nft_fib_reduce(struct nft_regs_track *track,
 EXPORT_SYMBOL_GPL(nft_fib_reduce);
 
 MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Query routing table from nftables");
 MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index a5268e6dd32f..358e742afad7 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -270,4 +270,5 @@ module_exit(nft_fwd_netdev_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("nftables netdev packet forwarding support");
 MODULE_ALIAS_NFT_AF_EXPR(5, "fwd");
-- 
2.30.2


^ permalink raw reply related

* [PATCH net 0/5] Netfilter fixes for net
From: Pablo Neira Ayuso @ 2023-11-08 15:57 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev, kuba, pabeni, edumazet, fw, kadlec

Hi,

The following patchset contains Netfilter fixes for net:

1) Add missing netfilter modules description to fix W=1, from Florian Westphal.

2) Fix catch-all element GC with timeout when use with the pipapo set
   backend, this remained broken since I tried to fix it this summer,
   then another attempt to fix it recently.

3) Add missing IPVS modules descriptions to fix W=1, also from Florian.

4) xt_recent allocated a too small buffer to store an IPv4-mapped IPv6
   address which can be parsed by in6_pton(), from Maciej Zenczykowski.
   Broken for many releases.

5) Skip IPv4-mapped IPv6, IPv4-compat IPv6, site/link local scoped IPv6
   addressses to set up IPv6 NAT redirect, also from Florian. This is
   broken since 2012.

Please, pull these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf.git nf-23-11-08

Thanks.

----------------------------------------------------------------

The following changes since commit d93f9528573e1d419b69ca5ff4130201d05f6b90:

  nfsd: regenerate user space parsers after ynl-gen changes (2023-11-06 09:03:46 +0000)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf.git tags/nf-23-11-08

for you to fetch changes up to 80abbe8a8263106fe45a4f293b92b5c74cc9cc8a:

  netfilter: nat: fix ipv6 nat redirect with mapped and scoped addresses (2023-11-08 16:40:30 +0100)

----------------------------------------------------------------
netfilter pull request 23-11-08

----------------------------------------------------------------
Florian Westphal (3):
      netfilter: add missing module descriptions
      ipvs: add missing module descriptions
      netfilter: nat: fix ipv6 nat redirect with mapped and scoped addresses

Maciej Żenczykowski (1):
      netfilter: xt_recent: fix (increase) ipv6 literal buffer length

Pablo Neira Ayuso (1):
      netfilter: nf_tables: remove catchall element in GC sync path

 net/bridge/netfilter/ebtable_broute.c      |  1 +
 net/bridge/netfilter/ebtable_filter.c      |  1 +
 net/bridge/netfilter/ebtable_nat.c         |  1 +
 net/bridge/netfilter/ebtables.c            |  1 +
 net/bridge/netfilter/nf_conntrack_bridge.c |  1 +
 net/ipv4/netfilter/iptable_nat.c           |  1 +
 net/ipv4/netfilter/iptable_raw.c           |  1 +
 net/ipv4/netfilter/nf_defrag_ipv4.c        |  1 +
 net/ipv4/netfilter/nf_reject_ipv4.c        |  1 +
 net/ipv6/netfilter/ip6table_nat.c          |  1 +
 net/ipv6/netfilter/ip6table_raw.c          |  1 +
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c  |  1 +
 net/ipv6/netfilter/nf_reject_ipv6.c        |  1 +
 net/netfilter/ipvs/ip_vs_core.c            |  1 +
 net/netfilter/ipvs/ip_vs_dh.c              |  1 +
 net/netfilter/ipvs/ip_vs_fo.c              |  1 +
 net/netfilter/ipvs/ip_vs_ftp.c             |  1 +
 net/netfilter/ipvs/ip_vs_lblc.c            |  1 +
 net/netfilter/ipvs/ip_vs_lblcr.c           |  1 +
 net/netfilter/ipvs/ip_vs_lc.c              |  1 +
 net/netfilter/ipvs/ip_vs_nq.c              |  1 +
 net/netfilter/ipvs/ip_vs_ovf.c             |  1 +
 net/netfilter/ipvs/ip_vs_pe_sip.c          |  1 +
 net/netfilter/ipvs/ip_vs_rr.c              |  1 +
 net/netfilter/ipvs/ip_vs_sed.c             |  1 +
 net/netfilter/ipvs/ip_vs_sh.c              |  1 +
 net/netfilter/ipvs/ip_vs_twos.c            |  1 +
 net/netfilter/ipvs/ip_vs_wlc.c             |  1 +
 net/netfilter/ipvs/ip_vs_wrr.c             |  1 +
 net/netfilter/nf_conntrack_broadcast.c     |  1 +
 net/netfilter/nf_conntrack_netlink.c       |  1 +
 net/netfilter/nf_conntrack_proto.c         |  1 +
 net/netfilter/nf_nat_core.c                |  1 +
 net/netfilter/nf_nat_redirect.c            | 27 ++++++++++++++++++++++++++-
 net/netfilter/nf_tables_api.c              | 23 ++++++++++++++++++-----
 net/netfilter/nfnetlink_osf.c              |  1 +
 net/netfilter/nft_chain_nat.c              |  1 +
 net/netfilter/nft_fib.c                    |  1 +
 net/netfilter/nft_fwd_netdev.c             |  1 +
 net/netfilter/xt_recent.c                  |  2 +-
 40 files changed, 82 insertions(+), 7 deletions(-)

^ permalink raw reply

* [PATCH net 2/5] netfilter: nf_tables: remove catchall element in GC sync path
From: Pablo Neira Ayuso @ 2023-11-08 15:57 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev, kuba, pabeni, edumazet, fw, kadlec
In-Reply-To: <20231108155802.84617-1-pablo@netfilter.org>

The expired catchall element is not deactivated and removed from GC sync
path. This path holds mutex so just call nft_setelem_data_deactivate()
and nft_setelem_catchall_remove() before queueing the GC work.

Fixes: 4a9e12ea7e70 ("netfilter: nft_set_pipapo: call nft_trans_gc_queue_sync() in catchall GC")
Reported-by: lonial con <kongln9170@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 146b7447a969..a761ee6796f6 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6520,6 +6520,12 @@ static int nft_setelem_deactivate(const struct net *net,
 	return ret;
 }
 
+static void nft_setelem_catchall_destroy(struct nft_set_elem_catchall *catchall)
+{
+	list_del_rcu(&catchall->list);
+	kfree_rcu(catchall, rcu);
+}
+
 static void nft_setelem_catchall_remove(const struct net *net,
 					const struct nft_set *set,
 					struct nft_elem_priv *elem_priv)
@@ -6528,8 +6534,7 @@ static void nft_setelem_catchall_remove(const struct net *net,
 
 	list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
 		if (catchall->elem == elem_priv) {
-			list_del_rcu(&catchall->list);
-			kfree_rcu(catchall, rcu);
+			nft_setelem_catchall_destroy(catchall);
 			break;
 		}
 	}
@@ -9678,11 +9683,12 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
 						  unsigned int gc_seq,
 						  bool sync)
 {
-	struct nft_set_elem_catchall *catchall;
+	struct nft_set_elem_catchall *catchall, *next;
 	const struct nft_set *set = gc->set;
+	struct nft_elem_priv *elem_priv;
 	struct nft_set_ext *ext;
 
-	list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+	list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
 		ext = nft_set_elem_ext(set, catchall->elem);
 
 		if (!nft_set_elem_expired(ext))
@@ -9700,7 +9706,13 @@ static struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
 		if (!gc)
 			return NULL;
 
-		nft_trans_gc_elem_add(gc, catchall->elem);
+		elem_priv = catchall->elem;
+		if (sync) {
+			nft_setelem_data_deactivate(gc->net, gc->set, elem_priv);
+			nft_setelem_catchall_destroy(catchall);
+		}
+
+		nft_trans_gc_elem_add(gc, elem_priv);
 	}
 
 	return gc;
-- 
2.30.2


^ permalink raw reply related

* Re: [PATCH v2 net 0/7] qbv cycle time extension/truncation
From: Vladimir Oltean @ 2023-11-08 15:51 UTC (permalink / raw)
  To: Faizal Rahim
  Cc: Vinicius Costa Gomes, Jamal Hadi Salim, Cong Wang, Jiri Pirko,
	David S . Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	netdev, linux-kernel
In-Reply-To: <20231107112023.676016-1-faizal.abdul.rahim@linux.intel.com>

Hi Faizal,

On Tue, Nov 07, 2023 at 06:20:16AM -0500, Faizal Rahim wrote:
> According to IEEE Std. 802.1Q-2018 section Q.5 CycleTimeExtension,
> the Cycle Time Extension variable allows this extension of the last old
> cycle to be done in a defined way. If the last complete old cycle would
> normally end less than OperCycleTimeExtension nanoseconds before the new
> base time, then the last complete cycle before AdminBaseTime is reached
> is extended so that it ends at AdminBaseTime.
> 
> Changes in v2:
> 
> - Added 's64 cycle_time_correction' in 'sched_gate_list struct'.
> - Removed sched_changed created in v1 since the new cycle_time_correction
>   field can also serve to indicate the need for a schedule change.
> - Added 'bool correction_active' in 'struct sched_entry' to represent
>   the correction state from the entry's perspective and return corrected
>   interval value when active.
> - Fix cycle time correction logics for the next entry in advance_sched()
> - Fix and implement proper cycle time correction logics for current
>   entry in taprio_start_sched()
> 
> v1 at:
> https://lore.kernel.org/lkml/20230530082541.495-1-muhammad.husaini.zulkifli@intel.com/

I like what came of this patch series. Thanks for following up and
taking over. I have some comments on individual patches.

^ permalink raw reply

* [REPORT] BPF: Reproducible triggering of BUG() from userspace PoC
From: Lee Jones @ 2023-11-08 15:46 UTC (permalink / raw)
  To: Alexei Starovoitov, Daniel Borkmann, Andrii Nakryiko
  Cc: x86, bpf, linux-kernel, netdev

Good afternoon,

After coming across a recent Syzkaller report [0] I thought I'd take
some time to firstly reproduce the issue, then see if there was a
trivial way to mitigate it.  The report suggests that a BUG() in
prog_array_map_poke_run() [1] can be trivially and reliably triggered
from userspace using the PoC provided [2].

        ret = bpf_arch_text_poke(poke->tailcall_bypass,
                                 BPF_MOD_JUMP,
                                 old_bypass_addr,
                                 poke->bypass_addr);
        BUG_ON(ret < 0 && ret != -EINVAL);

Indeed the PoC does seem to be able to consistently trigger the BUG(),
not only on the reported kernel (v6.1), but also on linux-next.  I went
to the trouble of checking LORE, but failed to find any patches which
may be attempting to fix this.

    kernel BUG at kernel/bpf/arraymap.c:1094!
    invalid opcode: 0000 [#1] PREEMPT SMP KASAN
    CPU: 5 PID: 45 Comm: kworker/5:0 Not tainted 6.6.0-rc3-next-20230929-dirty #74
    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
    Workqueue: events prog_array_map_clear_deferred
    RIP: 0010:prog_array_map_poke_run+0x6b4/0x6d0
    Code: ff 0f 0b e8 1e 27 e1 ff 48 c7 c7 60 80 93 85 48 c7 c6 00 7f 93 85 48 c7 c2 bb c2 39 86 b9 45 04 00 00 45 89 f8 e8 9c 890
    RSP: 0018:ffffc9000036fb50 EFLAGS: 00010246
    RAX: 0000000000000044 RBX: ffff88811f337490 RCX: 63af48a1314f9900
    RDX: 0000000000000000 RSI: 0000000080000000 RDI: 0000000000000000
    RBP: ffffc9000036fbe8 R08: ffffffff815c23c5 R09: 1ffff11084c14eba
    R10: dfffe91084c14ebc R11: ffffed1084c14ebb R12: ffff888116517800
    R13: dffffc0000000000 R14: ffff888125a1a400 R15: 00000000fffffff0
    FS:  0000000000000000(0000) GS:ffff888426080000(0000) knlGS:0000000000000000
    CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
    CR2: 00000000004ab678 CR3: 0000000122ac4000 CR4: 0000000000350eb0
    DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
    DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
    Call Trace:
     <TASK>
     ? __die_body+0x92/0xf0
     ? die+0xa2/0xe0
     ? do_trap+0x12f/0x370
     ? handle_invalid_op+0xa6/0x140
     ? handle_invalid_op+0xdf/0x140
     ? prog_array_map_poke_run+0x6b4/0x6d0
     ? prog_array_map_poke_run+0x6b4/0x6d0
     ? exc_invalid_op+0x32/0x50
     ? asm_exc_invalid_op+0x1b/0x20
     ? __wake_up_klogd+0xd5/0x110
     ? prog_array_map_poke_run+0x6b4/0x6d0
     ? bpf_prog_6781ebc2dae4bad9+0xb/0x53
     fd_array_map_delete_elem+0x152/0x250
     prog_array_map_clear_deferred+0xf6/0x210
     ? __bpf_array_map_seq_show+0xa40/0xa40
     ? kick_pool+0x164/0x350
     ? process_one_work+0x57a/0xd00
     process_one_work+0x5e4/0xd00
     worker_thread+0x9cf/0xea0
     kthread+0x2b4/0x350
     ? pr_cont_work+0x580/0x580
     ? kthread_blkcg+0xd0/0xd0
     ret_from_fork+0x4a/0x80
     ? kthread_blkcg+0xd0/0xd0
     ret_from_fork_asm+0x11/0x20
     </TASK>
    Modules linked in:
    ---[ end trace 0000000000000000 ]---

However, with my very limited BPF subsystem knowledge I was unable to
trivially fix the issue.  Hopefully some knowledgable person would be
kind enough to provide me with some pointers.

bpf_arch_text_poke() seems to be returning -EBUSY due to a negative
memcmp() result from [3].

        ret = -EBUSY;
        mutex_lock(&text_mutex);
        if (memcmp(ip, old_insn, X86_PATCH_SIZE)) {
                goto out;
        [...]

When spitting out the memory at those locations, this is the result:

    ip:        e9 06 00 00 00
    old_insn:  0f 1f 44 00 00
    nop_insn:  0f 1f 44 00 00

As you can see, the information stored in 'ip' does not match that of
the data stored in 'old_insn', causing bpf_arch_text_poke() to return
early with the error -EBUSY, suggesting that the data pointed to by
'old_insn', and by extension 'prog' should have been changed when
emit_call()ing, to the value of 'ip', but wasn't.

It's possible for me to see what is happening, but I'm afraid finding
possible causes of corruption became too time consuming on this
occasion.  Would anyone be able to chime in to provide their take on
possible causes please?

Any help would be gratefully received.

[0] https://syzkaller.appspot.com/bug?extid=97a4fe20470e9bc30810
[1] https://elixir.bootlin.com/linux/latest/source/kernel/bpf/arraymap.c#L1092
[2] https://syzkaller.appspot.com/text?tag=ReproC&x=1397180f680000
[3] https://elixir.bootlin.com/linux/latest/source/arch/x86/net/bpf_jit_comp.c#L387

-- 
Lee Jones [李琼斯]

^ permalink raw reply

* [PATCH net] tty: Fix uninit-value access in ppp_sync_receive()
From: Shigeru Yoshida @ 2023-11-08 15:44 UTC (permalink / raw)
  To: davem, edumazet, kuba, pabeni
  Cc: linux-ppp, netdev, linux-kernel, Shigeru Yoshida

KMSAN reported the following uninit-value access issue:

=====================================================
BUG: KMSAN: uninit-value in ppp_sync_input drivers/net/ppp/ppp_synctty.c:690 [inline]
BUG: KMSAN: uninit-value in ppp_sync_receive+0xdc9/0xe70 drivers/net/ppp/ppp_synctty.c:334
 ppp_sync_input drivers/net/ppp/ppp_synctty.c:690 [inline]
 ppp_sync_receive+0xdc9/0xe70 drivers/net/ppp/ppp_synctty.c:334
 tiocsti+0x328/0x450 drivers/tty/tty_io.c:2295
 tty_ioctl+0x808/0x1920 drivers/tty/tty_io.c:2694
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:871 [inline]
 __se_sys_ioctl+0x211/0x400 fs/ioctl.c:857
 __x64_sys_ioctl+0x97/0xe0 fs/ioctl.c:857
 do_syscall_x64 arch/x86/entry/common.c:51 [inline]
 do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82
 entry_SYSCALL_64_after_hwframe+0x63/0x6b

Uninit was created at:
 __alloc_pages+0x75d/0xe80 mm/page_alloc.c:4591
 __alloc_pages_node include/linux/gfp.h:238 [inline]
 alloc_pages_node include/linux/gfp.h:261 [inline]
 __page_frag_cache_refill+0x9a/0x2c0 mm/page_alloc.c:4691
 page_frag_alloc_align+0x91/0x5d0 mm/page_alloc.c:4722
 page_frag_alloc include/linux/gfp.h:322 [inline]
 __netdev_alloc_skb+0x215/0x6d0 net/core/skbuff.c:728
 netdev_alloc_skb include/linux/skbuff.h:3225 [inline]
 dev_alloc_skb include/linux/skbuff.h:3238 [inline]
 ppp_sync_input drivers/net/ppp/ppp_synctty.c:669 [inline]
 ppp_sync_receive+0x237/0xe70 drivers/net/ppp/ppp_synctty.c:334
 tiocsti+0x328/0x450 drivers/tty/tty_io.c:2295
 tty_ioctl+0x808/0x1920 drivers/tty/tty_io.c:2694
 vfs_ioctl fs/ioctl.c:51 [inline]
 __do_sys_ioctl fs/ioctl.c:871 [inline]
 __se_sys_ioctl+0x211/0x400 fs/ioctl.c:857
 __x64_sys_ioctl+0x97/0xe0 fs/ioctl.c:857
 do_syscall_x64 arch/x86/entry/common.c:51 [inline]
 do_syscall_64+0x44/0x110 arch/x86/entry/common.c:82
 entry_SYSCALL_64_after_hwframe+0x63/0x6b

CPU: 0 PID: 12950 Comm: syz-executor.1 Not tainted 6.6.0-14500-g1c41041124bd #10
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-1.fc38 04/01/2014
=====================================================

ppp_sync_input() checks the first 2 bytes of the data are PPP_ALLSTATIONS
and PPP_UI. However, if the data length is 1 and the first byte is
PPP_ALLSTATIONS, an access to an uninitialized value occurs when checking
PPP_UI. This patch resolves this issue by checking the data length.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
---
 drivers/net/ppp/ppp_synctty.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c
index ebcdffdf4f0e..ea261a628786 100644
--- a/drivers/net/ppp/ppp_synctty.c
+++ b/drivers/net/ppp/ppp_synctty.c
@@ -687,7 +687,7 @@ ppp_sync_input(struct syncppp *ap, const u8 *buf, const u8 *flags, int count)
 
 	/* strip address/control field if present */
 	p = skb->data;
-	if (p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) {
+	if (skb->len >= 2 && p[0] == PPP_ALLSTATIONS && p[1] == PPP_UI) {
 		/* chop off address/control */
 		if (skb->len < 3)
 			goto err;
-- 
2.41.0


^ permalink raw reply related

* Re: [RFC PATCH v3 10/12] tcp: RX path for devmem TCP
From: Edward Cree @ 2023-11-08 15:36 UTC (permalink / raw)
  To: Stanislav Fomichev, Mina Almasry
  Cc: netdev, linux-kernel, linux-arch, linux-kselftest, linux-media,
	dri-devel, linaro-mm-sig, David S. Miller, Eric Dumazet,
	Jakub Kicinski, Paolo Abeni, Jesper Dangaard Brouer,
	Ilias Apalodimas, Arnd Bergmann, David Ahern, Willem de Bruijn,
	Shuah Khan, Sumit Semwal, Christian König, Shakeel Butt,
	Jeroen de Borst, Praveen Kaligineedi, Willem de Bruijn,
	Kaiyuan Zhang
In-Reply-To: <ZUlYB99GK1Q8is-I@google.com>

On 06/11/2023 21:17, Stanislav Fomichev wrote:
> I guess I'm just wondering whether other people have any suggestions
> here. Not sure Jonathan's way was better, but we fundamentally
> have two queues between the kernel and the userspace:
> - userspace receiving tokens (recvmsg + magical flag)
> - userspace refilling tokens (setsockopt + magical flag)
> 
> So having some kind of shared memory producer-consumer queue feels natural.
> And using 'classic' socket api here feels like a stretch, idk.

Do 'refilled tokens' (returned memory areas) get used for anything other
 than subsequent RX?  If not then surely the way to return a memory area
 in an io_uring idiom is just to post a new read sqe ('RX descriptor')
 pointing into it, rather than explicitly returning it with setsockopt.
(Being async means you can post lots of these, unlike recvmsg(), so you
 don't need any kernel management to keep the RX queue filled; it can
 just be all handled by the userland thus simplifying APIs overall.)
Or I'm misunderstanding something?

-e

^ permalink raw reply

* Re: [PATCH net v3 1/4] net: ethernet: cortina: Fix MTU max setting
From: Vladimir Oltean @ 2023-11-08 15:29 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Hans Ulli Kroll, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Michał Mirosław, Andrew Lunn,
	linux-arm-kernel, netdev, linux-kernel
In-Reply-To: <CACRpkdZ0zH6i8xuZGXq2VEd7brp-dmY89KXmKfxMTk=9eX1EQw@mail.gmail.com>

On Wed, Nov 08, 2023 at 03:37:28PM +0100, Linus Walleij wrote:
> On Wed, Nov 8, 2023 at 3:26 PM Vladimir Oltean <olteanv@gmail.com> wrote:
> > On Tue, Nov 07, 2023 at 10:54:26AM +0100, Linus Walleij wrote:
> 
> > > The RX max frame size is over 10000 for the Gemini ethernet,
> > > but the TX max frame size is actually just 2047 (0x7ff after
> > > checking the datasheet). Reflect this in what we offer to Linux,
> > > cap the MTU at the TX max frame minus ethernet headers.
> > >
> > > Use the BIT() macro for related bit flags so these TX settings
> > > are consistent.
> >
> > What does this second paragraph intend to say? The patch doesn't use the
> > BIT() macro.
> 
> Ah it's a leftover from v1 where I did some unrelated cleanup using
> BIT() but Andrew remarked on it so I dropped it.
> 
> Maybe this twoliner in the commit message can be deleted when
> applying?

I think it's better if you do it, there are some more minor fixups which
you could bundle up with a new series.

^ permalink raw reply

* Re: [PATCH net v3 3/4] net: ethernet: cortina: Handle large frames
From: Vladimir Oltean @ 2023-11-08 15:27 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Hans Ulli Kroll, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Michał Mirosław, Andrew Lunn,
	linux-arm-kernel, netdev, linux-kernel
In-Reply-To: <20231107-gemini-largeframe-fix-v3-3-e3803c080b75@linaro.org>

On Tue, Nov 07, 2023 at 10:54:28AM +0100, Linus Walleij wrote:
> The Gemini ethernet controller provides hardware checksumming
> for frames up to 1514 bytes including ethernet headers but not
> FCS.
> 
> If we start sending bigger frames (after first bumping up the MTU
> on both interfaces sending and receiveing the frames), truncated

s/receiveing/receiving/

> packets start to appear on the target such as in this tcpdump
> resulting from ping -s 1474:
> 
> 23:34:17.241983 14:d6:4d:a8:3c:4f (oui Unknown) > bc:ae:c5:6b:a8:3d (oui Unknown),
> ethertype IPv4 (0x0800), length 1514: truncated-ip - 2 bytes missing!
> (tos 0x0, ttl 64, id 32653, offset 0, flags [DF], proto ICMP (1), length 1502)
> OpenWrt.lan > Fecusia: ICMP echo request, id 1672, seq 50, length 1482
> 
> If we bypass the hardware checksumming and provide a software
> fallback, everything starts working fine up to the max TX MTU
> of 2047 bytes, for example ping -s2000 192.168.1.2:
> 
> 00:44:29.587598 bc:ae:c5:6b:a8:3d (oui Unknown) > 14:d6:4d:a8:3c:4f (oui Unknown),
> ethertype IPv4 (0x0800), length 2042:
> (tos 0x0, ttl 64, id 51828, offset 0, flags [none], proto ICMP (1), length 2028)
> Fecusia > OpenWrt.lan: ICMP echo reply, id 1683, seq 4, length 2008
> 
> The bit enabling to bypass hardware checksum (or any of the
> "TSS" bits) are undocumented in the hardware reference manual.
> The entire hardware checksum unit appears undocumented. The
> conclusion that we need to use the "bypass" bit was found by
> trial-and-error.
> 
> Since no hardware checksum will happen, we slot in a software
> checksum fallback.
> 
> Check for the condition where we need to compute checksum on the
> skb with either hardware or software using == CHECKSUM_PARTIAL instead
> of != CHECKSUM_NONE which is an incomplete check according to
> <linux/skbuff.h>.
> 
> We delete the code disabling the hardware checksum for large
> MTU:s: this is suboptimal because it will disable hardware

"MTUs" maybe?

> checksumming also on small packets which the checksumming
> engine can handle just fine, which is a waste of resources.
> 
> On the D-Link DIR-685 router this fixes a bug on the conduit
> interface to the RTL8366RB DSA switch: as the switch needs to add
> space for its tag it increases the MTU on the conduit interface
> to 1504 and that means that when the router sends packages
> of 1500 bytes these get an extra 4 bytes of DSA tag and the
> transfer fails because of the erroneous hardware checksumming,
> affecting such basic functionality as the LuCI web interface.
> 
> Suggested-by: Vladimir Oltean <olteanv@gmail.com>
> Fixes: 4d5ae32f5e1e ("net: ethernet: Add a driver for Gemini gigabit ethernet")
> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
> ---
>  drivers/net/ethernet/cortina/gemini.c | 34 +++++++++++++++++++++++-----------
>  1 file changed, 23 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
> index b21a94b4ab5c..78287cfcbf63 100644
> --- a/drivers/net/ethernet/cortina/gemini.c
> +++ b/drivers/net/ethernet/cortina/gemini.c
> @@ -1145,6 +1145,7 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
>  	dma_addr_t mapping;
>  	unsigned short mtu;
>  	void *buffer;
> +	int ret;
>  
>  	mtu  = ETH_HLEN;
>  	mtu += netdev->mtu;
> @@ -1159,9 +1160,30 @@ static int gmac_map_tx_bufs(struct net_device *netdev, struct sk_buff *skb,
>  		word3 |= mtu;
>  	}
>  
> -	if (skb->ip_summed != CHECKSUM_NONE) {
> +	if (skb->len >= ETH_FRAME_LEN) {
> +		/* Hardware offloaded checksumming isn't working on frames
> +		 * bigger than 1514 bytes. A hypothesis about this is that the
> +		 * checksum buffer is only 1518 bytes, so when the frames get
> +		 * bigger they get truncated, or the last few bytes get
> +		 * overwritten by the FCS.
> +		 *
> +		 * Just use software checksumming and bypass on bigger frames.
> +		 */
> +		if (skb->ip_summed == CHECKSUM_PARTIAL) {
> +			ret = skb_checksum_help(skb);
> +			if (ret)
> +				return ret;
> +		}
> +		word1 |= TSS_BYPASS_BIT;
> +	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
>  		int tcp = 0;
>  
> +		/* We do not switch off the checksumming on non TCP/UDP
> +		 * frames: as is shown from tests, the checksumming engine
> +		 * is smart enough to see that a frame is not actually TCP
> +		 * or UDP and then just pass it through without any changes
> +		 * to the frame.
> +		 */
>  		if (skb->protocol == htons(ETH_P_IP)) {
>  			word1 |= TSS_IP_CHKSUM_BIT;
>  			tcp = ip_hdr(skb)->protocol == IPPROTO_TCP;
> @@ -1978,15 +2000,6 @@ static int gmac_change_mtu(struct net_device *netdev, int new_mtu)
>  	return 0;
>  }
>  
> -static netdev_features_t gmac_fix_features(struct net_device *netdev,
> -					   netdev_features_t features)
> -{
> -	if (netdev->mtu + ETH_HLEN + VLAN_HLEN > MTU_SIZE_BIT_MASK)
> -		features &= ~GMAC_OFFLOAD_FEATURES;
> -
> -	return features;
> -}
> -

I think this entire ndo_fix_features() can be indeed removed, but your
justification was not immediately convincing. I'd point out that after
your patch 1/4 "net: ethernet: cortina: Fix MTU max setting", you
actually made this dead code, because netdev->mtu can't be larger than
netdev->max_mtu.

If you reverse the patch order a bit, such that "net: ethernet: cortina:
Handle large frames" comes first, I think it would be much more logical
for the removal of gmac_fix_features() to be part of the commit
"net: ethernet: cortina: Fix MTU max setting", with the simple
justification: the new MTU makes the code stop having any role.

>  static int gmac_set_features(struct net_device *netdev,
>  			     netdev_features_t features)
>  {
> @@ -2212,7 +2225,6 @@ static const struct net_device_ops gmac_351x_ops = {
>  	.ndo_set_mac_address	= gmac_set_mac_address,
>  	.ndo_get_stats64	= gmac_get_stats64,
>  	.ndo_change_mtu		= gmac_change_mtu,
> -	.ndo_fix_features	= gmac_fix_features,
>  	.ndo_set_features	= gmac_set_features,
>  };
>  
> 
> -- 
> 2.34.1
> 

^ permalink raw reply

* Re: [PATCH v8 2/2] can: esd: add support for esd GmbH PCIe/402 CAN interface family
From: Simon Horman @ 2023-11-08 15:23 UTC (permalink / raw)
  To: Stefan Mätje
  Cc: wg@grandegger.com, davem@davemloft.net, linux-can@vger.kernel.org,
	linux-kernel@vger.kernel.org, mkl@pengutronix.de, kuba@kernel.org,
	netdev@vger.kernel.org, edumazet@google.com, pabeni@redhat.com
In-Reply-To: <1a1d0f4257cd980c58b6e2f83e2456dde5fe9441.camel@esd.eu>

On Tue, Nov 07, 2023 at 01:27:34PM +0000, Stefan Mätje wrote:
> Am Freitag, dem 03.11.2023 um 16:48 +0000 schrieb Simon Horman:
> > On Wed, Oct 25, 2023 at 04:16:35PM +0200, Stefan Mätje wrote:
> > > This patch adds support for the PCI based PCIe/402 CAN interface family
> > > from esd GmbH that is available with various form factors
> > > (https://esd.eu/en/products/402-series-can-interfaces).
> > > 
> > > All boards utilize a FPGA based CAN controller solution developed
> > > by esd (esdACC). For more information on the esdACC see
> > > https://esd.eu/en/products/esdacc.
> > > 
> > > This driver detects all available CAN interface board variants of
> > > the family but atm. operates the CAN-FD capable devices in
> > > Classic-CAN mode only! A later patch will introduce the CAN-FD
> > > functionality in this driver.
> > > 
> > > Co-developed-by: Thomas Körper <thomas.koerper@esd.eu>
> > > Signed-off-by: Thomas Körper <thomas.koerper@esd.eu>
> > > Signed-off-by: Stefan Mätje <stefan.maetje@esd.eu>
> > 
> > ...
> > 
> > > +static int pci402_probe(struct pci_dev *pdev, const struct pci_device_id
> > > *ent)
> > > +{
> > > +       struct pci402_card *card = NULL;
> > > +       int err;
> > > +
> > > +       err = pci_enable_device(pdev);
> > > +       if (err)
> > > +               return err;
> > > +
> > > +       card = devm_kzalloc(&pdev->dev, sizeof(*card), GFP_KERNEL);
> > > +       if (!card)
> > 
> > Hi Thomas and Stefan,
> > 
> > If this condition is met then the function will return err,
> > but err is set to 0. Perhaps it should be set to an error value here?
> > 
> > Flagged by Smatch.
> 
> Hi Simon,
> 
> thank you for reviewing this. Looking at the code it is apparently wrong.
> 
> I was not aware of smatch. I got a copy and could reproduce the error report.
> This will add another tool of static code analysis to my release routine.
> 
> An upgraded patch with a fix will follow.

Thanks Stefan, that sounds good to me on both counts.

...

^ permalink raw reply

* Re: [PATCH net] net/sched: act_ct: Always fill offloading tuple iifidx
From: Simon Horman @ 2023-11-08 15:20 UTC (permalink / raw)
  To: Vlad Buslov
  Cc: davem, kuba, pabeni, netdev, jhs, xiyou.wangcong, jiri, pablo,
	Paul Blakey
In-Reply-To: <87il6dldlp.fsf@nvidia.com>

On Tue, Nov 07, 2023 at 06:30:24PM +0200, Vlad Buslov wrote:
> 
> On Tue 07 Nov 2023 at 11:27, Simon Horman <horms@kernel.org> wrote:
> > On Fri, Nov 03, 2023 at 04:14:10PM +0100, Vlad Buslov wrote:
> >> Referenced commit doesn't always set iifidx when offloading the flow to
> >> hardware. Fix the following cases:
> >> 
> >> - nf_conn_act_ct_ext_fill() is called before extension is created with
> >> nf_conn_act_ct_ext_add() in tcf_ct_act(). This can cause rule offload with
> >> unspecified iifidx when connection is offloaded after only single
> >> original-direction packet has been processed by tc data path. Always fill
> >> the new nf_conn_act_ct_ext instance after creating it in
> >> nf_conn_act_ct_ext_add().
> >> 
> >> - Offloading of unidirectional UDP NEW connections is now supported, but ct
> >> flow iifidx field is not updated when connection is promoted to
> >> bidirectional which can result reply-direction iifidx to be zero when
> >> refreshing the connection. Fill in the extension and update flow iifidx
> >> before calling flow_offload_refresh().
> >
> > Hi Vlad,
> >
> > these changes look good to me. However, I do wonder if the changes for each
> > of the two points above should be split into two patches, and
> > if the fixes tag for the second point should be.
> >
> > Fixes: 6a9bad0069cf ("net/sched: act_ct: offload UDP NEW connections")
> 
> Hi Simon,
> 
> I considered this but decided to send as single patch because
> connections 'refresh' mechanism has already existed before the UDP NEW
> offload and it didn't update the iifidx. While yes, it wasn't
> technically necessary because only established connections were
> considered for offloading I'm still leaning more towards considering it
> a flow in original implementation since UDP NEW support wasn't the first
> change modifying the offload behavior (43332cf97425 ("net/sched: act_ct:
> Offload only ASSURED connections") was before that), so further changes
> should have been anticipated. Hope this clarifies my motivation.
> 
> Note that I don't have strong opinion about it and willing to split the
> patch, if necessary but to me it appears as just more trouble for
> maintainers without any benefits...

Hi Vlad,

thanks for clarifying, I appreciate it.  I also don't have a strong feeling
on this, and with your clarification above I am now happy with the patch
arranged as-is.

Reviewed-by: Simon Horman <horms@kernel.org>

...

^ permalink raw reply

* Re: [PATCH net v3 4/4] net: ethernet: cortina: Checksum only TCP and UDP
From: Vladimir Oltean @ 2023-11-08 15:19 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Hans Ulli Kroll, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Michał Mirosław, Andrew Lunn,
	linux-arm-kernel, netdev, linux-kernel
In-Reply-To: <20231107-gemini-largeframe-fix-v3-4-e3803c080b75@linaro.org>

On Tue, Nov 07, 2023 at 10:54:29AM +0100, Linus Walleij wrote:
> It is a bit odd that frames that are neither TCP or UDP
> (such as ICMP or ARP) are still sent to the checksumming
> engine, where they are clearly just ignored.
> 
> Rewrite the logic slightly so that we first check if the
> frame is TCP or UDP, in that case bypass the checksum
> engine.
> 
> Reported-by: Vladimir Oltean <olteanv@gmail.com>
> Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
> ---

If this patch doesn't fix anything and isn't a dependency for anything,
it shouldn't be present in a series targeted for "net". And anyway, I
think it's not needed in general after the discussion on v2.

^ permalink raw reply

* Re: [PATCH net v2 4/4] net: ethernet: cortina: Handle large frames
From: Vladimir Oltean @ 2023-11-08 15:18 UTC (permalink / raw)
  To: Linus Walleij
  Cc: Hans Ulli Kroll, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Michał Mirosław, Andrew Lunn,
	linux-arm-kernel, netdev, linux-kernel
In-Reply-To: <CACRpkdb-iAQdw3S_1iBX=SFt6LCPvdW8+K0nvuXxD01q1K9A1A@mail.gmail.com>

On Mon, Nov 06, 2023 at 11:44:37PM +0100, Linus Walleij wrote:
> That indeed seems like the intention. But it's a bit suboptimal, because it
> disables hardware checksum just because the MTU goes over a
> certain level, and stops using the hardware checksum also for all
> packets smaller than the MTU :(
> 
> I'll delete this and make the driver slot in the SW fallback per-packet
> instead, I think it is fair to assume that most packets will be < MTU
> and it is really just a question of where the fallback gets called.

Performing the software checksum per packet instead of doing it for all
packets seems more sensible. I haven't looked at the other offload bits
from GMAC_OFFLOAD_FEATURES to determine if changing those is going to be
problematic.

> > > Suggested-by: Vladimir Oltean <olteanv@gmail.com>
> >
> > To be clear, I didn't suggest any of this. I just pointed towards the gemini.c
> > driver as being the problem. Please remove my Suggested-by tag.
> 
> OK sorry, it was just my way of trying to provide credit where
> credit is due, because you helped so much with this bug.

Ok, but asserting the validity of the DSA code and vaguely pointing
towards an unrelated driver and not even specifying what might be wrong
with its xmit procedure still does not count as worth a Suggested-by tag
to me. So still, please remove it.

> > We say this in Documentation/networking/dsa/dsa.rst:
> >
> > Checksum offload should work with category 1 and 2 taggers when the DSA conduit
> > driver declares NETIF_F_HW_CSUM in vlan_features
> > and looks at csum_start and csum_offset.
> > For those cases, DSA will shift the checksum start and offset by
> > the tag size. If the DSA conduit driver still uses the legacy NETIF_F_IP_CSUM
> > or NETIF_F_IPV6_CSUM in vlan_features, the offload might only work if the
> > offload hardware already expects that specific tag (perhaps due to matching
> > vendors).
> 
> Since things work smoothly I can only assume that the Gemini
> checksum engine actually knows about the Realtek ethertype (0x8899)
> and the protocol (0xa) and takes action on that, since the switch works.
> 
> OR: it has some heuristic on for how to handle it. (Such as looking for
> a valid TCP or UDP header to figure out where to put the checksum.)

The second option seems more plausible to me. It seems unlikely that a
non-Realtek controller would have information about a Realtek
proprietary protocol, that even Linux people have trouble finding a lot
of information about, when explicitly trying to do so.

> But I have no idea how it does it. It doesn't have a firmware AFAIK.
> 
> Examples listed were ICMP so just IP checksums but I tried for example
> SSH, and HTTP and packets look like this:
> 
> 22:51:35.457191 9a:ec:30:5a:46:96 (oui Unknown) > bc:ae:c5:6b:a8:3d
> (oui Unknown),
> ethertype IPv4 (0x0800), length 434: (tos 0x48, ttl 64, id 8221,
> offset 0, flags [DF], proto TCP (6), length 420)
>     _gateway.48102 > fedora.ssh: Flags [P.], cksum 0xcf1b (correct),
> seq 811:1179, ack 2310,
>    win 2054, options [nop,nop,TS val 74858741 ecr 1981407207], length 368
> 
> Checksum correct. So...

Well, the checksum is correct because it's done in software by the
network stack, prior to dsa_user_xmit() being even called, and thus
prior to the DSA tag being added. "ethtool -k lan0 | grep sum" will tell
you as much (compare it to "ethtool -k eth0" which will list some
offload bits set).

> > Shouldn't "word1 |= TSS_BYPASS_BIT;" be done depending on skb->protocol,
> > rather than depending on skb->len?!
> >
> >                 if (skb->protocol == htons(ETH_P_IP)) {
> >                         word1 |= TSS_IP_CHKSUM_BIT;
> >                         tcp = ip_hdr(skb)->protocol == IPPROTO_TCP;
> >                 } else { /* IPv6 */
> >                         word1 |= TSS_IPV6_ENABLE_BIT;
> >                         tcp = ipv6_hdr(skb)->nexthdr == IPPROTO_TCP;
> >                 } // here
> >                         word1 |= TSS_BYPASS_BIT;
> 
> Oddly it assumes everything is either TCP or UDP on
> IPv4 or IPv6. And yet things such as ICMP work just fine.
> 
> I think the checksum engine can contain some various
> heuristics, such as if it cannot recognize what is coming
> in as the selected TCP or UDP, it will pass right through.
> 
> > Gemini should never attempt to provide checksums for DSA-tagged packets
> > unless it is able to take skb->csum_start into consideration, otherwise
> > it will get it wrong.
> >
> > This is somewhat independent of the other problem you've found, which
> > seems to be that large non-DSA packets get truncated anyway. But not
> > bypassing TX checksum offload truncates a packet? Hmm, strange.
> 
> I have a theory about that in the comment, which is that when they
> engineered the hardware they only put in a hardware buffer for
> 1518 bytes in the checksum engine. If you try to put in any more
> it gets truncated. It's a reasonable guess.
> 
> If you do not set the checkumming engine to "bypass" it will try
> to fit the incoming paket into the checksumming buffer, and then
> it will look to see if it can find the right TCP or UDP headers.
> If it can't it will pass the packet out from the buffer without doing
> any changes. But the buffer is just 1518 bytes, which means that
> no matter what kind of package it is, it will get truncated if it
> does not fit into the checksumming buffer.
> 
> This would give exactly the behaviour we're seeing.

The evidence you've presented seems to support your position and not
mine. The controller does not attempt to provide checksums for
DSA-tagged packets, that is not a problem because those have checksums
already, but the packet size is the actual problem, irrespective of
whether the packets are DSA-tagged or not.

But in that case I have some comments on your latest version of the
patch set.

^ permalink raw reply

* Re: PRP with VLAN support - or how to contribute to a Linux network driver
From: Andrew Lunn @ 2023-11-08 15:17 UTC (permalink / raw)
  To: Heiko Gerstung; +Cc: netdev@vger.kernel.org
In-Reply-To: <75E355CF-3621-40D7-A31C-BA829804DFA2@meinberg.de>

> I would like to discuss if it makes sense to remove the PRP
> functionality from the HSR driver (which is based on the bridge
> kernel module AFAICS) and instead implement PRP as a separate module
> (based on the Bonding driver, which would make more sense for PRP).

Seems like nobody replied. I don't know PRP or HSR, so i can only make
general remarks.

The general policy is that we don't rip something out and replace it
with new code. We try to improve what already exists to meet the
demands. This is partially because of backwards compatibility. There
could be users using the code as is. You cannot break that. Can you
step by step modify the current code to make use of bonding, and in
the process show you don't break the current use cases? You also need
to consider offloading to hardware. The bridge code has infrastructure
to offload. Does the bond driver? I've no idea about that.

> Hoping for advise what the next steps could be. Happy to discuss
> this off-list as it may not be of interest for most people.

You probably want to get together with others who are interested in
PRP and HSR. linutronix, ti, microchip, etc.

	Andrew

^ permalink raw reply

* Re: [PATCH 17/22] powerpc: ps3: move udbg_shutdown_ps3gelic prototype
From: Jakub Kicinski @ 2023-11-08 15:10 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Arnd Bergmann, Andrew Morton, linux-kernel, Masahiro Yamada,
	linux-kbuild, Arnd Bergmann, Matt Turner, Vineet Gupta,
	Russell King, Catalin Marinas, Will Deacon, Steven Rostedt,
	Masami Hiramatsu, Mark Rutland, Guo Ren, Peter Zijlstra,
	Ard Biesheuvel, Huacai Chen, Greg Ungerer, Michal Simek,
	Thomas Bogendoerfer, Dinh Nguyen, Michael Ellerman,
	Nicholas Piggin, Christophe Leroy, Palmer Dabbelt, Heiko Carstens,
	John Paul Adrian Glaubitz, David S. Miller, Andy Lutomirski,
	Thomas Gleixner, Ingo Molnar, x86, Helge Deller, Sudip Mukherjee,
	Greg Kroah-Hartman, Timur Tabi, Kent Overstreet, David Woodhouse,
	Naveen N. Rao, Anil S Keshavamurthy, Kees Cook, Vincenzo Frascino,
	Juri Lelli, Vincent Guittot, Nathan Chancellor, Nick Desaulniers,
	Nicolas Schier, Al Viro, Uwe Kleine-König, linux-alpha,
	linux-snps-arc, linux-arm-kernel, linux-trace-kernel, linux-csky,
	loongarch, linux-m68k, linux-mips, linuxppc-dev, linux-riscv,
	linux-s390, linux-sh, sparclinux, netdev, linux-parisc, linux-usb,
	linux-fbdev, dri-devel, linux-bcachefs, linux-mtd
In-Reply-To: <1b3ccc4a-41f7-46ad-9c5c-5ef44a96426e@infradead.org>

On Wed, 8 Nov 2023 14:18:09 +0000 Geoff Levand wrote:
> Seems good to me.  I'll test it next chance I get.
> 
> Signed-off-by: Geoff Levand <geoff@infradead.org>

Seems like this is best routed via powerpc:

Acked-by: Jakub Kicinski <kuba@kernel.org>

^ permalink raw reply

* Re: EIO on send with UDP_SEGMENT
From: Willem de Bruijn @ 2023-11-08 15:10 UTC (permalink / raw)
  To: Jakub Sitnicki; +Cc: Willem de Bruijn, netdev, kernel-team
In-Reply-To: <87jzqsld6q.fsf@cloudflare.com>

On Wed, Nov 8, 2023 at 6:03 AM Jakub Sitnicki <jakub@cloudflare.com> wrote:
>
> Hi Willem et al,
>
> We have hit the EIO error path in udp_send_skb introduced in commit bec1f6f69736
> ("udp: generate gso with UDP_SEGMENT") [0]:
>
>         if (skb->ip_summed != CHECKSUM_PARTIAL || ...) {
>                 kfree_skb(skb);
>                 return -EIO;
>         }
>
> ... when attempting to send a GSO packet, using UDP_SEGMENT option, from
> a TUN device which didn't have any offloads enabled (the default case).
>
> A trivial reproducer for that would be:
>
>   ip tuntap add dev tun0 mode tun
>   ip addr add dev tun0 192.0.2.1/24
>   ip link set dev tun0 up
>
>   strace -e %net python -c '
>   from socket import *
>   s = socket(AF_INET, SOCK_DGRAM)
>   s.setsockopt(SOL_UDP, 103, 1200)
>   s.sendto(b"x" * 3000, ("192.0.2.2", 9))
>   '
>
> which yields:
>
>   socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC, IPPROTO_IP) = 3
>   setsockopt(3, SOL_UDP, UDP_SEGMENT, [1200], 4) = 0
>   sendto(3, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"..., 3000, 0, {sa_family=AF_INET, sin_port=htons(9), sin_addr=inet_addr("192.0.2.2")}, 16) = -1 EIO (Input/output error)
>
> This has been a surprise and caused us some pain. I think it comes down
> to that anyone using UDP_SEGMENT has to implement a segmentation
> fallback in user-space. Just to be on the safe side.  We can't really
> assume that any TUN/TAP interface, which happens to be our egress
> device, has at least checksum offload enabled and implemented.
>
> Which is not ideal.
> So it made us wonder if anything can be done about it?
>
> As it turns out, skb_segment() in GSO path implements a software
> fallback not only for segmentation but also for checksumming [1].
>
> What is more, when we removed the skb->ip_summed == CHECKSUM_PARTIAL
> restriction in udp_send, as an experiment, we were able to observe fully
> checksummed segments in packet capture.
>
> Which brings me to my question -
>
> Do you think the restriction in udp_send_skb can be lifted or tweaked?

The argument against has been that segmentation offload offers no
performance benefit if the stack has to fall back onto software
checksumming.

If this limitation makes userspace code more complex, by having to
branch between segmentation offload and not depending on device
features, that would be an argument to drop it. As you point out, it
is not needed for correctness.

>
> Thanks,
> Jakub
>
> [0] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=bec1f6f697362c5bc635dacd7ac8499d0a10a4e7
> [1] https://elixir.bootlin.com/linux/v6.6/source/net/core/skbuff.c#L4626
>

^ permalink raw reply

* [PATCH iwl-net] i40e: Fix max frame size check
From: Ivan Vecera @ 2023-11-08 15:10 UTC (permalink / raw)
  To: intel-wired-lan
  Cc: Jacob Keller, Wojciech Drewek, Todd Fujinaka, Jesse Brandeburg,
	Tony Nguyen, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Jeff Kirsher, open list:NETWORKING DRIVERS,
	open list

Commit 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set") added
a check for port's MFS (max frame size) value. The value is stored
in PRTGL_SAH register for each physical port. According datasheet this
register is defined as:

PRTGL_SAH[PRT]: (0x001E2140 + 0x4*PRT, PRT=0...3)

where PRT is physical port number.

The existing check does not take port number into account and reads
actually MFS value always for port 0 that is correct for PF 0 but
not for other PFs.

Update PRTGL_SAH register definition so it takes a port number as
an argument and fix the check by passing the port number.
Also fix the warning message that use for a port number a local
variable 'i' that really does not contain such information.

Fixes: 3a2c6ced90e1 ("i40e: Add a check to see if MFS is set")
Cc: Todd Fujinaka <todd.fujinaka@intel.com>
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c     | 4 ++--
 drivers/net/ethernet/intel/i40e/i40e_register.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 4f445f6835de..6a2907674583 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -16219,11 +16219,11 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* make sure the MFS hasn't been set lower than the default */
 #define MAX_FRAME_SIZE_DEFAULT 0x2600
-	val = (rd32(&pf->hw, I40E_PRTGL_SAH) &
+	val = (rd32(&pf->hw, I40E_PRTGL_SAH(pf->hw.port)) &
 	       I40E_PRTGL_SAH_MFS_MASK) >> I40E_PRTGL_SAH_MFS_SHIFT;
 	if (val < MAX_FRAME_SIZE_DEFAULT)
 		dev_warn(&pdev->dev, "MFS for port %x has been set below the default: %x\n",
-			 i, val);
+			 pf->hw.port, val);
 
 	/* Add a filter to drop all Flow control frames from any VSI from being
 	 * transmitted. By doing so we stop a malicious VF from sending out
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
index f408fcf23ce8..75edfe3d43f7 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_register.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_register.h
@@ -498,7 +498,7 @@
 #define I40E_VSILAN_QBASE_VSIQTABLE_ENA_SHIFT 11
 #define I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK I40E_MASK(0x1, I40E_VSILAN_QBASE_VSIQTABLE_ENA_SHIFT)
 #define I40E_VSILAN_QTABLE(_i, _VSI) (0x00200000 + ((_i) * 2048 + (_VSI) * 4)) /* _i=0...7, _VSI=0...383 */ /* Reset: PFR */
-#define I40E_PRTGL_SAH 0x001E2140 /* Reset: GLOBR */
+#define I40E_PRTGL_SAH(_PRT) (0x001E2140 + ((_PRT) * 4)) /* _PRT=0...3 */ /* Reset: GLOBR */
 #define I40E_PRTGL_SAH_FC_SAH_SHIFT 0
 #define I40E_PRTGL_SAH_FC_SAH_MASK I40E_MASK(0xFFFF, I40E_PRTGL_SAH_FC_SAH_SHIFT)
 #define I40E_PRTGL_SAH_MFS_SHIFT 16
-- 
2.41.0


^ permalink raw reply related

* Re: [PATCH net 1/3] dpll: fix pin dump crash after module unbind
From: Jiri Pirko @ 2023-11-08 15:08 UTC (permalink / raw)
  To: Arkadiusz Kubalewski
  Cc: netdev, vadim.fedorenko, michal.michalik, milena.olech, pabeni,
	kuba
In-Reply-To: <20231108103226.1168500-2-arkadiusz.kubalewski@intel.com>

Wed, Nov 08, 2023 at 11:32:24AM CET, arkadiusz.kubalewski@intel.com wrote:
>Disallow dump of unregistered parent pins, it is possible when parent
>pin and dpll device registerer kernel module instance unbinds, and
>other kernel module instances of the same dpll device have pins
>registered with the parent pin. The user can invoke a pin-dump but as
>the parent was unregistered, thus shall not be accessed by the
>userspace, prevent that by checking if parent pin is still registered.
>
>Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions")
>Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
>---
> drivers/dpll/dpll_netlink.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
>diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c
>index a6dc3997bf5c..93fc6c4b8a78 100644
>--- a/drivers/dpll/dpll_netlink.c
>+++ b/drivers/dpll/dpll_netlink.c
>@@ -328,6 +328,13 @@ dpll_msg_add_pin_parents(struct sk_buff *msg, struct dpll_pin *pin,
> 		void *parent_priv;
> 
> 		ppin = ref->pin;
>+		/*
>+		 * dump parent only if it is registered, thus prevent crash on
>+		 * pin dump called when driver which registered the pin unbinds
>+		 * and different instance registered pin on that parent pin

Read this sentence like 10 times, still don't get what you mean.
Shouldn't comments be easy to understand?


>+		 */
>+		if (!xa_get_mark(&dpll_pin_xa, ppin->id, DPLL_REGISTERED))
>+			continue;
> 		parent_priv = dpll_pin_on_dpll_priv(dpll_ref->dpll, ppin);
> 		ret = ops->state_on_pin_get(pin,
> 					    dpll_pin_on_pin_priv(ppin, pin),
>-- 
>2.38.1
>

^ permalink raw reply

* Re: [PATCH net 3/3] dpll: fix register pin with unregistered parent pin
From: Jiri Pirko @ 2023-11-08 15:07 UTC (permalink / raw)
  To: Arkadiusz Kubalewski
  Cc: netdev, vadim.fedorenko, michal.michalik, milena.olech, pabeni,
	kuba
In-Reply-To: <20231108103226.1168500-4-arkadiusz.kubalewski@intel.com>

Wed, Nov 08, 2023 at 11:32:26AM CET, arkadiusz.kubalewski@intel.com wrote:
>In case of multiple kernel module instances using the same dpll device:
>if only one registers dpll device, then only that one can register

They why you don't register in multiple instances? See mlx5 for a
reference.


>directly connected pins with a dpll device. If unregistered parent
>determines if the muxed pin can be register with it or not, it forces
>serialized driver load order - first the driver instance which registers
>the direct pins needs to be loaded, then the other instances could
>register muxed type pins.
>
>Allow registration of a pin with a parent even if the parent was not
>yet registered, thus allow ability for unserialized driver instance

Weird.


>load order.
>Do not WARN_ON notification for unregistered pin, which can be invoked
>for described case, instead just return error.
>
>Fixes: 9431063ad323 ("dpll: core: Add DPLL framework base functions")
>Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions")
>Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
>---
> drivers/dpll/dpll_core.c    | 4 ----
> drivers/dpll/dpll_netlink.c | 2 +-
> 2 files changed, 1 insertion(+), 5 deletions(-)
>
>diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
>index 4077b562ba3b..ae884b92d68c 100644
>--- a/drivers/dpll/dpll_core.c
>+++ b/drivers/dpll/dpll_core.c
>@@ -28,8 +28,6 @@ static u32 dpll_xa_id;
> 	WARN_ON_ONCE(!xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED))
> #define ASSERT_DPLL_NOT_REGISTERED(d)	\
> 	WARN_ON_ONCE(xa_get_mark(&dpll_device_xa, (d)->id, DPLL_REGISTERED))
>-#define ASSERT_PIN_REGISTERED(p)	\
>-	WARN_ON_ONCE(!xa_get_mark(&dpll_pin_xa, (p)->id, DPLL_REGISTERED))
> 
> struct dpll_device_registration {
> 	struct list_head list;
>@@ -641,8 +639,6 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
> 	    WARN_ON(!ops->state_on_pin_get) ||
> 	    WARN_ON(!ops->direction_get))
> 		return -EINVAL;
>-	if (ASSERT_PIN_REGISTERED(parent))
>-		return -EINVAL;
> 
> 	mutex_lock(&dpll_lock);
> 	ret = dpll_xa_ref_pin_add(&pin->parent_refs, parent, ops, priv);
>diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c
>index 963bbbbe6660..ff430f43304f 100644
>--- a/drivers/dpll/dpll_netlink.c
>+++ b/drivers/dpll/dpll_netlink.c
>@@ -558,7 +558,7 @@ dpll_pin_event_send(enum dpll_cmd event, struct dpll_pin *pin)
> 	int ret = -ENOMEM;
> 	void *hdr;
> 
>-	if (WARN_ON(!xa_get_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED)))
>+	if (!xa_get_mark(&dpll_pin_xa, pin->id, DPLL_REGISTERED))
> 		return -ENODEV;
> 
> 	msg = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
>-- 
>2.38.1
>

^ permalink raw reply

* Re: [PATCH net v1] net/smc: avoid data corruption caused by decline
From: Jakub Kicinski @ 2023-11-08 14:58 UTC (permalink / raw)
  To: D. Wythe
  Cc: kgraul, wenjia, jaka, wintera, davem, netdev, linux-s390,
	linux-rdma
In-Reply-To: <1699436909-22767-1-git-send-email-alibuda@linux.alibaba.com>

On Wed,  8 Nov 2023 17:48:29 +0800 D. Wythe wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
> 
> We found a data corruption issue during testing of SMC-R on Redis
> applications.

Please make sure you CC all relevant people pointed out
by get_maintainers. Make sure you run get_maintainers
on the generated patch, not just on file paths.

You seem to have missed the following people:
 pabeni@redhat.com
 guwen@linux.alibaba.com
 tonylu@linux.alibaba.com
 edumazet@google.com 
-- 
pw-bot: cr
pv-bot: cc

^ permalink raw reply

* RE: [RFC PATCH v3 09/12] net: add support for skbs with unreadable frags
From: David Laight @ 2023-11-08 14:43 UTC (permalink / raw)
  To: 'Mina Almasry', netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
	linux-kselftest@vger.kernel.org, linux-media@vger.kernel.org,
	dri-devel@lists.freedesktop.org, linaro-mm-sig@lists.linaro.org
  Cc: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni,
	Jesper Dangaard Brouer, Ilias Apalodimas, Arnd Bergmann,
	David Ahern, Willem de Bruijn, Shuah Khan, Sumit Semwal,
	Christian König, Shakeel Butt, Jeroen de Borst,
	Praveen Kaligineedi, Willem de Bruijn, Kaiyuan Zhang
In-Reply-To: <20231106024413.2801438-10-almasrymina@google.com>

From: Mina Almasry
> Sent: 06 November 2023 02:44
> 
> For device memory TCP, we expect the skb headers to be available in host
> memory for access, and we expect the skb frags to be in device memory
> and unaccessible to the host. We expect there to be no mixing and
> matching of device memory frags (unaccessible) with host memory frags
> (accessible) in the same skb.
> 
> Add a skb->devmem flag which indicates whether the frags in this skb
> are device memory frags or not.
> 
...
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 1fae276c1353..8fb468ff8115 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -805,6 +805,8 @@ typedef unsigned char *sk_buff_data_t;
>   *	@csum_level: indicates the number of consecutive checksums found in
>   *		the packet minus one that have been verified as
>   *		CHECKSUM_UNNECESSARY (max 3)
> + *	@devmem: indicates that all the fragments in this skb are backed by
> + *		device memory.
>   *	@dst_pending_confirm: need to confirm neighbour
>   *	@decrypted: Decrypted SKB
>   *	@slow_gro: state present at GRO time, slower prepare step required
> @@ -991,7 +993,7 @@ struct sk_buff {
>  #if IS_ENABLED(CONFIG_IP_SCTP)
>  	__u8			csum_not_inet:1;
>  #endif
> -
> +	__u8			devmem:1;
>  #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS)
>  	__u16			tc_index;	/* traffic control index */
>  #endif
> @@ -1766,6 +1768,12 @@ static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb)
>  		__skb_zcopy_downgrade_managed(skb);
>  }

Doesn't that bloat struct sk_buff?
I'm not sure there are any spare bits available.
Although CONFIG_NET_SWITCHDEV and CONFIG_NET_SCHED seem to
already add padding.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply

* Re: [PATCH net v3 1/4] net: ethernet: cortina: Fix MTU max setting
From: Linus Walleij @ 2023-11-08 14:37 UTC (permalink / raw)
  To: Vladimir Oltean
  Cc: Hans Ulli Kroll, David S. Miller, Eric Dumazet, Jakub Kicinski,
	Paolo Abeni, Michał Mirosław, Andrew Lunn,
	linux-arm-kernel, netdev, linux-kernel
In-Reply-To: <20231108142640.tmly4ifgsoeo7m3e@skbuf>

On Wed, Nov 8, 2023 at 3:26 PM Vladimir Oltean <olteanv@gmail.com> wrote:
> On Tue, Nov 07, 2023 at 10:54:26AM +0100, Linus Walleij wrote:

> > The RX max frame size is over 10000 for the Gemini ethernet,
> > but the TX max frame size is actually just 2047 (0x7ff after
> > checking the datasheet). Reflect this in what we offer to Linux,
> > cap the MTU at the TX max frame minus ethernet headers.
> >
> > Use the BIT() macro for related bit flags so these TX settings
> > are consistent.
>
> What does this second paragraph intend to say? The patch doesn't use the
> BIT() macro.

Ah it's a leftover from v1 where I did some unrelated cleanup using
BIT() but Andrew remarked on it so I dropped it.

Maybe this twoliner in the commit message can be deleted when
applying?

Yours,
Linus Walleij

^ permalink raw reply

* Re: [PATCH bpf-next] selftests/xsk: fix for SEND_RECEIVE_UNALIGNED test.
From: Maciej Fijalkowski @ 2023-11-08 14:30 UTC (permalink / raw)
  To: Tushar Vyavahare
  Cc: bpf, netdev, bjorn, magnus.karlsson, jonathan.lemon, davem, kuba,
	pabeni, ast, daniel, tirthendu.sarkar
In-Reply-To: <20231103142936.393654-1-tushar.vyavahare@intel.com>

On Fri, Nov 03, 2023 at 02:29:36PM +0000, Tushar Vyavahare wrote:
> Fix test broken by shared umem test and framework enhancement commit.
> 
> Correct the current implementation of pkt_stream_replace_half() by
> ensuring that nb_valid_entries are not set to half, as this is not true
> for all the tests.

Please be more specific - so what is the expected value for
nb_valid_entries for unaligned mode test then, if not the half?

> 
> Create a new function called pkt_modify() that allows for packet
> modification to meet specific requirements while ensuring the accurate
> maintenance of the valid packet count to prevent inconsistencies in packet
> tracking.
> 
> Fixes: 6d198a89c004 ("selftests/xsk: Add a test for shared umem feature")
> Reported-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> Signed-off-by: Tushar Vyavahare <tushar.vyavahare@intel.com>
> ---
>  tools/testing/selftests/bpf/xskxceiver.c | 71 ++++++++++++++++--------
>  1 file changed, 47 insertions(+), 24 deletions(-)
> 
> diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
> index 591ca9637b23..f7d3a4a9013f 100644
> --- a/tools/testing/selftests/bpf/xskxceiver.c
> +++ b/tools/testing/selftests/bpf/xskxceiver.c
> @@ -634,16 +634,35 @@ static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pk
>  	return nb_frags;
>  }
>  
> -static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
> +static bool pkt_valid(bool unaligned_mode, int offset, u32 len)

kinda confusing to have is_pkt_valid() and pkt_valid() functions...
maybe name this as set_pkt_valid() ? doesn't help much but anyways.

> +{
> +	if (len > MAX_ETH_JUMBO_SIZE || (!unaligned_mode && offset < 0))
> +		return false;
> +
> +	return true;
> +}
> +
> +static void pkt_set(struct pkt_stream *pkt_stream, struct xsk_umem_info *umem, struct pkt *pkt,
> +		    int offset, u32 len)

How about adding a bool unaligned to pkt_stream instead of passing whole
xsk_umem_info to pkt_set - wouldn't this make the diff smaller?

>  {
>  	pkt->offset = offset;
>  	pkt->len = len;
> -	if (len > MAX_ETH_JUMBO_SIZE) {
> -		pkt->valid = false;
> -	} else {
> -		pkt->valid = true;
> +
> +	pkt->valid = pkt_valid(umem->unaligned_mode, offset, len);
> +	if (pkt->valid)
>  		pkt_stream->nb_valid_entries++;
> -	}
> +}
> +
> +static void pkt_modify(struct pkt_stream *pkt_stream, struct xsk_umem_info *umem, struct pkt *pkt,
> +		       int offset, u32 len)
> +{
> +	bool mod_valid;
> +
> +	pkt->offset = offset;
> +	pkt->len = len;
> +	mod_valid  = pkt_valid(umem->unaligned_mode, offset, len);

double space

> +	pkt_stream->nb_valid_entries += mod_valid - pkt->valid;
> +	pkt->valid = mod_valid;
>  }
>  
>  static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
> @@ -651,7 +670,8 @@ static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
>  	return ceil_u32(len, umem->frame_size) * umem->frame_size;
>  }
>  
> -static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
> +static struct pkt_stream *__pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts,
> +						u32 pkt_len, u32 nb_start, u32 nb_off)
>  {
>  	struct pkt_stream *pkt_stream;
>  	u32 i;
> @@ -665,30 +685,31 @@ static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb
>  	for (i = 0; i < nb_pkts; i++) {
>  		struct pkt *pkt = &pkt_stream->pkts[i];
>  
> -		pkt_set(pkt_stream, pkt, 0, pkt_len);
> +		pkt_set(pkt_stream, umem, pkt, 0, pkt_len);
>  		pkt->pkt_nb = nb_start + i * nb_off;
>  	}
>  
>  	return pkt_stream;
>  }
>  
> -static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
> +static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
>  {
> -	return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
> +	return __pkt_stream_generate(umem, nb_pkts, pkt_len, 0, 1);
>  }
>  
> -static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
> +static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream,
> +					   struct xsk_umem_info *umem)
>  {
> -	return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
> +	return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
>  }
>  
>  static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
>  {
>  	struct pkt_stream *pkt_stream;
>  
> -	pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
> +	pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, nb_pkts, pkt_len);
>  	test->ifobj_tx->xsk->pkt_stream = pkt_stream;
> -	pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
> +	pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len);
>  	test->ifobj_rx->xsk->pkt_stream = pkt_stream;
>  }
>  
> @@ -698,12 +719,11 @@ static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
>  	struct pkt_stream *pkt_stream;
>  	u32 i;
>  
> -	pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
> +	pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream, ifobj->umem);
>  	for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
> -		pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
> +		pkt_modify(pkt_stream, ifobj->umem, &pkt_stream->pkts[i], offset, pkt_len);
>  
>  	ifobj->xsk->pkt_stream = pkt_stream;
> -	pkt_stream->nb_valid_entries /= 2;
>  }
>  
>  static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
> @@ -715,9 +735,10 @@ static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int off
>  static void pkt_stream_receive_half(struct test_spec *test)
>  {
>  	struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
> +	struct xsk_umem_info *umem = test->ifobj_rx->umem;
>  	u32 i;
>  
> -	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
> +	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts,
>  							      pkt_stream->pkts[0].len);
>  	pkt_stream = test->ifobj_rx->xsk->pkt_stream;
>  	for (i = 1; i < pkt_stream->nb_pkts; i += 2)
> @@ -733,12 +754,12 @@ static void pkt_stream_even_odd_sequence(struct test_spec *test)
>  
>  	for (i = 0; i < test->nb_sockets; i++) {
>  		pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
> -		pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
> +		pkt_stream = __pkt_stream_generate(test->ifobj_tx->umem, pkt_stream->nb_pkts / 2,
>  						   pkt_stream->pkts[0].len, i, 2);
>  		test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
>  
>  		pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
> -		pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
> +		pkt_stream = __pkt_stream_generate(test->ifobj_rx->umem, pkt_stream->nb_pkts / 2,
>  						   pkt_stream->pkts[0].len, i, 2);
>  		test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
>  	}
> @@ -1961,7 +1982,8 @@ static int testapp_stats_tx_invalid_descs(struct test_spec *test)
>  static int testapp_stats_rx_full(struct test_spec *test)
>  {
>  	pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
> -	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
> +	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
> +							      DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
>  
>  	test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
>  	test->ifobj_rx->release_rx = false;
> @@ -1972,7 +1994,8 @@ static int testapp_stats_rx_full(struct test_spec *test)
>  static int testapp_stats_fill_empty(struct test_spec *test)
>  {
>  	pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
> -	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
> +	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
> +							      DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
>  
>  	test->ifobj_rx->use_fill_ring = false;
>  	test->ifobj_rx->validation_func = validate_fill_empty;
> @@ -2526,8 +2549,8 @@ int main(int argc, char **argv)
>  	init_iface(ifobj_tx, worker_testapp_validate_tx);
>  
>  	test_spec_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
> -	tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
> -	rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
> +	tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
> +	rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
>  	if (!tx_pkt_stream_default || !rx_pkt_stream_default)
>  		exit_with_error(ENOMEM);
>  	test.tx_pkt_stream_default = tx_pkt_stream_default;
> -- 
> 2.34.1
> 

^ permalink raw reply

* Re: [PATCH net 2/3] dpll: fix pin dump crash for rebound module
From: Jiri Pirko @ 2023-11-08 14:30 UTC (permalink / raw)
  To: Arkadiusz Kubalewski
  Cc: netdev, vadim.fedorenko, michal.michalik, milena.olech, pabeni,
	kuba
In-Reply-To: <20231108103226.1168500-3-arkadiusz.kubalewski@intel.com>

Wed, Nov 08, 2023 at 11:32:25AM CET, arkadiusz.kubalewski@intel.com wrote:
>When a kernel module is unbound but the pin resources were not entirely
>freed (other kernel module instance have had kept the reference to that
>pin), and kernel module is again bound, the pin properties would not be
>updated (the properties are only assigned when memory for the pin is
>allocated), prop pointer still points to the kernel module memory of
>the kernel module which was deallocated on the unbind.
>
>If the pin dump is invoked in this state, the result is a kernel crash.
>Prevent the crash by storing persistent pin properties in dpll subsystem,
>copy the content from the kernel module when pin is allocated, instead of
>using memory of the kernel module.
>
>Fixes: 9431063ad323 ("dpll: core: Add DPLL framework base functions")
>Fixes: 9d71b54b65b1 ("dpll: netlink: Add DPLL framework base functions")
>Signed-off-by: Arkadiusz Kubalewski <arkadiusz.kubalewski@intel.com>
>---
> drivers/dpll/dpll_core.c    |  4 ++--
> drivers/dpll/dpll_core.h    |  4 ++--
> drivers/dpll/dpll_netlink.c | 28 ++++++++++++++--------------
> 3 files changed, 18 insertions(+), 18 deletions(-)
>
>diff --git a/drivers/dpll/dpll_core.c b/drivers/dpll/dpll_core.c
>index 3568149b9562..4077b562ba3b 100644
>--- a/drivers/dpll/dpll_core.c
>+++ b/drivers/dpll/dpll_core.c
>@@ -442,7 +442,7 @@ dpll_pin_alloc(u64 clock_id, u32 pin_idx, struct module *module,
> 		ret = -EINVAL;
> 		goto err;
> 	}
>-	pin->prop = prop;
>+	memcpy(&pin->prop, prop, sizeof(pin->prop));

Odd, you don't care about the pointer within this structure?


> 	refcount_set(&pin->refcount, 1);
> 	xa_init_flags(&pin->dpll_refs, XA_FLAGS_ALLOC);
> 	xa_init_flags(&pin->parent_refs, XA_FLAGS_ALLOC);
>@@ -634,7 +634,7 @@ int dpll_pin_on_pin_register(struct dpll_pin *parent, struct dpll_pin *pin,
> 	unsigned long i, stop;
> 	int ret;
> 
>-	if (WARN_ON(parent->prop->type != DPLL_PIN_TYPE_MUX))
>+	if (WARN_ON(parent->prop.type != DPLL_PIN_TYPE_MUX))
> 		return -EINVAL;
> 
> 	if (WARN_ON(!ops) ||
>diff --git a/drivers/dpll/dpll_core.h b/drivers/dpll/dpll_core.h
>index 5585873c5c1b..717f715015c7 100644
>--- a/drivers/dpll/dpll_core.h
>+++ b/drivers/dpll/dpll_core.h
>@@ -44,7 +44,7 @@ struct dpll_device {
>  * @module:		module of creator
>  * @dpll_refs:		hold referencees to dplls pin was registered with
>  * @parent_refs:	hold references to parent pins pin was registered with
>- * @prop:		pointer to pin properties given by registerer
>+ * @prop:		pin properties copied from the registerer
>  * @rclk_dev_name:	holds name of device when pin can recover clock from it
>  * @refcount:		refcount
>  **/
>@@ -55,7 +55,7 @@ struct dpll_pin {
> 	struct module *module;
> 	struct xarray dpll_refs;
> 	struct xarray parent_refs;
>-	const struct dpll_pin_properties *prop;
>+	struct dpll_pin_properties prop;
> 	refcount_t refcount;
> };
> 
>diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c
>index 93fc6c4b8a78..963bbbbe6660 100644
>--- a/drivers/dpll/dpll_netlink.c
>+++ b/drivers/dpll/dpll_netlink.c
>@@ -278,17 +278,17 @@ dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin,
> 	if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY, sizeof(freq), &freq,
> 			  DPLL_A_PIN_PAD))
> 		return -EMSGSIZE;
>-	for (fs = 0; fs < pin->prop->freq_supported_num; fs++) {
>+	for (fs = 0; fs < pin->prop.freq_supported_num; fs++) {
> 		nest = nla_nest_start(msg, DPLL_A_PIN_FREQUENCY_SUPPORTED);
> 		if (!nest)
> 			return -EMSGSIZE;
>-		freq = pin->prop->freq_supported[fs].min;
>+		freq = pin->prop.freq_supported[fs].min;
> 		if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MIN, sizeof(freq),
> 				  &freq, DPLL_A_PIN_PAD)) {
> 			nla_nest_cancel(msg, nest);
> 			return -EMSGSIZE;
> 		}
>-		freq = pin->prop->freq_supported[fs].max;
>+		freq = pin->prop.freq_supported[fs].max;
> 		if (nla_put_64bit(msg, DPLL_A_PIN_FREQUENCY_MAX, sizeof(freq),
> 				  &freq, DPLL_A_PIN_PAD)) {
> 			nla_nest_cancel(msg, nest);
>@@ -304,9 +304,9 @@ static bool dpll_pin_is_freq_supported(struct dpll_pin *pin, u32 freq)
> {
> 	int fs;
> 
>-	for (fs = 0; fs < pin->prop->freq_supported_num; fs++)
>-		if (freq >= pin->prop->freq_supported[fs].min &&
>-		    freq <= pin->prop->freq_supported[fs].max)
>+	for (fs = 0; fs < pin->prop.freq_supported_num; fs++)
>+		if (freq >= pin->prop.freq_supported[fs].min &&
>+		    freq <= pin->prop.freq_supported[fs].max)
> 			return true;
> 	return false;
> }
>@@ -403,7 +403,7 @@ static int
> dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin,
> 		     struct netlink_ext_ack *extack)
> {
>-	const struct dpll_pin_properties *prop = pin->prop;
>+	const struct dpll_pin_properties *prop = &pin->prop;
> 	struct dpll_pin_ref *ref;
> 	int ret;
> 
>@@ -696,7 +696,7 @@ dpll_pin_on_pin_state_set(struct dpll_pin *pin, u32 parent_idx,
> 	int ret;
> 
> 	if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE &
>-	      pin->prop->capabilities)) {
>+	      pin->prop.capabilities)) {
> 		NL_SET_ERR_MSG(extack, "state changing is not allowed");
> 		return -EOPNOTSUPP;
> 	}
>@@ -732,7 +732,7 @@ dpll_pin_state_set(struct dpll_device *dpll, struct dpll_pin *pin,
> 	int ret;
> 
> 	if (!(DPLL_PIN_CAPABILITIES_STATE_CAN_CHANGE &
>-	      pin->prop->capabilities)) {
>+	      pin->prop.capabilities)) {
> 		NL_SET_ERR_MSG(extack, "state changing is not allowed");
> 		return -EOPNOTSUPP;
> 	}
>@@ -759,7 +759,7 @@ dpll_pin_prio_set(struct dpll_device *dpll, struct dpll_pin *pin,
> 	int ret;
> 
> 	if (!(DPLL_PIN_CAPABILITIES_PRIORITY_CAN_CHANGE &
>-	      pin->prop->capabilities)) {
>+	      pin->prop.capabilities)) {
> 		NL_SET_ERR_MSG(extack, "prio changing is not allowed");
> 		return -EOPNOTSUPP;
> 	}
>@@ -787,7 +787,7 @@ dpll_pin_direction_set(struct dpll_pin *pin, struct dpll_device *dpll,
> 	int ret;
> 
> 	if (!(DPLL_PIN_CAPABILITIES_DIRECTION_CAN_CHANGE &
>-	      pin->prop->capabilities)) {
>+	      pin->prop.capabilities)) {
> 		NL_SET_ERR_MSG(extack, "direction changing is not allowed");
> 		return -EOPNOTSUPP;
> 	}
>@@ -817,8 +817,8 @@ dpll_pin_phase_adj_set(struct dpll_pin *pin, struct nlattr *phase_adj_attr,
> 	int ret;
> 
> 	phase_adj = nla_get_s32(phase_adj_attr);
>-	if (phase_adj > pin->prop->phase_range.max ||
>-	    phase_adj < pin->prop->phase_range.min) {
>+	if (phase_adj > pin->prop.phase_range.max ||
>+	    phase_adj < pin->prop.phase_range.min) {
> 		NL_SET_ERR_MSG_ATTR(extack, phase_adj_attr,
> 				    "phase adjust value not supported");
> 		return -EINVAL;
>@@ -999,7 +999,7 @@ dpll_pin_find(u64 clock_id, struct nlattr *mod_name_attr,
> 	unsigned long i;
> 
> 	xa_for_each_marked(&dpll_pin_xa, i, pin, DPLL_REGISTERED) {
>-		prop = pin->prop;
>+		prop = &pin->prop;
> 		cid_match = clock_id ? pin->clock_id == clock_id : true;
> 		mod_match = mod_name_attr && module_name(pin->module) ?
> 			!nla_strcmp(mod_name_attr,
>-- 
>2.38.1
>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox