* [PATCH net-next v3 01/10] net: ip: refactor fib_validate_source/__fib_validate_source
2024-10-15 14:07 [PATCH net-next v3 00/10] net: ip: add drop reasons to input route Menglong Dong
@ 2024-10-15 14:07 ` Menglong Dong
2024-10-21 10:03 ` Paolo Abeni
2024-10-15 14:07 ` [PATCH net-next v3 02/10] net: ip: make fib_validate_source() return drop reason Menglong Dong
` (8 subsequent siblings)
9 siblings, 1 reply; 20+ messages in thread
From: Menglong Dong @ 2024-10-15 14:07 UTC (permalink / raw)
To: pabeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
The only caller of __fib_validate_source() is fib_validate_source(), so
we can combine fib_validate_source() into __fib_validate_source(), and
make fib_validate_source() an inline call to __fib_validate_source().
This will make it easier to make fib_validate_source() return drop reasons
in the next patch.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
include/net/ip_fib.h | 15 ++++++++--
net/ipv4/fib_frontend.c | 64 +++++++++++++++++------------------------
2 files changed, 38 insertions(+), 41 deletions(-)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index b6e44f4eaa4c..90ff815f212b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -448,9 +448,18 @@ int fib_gw_from_via(struct fib_config *cfg, struct nlattr *nla,
struct netlink_ext_ack *extack);
__be32 fib_compute_spec_dst(struct sk_buff *skb);
bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev);
-int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
- dscp_t dscp, int oif, struct net_device *dev,
- struct in_device *idev, u32 *itag);
+int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
+ dscp_t dscp, int oif, struct net_device *dev,
+ struct in_device *idev, u32 *itag);
+
+static inline int
+fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
+ dscp_t dscp, int oif, struct net_device *dev,
+ struct in_device *idev, u32 *itag)
+{
+ return __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
+ itag);
+}
#ifdef CONFIG_IP_ROUTE_CLASSID
static inline int fib_num_tclassid_users(struct net *net)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 8353518b110a..f74138f4d748 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -341,10 +341,11 @@ EXPORT_SYMBOL_GPL(fib_info_nh_uses_dev);
* - check, that packet arrived from expected physical interface.
* called with rcu_read_lock()
*/
-static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
- dscp_t dscp, int oif, struct net_device *dev,
- int rpf, struct in_device *idev, u32 *itag)
+int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
+ dscp_t dscp, int oif, struct net_device *dev,
+ struct in_device *idev, u32 *itag)
{
+ int rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
struct net *net = dev_net(dev);
struct flow_keys flkeys;
int ret, no_addr;
@@ -352,6 +353,28 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
struct flowi4 fl4;
bool dev_match;
+ /* Ignore rp_filter for packets protected by IPsec. */
+ if (!rpf && !fib_num_tclassid_users(net) &&
+ (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
+ if (IN_DEV_ACCEPT_LOCAL(idev))
+ goto last_resort;
+ /* with custom local routes in place, checking local addresses
+ * only will be too optimistic, with custom rules, checking
+ * local addresses only can be too strict, e.g. due to vrf
+ */
+ if (net->ipv4.fib_has_custom_local_routes ||
+ fib4_has_custom_rules(net))
+ goto full_check;
+ /* Within the same container, it is regarded as a martian source,
+ * and the same host but different containers are not.
+ */
+ if (inet_lookup_ifaddr_rcu(net, src))
+ return -EINVAL;
+
+ goto last_resort;
+ }
+
+full_check:
fl4.flowi4_oif = 0;
fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev);
fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
@@ -417,41 +440,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
return -EXDEV;
}
-/* Ignore rp_filter for packets protected by IPsec. */
-int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
- dscp_t dscp, int oif, struct net_device *dev,
- struct in_device *idev, u32 *itag)
-{
- int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
- struct net *net = dev_net(dev);
-
- if (!r && !fib_num_tclassid_users(net) &&
- (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
- if (IN_DEV_ACCEPT_LOCAL(idev))
- goto ok;
- /* with custom local routes in place, checking local addresses
- * only will be too optimistic, with custom rules, checking
- * local addresses only can be too strict, e.g. due to vrf
- */
- if (net->ipv4.fib_has_custom_local_routes ||
- fib4_has_custom_rules(net))
- goto full_check;
- /* Within the same container, it is regarded as a martian source,
- * and the same host but different containers are not.
- */
- if (inet_lookup_ifaddr_rcu(net, src))
- return -EINVAL;
-
-ok:
- *itag = 0;
- return 0;
- }
-
-full_check:
- return __fib_validate_source(skb, src, dst, dscp, oif, dev, r, idev,
- itag);
-}
-
static inline __be32 sk_extract_addr(struct sockaddr *addr)
{
return ((struct sockaddr_in *) addr)->sin_addr.s_addr;
--
2.39.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH net-next v3 01/10] net: ip: refactor fib_validate_source/__fib_validate_source
2024-10-15 14:07 ` [PATCH net-next v3 01/10] net: ip: refactor fib_validate_source/__fib_validate_source Menglong Dong
@ 2024-10-21 10:03 ` Paolo Abeni
0 siblings, 0 replies; 20+ messages in thread
From: Paolo Abeni @ 2024-10-21 10:03 UTC (permalink / raw)
To: Menglong Dong
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
On 10/15/24 16:07, Menglong Dong wrote:
> @@ -352,6 +353,28 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
> struct flowi4 fl4;
> bool dev_match;
>
> + /* Ignore rp_filter for packets protected by IPsec. */
> + if (!rpf && !fib_num_tclassid_users(net) &&
> + (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) {
> + if (IN_DEV_ACCEPT_LOCAL(idev))
> + goto last_resort;
IMHO the re-usage of the 'last_resort' macro makes the patch a little
hard to read, as this is an 'accept' condition. I think it would be
better to retain the original code. If you really want to avoid the
small duplication, you could instead introduce an 'ok' label towards the
end of this function:
last_resort:
if (rpf)
goto e_rpf;
ok:
*itag = 0;
return 0;
And jump there.
Thanks,
Paolo
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH net-next v3 02/10] net: ip: make fib_validate_source() return drop reason
2024-10-15 14:07 [PATCH net-next v3 00/10] net: ip: add drop reasons to input route Menglong Dong
2024-10-15 14:07 ` [PATCH net-next v3 01/10] net: ip: refactor fib_validate_source/__fib_validate_source Menglong Dong
@ 2024-10-15 14:07 ` Menglong Dong
2024-10-21 10:20 ` Paolo Abeni
2024-10-15 14:07 ` [PATCH net-next v3 03/10] net: ip: make ip_route_input_mc() " Menglong Dong
` (7 subsequent siblings)
9 siblings, 1 reply; 20+ messages in thread
From: Menglong Dong @ 2024-10-15 14:07 UTC (permalink / raw)
To: pabeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
In this commit, we make __fib_validate_source return -reason instead of
errno on error.
The return value of __fib_validate_source can be -errno, 0, and 1.
It's hard to make __fib_validate_source() return drop reasons directly.
The __fib_validate_source() will return 1 if the scope of the
source(revert) route is HOST. And the __mkroute_input() will mark the skb
with IPSKB_DOREDIRECT in this case (combine with some other conditions).
And then, a REDIRECT ICMP will be sent in ip_forward() if this flag
exists. We can't pass this information to __mkroute_input if we make
__fib_validate_source() return drop reasons.
However, we can make fib_validate_source() return drop reasons, and call
__fib_validate_source() directly in __mkroute_input().
In the origin logic, LINUX_MIB_IPRPFILTER will be counted if
__fib_validate_source() return -EXDEV. And now, we need to adjust it by
checking "reason == SKB_DROP_REASON_IP_RPFILTER". However, this will take
effect only after the patch "net: ip: make ip_route_input_noref() return
drop reasons", as we can't pass the drop reasons from
fib_validate_source() to ip_rcv_finish_core() in this patch.
Following new drop reasons are added in this patch:
SKB_DROP_REASON_IP_LOCAL_SOURCE
SKB_DROP_REASON_IP_INVALID_SOURCE
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
v2:
- make fib_validate_source() return drop reasons, instead of -reason.
---
include/net/dropreason-core.h | 10 ++++++++++
include/net/ip_fib.h | 9 ++++++---
net/ipv4/fib_frontend.c | 19 ++++++++++++------
net/ipv4/ip_input.c | 4 +---
net/ipv4/route.c | 37 ++++++++++++++++++++---------------
5 files changed, 51 insertions(+), 28 deletions(-)
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index d59bb96c5a02..62a60be1db84 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -76,6 +76,8 @@
FN(INVALID_PROTO) \
FN(IP_INADDRERRORS) \
FN(IP_INNOROUTES) \
+ FN(IP_LOCAL_SOURCE) \
+ FN(IP_INVALID_SOURCE) \
FN(PKT_TOO_BIG) \
FN(DUP_FRAG) \
FN(FRAG_REASM_TIMEOUT) \
@@ -373,6 +375,14 @@ enum skb_drop_reason {
* IPSTATS_MIB_INADDRERRORS
*/
SKB_DROP_REASON_IP_INNOROUTES,
+ /** @SKB_DROP_REASON_IP_LOCAL_SOURCE: the source ip is local */
+ SKB_DROP_REASON_IP_LOCAL_SOURCE,
+ /**
+ * @SKB_DROP_REASON_IP_INVALID_SOURCE: the source ip is invalid:
+ * 1) source ip is multicast or limited broadcast
+ * 2) source ip is zero and not IGMP
+ */
+ SKB_DROP_REASON_IP_INVALID_SOURCE,
/**
* @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
* MTU)
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 90ff815f212b..b3f7a1562140 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -452,13 +452,16 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
dscp_t dscp, int oif, struct net_device *dev,
struct in_device *idev, u32 *itag);
-static inline int
+static inline enum skb_drop_reason
fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
dscp_t dscp, int oif, struct net_device *dev,
struct in_device *idev, u32 *itag)
{
- return __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
- itag);
+ int err = __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
+ itag);
+ if (err < 0)
+ return -err;
+ return SKB_NOT_DROPPED_YET;
}
#ifdef CONFIG_IP_ROUTE_CLASSID
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f74138f4d748..71fa9cee9149 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -347,6 +347,7 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
{
int rpf = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev);
struct net *net = dev_net(dev);
+ enum skb_drop_reason reason;
struct flow_keys flkeys;
int ret, no_addr;
struct fib_result res;
@@ -369,7 +370,7 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
* and the same host but different containers are not.
*/
if (inet_lookup_ifaddr_rcu(net, src))
- return -EINVAL;
+ return -SKB_DROP_REASON_IP_LOCAL_SOURCE;
goto last_resort;
}
@@ -400,9 +401,15 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (fib_lookup(net, &fl4, &res, 0))
goto last_resort;
- if (res.type != RTN_UNICAST &&
- (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
- goto e_inval;
+ if (res.type != RTN_UNICAST) {
+ if (res.type != RTN_LOCAL) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
+ goto e_inval;
+ } else if (!IN_DEV_ACCEPT_LOCAL(idev)) {
+ reason = SKB_DROP_REASON_IP_LOCAL_SOURCE;
+ goto e_inval;
+ }
+ }
fib_combine_itag(itag, &res);
dev_match = fib_info_nh_uses_dev(res.fi, dev);
@@ -435,9 +442,9 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
return 0;
e_inval:
- return -EINVAL;
+ return -reason;
e_rpf:
- return -EXDEV;
+ return -SKB_DROP_REASON_IP_RPFILTER;
}
static inline __be32 sk_extract_addr(struct sockaddr *addr)
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 89bb63da6852..c40a26972884 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -425,10 +425,8 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
return NET_RX_DROP;
drop_error:
- if (err == -EXDEV) {
- drop_reason = SKB_DROP_REASON_IP_RPFILTER;
+ if (drop_reason == SKB_DROP_REASON_IP_RPFILTER)
__NET_INC_STATS(net, LINUX_MIB_IPRPFILTER);
- }
goto drop;
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a0b091a7df87..df5401efbf56 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1669,7 +1669,7 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev,
struct in_device *in_dev, u32 *itag)
{
- int err;
+ enum skb_drop_reason reason;
/* Primary sanity checks. */
if (!in_dev)
@@ -1687,10 +1687,10 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
ip_hdr(skb)->protocol != IPPROTO_IGMP)
return -EINVAL;
} else {
- err = fib_validate_source(skb, saddr, 0, dscp, 0, dev, in_dev,
- itag);
- if (err < 0)
- return err;
+ reason = fib_validate_source(skb, saddr, 0, dscp, 0, dev,
+ in_dev, itag);
+ if (reason)
+ return -EINVAL;
}
return 0;
}
@@ -1785,9 +1785,10 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
return -EINVAL;
}
- err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
- in_dev->dev, in_dev, &itag);
+ err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
+ in_dev->dev, in_dev, &itag);
if (err < 0) {
+ err = -EINVAL;
ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
saddr);
@@ -2140,6 +2141,7 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct rtable *rt = skb_rtable(hint);
struct net *net = dev_net(dev);
+ enum skb_drop_reason reason;
int err = -EINVAL;
u32 tag = 0;
@@ -2158,9 +2160,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (rt->rt_type != RTN_LOCAL)
goto skip_validate_source;
- err = fib_validate_source(skb, saddr, daddr, dscp, 0, dev, in_dev,
- &tag);
- if (err < 0)
+ reason = fib_validate_source(skb, saddr, daddr, dscp, 0, dev, in_dev,
+ &tag);
+ if (reason)
goto martian_source;
skip_validate_source:
@@ -2202,6 +2204,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev,
struct fib_result *res)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct flow_keys *flkeys = NULL, _flkeys;
struct net *net = dev_net(dev);
@@ -2296,10 +2299,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto brd_input;
}
+ err = -EINVAL;
if (res->type == RTN_LOCAL) {
- err = fib_validate_source(skb, saddr, daddr, dscp, 0, dev,
- in_dev, &itag);
- if (err < 0)
+ reason = fib_validate_source(skb, saddr, daddr, dscp, 0, dev,
+ in_dev, &itag);
+ if (reason)
goto martian_source;
goto local_input;
}
@@ -2320,9 +2324,10 @@ out: return err;
goto e_inval;
if (!ipv4_is_zeronet(saddr)) {
- err = fib_validate_source(skb, saddr, 0, dscp, 0, dev, in_dev,
- &itag);
- if (err < 0)
+ err = -EINVAL;
+ reason = fib_validate_source(skb, saddr, 0, dscp, 0, dev,
+ in_dev, &itag);
+ if (reason)
goto martian_source;
}
flags |= RTCF_BROADCAST;
--
2.39.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH net-next v3 02/10] net: ip: make fib_validate_source() return drop reason
2024-10-15 14:07 ` [PATCH net-next v3 02/10] net: ip: make fib_validate_source() return drop reason Menglong Dong
@ 2024-10-21 10:20 ` Paolo Abeni
2024-10-21 10:36 ` Paolo Abeni
2024-10-22 8:47 ` Menglong Dong
0 siblings, 2 replies; 20+ messages in thread
From: Paolo Abeni @ 2024-10-21 10:20 UTC (permalink / raw)
To: Menglong Dong
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
On 10/15/24 16:07, Menglong Dong wrote:
> diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
> index 90ff815f212b..b3f7a1562140 100644
> --- a/include/net/ip_fib.h
> +++ b/include/net/ip_fib.h
> @@ -452,13 +452,16 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
> dscp_t dscp, int oif, struct net_device *dev,
> struct in_device *idev, u32 *itag);
>
> -static inline int
> +static inline enum skb_drop_reason
> fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
> dscp_t dscp, int oif, struct net_device *dev,
> struct in_device *idev, u32 *itag)
> {
> - return __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
> - itag);
> + int err = __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
> + itag);
> + if (err < 0)
> + return -err;
> + return SKB_NOT_DROPPED_YET;
> }
It looks like the code churn in patch 1 is not needed??? You could just
define here a fib_validate_source_reason() helper doing the above, and
replace fib_validate_source with the the new helper as needed. Would
that work?
> @@ -1785,9 +1785,10 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
> return -EINVAL;
> }
>
> - err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
> - in_dev->dev, in_dev, &itag);
> + err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
> + in_dev->dev, in_dev, &itag);
> if (err < 0) {
> + err = -EINVAL;
> ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
> saddr);
I'm sorry for not noticing this issue before, but must preserve (at
least) the -EXDEV error code from the unpatched version or RP Filter MIB
accounting in ip_rcv_finish_core() will be fooled.
Thanks,
Paolo
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH net-next v3 02/10] net: ip: make fib_validate_source() return drop reason
2024-10-21 10:20 ` Paolo Abeni
@ 2024-10-21 10:36 ` Paolo Abeni
2024-10-22 8:47 ` Menglong Dong
1 sibling, 0 replies; 20+ messages in thread
From: Paolo Abeni @ 2024-10-21 10:36 UTC (permalink / raw)
To: Menglong Dong
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
On 10/21/24 12:20, Paolo Abeni wrote:
> On 10/15/24 16:07, Menglong Dong wrote:
>> @@ -1785,9 +1785,10 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
>> return -EINVAL;
>> }
>>
>> - err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
>> - in_dev->dev, in_dev, &itag);
>> + err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
>> + in_dev->dev, in_dev, &itag);
>> if (err < 0) {
>> + err = -EINVAL;
>> ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
>> saddr);
>
> I'm sorry for not noticing this issue before, but must preserve (at
> least) the -EXDEV error code from the unpatched version or RP Filter MIB
> accounting in ip_rcv_finish_core() will be fooled.
Please, ignore this comment. ENOCOFFEE here, sorry.
/P
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH net-next v3 02/10] net: ip: make fib_validate_source() return drop reason
2024-10-21 10:20 ` Paolo Abeni
2024-10-21 10:36 ` Paolo Abeni
@ 2024-10-22 8:47 ` Menglong Dong
1 sibling, 0 replies; 20+ messages in thread
From: Menglong Dong @ 2024-10-22 8:47 UTC (permalink / raw)
To: Paolo Abeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
On Mon, Oct 21, 2024 at 6:20 PM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On 10/15/24 16:07, Menglong Dong wrote:
> > diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
> > index 90ff815f212b..b3f7a1562140 100644
> > --- a/include/net/ip_fib.h
> > +++ b/include/net/ip_fib.h
> > @@ -452,13 +452,16 @@ int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
> > dscp_t dscp, int oif, struct net_device *dev,
> > struct in_device *idev, u32 *itag);
> >
> > -static inline int
> > +static inline enum skb_drop_reason
> > fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
> > dscp_t dscp, int oif, struct net_device *dev,
> > struct in_device *idev, u32 *itag)
> > {
> > - return __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
> > - itag);
> > + int err = __fib_validate_source(skb, src, dst, dscp, oif, dev, idev,
> > + itag);
> > + if (err < 0)
> > + return -err;
> > + return SKB_NOT_DROPPED_YET;
> > }
>
> It looks like the code churn in patch 1 is not needed??? You could just
> define here a fib_validate_source_reason() helper doing the above, and
> replace fib_validate_source with the the new helper as needed. Would
> that work?
>
Of course, that works fine. I'm just trying to find a graceful way
for this part. Defining a fib_validate_source_reason() here looks
nice too, and we can ignore the 1st patch. I'll do it this way in
the next version.
Thanks!
Menglong Dong
> > @@ -1785,9 +1785,10 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
> > return -EINVAL;
> > }
> >
> > - err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
> > - in_dev->dev, in_dev, &itag);
> > + err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
> > + in_dev->dev, in_dev, &itag);
> > if (err < 0) {
> > + err = -EINVAL;
> > ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
> > saddr);
>
> I'm sorry for not noticing this issue before, but must preserve (at
> least) the -EXDEV error code from the unpatched version or RP Filter MIB
> accounting in ip_rcv_finish_core() will be fooled.
>
> Thanks,
>
> Paolo
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH net-next v3 03/10] net: ip: make ip_route_input_mc() return drop reason
2024-10-15 14:07 [PATCH net-next v3 00/10] net: ip: add drop reasons to input route Menglong Dong
2024-10-15 14:07 ` [PATCH net-next v3 01/10] net: ip: refactor fib_validate_source/__fib_validate_source Menglong Dong
2024-10-15 14:07 ` [PATCH net-next v3 02/10] net: ip: make fib_validate_source() return drop reason Menglong Dong
@ 2024-10-15 14:07 ` Menglong Dong
2024-10-15 14:07 ` [PATCH net-next v3 04/10] net: ip: make ip_mc_validate_source() " Menglong Dong
` (6 subsequent siblings)
9 siblings, 0 replies; 20+ messages in thread
From: Menglong Dong @ 2024-10-15 14:07 UTC (permalink / raw)
To: pabeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
Make ip_route_input_mc() return drop reason, and adjust the call of it
in ip_route_input_rcu().
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
net/ipv4/route.c | 21 +++++++++++----------
1 file changed, 11 insertions(+), 10 deletions(-)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df5401efbf56..7f989e8eff30 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1696,8 +1696,9 @@ int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
}
/* called in rcu_read_lock() section */
-static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- dscp_t dscp, struct net_device *dev, int our)
+static enum skb_drop_reason
+ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev, int our)
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
unsigned int flags = RTCF_MULTICAST;
@@ -1708,7 +1709,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
err = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev,
&itag);
if (err)
- return err;
+ return SKB_DROP_REASON_NOT_SPECIFIED;
if (our)
flags |= RTCF_LOCAL;
@@ -1719,7 +1720,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
false);
if (!rth)
- return -ENOBUFS;
+ return SKB_DROP_REASON_NOMEM;
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
@@ -1735,7 +1736,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
skb_dst_drop(skb);
skb_dst_set(skb, &rth->dst);
- return 0;
+ return SKB_NOT_DROPPED_YET;
}
@@ -2433,12 +2434,12 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
* route cache entry is created eventually.
*/
if (ipv4_is_multicast(daddr)) {
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
int our = 0;
- int err = -EINVAL;
if (!in_dev)
- return err;
+ return -EINVAL;
our = ip_check_mc_rcu(in_dev, daddr, saddr,
ip_hdr(skb)->protocol);
@@ -2459,10 +2460,10 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
IN_DEV_MFORWARD(in_dev))
#endif
) {
- err = ip_route_input_mc(skb, daddr, saddr, dscp, dev,
- our);
+ reason = ip_route_input_mc(skb, daddr, saddr, dscp,
+ dev, our);
}
- return err;
+ return reason ? -EINVAL : 0;
}
return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res);
--
2.39.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH net-next v3 04/10] net: ip: make ip_mc_validate_source() return drop reason
2024-10-15 14:07 [PATCH net-next v3 00/10] net: ip: add drop reasons to input route Menglong Dong
` (2 preceding siblings ...)
2024-10-15 14:07 ` [PATCH net-next v3 03/10] net: ip: make ip_route_input_mc() " Menglong Dong
@ 2024-10-15 14:07 ` Menglong Dong
2024-10-15 14:07 ` [PATCH net-next v3 05/10] net: ip: make ip_route_input_slow() return drop reasons Menglong Dong
` (5 subsequent siblings)
9 siblings, 0 replies; 20+ messages in thread
From: Menglong Dong @ 2024-10-15 14:07 UTC (permalink / raw)
To: pabeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
Make ip_mc_validate_source() return drop reason, and adjust the call of
it in ip_route_input_mc().
Another caller of it is ip_rcv_finish_core->udp_v4_early_demux, and the
errno is not checked in detail, so we don't do more adjustment for it.
The drop reason "SKB_DROP_REASON_IP_LOCALNET" is added in this commit.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
include/net/dropreason-core.h | 3 +++
include/net/route.h | 7 ++++---
net/ipv4/route.c | 35 +++++++++++++++++++----------------
3 files changed, 26 insertions(+), 19 deletions(-)
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 62a60be1db84..a2a1fb90e0e5 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -78,6 +78,7 @@
FN(IP_INNOROUTES) \
FN(IP_LOCAL_SOURCE) \
FN(IP_INVALID_SOURCE) \
+ FN(IP_LOCALNET) \
FN(PKT_TOO_BIG) \
FN(DUP_FRAG) \
FN(FRAG_REASM_TIMEOUT) \
@@ -383,6 +384,8 @@ enum skb_drop_reason {
* 2) source ip is zero and not IGMP
*/
SKB_DROP_REASON_IP_INVALID_SOURCE,
+ /** @SKB_DROP_REASON_IP_LOCALNET: source or dest ip is local net */
+ SKB_DROP_REASON_IP_LOCALNET,
/**
* @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
* MTU)
diff --git a/include/net/route.h b/include/net/route.h
index 586e59f7ed8a..a828a17a6313 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -199,9 +199,10 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
return ip_route_output_key(net, fl4);
}
-int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- dscp_t dscp, struct net_device *dev,
- struct in_device *in_dev, u32 *itag);
+enum skb_drop_reason
+ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct in_device *in_dev, u32 *itag);
int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev);
int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 7f989e8eff30..917f05a0a5ce 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1665,34 +1665,37 @@ struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
EXPORT_SYMBOL(rt_dst_clone);
/* called in rcu_read_lock() section */
-int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- dscp_t dscp, struct net_device *dev,
- struct in_device *in_dev, u32 *itag)
+enum skb_drop_reason
+ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct in_device *in_dev, u32 *itag)
{
enum skb_drop_reason reason;
/* Primary sanity checks. */
if (!in_dev)
- return -EINVAL;
+ return SKB_DROP_REASON_NOT_SPECIFIED;
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
- skb->protocol != htons(ETH_P_IP))
- return -EINVAL;
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ return SKB_DROP_REASON_IP_INVALID_SOURCE;
+
+ if (skb->protocol != htons(ETH_P_IP))
+ return SKB_DROP_REASON_INVALID_PROTO;
if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
- return -EINVAL;
+ return SKB_DROP_REASON_IP_LOCALNET;
if (ipv4_is_zeronet(saddr)) {
if (!ipv4_is_local_multicast(daddr) &&
ip_hdr(skb)->protocol != IPPROTO_IGMP)
- return -EINVAL;
+ return SKB_DROP_REASON_IP_INVALID_SOURCE;
} else {
reason = fib_validate_source(skb, saddr, 0, dscp, 0, dev,
in_dev, itag);
if (reason)
- return -EINVAL;
+ return reason;
}
- return 0;
+ return SKB_NOT_DROPPED_YET;
}
/* called in rcu_read_lock() section */
@@ -1702,14 +1705,14 @@ ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
{
struct in_device *in_dev = __in_dev_get_rcu(dev);
unsigned int flags = RTCF_MULTICAST;
+ enum skb_drop_reason reason;
struct rtable *rth;
u32 itag = 0;
- int err;
- err = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev,
- &itag);
- if (err)
- return SKB_DROP_REASON_NOT_SPECIFIED;
+ reason = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev,
+ &itag);
+ if (reason)
+ return reason;
if (our)
flags |= RTCF_LOCAL;
--
2.39.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH net-next v3 05/10] net: ip: make ip_route_input_slow() return drop reasons
2024-10-15 14:07 [PATCH net-next v3 00/10] net: ip: add drop reasons to input route Menglong Dong
` (3 preceding siblings ...)
2024-10-15 14:07 ` [PATCH net-next v3 04/10] net: ip: make ip_mc_validate_source() " Menglong Dong
@ 2024-10-15 14:07 ` Menglong Dong
2024-10-21 10:52 ` Paolo Abeni
2024-10-15 14:07 ` [PATCH net-next v3 06/10] net: ip: make ip_route_input_rcu() " Menglong Dong
` (4 subsequent siblings)
9 siblings, 1 reply; 20+ messages in thread
From: Menglong Dong @ 2024-10-15 14:07 UTC (permalink / raw)
To: pabeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
In this commit, we make ip_route_input_slow() return skb drop reasons,
and following new skb drop reasons are added:
SKB_DROP_REASON_IP_INVALID_DEST
The only caller of ip_route_input_slow() is ip_route_input_rcu(), and we
adjust it by making it return -EINVAL on error.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
include/net/dropreason-core.h | 6 ++++
net/ipv4/route.c | 55 ++++++++++++++++++++++-------------
2 files changed, 40 insertions(+), 21 deletions(-)
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index a2a1fb90e0e5..74624d369d48 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -79,6 +79,7 @@
FN(IP_LOCAL_SOURCE) \
FN(IP_INVALID_SOURCE) \
FN(IP_LOCALNET) \
+ FN(IP_INVALID_DEST) \
FN(PKT_TOO_BIG) \
FN(DUP_FRAG) \
FN(FRAG_REASM_TIMEOUT) \
@@ -386,6 +387,11 @@ enum skb_drop_reason {
SKB_DROP_REASON_IP_INVALID_SOURCE,
/** @SKB_DROP_REASON_IP_LOCALNET: source or dest ip is local net */
SKB_DROP_REASON_IP_LOCALNET,
+ /**
+ * @SKB_DROP_REASON_IP_INVALID_DEST: the dest ip is invalid:
+ * 1) dest ip is 0
+ */
+ SKB_DROP_REASON_IP_INVALID_DEST,
/**
* @SKB_DROP_REASON_PKT_TOO_BIG: packet size is too big (maybe exceed the
* MTU)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 917f05a0a5ce..33bf83bcccdb 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2204,9 +2204,10 @@ static struct net_device *ip_rt_get_dev(struct net *net,
* called with rcu_read_lock()
*/
-static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- dscp_t dscp, struct net_device *dev,
- struct fib_result *res)
+static enum skb_drop_reason
+ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct fib_result *res)
{
enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -2236,8 +2237,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.flowi4_tun_key.tun_id = 0;
skb_dst_drop(skb);
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
res->fi = NULL;
res->table = NULL;
@@ -2247,21 +2250,29 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
/* Accept zero addresses only to limited broadcast;
* I even do not know to fix it or not. Waiting for complains :-)
*/
- if (ipv4_is_zeronet(saddr))
+ if (ipv4_is_zeronet(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_zeronet(daddr))
+ if (ipv4_is_zeronet(daddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_DEST;
goto martian_destination;
+ }
/* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
* and call it once if daddr or/and saddr are loopback addresses
*/
if (ipv4_is_loopback(daddr)) {
- if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_destination;
+ }
} else if (ipv4_is_loopback(saddr)) {
- if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_source;
+ }
}
/*
@@ -2316,19 +2327,25 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
err = -EHOSTUNREACH;
goto no_route;
}
- if (res->type != RTN_UNICAST)
+ if (res->type != RTN_UNICAST) {
+ reason = SKB_DROP_REASON_IP_INVALID_DEST;
goto martian_destination;
+ }
make_route:
err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp, flkeys);
-out: return err;
+ if (!err)
+ reason = SKB_NOT_DROPPED_YET;
+
+out: return reason;
brd_input:
- if (skb->protocol != htons(ETH_P_IP))
- goto e_inval;
+ if (skb->protocol != htons(ETH_P_IP)) {
+ reason = SKB_DROP_REASON_INVALID_PROTO;
+ goto out;
+ }
if (!ipv4_is_zeronet(saddr)) {
- err = -EINVAL;
reason = fib_validate_source(skb, saddr, 0, dscp, 0, dev,
in_dev, &itag);
if (reason)
@@ -2349,7 +2366,7 @@ out: return err;
rth = rcu_dereference(nhc->nhc_rth_input);
if (rt_cache_valid(rth)) {
skb_dst_set_noref(skb, &rth->dst);
- err = 0;
+ reason = SKB_NOT_DROPPED_YET;
goto out;
}
}
@@ -2386,7 +2403,7 @@ out: return err;
rt_add_uncached_list(rth);
}
skb_dst_set(skb, &rth->dst);
- err = 0;
+ reason = SKB_NOT_DROPPED_YET;
goto out;
no_route:
@@ -2407,12 +2424,8 @@ out: return err;
&daddr, &saddr, dev->name);
#endif
-e_inval:
- err = -EINVAL;
- goto out;
-
e_nobufs:
- err = -ENOBUFS;
+ reason = SKB_DROP_REASON_NOMEM;
goto out;
martian_source:
@@ -2469,7 +2482,7 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
return reason ? -EINVAL : 0;
}
- return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res);
+ return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res) ? -EINVAL : 0;
}
int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
--
2.39.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH net-next v3 05/10] net: ip: make ip_route_input_slow() return drop reasons
2024-10-15 14:07 ` [PATCH net-next v3 05/10] net: ip: make ip_route_input_slow() return drop reasons Menglong Dong
@ 2024-10-21 10:52 ` Paolo Abeni
2024-10-22 8:50 ` Menglong Dong
0 siblings, 1 reply; 20+ messages in thread
From: Paolo Abeni @ 2024-10-21 10:52 UTC (permalink / raw)
To: Menglong Dong
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
On 10/15/24 16:07, Menglong Dong wrote:
> @@ -2316,19 +2327,25 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
> err = -EHOSTUNREACH;
> goto no_route;
> }
> - if (res->type != RTN_UNICAST)
> + if (res->type != RTN_UNICAST) {
> + reason = SKB_DROP_REASON_IP_INVALID_DEST;
> goto martian_destination;
> + }
>
> make_route:
> err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp, flkeys);
> -out: return err;
> + if (!err)
> + reason = SKB_NOT_DROPPED_YET;
> +
> +out: return reason;
Since you are touching this line, please rewrite the code with a more
natural indentation:
out:
return reason;
Thanks,
Paolo
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH net-next v3 05/10] net: ip: make ip_route_input_slow() return drop reasons
2024-10-21 10:52 ` Paolo Abeni
@ 2024-10-22 8:50 ` Menglong Dong
0 siblings, 0 replies; 20+ messages in thread
From: Menglong Dong @ 2024-10-22 8:50 UTC (permalink / raw)
To: Paolo Abeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
On Mon, Oct 21, 2024 at 6:52 PM Paolo Abeni <pabeni@redhat.com> wrote:
>
> On 10/15/24 16:07, Menglong Dong wrote:
> > @@ -2316,19 +2327,25 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
> > err = -EHOSTUNREACH;
> > goto no_route;
> > }
> > - if (res->type != RTN_UNICAST)
> > + if (res->type != RTN_UNICAST) {
> > + reason = SKB_DROP_REASON_IP_INVALID_DEST;
> > goto martian_destination;
> > + }
> >
> > make_route:
> > err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp, flkeys);
> > -out: return err;
> > + if (!err)
> > + reason = SKB_NOT_DROPPED_YET;
> > +
> > +out: return reason;
>
> Since you are touching this line, please rewrite the code with a more
> natural indentation:
>
> out:
> return reason;
>
Okay!
> Thanks,
>
> Paolo
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH net-next v3 06/10] net: ip: make ip_route_input_rcu() return drop reasons
2024-10-15 14:07 [PATCH net-next v3 00/10] net: ip: add drop reasons to input route Menglong Dong
` (4 preceding siblings ...)
2024-10-15 14:07 ` [PATCH net-next v3 05/10] net: ip: make ip_route_input_slow() return drop reasons Menglong Dong
@ 2024-10-15 14:07 ` Menglong Dong
2024-10-15 14:07 ` [PATCH net-next v3 07/10] net: ip: make ip_route_input_noref() " Menglong Dong
` (3 subsequent siblings)
9 siblings, 0 replies; 20+ messages in thread
From: Menglong Dong @ 2024-10-15 14:07 UTC (permalink / raw)
To: pabeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
In this commit, we make ip_route_input_rcu() return drop reasons, which
come from ip_route_input_mc() and ip_route_input_slow().
The only caller of ip_route_input_rcu() is ip_route_input_noref(). We
adjust it by making it return -EINVAL on error and ignore the reasons that
ip_route_input_rcu() returns. In the following patch, we will make
ip_route_input_noref() returns the drop reasons.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
net/ipv4/route.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 33bf83bcccdb..8ac298d69c8c 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2434,9 +2434,10 @@ out: return reason;
}
/* called with rcu_read_lock held */
-static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- dscp_t dscp, struct net_device *dev,
- struct fib_result *res)
+static enum skb_drop_reason
+ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ struct fib_result *res)
{
/* Multicast recognition logic is moved from route cache to here.
* The problem was that too many Ethernet cards have broken/missing
@@ -2479,23 +2480,23 @@ static int ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
reason = ip_route_input_mc(skb, daddr, saddr, dscp,
dev, our);
}
- return reason ? -EINVAL : 0;
+ return reason;
}
- return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res) ? -EINVAL : 0;
+ return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res);
}
int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev)
{
+ enum skb_drop_reason reason;
struct fib_result res;
- int err;
rcu_read_lock();
- err = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, &res);
+ reason = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, &res);
rcu_read_unlock();
- return err;
+ return reason ? -EINVAL : 0;
}
EXPORT_SYMBOL(ip_route_input_noref);
@@ -3308,6 +3309,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = ip_route_input_rcu(skb, dst, src,
inet_dsfield_to_dscp(rtm->rtm_tos),
dev, &res);
+ err = err ? -EINVAL : 0;
rt = skb_rtable(skb);
if (err == 0 && rt->dst.error)
--
2.39.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH net-next v3 07/10] net: ip: make ip_route_input_noref() return drop reasons
2024-10-15 14:07 [PATCH net-next v3 00/10] net: ip: add drop reasons to input route Menglong Dong
` (5 preceding siblings ...)
2024-10-15 14:07 ` [PATCH net-next v3 06/10] net: ip: make ip_route_input_rcu() " Menglong Dong
@ 2024-10-15 14:07 ` Menglong Dong
2024-10-21 10:44 ` Paolo Abeni
2024-10-15 14:07 ` [PATCH net-next v3 08/10] net: ip: make ip_route_input() " Menglong Dong
` (2 subsequent siblings)
9 siblings, 1 reply; 20+ messages in thread
From: Menglong Dong @ 2024-10-15 14:07 UTC (permalink / raw)
To: pabeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
In this commit, we make ip_route_input_noref() return drop reasons, which
come from ip_route_input_rcu().
We need adjust the callers of ip_route_input_noref() to make sure the
return value of ip_route_input_noref() is used properly.
The errno that ip_route_input_noref() returns comes from ip_route_input
and bpf_lwt_input_reroute in the origin logic, and we make them return
-EINVAL on error instead. In the following patch, we will make
ip_route_input() returns drop reasons too.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
include/net/route.h | 15 ++++++++-------
net/core/lwt_bpf.c | 1 +
net/ipv4/ip_fragment.c | 12 +++++++-----
net/ipv4/ip_input.c | 7 ++++---
net/ipv4/route.c | 7 ++++---
5 files changed, 24 insertions(+), 18 deletions(-)
diff --git a/include/net/route.h b/include/net/route.h
index a828a17a6313..11674f7c6be6 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -203,8 +203,9 @@ enum skb_drop_reason
ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev,
struct in_device *in_dev, u32 *itag);
-int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- dscp_t dscp, struct net_device *dev);
+enum skb_drop_reason
+ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev);
int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev,
const struct sk_buff *hint);
@@ -212,18 +213,18 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
dscp_t dscp, struct net_device *devin)
{
- int err;
+ enum skb_drop_reason reason;
rcu_read_lock();
- err = ip_route_input_noref(skb, dst, src, dscp, devin);
- if (!err) {
+ reason = ip_route_input_noref(skb, dst, src, dscp, devin);
+ if (!reason) {
skb_dst_force(skb);
if (!skb_dst(skb))
- err = -EINVAL;
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
}
rcu_read_unlock();
- return err;
+ return reason ? -EINVAL : 0;
}
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif,
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index e0ca24a58810..a4652f2a103a 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -98,6 +98,7 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb)
skb_dst_drop(skb);
err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
ip4h_dscp(iph), dev);
+ err = err ? -EINVAL : 0;
dev_put(dev);
} else if (skb->protocol == htons(ETH_P_IPV6)) {
skb_dst_drop(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 48e2810f1f27..52b991e976ba 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -132,12 +132,12 @@ static bool frag_expire_skip_icmp(u32 user)
*/
static void ip_expire(struct timer_list *t)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT;
struct inet_frag_queue *frag = from_timer(frag, t, timer);
const struct iphdr *iph;
struct sk_buff *head = NULL;
struct net *net;
struct ipq *qp;
- int err;
qp = container_of(frag, struct ipq, q);
net = qp->q.fqdir->net;
@@ -175,10 +175,12 @@ static void ip_expire(struct timer_list *t)
/* skb has no dst, perform route lookup again */
iph = ip_hdr(head);
- err = ip_route_input_noref(head, iph->daddr, iph->saddr, ip4h_dscp(iph),
- head->dev);
- if (err)
+ reason = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), head->dev);
+ if (reason)
goto out;
+ else
+ reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT;
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
@@ -195,7 +197,7 @@ static void ip_expire(struct timer_list *t)
spin_unlock(&qp->q.lock);
out_rcu_unlock:
rcu_read_unlock();
- kfree_skb_reason(head, SKB_DROP_REASON_FRAG_REASM_TIMEOUT);
+ kfree_skb_reason(head, reason);
ipq_put(qp);
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c40a26972884..513eb0c6435a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -362,10 +362,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
* how the packet travels inside Linux networking.
*/
if (!skb_valid_dst(skb)) {
- err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
- ip4h_dscp(iph), dev);
- if (unlikely(err))
+ drop_reason = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev);
+ if (unlikely(drop_reason))
goto drop_error;
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
} else {
struct in_device *in_dev = __in_dev_get_rcu(dev);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8ac298d69c8c..86a964734b1d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2486,8 +2486,9 @@ ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res);
}
-int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- dscp_t dscp, struct net_device *dev)
+enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr,
+ __be32 saddr, dscp_t dscp,
+ struct net_device *dev)
{
enum skb_drop_reason reason;
struct fib_result res;
@@ -2496,7 +2497,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
reason = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, &res);
rcu_read_unlock();
- return reason ? -EINVAL : 0;
+ return reason;
}
EXPORT_SYMBOL(ip_route_input_noref);
--
2.39.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH net-next v3 07/10] net: ip: make ip_route_input_noref() return drop reasons
2024-10-15 14:07 ` [PATCH net-next v3 07/10] net: ip: make ip_route_input_noref() " Menglong Dong
@ 2024-10-21 10:44 ` Paolo Abeni
2024-10-21 10:49 ` Paolo Abeni
0 siblings, 1 reply; 20+ messages in thread
From: Paolo Abeni @ 2024-10-21 10:44 UTC (permalink / raw)
To: Menglong Dong
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
On 10/15/24 16:07, Menglong Dong wrote:
> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
> index e0ca24a58810..a4652f2a103a 100644
> --- a/net/core/lwt_bpf.c
> +++ b/net/core/lwt_bpf.c
> @@ -98,6 +98,7 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb)
> skb_dst_drop(skb);
> err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
> ip4h_dscp(iph), dev);
> + err = err ? -EINVAL : 0;
Please introduce and use a drop_reason variable here instead of 'err',
to make it clear the type conversion.
Thanks,
Paolo
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH net-next v3 07/10] net: ip: make ip_route_input_noref() return drop reasons
2024-10-21 10:44 ` Paolo Abeni
@ 2024-10-21 10:49 ` Paolo Abeni
2024-10-22 8:49 ` Menglong Dong
0 siblings, 1 reply; 20+ messages in thread
From: Paolo Abeni @ 2024-10-21 10:49 UTC (permalink / raw)
To: Menglong Dong
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
On 10/21/24 12:44, Paolo Abeni wrote:
> On 10/15/24 16:07, Menglong Dong wrote:
>> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
>> index e0ca24a58810..a4652f2a103a 100644
>> --- a/net/core/lwt_bpf.c
>> +++ b/net/core/lwt_bpf.c
>> @@ -98,6 +98,7 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb)
>> skb_dst_drop(skb);
>> err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
>> ip4h_dscp(iph), dev);
>> + err = err ? -EINVAL : 0;
>
> Please introduce and use a drop_reason variable here instead of 'err',
> to make it clear the type conversion.
Or even better, collapse the 2 statements:
err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
ip4h_dscp(iph), dev) ? -EINVAL : 0;
There are other places which could use a similar changes.
Thanks,
Paolo
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH net-next v3 07/10] net: ip: make ip_route_input_noref() return drop reasons
2024-10-21 10:49 ` Paolo Abeni
@ 2024-10-22 8:49 ` Menglong Dong
0 siblings, 0 replies; 20+ messages in thread
From: Menglong Dong @ 2024-10-22 8:49 UTC (permalink / raw)
To: Paolo Abeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
On Mon, Oct 21, 2024 at 6:49 PM Paolo Abeni <pabeni@redhat.com> wrote:
>
>
>
> On 10/21/24 12:44, Paolo Abeni wrote:
> > On 10/15/24 16:07, Menglong Dong wrote:
> >> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
> >> index e0ca24a58810..a4652f2a103a 100644
> >> --- a/net/core/lwt_bpf.c
> >> +++ b/net/core/lwt_bpf.c
> >> @@ -98,6 +98,7 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb)
> >> skb_dst_drop(skb);
> >> err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
> >> ip4h_dscp(iph), dev);
> >> + err = err ? -EINVAL : 0;
> >
> > Please introduce and use a drop_reason variable here instead of 'err',
> > to make it clear the type conversion.
>
> Or even better, collapse the 2 statements:
>
> err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
> ip4h_dscp(iph), dev) ? -EINVAL : 0;
>
> There are other places which could use a similar changes.
>
Yeah, it makes things much more clear.
Thanks!
Menglong Dong
> Thanks,
>
> Paolo
>
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH net-next v3 08/10] net: ip: make ip_route_input() return drop reasons
2024-10-15 14:07 [PATCH net-next v3 00/10] net: ip: add drop reasons to input route Menglong Dong
` (6 preceding siblings ...)
2024-10-15 14:07 ` [PATCH net-next v3 07/10] net: ip: make ip_route_input_noref() " Menglong Dong
@ 2024-10-15 14:07 ` Menglong Dong
2024-10-15 14:07 ` [PATCH net-next v3 09/10] net: ip: make ip_mkroute_input/__mkroute_input " Menglong Dong
2024-10-15 14:08 ` [PATCH net-next v3 10/10] net: ip: make ip_route_use_hint() " Menglong Dong
9 siblings, 0 replies; 20+ messages in thread
From: Menglong Dong @ 2024-10-15 14:07 UTC (permalink / raw)
To: pabeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
In this commit, we make ip_route_input() return skb drop reasons that come
from ip_route_input_noref().
Meanwhile, adjust all the call to it.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
include/net/route.h | 7 ++++---
net/bridge/br_netfilter_hooks.c | 11 ++++++-----
net/ipv4/icmp.c | 1 +
3 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/include/net/route.h b/include/net/route.h
index 11674f7c6be6..f4ab5412c9c9 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -210,8 +210,9 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev,
const struct sk_buff *hint);
-static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
- dscp_t dscp, struct net_device *devin)
+static inline enum skb_drop_reason
+ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, dscp_t dscp,
+ struct net_device *devin)
{
enum skb_drop_reason reason;
@@ -224,7 +225,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
}
rcu_read_unlock();
- return reason ? -EINVAL : 0;
+ return reason;
}
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif,
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 17a5f5923d61..110cffc24a1d 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -373,8 +373,8 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct net_device *dev = skb->dev, *br_indev;
const struct iphdr *iph = ip_hdr(skb);
+ enum skb_drop_reason reason;
struct rtable *rt;
- int err;
br_indev = nf_bridge_get_physindev(skb, net);
if (!br_indev) {
@@ -390,9 +390,9 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
}
nf_bridge->in_prerouting = 0;
if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
- err = ip_route_input(skb, iph->daddr, iph->saddr,
- ip4h_dscp(iph), dev);
- if (err) {
+ reason = ip_route_input(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev);
+ if (reason) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
/* If err equals -EHOSTUNREACH the error is due to a
@@ -402,7 +402,8 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_
* martian destinations: loopback destinations and destination
* 0.0.0.0. In both cases the packet will be dropped because the
* destination is the loopback device and not the bridge. */
- if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
+ if (reason != SKB_DROP_REASON_IP_INADDRERRORS || !in_dev ||
+ IN_DEV_FORWARD(in_dev))
goto free_skb;
rt = ip_route_output(net, iph->daddr, 0,
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 23664434922e..c3bafff093e0 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -546,6 +546,7 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4,
skb_dst_set(skb_in, NULL);
err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
dscp, rt2->dst.dev);
+ err = err ? -EINVAL : 0;
dst_release(&rt2->dst);
rt2 = skb_rtable(skb_in);
--
2.39.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH net-next v3 09/10] net: ip: make ip_mkroute_input/__mkroute_input return drop reasons
2024-10-15 14:07 [PATCH net-next v3 00/10] net: ip: add drop reasons to input route Menglong Dong
` (7 preceding siblings ...)
2024-10-15 14:07 ` [PATCH net-next v3 08/10] net: ip: make ip_route_input() " Menglong Dong
@ 2024-10-15 14:07 ` Menglong Dong
2024-10-15 14:08 ` [PATCH net-next v3 10/10] net: ip: make ip_route_use_hint() " Menglong Dong
9 siblings, 0 replies; 20+ messages in thread
From: Menglong Dong @ 2024-10-15 14:07 UTC (permalink / raw)
To: pabeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
In this commit, we make ip_mkroute_input() and __mkroute_input() return
drop reasons.
The drop reason "SKB_DROP_REASON_ARP_PVLAN_DISABLE" is introduced for
the case: the packet which is not IP is forwarded to the in_dev, and
the proxy_arp_pvlan is not enabled. This name is ugly, and I have not
figure out a suitable name for this case yet :/
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
include/net/dropreason-core.h | 7 +++++++
net/ipv4/route.c | 35 +++++++++++++++++++----------------
2 files changed, 26 insertions(+), 16 deletions(-)
diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h
index 74624d369d48..6c5a1ea209a2 100644
--- a/include/net/dropreason-core.h
+++ b/include/net/dropreason-core.h
@@ -104,6 +104,7 @@
FN(IP_TUNNEL_ECN) \
FN(TUNNEL_TXINFO) \
FN(LOCAL_MAC) \
+ FN(ARP_PVLAN_DISABLE) \
FNe(MAX)
/**
@@ -477,6 +478,12 @@ enum skb_drop_reason {
* the MAC address of the local netdev.
*/
SKB_DROP_REASON_LOCAL_MAC,
+ /**
+ * @SKB_DROP_REASON_ARP_PVLAN_DISABLE: packet which is not IP is
+ * forwarded to the in_dev, and the proxy_arp_pvlan is not
+ * enabled.
+ */
+ SKB_DROP_REASON_ARP_PVLAN_DISABLE,
/**
* @SKB_DROP_REASON_MAX: the maximum of core drop reasons, which
* shouldn't be used as a real 'reason' - only for tracing code gen
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 86a964734b1d..cb6beb270265 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1769,10 +1769,12 @@ static void ip_handle_martian_source(struct net_device *dev,
}
/* called in rcu_read_lock() section */
-static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
- struct in_device *in_dev, __be32 daddr,
- __be32 saddr, dscp_t dscp)
+static enum skb_drop_reason
+__mkroute_input(struct sk_buff *skb, const struct fib_result *res,
+ struct in_device *in_dev, __be32 daddr,
+ __be32 saddr, dscp_t dscp)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct fib_nh_common *nhc = FIB_RES_NHC(*res);
struct net_device *dev = nhc->nhc_dev;
struct fib_nh_exception *fnhe;
@@ -1786,13 +1788,13 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
out_dev = __in_dev_get_rcu(dev);
if (!out_dev) {
net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
- return -EINVAL;
+ return reason;
}
err = __fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res),
in_dev->dev, in_dev, &itag);
if (err < 0) {
- err = -EINVAL;
+ reason = -err;
ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
saddr);
@@ -1820,7 +1822,8 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
*/
if (out_dev == in_dev &&
IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
- err = -EINVAL;
+ /* what do we name this situation? */
+ reason = SKB_DROP_REASON_ARP_PVLAN_DISABLE;
goto cleanup;
}
}
@@ -1843,7 +1846,7 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
rth = rt_dst_alloc(out_dev->dev, 0, res->type,
IN_DEV_ORCONF(out_dev, NOXFRM));
if (!rth) {
- err = -ENOBUFS;
+ reason = SKB_DROP_REASON_NOMEM;
goto cleanup;
}
@@ -1857,9 +1860,9 @@ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res,
lwtunnel_set_redirect(&rth->dst);
skb_dst_set(skb, &rth->dst);
out:
- err = 0;
- cleanup:
- return err;
+ reason = SKB_NOT_DROPPED_YET;
+cleanup:
+ return reason;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -2117,9 +2120,10 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
}
#endif /* CONFIG_IP_ROUTE_MULTIPATH */
-static int ip_mkroute_input(struct sk_buff *skb, struct fib_result *res,
- struct in_device *in_dev, __be32 daddr,
- __be32 saddr, dscp_t dscp, struct flow_keys *hkeys)
+static enum skb_drop_reason
+ip_mkroute_input(struct sk_buff *skb, struct fib_result *res,
+ struct in_device *in_dev, __be32 daddr,
+ __be32 saddr, dscp_t dscp, struct flow_keys *hkeys)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res->fi && fib_info_num_path(res->fi) > 1) {
@@ -2333,9 +2337,8 @@ ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
}
make_route:
- err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp, flkeys);
- if (!err)
- reason = SKB_NOT_DROPPED_YET;
+ reason = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp,
+ flkeys);
out: return reason;
--
2.39.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH net-next v3 10/10] net: ip: make ip_route_use_hint() return drop reasons
2024-10-15 14:07 [PATCH net-next v3 00/10] net: ip: add drop reasons to input route Menglong Dong
` (8 preceding siblings ...)
2024-10-15 14:07 ` [PATCH net-next v3 09/10] net: ip: make ip_mkroute_input/__mkroute_input " Menglong Dong
@ 2024-10-15 14:08 ` Menglong Dong
9 siblings, 0 replies; 20+ messages in thread
From: Menglong Dong @ 2024-10-15 14:08 UTC (permalink / raw)
To: pabeni
Cc: davem, edumazet, kuba, dsahern, pablo, kadlec, roopa, razor,
gnault, bigeasy, idosch, ast, dongml2, netdev, linux-kernel,
netfilter-devel, coreteam, bridge, bpf
In this commit, we make ip_route_use_hint() return drop reasons. The
drop reasons that we return are similar to what we do in
ip_route_input_slow(), and no drop reasons are added in this commit.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
include/net/route.h | 7 ++++---
net/ipv4/ip_input.c | 9 ++++-----
net/ipv4/route.c | 26 ++++++++++++++++----------
3 files changed, 24 insertions(+), 18 deletions(-)
diff --git a/include/net/route.h b/include/net/route.h
index f4ab5412c9c9..4debc335d276 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -206,9 +206,10 @@ ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
enum skb_drop_reason
ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev);
-int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- dscp_t dscp, struct net_device *dev,
- const struct sk_buff *hint);
+enum skb_drop_reason
+ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ const struct sk_buff *hint);
static inline enum skb_drop_reason
ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src, dscp_t dscp,
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 513eb0c6435a..f0a4dda246ab 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -322,15 +322,14 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
int err, drop_reason;
struct rtable *rt;
- drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
-
if (ip_can_use_hint(skb, iph, hint)) {
- err = ip_route_use_hint(skb, iph->daddr, iph->saddr,
- ip4h_dscp(iph), dev, hint);
- if (unlikely(err))
+ drop_reason = ip_route_use_hint(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev, hint);
+ if (unlikely(drop_reason))
goto drop_error;
}
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
!skb_dst(skb) &&
!skb->sk &&
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cb6beb270265..fe57f6abf53e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2142,28 +2142,34 @@ ip_mkroute_input(struct sk_buff *skb, struct fib_result *res,
* assuming daddr is valid and the destination is not a local broadcast one.
* Uses the provided hint instead of performing a route lookup.
*/
-int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- dscp_t dscp, struct net_device *dev,
- const struct sk_buff *hint)
+enum skb_drop_reason
+ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev,
+ const struct sk_buff *hint)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
struct in_device *in_dev = __in_dev_get_rcu(dev);
struct rtable *rt = skb_rtable(hint);
struct net *net = dev_net(dev);
- enum skb_drop_reason reason;
- int err = -EINVAL;
u32 tag = 0;
if (!in_dev)
- return -EINVAL;
+ return reason;
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_zeronet(saddr))
+ if (ipv4_is_zeronet(saddr)) {
+ reason = SKB_DROP_REASON_IP_INVALID_SOURCE;
goto martian_source;
+ }
- if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) {
+ reason = SKB_DROP_REASON_IP_LOCALNET;
goto martian_source;
+ }
if (rt->rt_type != RTN_LOCAL)
goto skip_validate_source;
@@ -2179,7 +2185,7 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
martian_source:
ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
- return err;
+ return reason;
}
/* get device for dst_alloc with local routes */
--
2.39.5
^ permalink raw reply related [flat|nested] 20+ messages in thread