From: Menglong Dong <menglong8.dong@gmail.com>
To: pabeni@redhat.com
Cc: davem@davemloft.net, edumazet@google.com, kuba@kernel.org,
dsahern@kernel.org, pablo@netfilter.org, kadlec@netfilter.org,
roopa@nvidia.com, razor@blackwall.org, gnault@redhat.com,
bigeasy@linutronix.de, idosch@nvidia.com, ast@kernel.org,
dongml2@chinatelecom.cn, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, netfilter-devel@vger.kernel.org,
coreteam@netfilter.org, bridge@lists.linux.dev,
bpf@vger.kernel.org
Subject: [PATCH bpf-next v4 6/9] net: ip: make ip_route_input_noref() return drop reasons
Date: Thu, 24 Oct 2024 17:33:45 +0800 [thread overview]
Message-ID: <20241024093348.353245-7-dongml2@chinatelecom.cn> (raw)
In-Reply-To: <20241024093348.353245-1-dongml2@chinatelecom.cn>
In this commit, we make ip_route_input_noref() return drop reasons, which
come from ip_route_input_rcu().
We need adjust the callers of ip_route_input_noref() to make sure the
return value of ip_route_input_noref() is used properly.
The errno that ip_route_input_noref() returns comes from ip_route_input
and bpf_lwt_input_reroute in the origin logic, and we make them return
-EINVAL on error instead. In the following patch, we will make
ip_route_input() returns drop reasons too.
Signed-off-by: Menglong Dong <dongml2@chinatelecom.cn>
---
v4:
- introduce the variable "reason" in bpf_lwt_input_reroute() to make
things clear
---
include/net/route.h | 15 ++++++++-------
net/core/lwt_bpf.c | 6 ++++--
net/ipv4/ip_fragment.c | 12 +++++++-----
net/ipv4/ip_input.c | 7 ++++---
net/ipv4/route.c | 7 ++++---
5 files changed, 27 insertions(+), 20 deletions(-)
diff --git a/include/net/route.h b/include/net/route.h
index a828a17a6313..11674f7c6be6 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -203,8 +203,9 @@ enum skb_drop_reason
ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev,
struct in_device *in_dev, u32 *itag);
-int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- dscp_t dscp, struct net_device *dev);
+enum skb_drop_reason
+ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ dscp_t dscp, struct net_device *dev);
int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
dscp_t dscp, struct net_device *dev,
const struct sk_buff *hint);
@@ -212,18 +213,18 @@ int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
dscp_t dscp, struct net_device *devin)
{
- int err;
+ enum skb_drop_reason reason;
rcu_read_lock();
- err = ip_route_input_noref(skb, dst, src, dscp, devin);
- if (!err) {
+ reason = ip_route_input_noref(skb, dst, src, dscp, devin);
+ if (!reason) {
skb_dst_force(skb);
if (!skb_dst(skb))
- err = -EINVAL;
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
}
rcu_read_unlock();
- return err;
+ return reason ? -EINVAL : 0;
}
void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif,
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index e0ca24a58810..8a78bff53b2c 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -88,6 +88,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt,
static int bpf_lwt_input_reroute(struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
int err = -EINVAL;
if (skb->protocol == htons(ETH_P_IP)) {
@@ -96,8 +97,9 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb)
dev_hold(dev);
skb_dst_drop(skb);
- err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
- ip4h_dscp(iph), dev);
+ reason = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev);
+ err = reason ? -EINVAL : 0;
dev_put(dev);
} else if (skb->protocol == htons(ETH_P_IPV6)) {
skb_dst_drop(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 48e2810f1f27..52b991e976ba 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -132,12 +132,12 @@ static bool frag_expire_skip_icmp(u32 user)
*/
static void ip_expire(struct timer_list *t)
{
+ enum skb_drop_reason reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT;
struct inet_frag_queue *frag = from_timer(frag, t, timer);
const struct iphdr *iph;
struct sk_buff *head = NULL;
struct net *net;
struct ipq *qp;
- int err;
qp = container_of(frag, struct ipq, q);
net = qp->q.fqdir->net;
@@ -175,10 +175,12 @@ static void ip_expire(struct timer_list *t)
/* skb has no dst, perform route lookup again */
iph = ip_hdr(head);
- err = ip_route_input_noref(head, iph->daddr, iph->saddr, ip4h_dscp(iph),
- head->dev);
- if (err)
+ reason = ip_route_input_noref(head, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), head->dev);
+ if (reason)
goto out;
+ else
+ reason = SKB_DROP_REASON_FRAG_REASM_TIMEOUT;
/* Only an end host needs to send an ICMP
* "Fragment Reassembly Timeout" message, per RFC792.
@@ -195,7 +197,7 @@ static void ip_expire(struct timer_list *t)
spin_unlock(&qp->q.lock);
out_rcu_unlock:
rcu_read_unlock();
- kfree_skb_reason(head, SKB_DROP_REASON_FRAG_REASM_TIMEOUT);
+ kfree_skb_reason(head, reason);
ipq_put(qp);
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c40a26972884..513eb0c6435a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -362,10 +362,11 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
* how the packet travels inside Linux networking.
*/
if (!skb_valid_dst(skb)) {
- err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
- ip4h_dscp(iph), dev);
- if (unlikely(err))
+ drop_reason = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+ ip4h_dscp(iph), dev);
+ if (unlikely(drop_reason))
goto drop_error;
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
} else {
struct in_device *in_dev = __in_dev_get_rcu(dev);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 4b0daf3510d7..757526e450fd 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2487,8 +2487,9 @@ ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr,
return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res);
}
-int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
- dscp_t dscp, struct net_device *dev)
+enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr,
+ __be32 saddr, dscp_t dscp,
+ struct net_device *dev)
{
enum skb_drop_reason reason;
struct fib_result res;
@@ -2497,7 +2498,7 @@ int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
reason = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, &res);
rcu_read_unlock();
- return reason ? -EINVAL : 0;
+ return reason;
}
EXPORT_SYMBOL(ip_route_input_noref);
--
2.39.5
next prev parent reply other threads:[~2024-10-24 9:35 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-24 9:33 [PATCH bpf-next v4 0/9] net: ip: add drop reasons to input route Menglong Dong
2024-10-24 9:33 ` [PATCH bpf-next v4 1/9] net: ip: make fib_validate_source() support drop reasons Menglong Dong
2024-10-24 9:33 ` [PATCH bpf-next v4 2/9] net: ip: make ip_route_input_mc() return drop reason Menglong Dong
2024-10-24 9:33 ` [PATCH bpf-next v4 3/9] net: ip: make ip_mc_validate_source() " Menglong Dong
2024-10-24 9:33 ` [PATCH bpf-next v4 4/9] net: ip: make ip_route_input_slow() return drop reasons Menglong Dong
2024-10-24 9:33 ` [PATCH bpf-next v4 5/9] net: ip: make ip_route_input_rcu() " Menglong Dong
2024-10-24 9:33 ` Menglong Dong [this message]
2024-10-24 9:33 ` [PATCH bpf-next v4 7/9] net: ip: make ip_route_input() " Menglong Dong
2024-10-24 9:33 ` [PATCH bpf-next v4 8/9] net: ip: make ip_mkroute_input/__mkroute_input " Menglong Dong
2024-10-24 9:33 ` [PATCH bpf-next v4 9/9] net: ip: make ip_route_use_hint() " Menglong Dong
2024-10-30 0:03 ` [PATCH bpf-next v4 0/9] net: ip: add drop reasons to input route Jakub Kicinski
2024-10-30 1:35 ` Menglong Dong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241024093348.353245-7-dongml2@chinatelecom.cn \
--to=menglong8.dong@gmail.com \
--cc=ast@kernel.org \
--cc=bigeasy@linutronix.de \
--cc=bpf@vger.kernel.org \
--cc=bridge@lists.linux.dev \
--cc=coreteam@netfilter.org \
--cc=davem@davemloft.net \
--cc=dongml2@chinatelecom.cn \
--cc=dsahern@kernel.org \
--cc=edumazet@google.com \
--cc=gnault@redhat.com \
--cc=idosch@nvidia.com \
--cc=kadlec@netfilter.org \
--cc=kuba@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=netfilter-devel@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=pablo@netfilter.org \
--cc=razor@blackwall.org \
--cc=roopa@nvidia.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox