From: Stephen Suryaputra <ssuryaextr@gmail.com>
To: netdev@vger.kernel.org
Cc: dsahern@gmail.com, Stephen Suryaputra <ssuryaextr@gmail.com>
Subject: [PATCH net] vrf: reset rt_iif for recirculated mcast out pkts
Date: Tue, 25 Jun 2019 06:33:59 -0400 [thread overview]
Message-ID: <20190625103359.31102-1-ssuryaextr@gmail.com> (raw)
Multicast egress packets has skb_rtable(skb)->rt_iif set to the oif.
Depending on the socket, these packets might be recirculated back as
input and raw sockets that are opened for them are bound to the VRF. But
since skb_rtable(skb) is set and its rt_iif is non-zero, inet_iif()
function returns rt_iif instead of skb_iif (the VRF netdev). Hence, the
socket lookup fails.
Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
---
include/net/route.h | 1 +
net/ipv4/ip_output.c | 25 ++++++++++++++++++++++++-
net/ipv4/route.c | 33 +++++++++++++++++++++++++++++++++
3 files changed, 58 insertions(+), 1 deletion(-)
diff --git a/include/net/route.h b/include/net/route.h
index 065b47754f05..55ff71ffb796 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -221,6 +221,7 @@ void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt);
struct rtable *rt_dst_alloc(struct net_device *dev,
unsigned int flags, u16 type,
bool nopolicy, bool noxfrm, bool will_cache);
+struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt);
struct in_ifaddr;
void fib_add_ifaddr(struct in_ifaddr *);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 16f9159234a2..a5e240bad3ce 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -329,6 +329,19 @@ static int ip_mc_finish_output(struct net *net, struct sock *sk,
return dev_loopback_xmit(net, sk, skb);
}
+static void ip_mc_reset_rt_iif(struct net *net, struct rtable *rt,
+ struct sk_buff *newskb)
+{
+ struct rtable *new_rt;
+
+ new_rt = rt_dst_clone(net->loopback_dev, rt);
+ if (new_rt) {
+ new_rt->rt_iif = 0;
+ skb_dst_drop(newskb);
+ skb_dst_set(newskb, &new_rt->dst);
+ }
+}
+
int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct rtable *rt = skb_rtable(skb);
@@ -363,10 +376,20 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
#endif
) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
- if (newskb)
+ if (newskb) {
+ /* Reset rt_iif so that inet_iif() will return
+ * skb->dev->ifIndex which is the VRF device for
+ * socket lookup. Setting this to VRF ifindex
+ * causes ipi_ifindex in in_pktinfo to be
+ * overwritten, see ipv4_pktinfo_prepare().
+ */
+ if (netif_is_l3_slave(dev))
+ ip_mc_reset_rt_iif(net, rt, newskb);
+
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
net, sk, newskb, NULL, newskb->dev,
ip_mc_finish_output);
+ }
}
/* Multicasts with ttl 0 must not go beyond the host */
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6cb7cff22db9..8ea0735a6754 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1647,6 +1647,39 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
}
EXPORT_SYMBOL(rt_dst_alloc);
+struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt)
+{
+ struct rtable *new_rt;
+
+ new_rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+ rt->dst.flags);
+
+ if (new_rt) {
+ new_rt->rt_genid = rt_genid_ipv4(dev_net(dev));
+ new_rt->rt_flags = rt->rt_flags;
+ new_rt->rt_type = rt->rt_type;
+ new_rt->rt_is_input = rt->rt_is_input;
+ new_rt->rt_iif = rt->rt_iif;
+ new_rt->rt_pmtu = rt->rt_pmtu;
+ new_rt->rt_mtu_locked = rt->rt_mtu_locked;
+ new_rt->rt_gw_family = rt->rt_gw_family;
+ if (rt->rt_gw_family == AF_INET)
+ new_rt->rt_gw4 = rt->rt_gw4;
+ else if (rt->rt_gw_family == AF_INET6)
+ new_rt->rt_gw6 = rt->rt_gw6;
+ INIT_LIST_HEAD(&new_rt->rt_uncached);
+
+ new_rt->dst.flags |= DST_HOST;
+ new_rt->dst.input = rt->dst.input;
+ new_rt->dst.output = rt->dst.output;
+ new_rt->dst.error = rt->dst.error;
+ new_rt->dst.lastuse = jiffies;
+ new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate);
+ }
+ return new_rt;
+}
+EXPORT_SYMBOL(rt_dst_clone);
+
/* called in rcu_read_lock() section */
int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev,
--
2.17.1
next reply other threads:[~2019-06-25 12:14 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-06-25 10:33 Stephen Suryaputra [this message]
2019-06-25 20:22 ` [PATCH net] vrf: reset rt_iif for recirculated mcast out pkts David Ahern
2019-06-25 20:36 ` David Ahern
2019-06-25 20:42 ` Stephen Suryaputra
2019-06-25 20:52 ` David Ahern
2019-06-25 20:31 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190625103359.31102-1-ssuryaextr@gmail.com \
--to=ssuryaextr@gmail.com \
--cc=dsahern@gmail.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).