From: Ahmed Abdelsalam <ahabdels@gmail.com>
To: "David S. Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>,
Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>,
Hideaki YOSHIFUJI <yoshfuji@linux-ipv6.org>,
Ahmed Abdelsalam <ahabdels@gmail.com>,
David Ahern <dsahern@gmail.com>, Paolo Abeni <pabeni@redhat.com>,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: andrea.mayer@uniroma2.it
Subject: [net-next v5 1/2] seg6: inherit DSCP of inner IPv4 packets
Date: Tue, 25 Aug 2020 16:02:33 +0000 [thread overview]
Message-ID: <20200825160236.1123-1-ahabdels@gmail.com> (raw)
This patch allows SRv6 encapsulation to inherit the DSCP value of
the inner IPv4 packet.
This allows forwarding packet across the SRv6 fabric based on their
original traffic class.
The option is controlled through a sysctl (seg6_inherit_inner_ipv4_dscp).
The sysctl has to be set to 1 to enable this feature.
Signed-off-by: Ahmed Abdelsalam <ahabdels@gmail.com>
---
include/net/netns/ipv6.h | 1 +
net/ipv6/seg6_iptunnel.c | 37 ++++++++++++++++++++-----------------
net/ipv6/sysctl_net_ipv6.c | 9 +++++++++
3 files changed, 30 insertions(+), 17 deletions(-)
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 5ec054473d81..6ed73951f479 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -50,6 +50,7 @@ struct netns_sysctl_ipv6 {
int max_dst_opts_len;
int max_hbh_opts_len;
int seg6_flowlabel;
+ bool seg6_inherit_inner_ipv4_dscp;
bool skip_notify_on_dev_down;
};
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 897fa59c47de..9cc168462e11 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -104,8 +104,7 @@ static void set_tun_src(struct net *net, struct net_device *dev,
}
/* Compute flowlabel for outer IPv6 header */
-static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
- struct ipv6hdr *inner_hdr)
+static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb)
{
int do_flowlabel = net->ipv6.sysctl.seg6_flowlabel;
__be32 flowlabel = 0;
@@ -116,7 +115,7 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
hash = rol32(hash, 16);
flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
} else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
- flowlabel = ip6_flowlabel(inner_hdr);
+ flowlabel = ip6_flowlabel(ipv6_hdr(skb));
}
return flowlabel;
}
@@ -129,6 +128,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
struct ipv6hdr *hdr, *inner_hdr;
struct ipv6_sr_hdr *isrh;
int hdrlen, tot_len, err;
+ u8 tos = 0, hop_limit;
__be32 flowlabel;
hdrlen = (osrh->hdrlen + 1) << 3;
@@ -138,30 +138,33 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
if (unlikely(err))
return err;
- inner_hdr = ipv6_hdr(skb);
- flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
-
- skb_push(skb, tot_len);
- skb_reset_network_header(skb);
- skb_mac_header_rebuild(skb);
- hdr = ipv6_hdr(skb);
-
/* inherit tc, flowlabel and hlim
* hlim will be decremented in ip6_forward() afterwards and
* decapsulation will overwrite inner hlim with outer hlim
*/
+ flowlabel = seg6_make_flowlabel(net, skb);
+ hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+
if (skb->protocol == htons(ETH_P_IPV6)) {
- ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
- flowlabel);
- hdr->hop_limit = inner_hdr->hop_limit;
+ inner_hdr = ipv6_hdr(skb);
+ hop_limit = inner_hdr->hop_limit;
+ tos = ip6_tclass(ip6_flowinfo(inner_hdr));
+ } else if (skb->protocol == htons(ETH_P_IP)) {
+ if (net->ipv6.sysctl.seg6_inherit_inner_ipv4_dscp)
+ tos = ip_hdr(skb)->tos;
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
} else {
- ip6_flow_hdr(hdr, 0, flowlabel);
- hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
-
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
}
+ skb_push(skb, tot_len);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
+ hdr = ipv6_hdr(skb);
+ ip6_flow_hdr(hdr, tos, flowlabel);
+ hdr->hop_limit = hop_limit;
hdr->nexthdr = NEXTHDR_ROUTING;
isrh = (void *)hdr + sizeof(*hdr);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index fac2135aa47b..4b2cf8764524 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -159,6 +159,15 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "seg6_inherit_inner_ipv4_dscp",
+ .data = &init_net.ipv6.sysctl.seg6_inherit_inner_ipv4_dscp,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
{ }
};
--
2.17.1
next reply other threads:[~2020-08-25 16:02 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-08-25 16:02 Ahmed Abdelsalam [this message]
2020-08-25 16:45 ` [net-next v5 1/2] seg6: inherit DSCP of inner IPv4 packets David Ahern
2020-08-25 23:45 ` Ahmed Abdelsalam
2020-08-26 0:45 ` David Ahern
2020-08-26 12:12 ` Ahmed Abdelsalam
2020-08-26 19:41 ` David Ahern
2020-08-27 10:52 ` Ahmed Abdelsalam
-- strict thread matches above, loose matches on Subject: below --
2020-08-25 12:17 Ahmed Abdelsalam
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200825160236.1123-1-ahabdels@gmail.com \
--to=ahabdels@gmail.com \
--cc=andrea.mayer@uniroma2.it \
--cc=davem@davemloft.net \
--cc=dsahern@gmail.com \
--cc=kuba@kernel.org \
--cc=kuznet@ms2.inr.ac.ru \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=yoshfuji@linux-ipv6.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).