From: Dan Siemon <dan@coverfire.com>
To: netdev <netdev@vger.kernel.org>
Subject: [PATCH] cls_flow: Add tunnel support to the flow classifier
Date: Sun, 16 Oct 2011 19:06:12 -0400 [thread overview]
Message-ID: <1318806373.7169.35.camel@ganymede> (raw)
[-- Attachment #1: Type: text/plain, Size: 9437 bytes --]
When used on an interface carrying tunneled traffic the flow classifier
can't look into the tunnels so all of the traffic within the tunnel is
treated as a single flow. This does not allow any type of intelligent
queuing to occur. This patch adds new keys to the flow classifier which
look inside the tunnel. Presently IP-IP, IP-IPv6, IPv6-IPv6 and IPv6-IP
tunnels are supported.
If you are interested I have posted some background and experimental
results at:
http://www.coverfire.com/archives/2011/10/16/making-the-linux-flow-classifier-tunnel-aware/
The related iproute2 patch can be found at the above URL as well.
Signed-off-by: Dan Siemon <dan@coverfire.com>
diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index defbde2..2f80fa0 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -333,6 +333,11 @@ enum {
FLOW_KEY_SKGID,
FLOW_KEY_VLAN_TAG,
FLOW_KEY_RXHASH,
+ FLOW_KEY_TUNNEL_SRC,
+ FLOW_KEY_TUNNEL_DST,
+ FLOW_KEY_TUNNEL_PROTO,
+ FLOW_KEY_TUNNEL_PROTO_SRC,
+ FLOW_KEY_TUNNEL_PROTO_DST,
__FLOW_KEY_MAX,
};
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6994214..f0bd3ad 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -311,6 +311,301 @@ static u32 flow_get_rxhash(struct sk_buff *skb)
return skb_get_rxhash(skb);
}
+static u32 tunnel_inner_ip_src(struct sk_buff *skb)
+{
+ if (pskb_network_may_pull(skb, skb_network_header_len(skb) +
+ sizeof(struct iphdr))) {
+ return ntohl(ipip_hdr(skb)->saddr);
+ }
+
+ return 0;
+}
+
+static u32 tunnel_inner_ipv6_src(struct sk_buff *skb)
+{
+ if (pskb_network_may_pull(skb, skb_network_header_len(skb) +
+ sizeof(struct ipv6hdr))) {
+ struct ipv6hdr *iph = (struct ipv6hdr *)
+ skb_transport_header(skb);
+ return ntohl(iph->saddr.s6_addr32[3]);
+ }
+
+ return 0;
+}
+
+static u32 flow_get_tunnel_src(struct sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+ if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+ return tunnel_inner_ip_src(skb);
+ } else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+ return tunnel_inner_ipv6_src(skb);
+ }
+ }
+ break;
+ case htons(ETH_P_IPV6):
+ if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+ return tunnel_inner_ip_src(skb);
+ } else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+ return tunnel_inner_ipv6_src(skb);
+ }
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static u32 tunnel_inner_ip_dst(struct sk_buff *skb)
+{
+ if (pskb_network_may_pull(skb, skb_network_header_len(skb) +
+ sizeof(struct iphdr))) {
+ return ntohl(ipip_hdr(skb)->daddr);
+ }
+
+ return 0;
+}
+
+static u32 tunnel_inner_ipv6_dst(struct sk_buff *skb)
+{
+ if (pskb_network_may_pull(skb, skb_network_header_len(skb) +
+ sizeof(struct ipv6hdr))) {
+ struct ipv6hdr *iph = (struct ipv6hdr *)
+ skb_transport_header(skb);
+ return ntohl(iph->daddr.s6_addr32[3]);
+ }
+
+ return 0;
+}
+
+static u32 flow_get_tunnel_dst(struct sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+ if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+ return tunnel_inner_ip_dst(skb);
+ } else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+ return tunnel_inner_ipv6_dst(skb);
+ }
+ }
+ break;
+ case htons(ETH_P_IPV6):
+ if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+ return tunnel_inner_ip_dst(skb);
+ } else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+ return tunnel_inner_ipv6_dst(skb);
+ }
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static u32 tunnel_inner_ip_proto(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+
+ if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+ sizeof(struct iphdr))) {
+ return 0;
+ }
+
+ iph = ipip_hdr(skb);
+
+ return iph->protocol;
+}
+
+static u32 tunnel_inner_ipv6_proto(struct sk_buff *skb)
+{
+ struct ipv6hdr *ipv6h;
+
+ if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+ sizeof(struct ipv6hdr))) {
+ return 0;
+ }
+
+ ipv6h = (struct ipv6hdr *)skb_transport_header(skb);
+
+ return ipv6h->nexthdr;
+}
+
+static u32 flow_get_tunnel_proto(struct sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+ if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+ return tunnel_inner_ip_proto(skb);
+ } else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+ return tunnel_inner_ipv6_proto(skb);
+ }
+ }
+ break;
+ case htons(ETH_P_IPV6):
+ if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+ return tunnel_inner_ip_proto(skb);
+ } else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+ return tunnel_inner_ipv6_proto(skb);
+ }
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static u32 tunnel_inner_ip_proto_src(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ int poff;
+
+ if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+ sizeof(struct iphdr))) {
+ return 0;
+ }
+
+ iph = ipip_hdr(skb);
+
+ if (ip_is_fragment(iph))
+ return 0;
+
+ poff = proto_ports_offset(iph->protocol);
+ if (poff >= 0 && pskb_network_may_pull(skb, skb_network_header_len(skb)
+ + iph->ihl * 4 + 2 + poff)) {
+ return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + poff));
+ }
+
+ return 0;
+}
+
+static u32 tunnel_inner_ipv6_proto_src(struct sk_buff *skb)
+{
+ struct ipv6hdr *ipv6h;
+ int poff;
+
+ if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+ sizeof(struct ipv6hdr))) {
+ return 0;
+ }
+
+ ipv6h = (struct ipv6hdr *)skb_transport_header(skb);
+
+ poff = proto_ports_offset(ipv6h->nexthdr);
+ if (poff >= 0 &&
+ pskb_network_may_pull(skb, sizeof(*ipv6h) + poff + 2)) {
+ return ntohs(*(__be16 *)((void *)ipv6h + sizeof(*ipv6h) +
+ poff));
+ }
+
+ return 0;
+}
+
+static u32 flow_get_tunnel_proto_src(struct sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+ if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+ return tunnel_inner_ip_proto_src(skb);
+ } else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+ return tunnel_inner_ipv6_proto_src(skb);
+ }
+ return 0;
+ }
+ break;
+ case htons(ETH_P_IPV6):
+ if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+ return tunnel_inner_ip_proto_src(skb);
+ } else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+ return tunnel_inner_ipv6_proto_src(skb);
+ }
+ }
+ break;
+ }
+
+ return 0;
+}
+
+static u32 tunnel_inner_ip_proto_dst(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ int poff;
+
+ if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+ sizeof(struct iphdr))) {
+ return 0;
+ }
+
+ iph = ipip_hdr(skb);
+
+ if (ip_is_fragment(iph))
+ return 0;
+
+ poff = proto_ports_offset(iph->protocol);
+ if (poff >= 0 && pskb_network_may_pull(skb, skb_network_header_len(skb)
+ + iph->ihl * 4 + 4 + poff)) {
+ return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2 + poff));
+ }
+
+ return 0;
+}
+
+static u32 tunnel_inner_ipv6_proto_dst(struct sk_buff *skb)
+{
+ struct ipv6hdr *ipv6h;
+ int poff;
+
+ if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+ sizeof(struct ipv6hdr))) {
+ return 0;
+ }
+
+ ipv6h = (struct ipv6hdr *)skb_transport_header(skb);
+
+ poff = proto_ports_offset(ipv6h->nexthdr);
+ if (poff >= 0 &&
+ pskb_network_may_pull(skb, sizeof(*ipv6h) + poff + 4)) {
+ return ntohs(*(__be16 *)((void *)ipv6h + sizeof(*ipv6h) +
+ poff + 2));
+ }
+
+ return 0;
+}
+
+static u32 flow_get_tunnel_proto_dst(struct sk_buff *skb)
+{
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+ if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+ return tunnel_inner_ip_proto_dst(skb);
+ } else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+ return tunnel_inner_ipv6_proto_dst(skb);
+ }
+ }
+ break;
+ case htons(ETH_P_IPV6):
+ if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+ return tunnel_inner_ip_proto_dst(skb);
+ } else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+ return tunnel_inner_ipv6_proto_dst(skb);
+ }
+ }
+ break;
+ }
+
+ return 0;
+}
+
static u32 flow_key_get(struct sk_buff *skb, int key)
{
switch (key) {
@@ -350,6 +645,16 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
return flow_get_vlan_tag(skb);
case FLOW_KEY_RXHASH:
return flow_get_rxhash(skb);
+ case FLOW_KEY_TUNNEL_SRC:
+ return flow_get_tunnel_src(skb);
+ case FLOW_KEY_TUNNEL_DST:
+ return flow_get_tunnel_dst(skb);
+ case FLOW_KEY_TUNNEL_PROTO:
+ return flow_get_tunnel_proto(skb);
+ case FLOW_KEY_TUNNEL_PROTO_SRC:
+ return flow_get_tunnel_proto_src(skb);
+ case FLOW_KEY_TUNNEL_PROTO_DST:
+ return flow_get_tunnel_proto_dst(skb);
default:
WARN_ON(1);
return 0;
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]
next reply other threads:[~2011-10-16 23:06 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-10-16 23:06 Dan Siemon [this message]
2011-10-17 6:40 ` [PATCH] cls_flow: Add tunnel support to the flow classifier Eric Dumazet
2011-10-24 1:21 ` Dan Siemon
2011-10-24 3:14 ` Eric Dumazet
2011-10-24 3:59 ` Eric Dumazet
2011-10-24 22:36 ` David Miller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1318806373.7169.35.camel@ganymede \
--to=dan@coverfire.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).