netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Dan Siemon <dan@coverfire.com>
To: netdev <netdev@vger.kernel.org>
Subject: [PATCH] cls_flow: Add tunnel support to the flow classifier
Date: Sun, 16 Oct 2011 19:06:12 -0400	[thread overview]
Message-ID: <1318806373.7169.35.camel@ganymede> (raw)

[-- Attachment #1: Type: text/plain, Size: 9437 bytes --]

When used on an interface carrying tunneled traffic the flow classifier
can't look into the tunnels so all of the traffic within the tunnel is
treated as a single flow. This does not allow any type of intelligent
queuing to occur. This patch adds new keys to the flow classifier which
look inside the tunnel. Presently IP-IP, IP-IPv6, IPv6-IPv6 and IPv6-IP
tunnels are supported.

If you are interested I have posted some background and experimental
results at:
http://www.coverfire.com/archives/2011/10/16/making-the-linux-flow-classifier-tunnel-aware/

The related iproute2 patch can be found at the above URL as well.

Signed-off-by: Dan Siemon <dan@coverfire.com>

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index defbde2..2f80fa0 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -333,6 +333,11 @@ enum {
 	FLOW_KEY_SKGID,
 	FLOW_KEY_VLAN_TAG,
 	FLOW_KEY_RXHASH,
+	FLOW_KEY_TUNNEL_SRC,
+	FLOW_KEY_TUNNEL_DST,
+	FLOW_KEY_TUNNEL_PROTO,
+	FLOW_KEY_TUNNEL_PROTO_SRC,
+	FLOW_KEY_TUNNEL_PROTO_DST,
 	__FLOW_KEY_MAX,
 };
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6994214..f0bd3ad 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -311,6 +311,301 @@ static u32 flow_get_rxhash(struct sk_buff *skb)
 	return skb_get_rxhash(skb);
 }
 
+static u32 tunnel_inner_ip_src(struct sk_buff *skb)
+{
+	if (pskb_network_may_pull(skb, skb_network_header_len(skb) +
+							sizeof(struct iphdr))) {
+		return ntohl(ipip_hdr(skb)->saddr);
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ipv6_src(struct sk_buff *skb)
+{
+	if (pskb_network_may_pull(skb, skb_network_header_len(skb) +
+						sizeof(struct ipv6hdr))) {
+		struct ipv6hdr *iph = (struct ipv6hdr *)
+					skb_transport_header(skb);
+		return ntohl(iph->saddr.s6_addr32[3]);
+	}
+
+	return 0;
+}
+
+static u32 flow_get_tunnel_src(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+			if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+				return tunnel_inner_ip_src(skb);
+			} else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_src(skb);
+			}
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+			if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+				return tunnel_inner_ip_src(skb);
+			} else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_src(skb);
+			}
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ip_dst(struct sk_buff *skb)
+{
+	if (pskb_network_may_pull(skb, skb_network_header_len(skb) +
+							sizeof(struct iphdr))) {
+		return ntohl(ipip_hdr(skb)->daddr);
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ipv6_dst(struct sk_buff *skb)
+{
+	if (pskb_network_may_pull(skb, skb_network_header_len(skb) +
+						sizeof(struct ipv6hdr))) {
+		struct ipv6hdr *iph = (struct ipv6hdr *)
+					skb_transport_header(skb);
+		return ntohl(iph->daddr.s6_addr32[3]);
+	}
+
+	return 0;
+}
+
+static u32 flow_get_tunnel_dst(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+			if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+				return tunnel_inner_ip_dst(skb);
+			} else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_dst(skb);
+			}
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+			if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+				return tunnel_inner_ip_dst(skb);
+			} else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_dst(skb);
+			}
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ip_proto(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+
+	if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+							sizeof(struct iphdr))) {
+		return 0;
+	}
+
+	iph = ipip_hdr(skb);
+
+	return iph->protocol;
+}
+
+static u32 tunnel_inner_ipv6_proto(struct sk_buff *skb)
+{
+	struct ipv6hdr *ipv6h;
+
+	if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+						sizeof(struct ipv6hdr))) {
+		return 0;
+	}
+
+	ipv6h = (struct ipv6hdr *)skb_transport_header(skb);
+
+	return ipv6h->nexthdr;
+}
+
+static u32 flow_get_tunnel_proto(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+			if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+				return tunnel_inner_ip_proto(skb);
+			} else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_proto(skb);
+			}
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+			if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+				return tunnel_inner_ip_proto(skb);
+			} else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_proto(skb);
+			}
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ip_proto_src(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	int poff;
+
+	if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+							sizeof(struct iphdr))) {
+		return 0;
+	}
+
+	iph = ipip_hdr(skb);
+
+	if (ip_is_fragment(iph))
+		return 0;
+
+	poff = proto_ports_offset(iph->protocol);
+	if (poff >= 0 && pskb_network_may_pull(skb, skb_network_header_len(skb)
+						+ iph->ihl * 4 + 2 + poff)) {
+		return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + poff));
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ipv6_proto_src(struct sk_buff *skb)
+{
+	struct ipv6hdr *ipv6h;
+	int poff;
+
+	if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+						sizeof(struct ipv6hdr))) {
+		return 0;
+	}
+
+	ipv6h = (struct ipv6hdr *)skb_transport_header(skb);
+
+	poff = proto_ports_offset(ipv6h->nexthdr);
+	if (poff >= 0 &&
+		    pskb_network_may_pull(skb, sizeof(*ipv6h) + poff + 2)) {
+		return ntohs(*(__be16 *)((void *)ipv6h + sizeof(*ipv6h) +
+						 			poff));
+	}
+
+	return 0;
+}
+
+static u32 flow_get_tunnel_proto_src(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+			if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+				return tunnel_inner_ip_proto_src(skb);
+			} else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_proto_src(skb);
+			}
+			return 0;
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+			if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+				return tunnel_inner_ip_proto_src(skb);
+			} else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_proto_src(skb);
+			}
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ip_proto_dst(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	int poff;
+
+	if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+							sizeof(struct iphdr))) {
+		return 0;
+	}
+
+	iph = ipip_hdr(skb);
+
+	if (ip_is_fragment(iph))
+		return 0;
+
+	poff = proto_ports_offset(iph->protocol);
+	if (poff >= 0 && pskb_network_may_pull(skb, skb_network_header_len(skb)
+						+ iph->ihl * 4 + 4 + poff)) {
+		return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2 + poff));
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ipv6_proto_dst(struct sk_buff *skb)
+{
+	struct ipv6hdr *ipv6h;
+	int poff;
+
+	if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+						sizeof(struct ipv6hdr))) {
+		return 0;
+	}
+
+	ipv6h = (struct ipv6hdr *)skb_transport_header(skb);
+
+	poff = proto_ports_offset(ipv6h->nexthdr);
+	if (poff >= 0 &&
+		    pskb_network_may_pull(skb, sizeof(*ipv6h) + poff + 4)) {
+		return ntohs(*(__be16 *)((void *)ipv6h + sizeof(*ipv6h) +
+						 		poff + 2));
+	}
+
+	return 0;
+}
+
+static u32 flow_get_tunnel_proto_dst(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+			if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+				return tunnel_inner_ip_proto_dst(skb);
+			} else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_proto_dst(skb);
+			}
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+			if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+				return tunnel_inner_ip_proto_dst(skb);
+			} else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_proto_dst(skb);
+			}
+		}
+		break;
+	}
+
+	return 0;
+}
+
 static u32 flow_key_get(struct sk_buff *skb, int key)
 {
 	switch (key) {
@@ -350,6 +645,16 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
 		return flow_get_vlan_tag(skb);
 	case FLOW_KEY_RXHASH:
 		return flow_get_rxhash(skb);
+	case FLOW_KEY_TUNNEL_SRC:
+		return flow_get_tunnel_src(skb);
+	case FLOW_KEY_TUNNEL_DST:
+		return flow_get_tunnel_dst(skb);
+	case FLOW_KEY_TUNNEL_PROTO:
+		return flow_get_tunnel_proto(skb);
+	case FLOW_KEY_TUNNEL_PROTO_SRC:
+		return flow_get_tunnel_proto_src(skb);
+	case FLOW_KEY_TUNNEL_PROTO_DST:
+		return flow_get_tunnel_proto_dst(skb);
 	default:
 		WARN_ON(1);
 		return 0;


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

             reply	other threads:[~2011-10-16 23:06 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-10-16 23:06 Dan Siemon [this message]
2011-10-17  6:40 ` [PATCH] cls_flow: Add tunnel support to the flow classifier Eric Dumazet
2011-10-24  1:21   ` Dan Siemon
2011-10-24  3:14     ` Eric Dumazet
2011-10-24  3:59       ` Eric Dumazet
2011-10-24 22:36         ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1318806373.7169.35.camel@ganymede \
    --to=dan@coverfire.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).