[PATCH] cls_flow: Add tunnel support to the flow classifier

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] cls_flow: Add tunnel support to the flow classifier
@ 2011-10-16 23:06 Dan Siemon
  2011-10-17  6:40 ` Eric Dumazet
  0 siblings, 1 reply; 6+ messages in thread
From: Dan Siemon @ 2011-10-16 23:06 UTC (permalink / raw)
  To: netdev

[-- Attachment #1: Type: text/plain, Size: 9437 bytes --]

When used on an interface carrying tunneled traffic the flow classifier
can't look into the tunnels so all of the traffic within the tunnel is
treated as a single flow. This does not allow any type of intelligent
queuing to occur. This patch adds new keys to the flow classifier which
look inside the tunnel. Presently IP-IP, IP-IPv6, IPv6-IPv6 and IPv6-IP
tunnels are supported.

If you are interested I have posted some background and experimental
results at:
http://www.coverfire.com/archives/2011/10/16/making-the-linux-flow-classifier-tunnel-aware/

The related iproute2 patch can be found at the above URL as well.

Signed-off-by: Dan Siemon <dan@coverfire.com>

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index defbde2..2f80fa0 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -333,6 +333,11 @@ enum {
 	FLOW_KEY_SKGID,
 	FLOW_KEY_VLAN_TAG,
 	FLOW_KEY_RXHASH,
+	FLOW_KEY_TUNNEL_SRC,
+	FLOW_KEY_TUNNEL_DST,
+	FLOW_KEY_TUNNEL_PROTO,
+	FLOW_KEY_TUNNEL_PROTO_SRC,
+	FLOW_KEY_TUNNEL_PROTO_DST,
 	__FLOW_KEY_MAX,
 };
 
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6994214..f0bd3ad 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -311,6 +311,301 @@ static u32 flow_get_rxhash(struct sk_buff *skb)
 	return skb_get_rxhash(skb);
 }
 
+static u32 tunnel_inner_ip_src(struct sk_buff *skb)
+{
+	if (pskb_network_may_pull(skb, skb_network_header_len(skb) +
+							sizeof(struct iphdr))) {
+		return ntohl(ipip_hdr(skb)->saddr);
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ipv6_src(struct sk_buff *skb)
+{
+	if (pskb_network_may_pull(skb, skb_network_header_len(skb) +
+						sizeof(struct ipv6hdr))) {
+		struct ipv6hdr *iph = (struct ipv6hdr *)
+					skb_transport_header(skb);
+		return ntohl(iph->saddr.s6_addr32[3]);
+	}
+
+	return 0;
+}
+
+static u32 flow_get_tunnel_src(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+			if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+				return tunnel_inner_ip_src(skb);
+			} else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_src(skb);
+			}
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+			if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+				return tunnel_inner_ip_src(skb);
+			} else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_src(skb);
+			}
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ip_dst(struct sk_buff *skb)
+{
+	if (pskb_network_may_pull(skb, skb_network_header_len(skb) +
+							sizeof(struct iphdr))) {
+		return ntohl(ipip_hdr(skb)->daddr);
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ipv6_dst(struct sk_buff *skb)
+{
+	if (pskb_network_may_pull(skb, skb_network_header_len(skb) +
+						sizeof(struct ipv6hdr))) {
+		struct ipv6hdr *iph = (struct ipv6hdr *)
+					skb_transport_header(skb);
+		return ntohl(iph->daddr.s6_addr32[3]);
+	}
+
+	return 0;
+}
+
+static u32 flow_get_tunnel_dst(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+			if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+				return tunnel_inner_ip_dst(skb);
+			} else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_dst(skb);
+			}
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+			if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+				return tunnel_inner_ip_dst(skb);
+			} else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_dst(skb);
+			}
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ip_proto(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+
+	if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+							sizeof(struct iphdr))) {
+		return 0;
+	}
+
+	iph = ipip_hdr(skb);
+
+	return iph->protocol;
+}
+
+static u32 tunnel_inner_ipv6_proto(struct sk_buff *skb)
+{
+	struct ipv6hdr *ipv6h;
+
+	if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+						sizeof(struct ipv6hdr))) {
+		return 0;
+	}
+
+	ipv6h = (struct ipv6hdr *)skb_transport_header(skb);
+
+	return ipv6h->nexthdr;
+}
+
+static u32 flow_get_tunnel_proto(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+			if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+				return tunnel_inner_ip_proto(skb);
+			} else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_proto(skb);
+			}
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+			if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+				return tunnel_inner_ip_proto(skb);
+			} else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_proto(skb);
+			}
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ip_proto_src(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	int poff;
+
+	if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+							sizeof(struct iphdr))) {
+		return 0;
+	}
+
+	iph = ipip_hdr(skb);
+
+	if (ip_is_fragment(iph))
+		return 0;
+
+	poff = proto_ports_offset(iph->protocol);
+	if (poff >= 0 && pskb_network_may_pull(skb, skb_network_header_len(skb)
+						+ iph->ihl * 4 + 2 + poff)) {
+		return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + poff));
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ipv6_proto_src(struct sk_buff *skb)
+{
+	struct ipv6hdr *ipv6h;
+	int poff;
+
+	if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+						sizeof(struct ipv6hdr))) {
+		return 0;
+	}
+
+	ipv6h = (struct ipv6hdr *)skb_transport_header(skb);
+
+	poff = proto_ports_offset(ipv6h->nexthdr);
+	if (poff >= 0 &&
+		    pskb_network_may_pull(skb, sizeof(*ipv6h) + poff + 2)) {
+		return ntohs(*(__be16 *)((void *)ipv6h + sizeof(*ipv6h) +
+						 			poff));
+	}
+
+	return 0;
+}
+
+static u32 flow_get_tunnel_proto_src(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+			if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+				return tunnel_inner_ip_proto_src(skb);
+			} else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_proto_src(skb);
+			}
+			return 0;
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+			if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+				return tunnel_inner_ip_proto_src(skb);
+			} else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_proto_src(skb);
+			}
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ip_proto_dst(struct sk_buff *skb)
+{
+	struct iphdr *iph;
+	int poff;
+
+	if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+							sizeof(struct iphdr))) {
+		return 0;
+	}
+
+	iph = ipip_hdr(skb);
+
+	if (ip_is_fragment(iph))
+		return 0;
+
+	poff = proto_ports_offset(iph->protocol);
+	if (poff >= 0 && pskb_network_may_pull(skb, skb_network_header_len(skb)
+						+ iph->ihl * 4 + 4 + poff)) {
+		return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2 + poff));
+	}
+
+	return 0;
+}
+
+static u32 tunnel_inner_ipv6_proto_dst(struct sk_buff *skb)
+{
+	struct ipv6hdr *ipv6h;
+	int poff;
+
+	if (!pskb_network_may_pull(skb, skb_network_header_len(skb) +
+						sizeof(struct ipv6hdr))) {
+		return 0;
+	}
+
+	ipv6h = (struct ipv6hdr *)skb_transport_header(skb);
+
+	poff = proto_ports_offset(ipv6h->nexthdr);
+	if (poff >= 0 &&
+		    pskb_network_may_pull(skb, sizeof(*ipv6h) + poff + 4)) {
+		return ntohs(*(__be16 *)((void *)ipv6h + sizeof(*ipv6h) +
+						 		poff + 2));
+	}
+
+	return 0;
+}
+
+static u32 flow_get_tunnel_proto_dst(struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case htons(ETH_P_IP):
+		if (pskb_network_may_pull(skb, sizeof(struct iphdr))) {
+			if (ip_hdr(skb)->protocol == IPPROTO_IPIP) {
+				return tunnel_inner_ip_proto_dst(skb);
+			} else if (ip_hdr(skb)->protocol == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_proto_dst(skb);
+			}
+		}
+		break;
+	case htons(ETH_P_IPV6):
+		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) {
+			if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPIP) {
+				return tunnel_inner_ip_proto_dst(skb);
+			} else if (ipv6_hdr(skb)->nexthdr == IPPROTO_IPV6) {
+				return tunnel_inner_ipv6_proto_dst(skb);
+			}
+		}
+		break;
+	}
+
+	return 0;
+}
+
 static u32 flow_key_get(struct sk_buff *skb, int key)
 {
 	switch (key) {
@@ -350,6 +645,16 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
 		return flow_get_vlan_tag(skb);
 	case FLOW_KEY_RXHASH:
 		return flow_get_rxhash(skb);
+	case FLOW_KEY_TUNNEL_SRC:
+		return flow_get_tunnel_src(skb);
+	case FLOW_KEY_TUNNEL_DST:
+		return flow_get_tunnel_dst(skb);
+	case FLOW_KEY_TUNNEL_PROTO:
+		return flow_get_tunnel_proto(skb);
+	case FLOW_KEY_TUNNEL_PROTO_SRC:
+		return flow_get_tunnel_proto_src(skb);
+	case FLOW_KEY_TUNNEL_PROTO_DST:
+		return flow_get_tunnel_proto_dst(skb);
 	default:
 		WARN_ON(1);
 		return 0;


[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] cls_flow: Add tunnel support to the flow classifier
  2011-10-16 23:06 [PATCH] cls_flow: Add tunnel support to the flow classifier Dan Siemon
@ 2011-10-17  6:40 ` Eric Dumazet
  2011-10-24  1:21   ` Dan Siemon
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2011-10-17  6:40 UTC (permalink / raw)
  To: Dan Siemon; +Cc: netdev

Le dimanche 16 octobre 2011 à 19:06 -0400, Dan Siemon a écrit :
> When used on an interface carrying tunneled traffic the flow classifier
> can't look into the tunnels so all of the traffic within the tunnel is
> treated as a single flow. This does not allow any type of intelligent
> queuing to occur. This patch adds new keys to the flow classifier which
> look inside the tunnel. Presently IP-IP, IP-IPv6, IPv6-IPv6 and IPv6-IP
> tunnels are supported.
> 
> If you are interested I have posted some background and experimental
> results at:
> http://www.coverfire.com/archives/2011/10/16/making-the-linux-flow-classifier-tunnel-aware/
> 
> The related iproute2 patch can be found at the above URL as well.
> 
> Signed-off-by: Dan Siemon <dan@coverfire.com>
> 

Hi Dan

You're adding a lot of code (omitting the diffstat :( ) for a specific
usage, yet GRE tunnels are not supported.

IPv6 part is also a bit limited : It assumes TCP/UDP headers are the
first ones. Maybe its time to use ipv6_skip_exthdr() ?

Note also that if we pull (with pskb_network_may_pull()) too many bytes,
we kill routing performance on paged frags devices, wich are now
becoming very common.

Adding tunnel support and deep packet inspection might require the use
of skb_header_pointer() wich does the copy of needed data without
requiring expensive reallocation of skb head.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] cls_flow: Add tunnel support to the flow classifier
  2011-10-17  6:40 ` Eric Dumazet
@ 2011-10-24  1:21   ` Dan Siemon
  2011-10-24  3:14     ` Eric Dumazet
  0 siblings, 1 reply; 6+ messages in thread
From: Dan Siemon @ 2011-10-24  1:21 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev

[-- Attachment #1: Type: text/plain, Size: 2039 bytes --]

On Mon, 2011-10-17 at 08:40 +0200, Eric Dumazet wrote:
> Le dimanche 16 octobre 2011 à 19:06 -0400, Dan Siemon a écrit :
> > When used on an interface carrying tunneled traffic the flow classifier
> > can't look into the tunnels so all of the traffic within the tunnel is
> > treated as a single flow. This does not allow any type of intelligent
> > queuing to occur. This patch adds new keys to the flow classifier which
> > look inside the tunnel. Presently IP-IP, IP-IPv6, IPv6-IPv6 and IPv6-IP
> > tunnels are supported.
> > 
> > If you are interested I have posted some background and experimental
> > results at:
> > http://www.coverfire.com/archives/2011/10/16/making-the-linux-flow-classifier-tunnel-aware/
> > 
> > The related iproute2 patch can be found at the above URL as well.
> > 
> > Signed-off-by: Dan Siemon <dan@coverfire.com>
> > 
> 
> Hi Dan
> 
> You're adding a lot of code (omitting the diffstat :( ) for a specific
> usage, yet GRE tunnels are not supported.

Thanks for the review.

Are you arguing this use case isn't worth addressing or that there is a
more efficient way to implement this with less code?

> IPv6 part is also a bit limited : It assumes TCP/UDP headers are the
> first ones. Maybe its time to use ipv6_skip_exthdr() ?

I noticed this too but the existing src-proto and dst-proto don't handle
this case either. Maybe I can look into fixing those as well.

> Note also that if we pull (with pskb_network_may_pull()) too many bytes,
> we kill routing performance on paged frags devices, wich are now
> becoming very common.

I don't know what paged frag devices means but I trust you are correct :)

The existing keys also use pskb_network_may_pull(). Should they be changed as well?

> Adding tunnel support and deep packet inspection might require the use
> of skb_header_pointer() wich does the copy of needed data without
> requiring expensive reallocation of skb head.

I'll look into this but it may be a while before I have an updated
patch.

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] cls_flow: Add tunnel support to the flow classifier
  2011-10-24  1:21   ` Dan Siemon
@ 2011-10-24  3:14     ` Eric Dumazet
  2011-10-24  3:59       ` Eric Dumazet
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2011-10-24  3:14 UTC (permalink / raw)
  To: Dan Siemon; +Cc: netdev

Le dimanche 23 octobre 2011 à 21:21 -0400, Dan Siemon a écrit :

> Thanks for the review.
> 
> Are you arguing this use case isn't worth addressing or that there is a
> more efficient way to implement this with less code?
> 

Its worth doing it, but also needs more efficient code ;)

As long as we were reading only bytes in the first 64 bytes of the
frame, existing code was probably fine and efficient.

If we want to add features and features, this is going to ask more bytes
so can trigger expensive skb head reallocs.

> > IPv6 part is also a bit limited : It assumes TCP/UDP headers are the
> > first ones. Maybe its time to use ipv6_skip_exthdr() ?
> 
> I noticed this too but the existing src-proto and dst-proto don't handle
> this case either. Maybe I can look into fixing those as well.
> 

Yes.

> > Note also that if we pull (with pskb_network_may_pull()) too many bytes,
> > we kill routing performance on paged frags devices, wich are now
> > becoming very common.
> 
> I don't know what paged frag devices means but I trust you are correct :)
> 
> The existing keys also use pskb_network_may_pull(). Should they be changed as well?
> 

A frame delivered by such device has for example 64 bytes present in skb
head, but remaining of data sits in attached fragment(s). 

For example :
drivers/net/ethernet/emulex/benet/be.h
/* Number of bytes of an RX frame that are copied to skb->data */
#define BE_HDR_LEN      ((u16) 64)

This works well if this fragment stay as is until being delivered to
userland, or forwarded.

Using pskb_network_may_pull() on data present on fragment might force to
reallocate skb head because it was too small, including a copy of struct
skb_shared_info, and all headroom (usually 64 bytes were reserved by
dev_alloc_skb()).

Adding tunnelling code definitely can increase the max offset of
inspected data from the frame beyond 64.

skb_header_pointer() can access to frag data without reallocations.

You can find many use examples in net/sched/cls_u32.c & net/netfilter

If you prefer, I can do the preliminary work 

Here is a patch to give a hint :

diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6994214..cda6bf1 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -65,19 +65,27 @@ static inline u32 addr_fold(void *addr)
 	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
 }
 
-static u32 flow_get_src(struct sk_buff *skb)
+static u32 flow_get_src(const struct sk_buff *skb, int nhoff)
 {
+	__be32 *data = NULL, hdata;
+
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		if (pskb_network_may_pull(skb, sizeof(struct iphdr)))
-			return ntohl(ip_hdr(skb)->saddr);
+		data = skb_header_pointer(skb,
+					  nhoff + offsetof(struct iphdr,
+							   saddr),
+					  4, &hdata);
 		break;
 	case htons(ETH_P_IPV6):
-		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
-			return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
+		data = skb_header_pointer(skb,
+					 nhoff + offsetof(struct ipv6hdr,
+							  saddr.s6_addr32[3]),
+					 4, &hdata);
 		break;
 	}
 
+	if (data)
+		return ntohl(*data);
 	return addr_fold(skb->sk);
 }
 
@@ -236,7 +244,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
 })
 #endif
 
-static u32 flow_get_nfct_src(struct sk_buff *skb)
+static u32 flow_get_nfct_src(const struct sk_buff *skb, int nhoff)
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
@@ -245,7 +253,7 @@ static u32 flow_get_nfct_src(struct sk_buff *skb)
 		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
 	}
 fallback:
-	return flow_get_src(skb);
+	return flow_get_src(skb, nhoff);
 }
 
 static u32 flow_get_nfct_dst(struct sk_buff *skb)
@@ -313,9 +321,11 @@ static u32 flow_get_rxhash(struct sk_buff *skb)
 
 static u32 flow_key_get(struct sk_buff *skb, int key)
 {
+	int nhoff = skb_network_offset(skb);
+
 	switch (key) {
 	case FLOW_KEY_SRC:
-		return flow_get_src(skb);
+		return flow_get_src(skb, nhoff);
 	case FLOW_KEY_DST:
 		return flow_get_dst(skb);
 	case FLOW_KEY_PROTO:
@@ -333,7 +343,7 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
 	case FLOW_KEY_NFCT:
 		return flow_get_nfct(skb);
 	case FLOW_KEY_NFCT_SRC:
-		return flow_get_nfct_src(skb);
+		return flow_get_nfct_src(skb, nhoff);
 	case FLOW_KEY_NFCT_DST:
 		return flow_get_nfct_dst(skb);
 	case FLOW_KEY_NFCT_PROTO_SRC:

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] cls_flow: Add tunnel support to the flow classifier
  2011-10-24  3:14     ` Eric Dumazet
@ 2011-10-24  3:59       ` Eric Dumazet
  2011-10-24 22:36         ` David Miller
  0 siblings, 1 reply; 6+ messages in thread
From: Eric Dumazet @ 2011-10-24  3:59 UTC (permalink / raw)
  To: Dan Siemon; +Cc: netdev

Le lundi 24 octobre 2011 à 05:14 +0200, Eric Dumazet a écrit :

> If you prefer, I can do the preliminary work 
> 

Since it is a bit tricky, I finished it.

It'll be easier for you to use existing functions without copy/paste,
since I added an "nhoff" argument that you can play with (skipping
tunnel header)

Please test it !

Thanks

[PATCH net-next] net_sched: cls_flow: use skb_header_pointer()

Dan Siemon would like to add tunnelling support to cls_flow

This preliminary patch introduces use of skb_header_pointer() to help
this task, while avoiding skb head reallocation because of deep packet
inspection.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/sched/cls_flow.c |  188 ++++++++++++++++++++---------------------
 1 file changed, 96 insertions(+), 92 deletions(-)

diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6994214..9e087d8 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -65,132 +65,134 @@ static inline u32 addr_fold(void *addr)
 	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
 }
 
-static u32 flow_get_src(struct sk_buff *skb)
+static u32 flow_get_src(const struct sk_buff *skb, int nhoff)
 {
+	__be32 *data = NULL, hdata;
+
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		if (pskb_network_may_pull(skb, sizeof(struct iphdr)))
-			return ntohl(ip_hdr(skb)->saddr);
+		data = skb_header_pointer(skb,
+					  nhoff + offsetof(struct iphdr,
+							   saddr),
+					  4, &hdata);
 		break;
 	case htons(ETH_P_IPV6):
-		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
-			return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
+		data = skb_header_pointer(skb,
+					 nhoff + offsetof(struct ipv6hdr,
+							  saddr.s6_addr32[3]),
+					 4, &hdata);
 		break;
 	}
 
+	if (data)
+		return ntohl(*data);
 	return addr_fold(skb->sk);
 }
 
-static u32 flow_get_dst(struct sk_buff *skb)
+static u32 flow_get_dst(const struct sk_buff *skb, int nhoff)
 {
+	__be32 *data = NULL, hdata;
+
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		if (pskb_network_may_pull(skb, sizeof(struct iphdr)))
-			return ntohl(ip_hdr(skb)->daddr);
+		data = skb_header_pointer(skb,
+					  nhoff + offsetof(struct iphdr,
+							   daddr),
+					  4, &hdata);
 		break;
 	case htons(ETH_P_IPV6):
-		if (pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
-			return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
+		data = skb_header_pointer(skb,
+					 nhoff + offsetof(struct ipv6hdr,
+							  daddr.s6_addr32[3]),
+					 4, &hdata);
 		break;
 	}
 
+	if (data)
+		return ntohl(*data);
 	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
 }
 
-static u32 flow_get_proto(struct sk_buff *skb)
+static u32 flow_get_proto(const struct sk_buff *skb, int nhoff)
 {
+	__u8 *data = NULL, hdata;
+
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		return pskb_network_may_pull(skb, sizeof(struct iphdr)) ?
-		       ip_hdr(skb)->protocol : 0;
+		data = skb_header_pointer(skb,
+					  nhoff + offsetof(struct iphdr,
+							   protocol),
+					  1, &hdata);
+		break;
 	case htons(ETH_P_IPV6):
-		return pskb_network_may_pull(skb, sizeof(struct ipv6hdr)) ?
-		       ipv6_hdr(skb)->nexthdr : 0;
-	default:
-		return 0;
+		data = skb_header_pointer(skb,
+					 nhoff + offsetof(struct ipv6hdr,
+							  nexthdr),
+					 1, &hdata);
+		break;
 	}
+	if (data)
+		return *data;
+	return 0;
 }
 
-static u32 flow_get_proto_src(struct sk_buff *skb)
+/* helper function to get either src or dst port */
+static __be16 *flow_get_proto_common(const struct sk_buff *skb, int nhoff,
+				     __be16 *_port, int dst)
 {
+	__be16 *port = NULL;
+	int poff;
+
 	switch (skb->protocol) {
 	case htons(ETH_P_IP): {
-		struct iphdr *iph;
-		int poff;
+		struct iphdr *iph, _iph;
 
-		if (!pskb_network_may_pull(skb, sizeof(*iph)))
+		iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+		if (!iph)
 			break;
-		iph = ip_hdr(skb);
 		if (ip_is_fragment(iph))
 			break;
 		poff = proto_ports_offset(iph->protocol);
-		if (poff >= 0 &&
-		    pskb_network_may_pull(skb, iph->ihl * 4 + 2 + poff)) {
-			iph = ip_hdr(skb);
-			return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
-						 poff));
-		}
+		if (poff >= 0)
+			port = skb_header_pointer(skb,
+					nhoff + iph->ihl * 4 + poff + dst,
+					sizeof(*_port), _port);
 		break;
 	}
 	case htons(ETH_P_IPV6): {
-		struct ipv6hdr *iph;
-		int poff;
+		struct ipv6hdr *iph, _iph;
 
-		if (!pskb_network_may_pull(skb, sizeof(*iph)))
+		iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+		if (!iph)
 			break;
-		iph = ipv6_hdr(skb);
 		poff = proto_ports_offset(iph->nexthdr);
-		if (poff >= 0 &&
-		    pskb_network_may_pull(skb, sizeof(*iph) + poff + 2)) {
-			iph = ipv6_hdr(skb);
-			return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
-						 poff));
-		}
+		if (poff >= 0)
+			port = skb_header_pointer(skb,
+					nhoff + sizeof(*iph) + poff + dst,
+					sizeof(*_port), _port);
 		break;
 	}
 	}
 
-	return addr_fold(skb->sk);
+	return port;
 }
 
-static u32 flow_get_proto_dst(struct sk_buff *skb)
+static u32 flow_get_proto_src(const struct sk_buff *skb, int nhoff)
 {
-	switch (skb->protocol) {
-	case htons(ETH_P_IP): {
-		struct iphdr *iph;
-		int poff;
+	__be16 _port, *port = flow_get_proto_common(skb, nhoff, &_port, 0);
 
-		if (!pskb_network_may_pull(skb, sizeof(*iph)))
-			break;
-		iph = ip_hdr(skb);
-		if (ip_is_fragment(iph))
-			break;
-		poff = proto_ports_offset(iph->protocol);
-		if (poff >= 0 &&
-		    pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
-			iph = ip_hdr(skb);
-			return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
-						 2 + poff));
-		}
-		break;
-	}
-	case htons(ETH_P_IPV6): {
-		struct ipv6hdr *iph;
-		int poff;
+	if (port)
+		return ntohs(*port);
 
-		if (!pskb_network_may_pull(skb, sizeof(*iph)))
-			break;
-		iph = ipv6_hdr(skb);
-		poff = proto_ports_offset(iph->nexthdr);
-		if (poff >= 0 &&
-		    pskb_network_may_pull(skb, sizeof(*iph) + poff + 4)) {
-			iph = ipv6_hdr(skb);
-			return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
-						 poff + 2));
-		}
-		break;
-	}
-	}
+	return addr_fold(skb->sk);
+}
+
+static u32 flow_get_proto_dst(const struct sk_buff *skb, int nhoff)
+{
+	__be16 _port, *port = flow_get_proto_common(skb, nhoff, &_port, 2);
+
+	if (port)
+		return ntohs(*port);
 
 	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
 }
@@ -223,7 +225,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
 #define CTTUPLE(skb, member)						\
 ({									\
 	enum ip_conntrack_info ctinfo;					\
-	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);			\
+	const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);		\
 	if (ct == NULL)							\
 		goto fallback;						\
 	ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;			\
@@ -236,7 +238,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
 })
 #endif
 
-static u32 flow_get_nfct_src(struct sk_buff *skb)
+static u32 flow_get_nfct_src(const struct sk_buff *skb, int nhoff)
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
@@ -245,10 +247,10 @@ static u32 flow_get_nfct_src(struct sk_buff *skb)
 		return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
 	}
 fallback:
-	return flow_get_src(skb);
+	return flow_get_src(skb, nhoff);
 }
 
-static u32 flow_get_nfct_dst(struct sk_buff *skb)
+static u32 flow_get_nfct_dst(const struct sk_buff *skb, int nhoff)
 {
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
@@ -257,21 +259,21 @@ static u32 flow_get_nfct_dst(struct sk_buff *skb)
 		return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
 	}
 fallback:
-	return flow_get_dst(skb);
+	return flow_get_dst(skb, nhoff);
 }
 
-static u32 flow_get_nfct_proto_src(struct sk_buff *skb)
+static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, int nhoff)
 {
 	return ntohs(CTTUPLE(skb, src.u.all));
 fallback:
-	return flow_get_proto_src(skb);
+	return flow_get_proto_src(skb, nhoff);
 }
 
-static u32 flow_get_nfct_proto_dst(struct sk_buff *skb)
+static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, int nhoff)
 {
 	return ntohs(CTTUPLE(skb, dst.u.all));
 fallback:
-	return flow_get_proto_dst(skb);
+	return flow_get_proto_dst(skb, nhoff);
 }
 
 static u32 flow_get_rtclassid(const struct sk_buff *skb)
@@ -313,17 +315,19 @@ static u32 flow_get_rxhash(struct sk_buff *skb)
 
 static u32 flow_key_get(struct sk_buff *skb, int key)
 {
+	int nhoff = skb_network_offset(skb);
+
 	switch (key) {
 	case FLOW_KEY_SRC:
-		return flow_get_src(skb);
+		return flow_get_src(skb, nhoff);
 	case FLOW_KEY_DST:
-		return flow_get_dst(skb);
+		return flow_get_dst(skb, nhoff);
 	case FLOW_KEY_PROTO:
-		return flow_get_proto(skb);
+		return flow_get_proto(skb, nhoff);
 	case FLOW_KEY_PROTO_SRC:
-		return flow_get_proto_src(skb);
+		return flow_get_proto_src(skb, nhoff);
 	case FLOW_KEY_PROTO_DST:
-		return flow_get_proto_dst(skb);
+		return flow_get_proto_dst(skb, nhoff);
 	case FLOW_KEY_IIF:
 		return flow_get_iif(skb);
 	case FLOW_KEY_PRIORITY:
@@ -333,13 +337,13 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
 	case FLOW_KEY_NFCT:
 		return flow_get_nfct(skb);
 	case FLOW_KEY_NFCT_SRC:
-		return flow_get_nfct_src(skb);
+		return flow_get_nfct_src(skb, nhoff);
 	case FLOW_KEY_NFCT_DST:
-		return flow_get_nfct_dst(skb);
+		return flow_get_nfct_dst(skb, nhoff);
 	case FLOW_KEY_NFCT_PROTO_SRC:
-		return flow_get_nfct_proto_src(skb);
+		return flow_get_nfct_proto_src(skb, nhoff);
 	case FLOW_KEY_NFCT_PROTO_DST:
-		return flow_get_nfct_proto_dst(skb);
+		return flow_get_nfct_proto_dst(skb, nhoff);
 	case FLOW_KEY_RTCLASSID:
 		return flow_get_rtclassid(skb);
 	case FLOW_KEY_SKUID:

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] cls_flow: Add tunnel support to the flow classifier
  2011-10-24  3:59       ` Eric Dumazet
@ 2011-10-24 22:36         ` David Miller
  0 siblings, 0 replies; 6+ messages in thread
From: David Miller @ 2011-10-24 22:36 UTC (permalink / raw)
  To: eric.dumazet; +Cc: dan, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 24 Oct 2011 05:59:41 +0200

> [PATCH net-next] net_sched: cls_flow: use skb_header_pointer()
> 
> Dan Siemon would like to add tunnelling support to cls_flow
> 
> This preliminary patch introduces use of skb_header_pointer() to help
> this task, while avoiding skb head reallocation because of deep packet
> inspection.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2011-10-24 22:36 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-10-16 23:06 [PATCH] cls_flow: Add tunnel support to the flow classifier Dan Siemon
2011-10-17  6:40 ` Eric Dumazet
2011-10-24  1:21   ` Dan Siemon
2011-10-24  3:14     ` Eric Dumazet
2011-10-24  3:59       ` Eric Dumazet
2011-10-24 22:36         ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).