All of lore.kernel.org
 help / color / mirror / Atom feed
From: Simon Horman <horms@verge.net.au>
To: netdev@vger.kernel.org, lvs-devel@vger.kernel.org
Cc: "Malcolm Turnbull" <malcolm@loadbalancer.org>,
	"Siim Põder" <siim@p6drad-teel.net>,
	"Julius Volz" <juliusv@google.com>,
	"Vince Busam" <vbusam@google.com>
Subject: [PATCH 1/2] ipvs: load balance IPv4 connections from a local process
Date: Fri, 5 Sep 2008 11:36:11 +1000	[thread overview]
Message-ID: <20080905013609.GD14128@verge.net.au> (raw)

From: Malcolm Turnbull <malcolm@loadbalancer.org>

ipvs: load balance IPv4 connections from a local process

This allows IPVS to load balance connections made by a local process.
For example a proxy server running locally.

External client --> pound:443 -> Local:443 --> IPVS:80 --> RealServer

Signed-off-by: Siim Põder <siim@p6drad-teel.net>
Signed-off-by: Malcolm Turnbull <malcolm@loadbalancer.org>
Signed-off-by: Simon Horman <horms@verge.net.au>

--- 

 net/ipv4/ipvs/ip_vs_core.c      |  224 ++++++++++++++++++++++-----------------
 net/ipv4/ipvs/ip_vs_proto_tcp.c |    4 
 2 files changed, 134 insertions(+), 94 deletions(-)

* Simon Horman, Wed, 03 Sep 2008 14:50:36 +1000

  I have updated this patch so that it will apply on top
  of the current IPv6 patches.

  http://marc.info/?l=linux-netdev&m=122036407428246&w=2

  I have also updated the patch so that it does not handle IPv6 packets.

  I have an additional patch that I will provide to exetend
  the code to handle IPv6 connections.

* Simon Horman, Fri, 05 Sep 2008 11:32:38 +1000

  I have applied this patch to the net-next-2.6 branck of lvs-2.6

  git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-2.6.git

Index: lvs-2.6/net/ipv4/ipvs/ip_vs_core.c
===================================================================
--- lvs-2.6.orig/net/ipv4/ipvs/ip_vs_core.c	2008-09-03 11:01:38.000000000 +1000
+++ lvs-2.6/net/ipv4/ipvs/ip_vs_core.c	2008-09-03 12:17:49.000000000 +1000
@@ -651,12 +651,53 @@ void ip_vs_nat_icmp_v6(struct sk_buff *s
 }
 #endif
 
+/* Handle relevant response ICMP messages - forward to the right
+ * destination host. Used for NAT and local client.
+ */
+static int handle_response_icmp(struct sk_buff *skb, struct iphdr *iph,
+				struct iphdr *cih, struct ip_vs_conn *cp,
+				struct ip_vs_protocol *pp,
+				unsigned int offset, unsigned int ihl)
+{
+	unsigned int verdict = NF_DROP;
+
+	if (IP_VS_FWD_METHOD(cp) != 0) {
+		IP_VS_ERR("shouldn't reach here, because the box is on the "
+			  "half connection in the tun/dr module.\n");
+	}
+
+	/* Ensure the checksum is correct */
+	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+		/* Failed checksum! */
+		IP_VS_DBG(1,
+			  "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
+			  NIPQUAD(iph->saddr));
+		goto out;
+	}
+
+	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
+		offset += 2 * sizeof(__u16);
+	if (!skb_make_writable(skb, offset))
+		goto out;
+
+	ip_vs_nat_icmp(skb, pp, cp, 1);
+
+	/* do the statistics and put it back */
+	ip_vs_out_stats(cp, skb);
+
+	skb->ipvs_property = 1;
+	verdict = NF_ACCEPT;
+
+out:
+	__ip_vs_conn_put(cp);
+
+	return verdict;
+}
+
 /*
  *	Handle ICMP messages in the inside-to-outside direction (outgoing).
- *	Find any that might be relevant, check against existing connections,
- *	forward to the right destination host if relevant.
+ *	Find any that might be relevant, check against existing connections.
  *	Currently handles error types - unreachable, quench, ttl exceeded.
- *	(Only used in VS/NAT)
  */
 static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
 {
@@ -666,7 +707,7 @@ static int ip_vs_out_icmp(struct sk_buff
 	struct ip_vs_iphdr ciph;
 	struct ip_vs_conn *cp;
 	struct ip_vs_protocol *pp;
-	unsigned int offset, ihl, verdict;
+	unsigned int offset, ihl;
 
 	*related = 1;
 
@@ -725,38 +766,7 @@ static int ip_vs_out_icmp(struct sk_buff
 	if (!cp)
 		return NF_ACCEPT;
 
-	verdict = NF_DROP;
-
-	if (IP_VS_FWD_METHOD(cp) != 0) {
-		IP_VS_ERR("shouldn't reach here, because the box is on the "
-			  "half connection in the tun/dr module.\n");
-	}
-
-	/* Ensure the checksum is correct */
-	if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
-		/* Failed checksum! */
-		IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
-			  NIPQUAD(iph->saddr));
-		goto out;
-	}
-
-	if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
-		offset += 2 * sizeof(__u16);
-	if (!skb_make_writable(skb, offset))
-		goto out;
-
-	ip_vs_nat_icmp(skb, pp, cp, 1);
-
-	/* do the statistics and put it back */
-	ip_vs_out_stats(cp, skb);
-
-	skb->ipvs_property = 1;
-	verdict = NF_ACCEPT;
-
-  out:
-	__ip_vs_conn_put(cp);
-
-	return verdict;
+	return handle_response_icmp(skb, iph, cih, cp, pp, offset, ihl);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -875,10 +885,76 @@ static inline int is_tcp_reset(const str
 	return th->rst;
 }
 
+/* Handle response packets: rewrite addresses and send away...
+ * Used for NAT and local client.
+ */
+static unsigned int
+handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+		struct ip_vs_conn *cp, int ihl)
+{
+	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
+
+	if (!skb_make_writable(skb, ihl))
+		goto drop;
+
+	/* mangle the packet */
+	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
+		goto drop;
+
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6)
+		ipv6_hdr(skb)->saddr = cp->vaddr.in6;
+	else
+#endif
+	{
+		ip_hdr(skb)->saddr = cp->vaddr.ip;
+		ip_send_check(ip_hdr(skb));
+	}
+
+	/* For policy routing, packets originating from this
+	 * machine itself may be routed differently to packets
+	 * passing through.  We want this packet to be routed as
+	 * if it came from this machine itself.  So re-compute
+	 * the routing information.
+	 */
+#ifdef CONFIG_IP_VS_IPV6
+	if (af == AF_INET6) {
+		if (ip6_route_me_harder(skb) != 0)
+			goto drop;
+	} else
+#endif
+		if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+			goto drop;
+
+	/* For policy routing, packets originating from this
+	 * machine itself may be routed differently to packets
+	 * passing through.  We want this packet to be routed as
+	 * if it came from this machine itself.  So re-compute
+	 * the routing information.
+	 */
+	if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+		goto drop;
+
+	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
+
+	ip_vs_out_stats(cp, skb);
+	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+	ip_vs_conn_put(cp);
+
+	skb->ipvs_property = 1;
+
+	LeaveFunction(11);
+	return NF_ACCEPT;
+
+drop:
+	ip_vs_conn_put(cp);
+	kfree_skb(skb);
+	return NF_STOLEN;
+}
+
 /*
  *	It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
- *	Check if outgoing packet belongs to the established ip_vs_conn,
- *      rewrite addresses of the packet and send it on its way...
+ *	Check if outgoing packet belongs to the established ip_vs_conn.
  */
 static unsigned int
 ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
@@ -987,55 +1063,7 @@ ip_vs_out(unsigned int hooknum, struct s
 		return NF_ACCEPT;
 	}
 
-	IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
-
-	if (!skb_make_writable(skb, iph.len))
-		goto drop;
-
-	/* mangle the packet */
-	if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
-		goto drop;
-
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6)
-		ipv6_hdr(skb)->saddr = cp->vaddr.in6;
-	else
-#endif
-	{
-		ip_hdr(skb)->saddr = cp->vaddr.ip;
-		ip_send_check(ip_hdr(skb));
-	}
-
-	/* For policy routing, packets originating from this
-	 * machine itself may be routed differently to packets
-	 * passing through.  We want this packet to be routed as
-	 * if it came from this machine itself.  So re-compute
-	 * the routing information.
-	 */
-#ifdef CONFIG_IP_VS_IPV6
-	if (af == AF_INET6) {
-		if (ip6_route_me_harder(skb) != 0)
-			goto drop;
-	} else
-#endif
-		if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
-			goto drop;
-
-	IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
-
-	ip_vs_out_stats(cp, skb);
-	ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
-	ip_vs_conn_put(cp);
-
-	skb->ipvs_property = 1;
-
-	LeaveFunction(11);
-	return NF_ACCEPT;
-
-  drop:
-	ip_vs_conn_put(cp);
-	kfree_skb(skb);
-	return NF_STOLEN;
+	return handle_response(af, skb, pp, cp, iph.len);
 }
 
 
@@ -1111,8 +1139,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *
 	ip_vs_fill_iphdr(AF_INET, cih, &ciph);
 	/* The embedded headers contain source and dest in reverse order */
 	cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
-	if (!cp)
+	if (!cp) {
+		/* The packet could also belong to a local client */
+		cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+		if (cp)
+			return handle_response_icmp(skb, iph, cih, cp, pp,
+						    offset, ihl);
 		return NF_ACCEPT;
+	}
 
 	verdict = NF_DROP;
 
@@ -1244,11 +1278,12 @@ ip_vs_in(unsigned int hooknum, struct sk
 	ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
 
 	/*
-	 *	Big tappo: only PACKET_HOST (neither loopback nor mcasts)
-	 *	... don't know why 1st test DOES NOT include 2nd (?)
+	 *	Big tappo: only PACKET_HOST, including loopback for local client
+	 *	Don't handle local packets on IPv6 for now
 	 */
-	if (unlikely(skb->pkt_type != PACKET_HOST
-		     || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
+	if (unlikely(skb->pkt_type != PACKET_HOST ||
+		     (af == AF_INET6 || (skb->dev->flags & IFF_LOOPBACK ||
+					 skb->sk)))) {
 		IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
 			      skb->pkt_type,
 			      iph.protocol,
@@ -1277,6 +1312,11 @@ ip_vs_in(unsigned int hooknum, struct sk
 	if (unlikely(!cp)) {
 		int v;
 
+		/* For local client packets, it could be a response */
+		cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+		if (cp)
+			return handle_response(af, skb, pp, cp, iph.len);
+
 		if (!pp->conn_schedule(af, skb, pp, &v, &cp))
 			return v;
 	}
Index: lvs-2.6/net/ipv4/ipvs/ip_vs_proto_tcp.c
===================================================================
--- lvs-2.6.orig/net/ipv4/ipvs/ip_vs_proto_tcp.c	2008-09-03 10:56:05.000000000 +1000
+++ lvs-2.6/net/ipv4/ipvs/ip_vs_proto_tcp.c	2008-09-03 11:24:26.000000000 +1000
@@ -166,7 +166,7 @@ tcp_snat_handler(struct sk_buff *skb,
 	tcph->source = cp->vport;
 
 	/* Adjust TCP checksums */
-	if (!cp->app) {
+	if (!cp->app && (tcph->check != 0)) {
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
 				     cp->dport, cp->vport);
@@ -235,7 +235,7 @@ tcp_dnat_handler(struct sk_buff *skb,
 	/*
 	 *	Adjust TCP checksums
 	 */
-	if (!cp->app) {
+	if (!cp->app && (tcph->check != 0)) {
 		/* Only port and addr are changed, do fast csum update */
 		tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
 				     cp->vport, cp->dport);

             reply	other threads:[~2008-09-05  1:36 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-09-05  1:36 Simon Horman [this message]
2008-09-05  1:37 ` [PATCH 2/2] ipvs: load balance ipv6 connections from a local process Simon Horman
2008-09-05 11:40   ` Julius Volz
2008-09-05 15:55     ` Brian Haley
2008-09-05 16:37       ` Julius Volz
2008-09-06  4:14     ` Simon Horman
2008-09-06  9:26       ` Julius Volz
2008-09-08  0:30         ` Simon Horman
2008-09-08  1:48         ` Simon Horman
2008-09-08  9:30           ` Julius Volz
2008-09-08  9:50             ` Simon Horman
2008-09-05  5:12 ` [PATCH 1/2] ipvs: load balance IPv4 " Julian Anastasov
2008-09-05  5:49   ` Siim Põder
2008-09-05  5:49     ` Siim Põder
2008-09-06  7:43     ` Simon Horman
2008-09-05 11:02 ` Julius Volz
2008-09-06  3:56   ` Simon Horman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080905013609.GD14128@verge.net.au \
    --to=horms@verge.net.au \
    --cc=juliusv@google.com \
    --cc=lvs-devel@vger.kernel.org \
    --cc=malcolm@loadbalancer.org \
    --cc=netdev@vger.kernel.org \
    --cc=siim@p6drad-teel.net \
    --cc=vbusam@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.