From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:43547) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bEy5l-0008AR-CR for qemu-devel@nongnu.org; Mon, 20 Jun 2016 08:14:15 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1bEy5g-0007g7-4L for qemu-devel@nongnu.org; Mon, 20 Jun 2016 08:14:12 -0400 Received: from mx1.redhat.com ([209.132.183.28]:44222) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bEy5f-0007g1-SJ for qemu-devel@nongnu.org; Mon, 20 Jun 2016 08:14:08 -0400 Date: Mon, 20 Jun 2016 13:14:03 +0100 From: "Dr. David Alan Gilbert" Message-ID: <20160620121402.GC2891@work-vm> References: <1465902912-27527-1-git-send-email-zhangchen.fnst@cn.fujitsu.com> <1465902912-27527-4-git-send-email-zhangchen.fnst@cn.fujitsu.com> <57678CDA.7020700@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <57678CDA.7020700@redhat.com> Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [RFC PATCH 3/3] filter-rewriter: rewrite tcp packet to keep secondary connection List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Jason Wang Cc: Zhang Chen , qemu devel , Li Zhijian , "eddie . dong" , zhanghailiang , Yang Hongyang * Jason Wang (jasowang@redhat.com) wrote: >=20 >=20 > On 2016=E5=B9=B406=E6=9C=8814=E6=97=A5 19:15, Zhang Chen wrote: > > We will rewrite tcp packet secondary received and sent. >=20 > More verbose please. E.g which fields were rewrote and why. >=20 > > Signed-off-by: Zhang Chen > > Signed-off-by: Li Zhijian > > Signed-off-by: Wen Congyang > > --- > > net/filter-rewriter.c | 94 ++++++++++++++++++++++++++++++++++++++++= +++++++++-- > > trace-events | 3 ++ > > 2 files changed, 95 insertions(+), 2 deletions(-) > >=20 > > diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c > > index 12f88c5..86a2f53 100644 > > --- a/net/filter-rewriter.c > > +++ b/net/filter-rewriter.c > > @@ -21,6 +21,7 @@ > > #include "qemu/main-loop.h" > > #include "qemu/iov.h" > > #include "net/checksum.h" > > +#include "trace.h" > > #define FILTER_COLO_REWRITER(obj) \ > > OBJECT_CHECK(RewriterState, (obj), TYPE_FILTER_REWRITER) > > @@ -64,6 +65,75 @@ static int is_tcp_packet(Packet *pkt) > > } > > } > > +static int handle_primary_tcp_pkt(NetFilterState *nf, > > + Connection *conn, > > + Packet *pkt) > > +{ > > + struct tcphdr *tcp_pkt; > > + > > + tcp_pkt =3D (struct tcphdr *)pkt->transport_layer; > > + > > + if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) { >=20 > Why not use tracepoints directly? Because trace can't cope with you having to do an allocation/free. > > + char *sdebug, *ddebug; > > + sdebug =3D strdup(inet_ntoa(pkt->ip->ip_src)); > > + ddebug =3D strdup(inet_ntoa(pkt->ip->ip_dst)); > > + fprintf(stderr, "%s: src/dst: %s/%s p: seq/ack=3D%u/%u" > > + " flags=3D%x\n", __func__, sdebug, ddebug, > > + ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack), > > + tcp_pkt->th_flags); However, this should use the trace_ call to write the result even if it's using trace_event_get_state to switch the whole block on/off. > > + g_free(sdebug); > > + g_free(ddebug); > > + } > > + > > + if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) =3D=3D TH_ACK)) { > > + /* save primary colo tcp packet seq */ > > + conn->primary_seq =3D ntohl(tcp_pkt->th_ack) - 1; >=20 > Looks like primary_seq will only be updated during handshake, I wonder = how > this works. This code really needs commenting to make it see what's going on; each of these functions should say which way the packet is going (e.g. 'handle packets to the primary from the secondary') - there's a lot of packet flows going on and without the comments it's very hard to follo= w. I think this could be because we're fixing up the sequence numbers on the= =20 secondary once we've received the first response from the primary, so it'= s only the first packet of each connection that the primary has to do this = on - but hmm I'm not sure without some comments. Dave > > + > > + /* adjust tcp seq to make secondary guest handle it */ > > + tcp_pkt->th_ack =3D htonl(conn->secondary_seq + 1); >=20 > I'm not sure this can work for all cases. I believe we should also rewr= ite > seq here. And to me, a better approach is to track the offset of seq be= tween > pri and sec during handshake and rewrite both ack and seq based on this > offset. >=20 > > + net_checksum_calculate((uint8_t *)pkt->data, pkt->size); > > + } > > + > > + return 0; > > +} > > + > > +static int handle_secondary_tcp_pkt(NetFilterState *nf, > > + Connection *conn, > > + Packet *pkt) > > +{ > > + struct tcphdr *tcp_pkt; > > + > > + tcp_pkt =3D (struct tcphdr *)pkt->transport_layer; > > + > > + if (trace_event_get_state(TRACE_COLO_FILTER_REWRITER_DEBUG)) { > > + char *sdebug, *ddebug; > > + sdebug =3D strdup(inet_ntoa(pkt->ip->ip_src)); > > + ddebug =3D strdup(inet_ntoa(pkt->ip->ip_dst)); > > + printf("handle_secondary_tcp_pkt conn->secondary_seq =3D %u,= \n", > > + conn->secondary_seq); > > + printf("handle_secondary_tcp_pkt conn->primary_seq =3D %u,\n= ", > > + conn->primary_seq); > > + fprintf(stderr, "%s: src/dst: %s/%s p: seq/ack=3D%u/%u" > > + " flags=3D%x\n", __func__, sdebug, ddebug, > > + ntohl(tcp_pkt->th_seq), ntohl(tcp_pkt->th_ack), > > + tcp_pkt->th_flags); > > + g_free(sdebug); > > + g_free(ddebug); > > + } > > + > > + if (((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) =3D=3D (TH_ACK | TH= _SYN))) { > > + /* save client's seq */ > > + conn->secondary_seq =3D ntohl(tcp_pkt->th_seq); > > + } > > + > > + if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) =3D=3D TH_ACK) { > > + tcp_pkt->th_seq =3D htonl(conn->primary_seq + 1); > > + net_checksum_calculate((uint8_t *)pkt->data, pkt->size); > > + } > > + > > + return 0; > > +} > > + > > static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, > > NetClientState *sender, > > unsigned flags, > > @@ -106,10 +176,30 @@ static ssize_t colo_rewriter_receive_iov(NetFil= terState *nf, > > if (sender =3D=3D nf->netdev) { > > /* This packet is sent by netdev itself */ > > /* NET_FILTER_DIRECTION_TX */ > > - /* handle_primary_tcp_pkt */ > > + if (!handle_primary_tcp_pkt(nf, conn, pkt)) { > > + qemu_net_queue_send(s->incoming_queue, sender, 0, > > + (const uint8_t *)pkt->data, pkt->size, NULL); > > + packet_destroy(pkt, NULL); > > + pkt =3D NULL; > > + /* > > + * We block the packet here,after rewrite pkt > > + * and will send it > > + */ > > + return 1; > > + } > > } else { > > /* NET_FILTER_DIRECTION_RX */ > > - /* handle_secondary_tcp_pkt */ > > + if (!handle_secondary_tcp_pkt(nf, conn, pkt)) { > > + qemu_net_queue_send(s->incoming_queue, sender, 0, > > + (const uint8_t *)pkt->data, pkt->size, NULL); > > + packet_destroy(pkt, NULL); > > + pkt =3D NULL; > > + /* > > + * We block the packet here,after rewrite pkt > > + * and will send it > > + */ > > + return 1; > > + } > > } > > } > > diff --git a/trace-events b/trace-events > > index 6686cdf..5d798c6 100644 > > --- a/trace-events > > +++ b/trace-events > > @@ -1927,3 +1927,6 @@ colo_compare_icmp_miscompare_mtu(const char *st= a, int size) ": %s %d" > > colo_compare_ip_info(int psize, const char *sta, const char *stb, i= nt ssize, const char *stc, const char *std) "ppkt size =3D %d, ip_src =3D= %s, ip_dst =3D %s, spkt size =3D %d, ip_src =3D %s, ip_dst =3D %s" > > colo_old_packet_check_found(int64_t old_time) "%" PRId64 > > colo_compare_miscompare(void) "" > > + > > +# net/filter-rewriter.c > > +colo_filter_rewriter_debug(void) "" >=20 -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK