From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:37387) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1b7ElL-0008OC-MT for qemu-devel@nongnu.org; Mon, 30 May 2016 00:25:13 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1b7ElI-0000B8-Dv for qemu-devel@nongnu.org; Mon, 30 May 2016 00:25:11 -0400 Received: from mx1.redhat.com ([209.132.183.28]:59850) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1b7ElI-0000Az-6B for qemu-devel@nongnu.org; Mon, 30 May 2016 00:25:08 -0400 Received: from int-mx14.intmail.prod.int.phx2.redhat.com (int-mx14.intmail.prod.int.phx2.redhat.com [10.5.11.27]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id BD415C06C9E0 for ; Mon, 30 May 2016 04:25:07 +0000 (UTC) References: <1464453454-5703-1-git-send-email-wexu@redhat.com> <1464453454-5703-3-git-send-email-wexu@redhat.com> From: Jason Wang Message-ID: <574BC09D.5050705@redhat.com> Date: Mon, 30 May 2016 12:25:01 +0800 MIME-Version: 1.0 In-Reply-To: <1464453454-5703-3-git-send-email-wexu@redhat.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [ RFC Patch v6 2/3] virtio-net rsc: support coalescing ipv6 tcp traffic List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: wexu@redhat.com, qemu-devel@nongnu.org Cc: victork@redhat.com, mst@redhat.com, yvugenfi@redhat.com, marcel@redhat.com, dfleytma@redhat.com On 2016=E5=B9=B405=E6=9C=8829=E6=97=A5 00:37, wexu@redhat.com wrote: > From: Wei Xu > > Most stuffs are like ipv4 2 differences between ipv4 and ipv6. > > 1. Fragment length in ipv4 header includes itself, while it's not > included for ipv6, thus means ipv6 can carry a real '65535' payload. > > 2. IPv6 header does not need calculate header checksum. > > Signed-off-by: Wei Xu > --- > hw/net/virtio-net.c | 152 +++++++++++++++++++++++++++++++++++++++++++= ++++++--- > 1 file changed, 144 insertions(+), 8 deletions(-) > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > index b3bb63b..cc8cbe4 100644 > --- a/hw/net/virtio-net.c > +++ b/hw/net/virtio-net.c > @@ -53,6 +53,10 @@ > /* header length value in ip header without option */ > #define VIRTIO_NET_IP4_HEADER_LENGTH 5 > =20 > +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ) > +#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ > +#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD > + > /* Purge coalesced packets timer interval, This value affects the per= formance > a lot, and should be tuned carefully, '300000'(300us) is the recom= mended > value to pass the WHQL test, '50000' can gain 2x netperf throughpu= t with > @@ -1724,6 +1728,25 @@ static void virtio_net_rsc_extract_unit4(NetRscC= hain *chain, > unit->payload =3D htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_h= drlen; > } > =20 > +static void virtio_net_rsc_extract_unit6(NetRscChain *chain, > + const uint8_t *buf, NetRscUni= t* unit) > +{ > + uint16_t hdr_len; > + struct ip6_header *ip6; > + > + hdr_len =3D ((VirtIONet *)(chain->n))->guest_hdr_len; > + ip6 =3D (struct ip6_header *)(buf + hdr_len + sizeof(struct eth_he= ader)); > + unit->ip =3D ip6; > + unit->ip_plen =3D &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); > + unit->tcp =3D (struct tcp_header *)(((uint8_t *)unit->ip)\ > + + sizeof(struct ip6_header)); > + unit->tcp_hdrlen =3D (htons(unit->tcp->th_offset_flags) & 0xF000) = >> 10; > + > + /* There is a difference between payload lenght in ipv4 and v6, > + ip header is excluded in ipv6 */ > + unit->payload =3D htons(*unit->ip_plen) - unit->tcp_hdrlen; > +} > + > static void virtio_net_rsc_ipv4_checksum(struct virtio_net_hdr *vhdr, > struct ip_header *ip) > { > @@ -1742,7 +1765,9 @@ static size_t virtio_net_rsc_drain_seg(NetRscChai= n *chain, NetRscSeg *seg) > struct virtio_net_hdr *h; > =20 > h =3D (struct virtio_net_hdr *)seg->buf; > - virtio_net_rsc_ipv4_checksum(h, seg->unit.ip); > + if ((chain->proto =3D=3D ETH_P_IP) && seg->is_coalesced) { > + virtio_net_rsc_ipv4_checksum(h, seg->unit.ip); > + } > ret =3D virtio_net_do_receive(seg->nc, seg->buf, seg->size); > QTAILQ_REMOVE(&chain->buffers, seg, next); > g_free(seg->buf); > @@ -1798,7 +1823,7 @@ static void virtio_net_rsc_cache_buf(NetRscChain = *chain, NetClientState *nc, > hdr_len =3D chain->n->guest_hdr_len; > seg =3D g_malloc(sizeof(NetRscSeg)); > seg->buf =3D g_malloc(hdr_len + sizeof(struct eth_header)\ > - + VIRTIO_NET_MAX_TCP_PAYLOAD); > + + sizeof(struct ip6_header) + VIRTIO_NET_MAX_TCP_PA= YLOAD); > memcpy(seg->buf, buf, size); > seg->size =3D size; > seg->packets =3D 1; > @@ -1809,7 +1834,18 @@ static void virtio_net_rsc_cache_buf(NetRscChain= *chain, NetClientState *nc, > QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); > chain->stat.cache++; > =20 > - virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); > + switch (chain->proto) { > + case ETH_P_IP: > + virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); > + break; > + > + case ETH_P_IPV6: > + virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); > + break; > + > + default: > + g_assert_not_reached(); > + } > } > =20 > static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, > @@ -1929,6 +1965,24 @@ static int32_t virtio_net_rsc_coalesce4(NetRscCh= ain *chain, NetRscSeg *seg, > return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); > } > =20 > +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg = *seg, > + const uint8_t *buf, size_t size, NetRscUnit *u= nit) > +{ > + struct ip6_header *ip1, *ip2; > + > + ip1 =3D (struct ip6_header *)(unit->ip); > + ip2 =3D (struct ip6_header *)(seg->unit.ip); > + if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address= )) > + || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_addr= ess)) > + || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) > + || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { > + chain->stat.no_match++; > + return RSC_NO_MATCH; > + } > + > + return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); > +} > + > /* Pakcets with 'SYN' should bypass, other flag should be sent after = drain > * to prevent out of order */ > static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain, > @@ -1981,7 +2035,11 @@ static size_t virtio_net_rsc_do_coalesce(NetRscC= hain *chain, NetClientState *nc, > NetRscSeg *seg, *nseg; > =20 > QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { > - ret =3D virtio_net_rsc_coalesce4(chain, seg, buf, size, unit); > + if (chain->proto =3D=3D ETH_P_IP) { > + ret =3D virtio_net_rsc_coalesce4(chain, seg, buf, size, un= it); > + } else { > + ret =3D virtio_net_rsc_coalesce6(chain, seg, buf, size, un= it); > + } > =20 > if (ret =3D=3D RSC_FINAL) { > if (virtio_net_rsc_drain_seg(chain, seg) =3D=3D 0) { > @@ -2106,13 +2164,82 @@ static size_t virtio_net_rsc_receive4(NetRscCha= in *chain, NetClientState* nc, > return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); > } > =20 > +static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain, > + struct ip6_header *ip6, > + const uint8_t *buf, size_t= size) > +{ > + uint16_t ip_len; > + uint16_t hdr_len; > + > + hdr_len =3D ((VirtIONet *)(chain->n))->guest_hdr_len; > + if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct ip= 6_header) > + + sizeof(tcp_header))) { > + return RSC_BYPASS; > + } > + > + if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) > + !=3D IP_HEADER_VERSION_6) { > + return RSC_BYPASS; > + } > + > + /* Both option and protocol is checked in this */ > + if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt !=3D IPPROTO_TCP) { > + chain->stat.bypass_not_tcp++; > + return RSC_BYPASS; > + } > + > + ip_len =3D htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); > + if (ip_len < sizeof(struct tcp_header) > + || ip_len > (size - hdr_len - sizeof(struct eth_header) > + - sizeof(struct ip6_header))) { > + chain->stat.ip_hacked++; > + return RSC_BYPASS; > + } > + > + return RSC_WANT; > +} > + > +static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc, > + const uint8_t *buf, size_t size) > +{ > + int32_t ret; > + uint16_t hdr_len; > + NetRscChain *chain; > + NetRscUnit unit; > + > + chain =3D (NetRscChain *)opq; > + hdr_len =3D ((VirtIONet *)(chain->n))->guest_hdr_len; > + virtio_net_rsc_extract_unit6(chain, buf, &unit); Same issue as ipv4, looks not safe if you analyze header before doing=20 sanity check. > + if (RSC_WANT !=3D virtio_net_rsc_sanity_check6(chain, > + unit.ip, buf, size)) = { > + return virtio_net_do_receive(nc, buf, size); > + } > + > + ret =3D virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); > + if (ret =3D=3D RSC_BYPASS) { > + return virtio_net_do_receive(nc, buf, size); > + } else if (ret =3D=3D RSC_FINAL) { > + return virtio_net_rsc_drain_flow(chain, nc, buf, size, > + ((hdr_len + sizeof(struct eth_header)) + 8), > + VIRTIO_NET_IP6_ADDR_SIZE, > + hdr_len + sizeof(struct eth_header) + sizeof(struct ip= 6_header), > + VIRTIO_NET_TCP_PORT_SIZE); > + } > + > + if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) { > + return size; > + } > + > + return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); > +} > + > static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n, > NetClientState *nc, > uint16_t proto) > { > NetRscChain *chain; > =20 > - if (proto !=3D (uint16_t)ETH_P_IP) { > + if ((proto !=3D (uint16_t)ETH_P_IP) && (proto !=3D (uint16_t)ETH_P= _IPV6)) { > return NULL; > } > =20 > @@ -2125,8 +2252,13 @@ static NetRscChain *virtio_net_rsc_lookup_chain(= VirtIONet * n, > chain =3D g_malloc(sizeof(*chain)); > chain->n =3D n; > chain->proto =3D proto; > - chain->max_payload =3D VIRTIO_NET_MAX_IP4_PAYLOAD; > - chain->gso_type =3D VIRTIO_NET_HDR_GSO_TCPV4; > + if (proto =3D=3D (uint16_t)ETH_P_IP) { > + chain->max_payload =3D VIRTIO_NET_MAX_IP4_PAYLOAD; > + chain->gso_type =3D VIRTIO_NET_HDR_GSO_TCPV4; > + } else { > + chain->max_payload =3D VIRTIO_NET_MAX_IP6_PAYLOAD; > + chain->gso_type =3D VIRTIO_NET_HDR_GSO_TCPV6; > + } > chain->drain_timer =3D timer_new_ns(QEMU_CLOCK_VIRTUAL, > virtio_net_rsc_purge, chain); > memset(&chain->stat, 0, sizeof(chain->stat)); > @@ -2158,7 +2290,11 @@ static ssize_t virtio_net_rsc_receive(NetClientS= tate *nc, > return virtio_net_do_receive(nc, buf, size); > } else { > chain->stat.received++; > - return virtio_net_rsc_receive4(chain, nc, buf, size); > + if (proto =3D=3D (uint16_t)ETH_P_IP) { > + return virtio_net_rsc_receive4(chain, nc, buf, size); > + } else { > + return virtio_net_rsc_receive6(chain, nc, buf, size); > + } > } > } > =20