From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:55944) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1b8VJi-00047I-4H for qemu-devel@nongnu.org; Thu, 02 Jun 2016 12:17:56 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1b8VJd-00022s-QX for qemu-devel@nongnu.org; Thu, 02 Jun 2016 12:17:53 -0400 Received: from mx1.redhat.com ([209.132.183.28]:57467) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1b8VJd-00022I-Gr for qemu-devel@nongnu.org; Thu, 02 Jun 2016 12:17:49 -0400 Received: from int-mx13.intmail.prod.int.phx2.redhat.com (int-mx13.intmail.prod.int.phx2.redhat.com [10.5.11.26]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id D5FEDC097259 for ; Thu, 2 Jun 2016 16:17:48 +0000 (UTC) Received: from wei-thinkpad.nay.redhat.com (vpn1-5-56.pek2.redhat.com [10.72.5.56]) by int-mx13.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id u52GHjUu024566 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO) for ; Thu, 2 Jun 2016 12:17:48 -0400 References: <1464453454-5703-1-git-send-email-wexu@redhat.com> <1464453454-5703-3-git-send-email-wexu@redhat.com> <574BC09D.5050705@redhat.com> From: Wei Xu Message-ID: <57505C29.7050402@redhat.com> Date: Fri, 3 Jun 2016 00:17:45 +0800 MIME-Version: 1.0 In-Reply-To: <574BC09D.5050705@redhat.com> Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [ RFC Patch v6 2/3] virtio-net rsc: support coalescing ipv6 tcp traffic List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org On 2016=E5=B9=B405=E6=9C=8830=E6=97=A5 12:25, Jason Wang wrote: > > > On 2016=E5=B9=B405=E6=9C=8829=E6=97=A5 00:37, wexu@redhat.com wrote: >> From: Wei Xu >> >> Most stuffs are like ipv4 2 differences between ipv4 and ipv6. >> >> 1. Fragment length in ipv4 header includes itself, while it's not >> included for ipv6, thus means ipv6 can carry a real '65535' payload. >> >> 2. IPv6 header does not need calculate header checksum. >> >> Signed-off-by: Wei Xu >> --- >> hw/net/virtio-net.c | 152 >> +++++++++++++++++++++++++++++++++++++++++++++++++--- >> 1 file changed, 144 insertions(+), 8 deletions(-) >> >> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c >> index b3bb63b..cc8cbe4 100644 >> --- a/hw/net/virtio-net.c >> +++ b/hw/net/virtio-net.c >> @@ -53,6 +53,10 @@ >> /* header length value in ip header without option */ >> #define VIRTIO_NET_IP4_HEADER_LENGTH 5 >> +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ) >> +#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ >> +#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD >> + >> /* Purge coalesced packets timer interval, This value affects the >> performance >> a lot, and should be tuned carefully, '300000'(300us) is the >> recommended >> value to pass the WHQL test, '50000' can gain 2x netperf >> throughput with >> @@ -1724,6 +1728,25 @@ static void >> virtio_net_rsc_extract_unit4(NetRscChain *chain, >> unit->payload =3D htons(*unit->ip_plen) - ip_hdrlen - >> unit->tcp_hdrlen; >> } >> +static void virtio_net_rsc_extract_unit6(NetRscChain *chain, >> + const uint8_t *buf, >> NetRscUnit* unit) >> +{ >> + uint16_t hdr_len; >> + struct ip6_header *ip6; >> + >> + hdr_len =3D ((VirtIONet *)(chain->n))->guest_hdr_len; >> + ip6 =3D (struct ip6_header *)(buf + hdr_len + sizeof(struct >> eth_header)); >> + unit->ip =3D ip6; >> + unit->ip_plen =3D &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); >> + unit->tcp =3D (struct tcp_header *)(((uint8_t *)unit->ip)\ >> + + sizeof(struct ip6_header)); >> + unit->tcp_hdrlen =3D (htons(unit->tcp->th_offset_flags) & 0xF000) >> >> 10; >> + >> + /* There is a difference between payload lenght in ipv4 and v6, >> + ip header is excluded in ipv6 */ >> + unit->payload =3D htons(*unit->ip_plen) - unit->tcp_hdrlen; >> +} >> + >> static void virtio_net_rsc_ipv4_checksum(struct virtio_net_hdr *vhdr= , >> struct ip_header *ip) >> { >> @@ -1742,7 +1765,9 @@ static size_t >> virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg) >> struct virtio_net_hdr *h; >> h =3D (struct virtio_net_hdr *)seg->buf; >> - virtio_net_rsc_ipv4_checksum(h, seg->unit.ip); >> + if ((chain->proto =3D=3D ETH_P_IP) && seg->is_coalesced) { >> + virtio_net_rsc_ipv4_checksum(h, seg->unit.ip); >> + } >> ret =3D virtio_net_do_receive(seg->nc, seg->buf, seg->size); >> QTAILQ_REMOVE(&chain->buffers, seg, next); >> g_free(seg->buf); >> @@ -1798,7 +1823,7 @@ static void virtio_net_rsc_cache_buf(NetRscChain >> *chain, NetClientState *nc, >> hdr_len =3D chain->n->guest_hdr_len; >> seg =3D g_malloc(sizeof(NetRscSeg)); >> seg->buf =3D g_malloc(hdr_len + sizeof(struct eth_header)\ >> - + VIRTIO_NET_MAX_TCP_PAYLOAD); >> + + sizeof(struct ip6_header) + >> VIRTIO_NET_MAX_TCP_PAYLOAD); >> memcpy(seg->buf, buf, size); >> seg->size =3D size; >> seg->packets =3D 1; >> @@ -1809,7 +1834,18 @@ static void >> virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, >> QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); >> chain->stat.cache++; >> - virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); >> + switch (chain->proto) { >> + case ETH_P_IP: >> + virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); >> + break; >> + >> + case ETH_P_IPV6: >> + virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); >> + break; >> + >> + default: >> + g_assert_not_reached(); >> + } >> } >> static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, >> @@ -1929,6 +1965,24 @@ static int32_t >> virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg, >> return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); >> } >> +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg >> *seg, >> + const uint8_t *buf, size_t size, NetRscUnit >> *unit) >> +{ >> + struct ip6_header *ip1, *ip2; >> + >> + ip1 =3D (struct ip6_header *)(unit->ip); >> + ip2 =3D (struct ip6_header *)(seg->unit.ip); >> + if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_addres= s)) >> + || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct >> in6_address)) >> + || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) >> + || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { >> + chain->stat.no_match++; >> + return RSC_NO_MATCH; >> + } >> + >> + return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); >> +} >> + >> /* Pakcets with 'SYN' should bypass, other flag should be sent after >> drain >> * to prevent out of order */ >> static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain, >> @@ -1981,7 +2035,11 @@ static size_t >> virtio_net_rsc_do_coalesce(NetRscChain *chain, NetClientState *nc, >> NetRscSeg *seg, *nseg; >> QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { >> - ret =3D virtio_net_rsc_coalesce4(chain, seg, buf, size, unit)= ; >> + if (chain->proto =3D=3D ETH_P_IP) { >> + ret =3D virtio_net_rsc_coalesce4(chain, seg, buf, size, u= nit); >> + } else { >> + ret =3D virtio_net_rsc_coalesce6(chain, seg, buf, size, u= nit); >> + } >> if (ret =3D=3D RSC_FINAL) { >> if (virtio_net_rsc_drain_seg(chain, seg) =3D=3D 0) { >> @@ -2106,13 +2164,82 @@ static size_t >> virtio_net_rsc_receive4(NetRscChain *chain, NetClientState* nc, >> return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); >> } >> +static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain, >> + struct ip6_header *ip6, >> + const uint8_t *buf, >> size_t size) >> +{ >> + uint16_t ip_len; >> + uint16_t hdr_len; >> + >> + hdr_len =3D ((VirtIONet *)(chain->n))->guest_hdr_len; >> + if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct >> ip6_header) >> + + sizeof(tcp_header))) { >> + return RSC_BYPASS; >> + } >> + >> + if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) >> + !=3D IP_HEADER_VERSION_6) { >> + return RSC_BYPASS; >> + } >> + >> + /* Both option and protocol is checked in this */ >> + if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt !=3D IPPROTO_TCP) { >> + chain->stat.bypass_not_tcp++; >> + return RSC_BYPASS; >> + } >> + >> + ip_len =3D htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); >> + if (ip_len < sizeof(struct tcp_header) >> + || ip_len > (size - hdr_len - sizeof(struct eth_header) >> + - sizeof(struct ip6_header))) { >> + chain->stat.ip_hacked++; >> + return RSC_BYPASS; >> + } >> + >> + return RSC_WANT; >> +} >> + >> +static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc, >> + const uint8_t *buf, size_t size= ) >> +{ >> + int32_t ret; >> + uint16_t hdr_len; >> + NetRscChain *chain; >> + NetRscUnit unit; >> + >> + chain =3D (NetRscChain *)opq; >> + hdr_len =3D ((VirtIONet *)(chain->n))->guest_hdr_len; >> + virtio_net_rsc_extract_unit6(chain, buf, &unit); > > Same issue as ipv4, looks not safe if you analyze header before doing > sanity check. OK. > >> + if (RSC_WANT !=3D virtio_net_rsc_sanity_check6(chain, >> + unit.ip, buf, size))= { >> + return virtio_net_do_receive(nc, buf, size); >> + } >> + >> + ret =3D virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); >> + if (ret =3D=3D RSC_BYPASS) { >> + return virtio_net_do_receive(nc, buf, size); >> + } else if (ret =3D=3D RSC_FINAL) { >> + return virtio_net_rsc_drain_flow(chain, nc, buf, size, >> + ((hdr_len + sizeof(struct eth_header)) + 8), >> + VIRTIO_NET_IP6_ADDR_SIZE, >> + hdr_len + sizeof(struct eth_header) + sizeof(struct >> ip6_header), >> + VIRTIO_NET_TCP_PORT_SIZE); >> + } >> + >> + if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) { >> + return size; >> + } >> + >> + return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); >> +} >> + >> static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n, >> NetClientState *nc, >> uint16_t proto) >> { >> NetRscChain *chain; >> - if (proto !=3D (uint16_t)ETH_P_IP) { >> + if ((proto !=3D (uint16_t)ETH_P_IP) && (proto !=3D >> (uint16_t)ETH_P_IPV6)) { >> return NULL; >> } >> @@ -2125,8 +2252,13 @@ static NetRscChain >> *virtio_net_rsc_lookup_chain(VirtIONet * n, >> chain =3D g_malloc(sizeof(*chain)); >> chain->n =3D n; >> chain->proto =3D proto; >> - chain->max_payload =3D VIRTIO_NET_MAX_IP4_PAYLOAD; >> - chain->gso_type =3D VIRTIO_NET_HDR_GSO_TCPV4; >> + if (proto =3D=3D (uint16_t)ETH_P_IP) { >> + chain->max_payload =3D VIRTIO_NET_MAX_IP4_PAYLOAD; >> + chain->gso_type =3D VIRTIO_NET_HDR_GSO_TCPV4; >> + } else { >> + chain->max_payload =3D VIRTIO_NET_MAX_IP6_PAYLOAD; >> + chain->gso_type =3D VIRTIO_NET_HDR_GSO_TCPV6; >> + } >> chain->drain_timer =3D timer_new_ns(QEMU_CLOCK_VIRTUAL, >> virtio_net_rsc_purge, chain); >> memset(&chain->stat, 0, sizeof(chain->stat)); >> @@ -2158,7 +2290,11 @@ static ssize_t >> virtio_net_rsc_receive(NetClientState *nc, >> return virtio_net_do_receive(nc, buf, size); >> } else { >> chain->stat.received++; >> - return virtio_net_rsc_receive4(chain, nc, buf, size); >> + if (proto =3D=3D (uint16_t)ETH_P_IP) { >> + return virtio_net_rsc_receive4(chain, nc, buf, size); >> + } else { >> + return virtio_net_rsc_receive6(chain, nc, buf, size); >> + } >> } >> } > >