From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:47323) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aoQUu-0007Iz-FE for qemu-devel@nongnu.org; Fri, 08 Apr 2016 03:06:33 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1aoQUq-00083N-BR for qemu-devel@nongnu.org; Fri, 08 Apr 2016 03:06:28 -0400 Received: from mx1.redhat.com ([209.132.183.28]:59539) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aoQUq-000838-3x for qemu-devel@nongnu.org; Fri, 08 Apr 2016 03:06:24 -0400 Received: from int-mx14.intmail.prod.int.phx2.redhat.com (int-mx14.intmail.prod.int.phx2.redhat.com [10.5.11.27]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by mx1.redhat.com (Postfix) with ESMTPS id 16A7E627DF for ; Fri, 8 Apr 2016 07:06:23 +0000 (UTC) Received: from wei-thinkpad.nay.redhat.com (vpn1-4-44.pek2.redhat.com [10.72.4.44]) by int-mx14.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id u3876Jei029357 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=NO) for ; Fri, 8 Apr 2016 03:06:21 -0400 References: <1459711556-10273-1-git-send-email-wexu@redhat.com> <1459711556-10273-4-git-send-email-wexu@redhat.com> <570327E7.7070908@redhat.com> From: Wei Xu Message-ID: <57075874.8060903@redhat.com> Date: Fri, 8 Apr 2016 15:06:28 +0800 MIME-Version: 1.0 In-Reply-To: <570327E7.7070908@redhat.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [ RFC Patch v4 3/3] virtio-net rsc: support coalescing ipv6 tcp traffic List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org On 2016=E5=B9=B404=E6=9C=8805=E6=97=A5 10:50, Jason Wang wrote: > > On 04/04/2016 03:25 AM, wexu@redhat.com wrote: >> From: Wei Xu >> >> Most things like ipv4 except there is a significant difference between= ipv4 >> and ipv6, the fragment lenght in ipv4 header includes itself, while it= 's not > typo Thanks. > >> included for ipv6, thus means ipv6 can carry a real '65535' payload. >> >> Signed-off-by: Wei Xu >> --- >> hw/net/virtio-net.c | 147 ++++++++++++++++++++++++++++++++++++++++++= +++++++--- >> 1 file changed, 141 insertions(+), 6 deletions(-) >> >> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c >> index 81e8e71..2d09352 100644 >> --- a/hw/net/virtio-net.c >> +++ b/hw/net/virtio-net.c >> @@ -50,6 +50,10 @@ >> /* header lenght value in ip header without option */ >> #define VIRTIO_NET_IP4_HEADER_LENGTH 5 >> =20 >> +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ) >> +#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ >> +#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD >> + >> /* Purge coalesced packets timer interval */ >> #define VIRTIO_NET_RSC_INTERVAL 300000 >> =20 >> @@ -1725,6 +1729,25 @@ static void virtio_net_rsc_extract_unit4(NetRsc= Chain *chain, >> unit->payload =3D htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_= hdrlen; >> } >> =20 >> +static void virtio_net_rsc_extract_unit6(NetRscChain *chain, >> + const uint8_t *buf, NetRscUn= it* unit) >> +{ >> + uint16_t hdr_len; >> + struct ip6_header *ip6; >> + >> + hdr_len =3D ((VirtIONet *)(chain->n))->guest_hdr_len; >> + ip6 =3D (struct ip6_header *)(buf + hdr_len + sizeof(struct eth_h= eader)); >> + unit->ip =3D ip6; >> + unit->ip_plen =3D &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); >> + unit->tcp =3D (struct tcp_header *)(((uint8_t *)unit->ip)\ >> + + sizeof(struct ip6_header)); >> + unit->tcp_hdrlen =3D (htons(unit->tcp->th_offset_flags) & 0xF000)= >> 10; >> + >> + /* There is a difference between payload lenght in ipv4 and v6, >> + ip header is excluded in ipv6 */ >> + unit->payload =3D htons(*unit->ip_plen) - unit->tcp_hdrlen; >> +} >> + >> static void virtio_net_rsc_ipv4_checksum(struct ip_header *ip) >> { >> uint32_t sum; >> @@ -1738,7 +1761,9 @@ static size_t virtio_net_rsc_drain_seg(NetRscCha= in *chain, NetRscSeg *seg) >> { >> int ret; >> =20 >> - virtio_net_rsc_ipv4_checksum(seg->unit.ip); >> + if ((chain->proto =3D=3D ETH_P_IP) && seg->is_coalesced) { >> + virtio_net_rsc_ipv4_checksum(seg->unit.ip); >> + } > Why not introduce proto specific checksum function for chain? Since there are only 2 protocols to be supported, and very limited=20 extension for this feature, mst suggest to use direct call in v2 patch to make things simple, and i took it. > >> ret =3D virtio_net_do_receive(seg->nc, seg->buf, seg->size); >> QTAILQ_REMOVE(&chain->buffers, seg, next); >> g_free(seg->buf); >> @@ -1804,7 +1829,18 @@ static void virtio_net_rsc_cache_buf(NetRscChai= n *chain, NetClientState *nc, >> QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); >> chain->stat.cache++; >> =20 >> - virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); >> + switch (chain->proto) { >> + case ETH_P_IP: >> + virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); > Another call for proto specific callbacks maybe? Same as above. > >> + break; >> + >> + case ETH_P_IPV6: >> + virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); >> + break; >> + >> + default: >> + g_assert_not_reached(); >> + } >> } >> =20 >> static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscS= eg *seg, >> @@ -1948,6 +1984,24 @@ static int32_t virtio_net_rsc_coalesce4(NetRscC= hain *chain, NetRscSeg *seg, >> return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); >> } >> =20 >> +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg= *seg, >> + const uint8_t *buf, size_t size, NetRscUnit *= unit) >> +{ >> + struct ip6_header *ip1, *ip2; >> + >> + ip1 =3D (struct ip6_header *)(unit->ip); >> + ip2 =3D (struct ip6_header *)(seg->unit.ip); >> + if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_addres= s)) >> + || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct in6_add= ress)) >> + || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) >> + || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { >> + chain->stat.no_match++; >> + return RSC_NO_MATCH; >> + } >> + >> + return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); >> +} >> + >> /* Pakcets with 'SYN' should bypass, other flag should be sent after= drain >> * to prevent out of order */ >> static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain, >> @@ -1991,7 +2045,11 @@ static size_t virtio_net_rsc_do_coalesce(NetRsc= Chain *chain, NetClientState *nc, >> NetRscSeg *seg, *nseg; >> =20 >> QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { >> - ret =3D virtio_net_rsc_coalesce4(chain, seg, buf, size, unit)= ; >> + if (chain->proto =3D=3D ETH_P_IP) { >> + ret =3D virtio_net_rsc_coalesce4(chain, seg, buf, size, u= nit); >> + } else { >> + ret =3D virtio_net_rsc_coalesce6(chain, seg, buf, size, u= nit); > Ditto. Ditto too:) > >> + } >> =20 >> if (ret =3D=3D RSC_FINAL) { >> if (virtio_net_rsc_drain_seg(chain, seg) =3D=3D 0) { >> @@ -2116,13 +2174,82 @@ static size_t virtio_net_rsc_receive4(void *op= q, NetClientState* nc, >> return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); >> } >> =20 >> +static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain, >> + struct ip6_header *ip6, >> + const uint8_t *buf, size_= t size) >> +{ >> + uint16_t ip_len; >> + uint16_t hdr_len; >> + >> + hdr_len =3D ((VirtIONet *)(chain->n))->guest_hdr_len; >> + if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct i= p6_header) >> + + sizeof(tcp_header))) { >> + return RSC_BYPASS; >> + } >> + >> + if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4) >> + !=3D IP_HEADER_VERSION_6) { >> + return RSC_BYPASS; >> + } >> + >> + /* Both option and protocol is checked in this */ >> + if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt !=3D IPPROTO_TCP) { >> + chain->stat.bypass_not_tcp++; >> + return RSC_BYPASS; >> + } >> + >> + ip_len =3D htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen); >> + if (ip_len < sizeof(struct tcp_header) >> + || ip_len > (size - hdr_len - sizeof(struct eth_header) >> + - sizeof(struct ip6_header))) { >> + chain->stat.ip_hacked++; >> + return RSC_BYPASS; >> + } >> + >> + return RSC_WANT; >> +} >> + >> +static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc, >> + const uint8_t *buf, size_t size= ) >> +{ >> + int32_t ret; >> + uint16_t hdr_len; >> + NetRscChain *chain; >> + NetRscUnit unit; >> + >> + chain =3D (NetRscChain *)opq; >> + hdr_len =3D ((VirtIONet *)(chain->n))->guest_hdr_len; >> + virtio_net_rsc_extract_unit6(chain, buf, &unit); >> + if (RSC_WANT !=3D virtio_net_rsc_sanity_check6(chain, >> + unit.ip, buf, size))= { >> + return virtio_net_do_receive(nc, buf, size); >> + } >> + >> + ret =3D virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); >> + if (ret =3D=3D RSC_BYPASS) { >> + return virtio_net_do_receive(nc, buf, size); >> + } else if (ret =3D=3D RSC_FINAL) { >> + return virtio_net_rsc_drain_flow(chain, nc, buf, size, >> + ((hdr_len + sizeof(struct eth_header)) + 8), >> + VIRTIO_NET_IP6_ADDR_SIZE, >> + hdr_len + sizeof(struct eth_header) + sizeof(struct i= p6_header), >> + VIRTIO_NET_TCP_PORT_SIZE); >> + } >> + >> + if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) { >> + return size; >> + } >> + >> + return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); >> +} >> + >> static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n, >> NetClientState *nc, >> uint16_t proto) >> { >> NetRscChain *chain; >> =20 >> - if (proto !=3D (uint16_t)ETH_P_IP) { >> + if ((proto !=3D (uint16_t)ETH_P_IP) && (proto !=3D (uint16_t)ETH_= P_IPV6)) { >> return NULL; >> } >> =20 >> @@ -2135,7 +2262,11 @@ static NetRscChain *virtio_net_rsc_lookup_chain= (VirtIONet * n, >> chain =3D g_malloc(sizeof(*chain)); >> chain->n =3D n; >> chain->proto =3D proto; >> - chain->max_payload =3D VIRTIO_NET_MAX_IP4_PAYLOAD; >> + if (proto =3D=3D (uint16_t)ETH_P_IP) { >> + chain->max_payload =3D VIRTIO_NET_MAX_IP4_PAYLOAD; >> + } else { >> + chain->max_payload =3D VIRTIO_NET_MAX_IP6_PAYLOAD; >> + } >> chain->drain_timer =3D timer_new_ns(QEMU_CLOCK_VIRTUAL, >> virtio_net_rsc_purge, chain); >> memset(&chain->stat, 0, sizeof(chain->stat)); >> @@ -2167,7 +2298,11 @@ static ssize_t virtio_net_rsc_receive(NetClient= State *nc, >> return virtio_net_do_receive(nc, buf, size); >> } else { >> chain->stat.received++; >> - return virtio_net_rsc_receive4(chain, nc, buf, size); >> + if (proto =3D=3D (uint16_t)ETH_P_IP) { >> + return virtio_net_rsc_receive4(chain, nc, buf, size); >> + } else { >> + return virtio_net_rsc_receive6(chain, nc, buf, size); >> + } >> } >> } >> =20 >