From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:53582) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1agb7i-0005aL-NG for qemu-devel@nongnu.org; Thu, 17 Mar 2016 12:50:12 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1agb7g-000269-GN for qemu-devel@nongnu.org; Thu, 17 Mar 2016 12:50:10 -0400 Received: from mx1.redhat.com ([209.132.183.28]:44357) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1agb7g-00025z-6a for qemu-devel@nongnu.org; Thu, 17 Mar 2016 12:50:08 -0400 Received: from int-mx09.intmail.prod.int.phx2.redhat.com (int-mx09.intmail.prod.int.phx2.redhat.com [10.5.11.22]) by mx1.redhat.com (Postfix) with ESMTPS id C986B46207 for ; Thu, 17 Mar 2016 16:50:07 +0000 (UTC) References: <1458033424-25414-1-git-send-email-wexu@redhat.com> <1458033424-25414-3-git-send-email-wexu@redhat.com> <56EA6FF2.7030009@redhat.com> From: Wei Xu Message-ID: <56EAE03B.4060907@redhat.com> Date: Fri, 18 Mar 2016 00:50:03 +0800 MIME-Version: 1.0 In-Reply-To: <56EA6FF2.7030009@redhat.com> Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [ Patch 2/2] virtio-net rsc: support coalescing ipv6 tcp traffic List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Jason Wang , qemu-devel@nongnu.org Cc: marcel@redhat.com, victork@redhat.com, dfleytma@redhat.com, yvugenfi@redhat.com, mst@redhat.com On 2016=E5=B9=B403=E6=9C=8817=E6=97=A5 16:50, Jason Wang wrote: > > On 03/15/2016 05:17 PM, wexu@redhat.com wrote: >> From: Wei Xu >> >> Most things like ipv4 except there is a significant difference between= ipv4 >> and ipv6, the fragment lenght in ipv4 header includes itself, while it= 's not >> included for ipv6, thus means ipv6 can carry a real '65535' unit. >> >> Signed-off-by: Wei Xu >> --- >> hw/net/virtio-net.c | 146 +++++++++++++++++++++++++++++++++++= +++++----- >> include/hw/virtio/virtio.h | 5 +- >> 2 files changed, 135 insertions(+), 16 deletions(-) >> >> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c >> index c23b45f..ef61b74 100644 >> --- a/hw/net/virtio-net.c >> +++ b/hw/net/virtio-net.c >> @@ -52,9 +52,14 @@ >> #define MAX_IP4_PAYLOAD (65535 - IP4_HDR_SZ) >> #define MAX_TCP_PAYLOAD 65535 >> =20 >> -/* max payload with virtio header */ >> +#define IP6_HDR_SZ (sizeof(struct ip6_header)) >> +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ) >> +#define IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */ >> +#define MAX_IP6_PAYLOAD MAX_TCP_PAYLOAD >> + >> +/* ip6 max payload, payload in ipv6 don't include the header */ >> #define MAX_VIRTIO_PAYLOAD (sizeof(struct virtio_net_hdr_mrg_rxbuf)= \ >> - + ETH_HDR_SZ + MAX_TCP_PAYLOAD) >> + + ETH_IP6_HDR_SZ + MAX_IP6_PAYLOAD) >> =20 >> #define IP4_HEADER_LEN 5 /* header lenght value in ip header without= option */ >> =20 >> @@ -1722,14 +1727,27 @@ static void virtio_net_rsc_extract_unit4(NetRs= cChain *chain, >> { >> uint16_t ip_hdrlen; >> =20 >> - unit->ip =3D (struct ip_header *)(buf + chain->hdr_size + ETH_HDR= _SZ); >> - ip_hdrlen =3D ((0xF & unit->ip->ip_ver_len) << 2); >> - unit->ip_plen =3D &unit->ip->ip_len; >> - unit->tcp =3D (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hd= rlen); >> + unit->u_ip.ip =3D (struct ip_header *)(buf + chain->hdr_size + ET= H_HDR_SZ); >> + ip_hdrlen =3D ((0xF & unit->u_ip.ip->ip_ver_len) << 2); >> + unit->ip_plen =3D &unit->u_ip.ip->ip_len; >> + unit->tcp =3D (struct tcp_header *)(((uint8_t *)unit->u_ip.ip) + = ip_hdrlen); >> unit->tcp_hdrlen =3D (htons(unit->tcp->th_offset_flags) & 0xF000= ) >> 10; >> unit->payload =3D htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_= hdrlen; >> } >> =20 >> +static void virtio_net_rsc_extract_unit6(NetRscChain *chain, >> + const uint8_t *buf, NetRscUn= it* unit) >> +{ >> + unit->u_ip.ip6 =3D (struct ip6_header *)(buf + chain->hdr_size + = ETH_HDR_SZ); > The u_ip seems a little bit redundant. How about use a simple void * an= d > cast it to ipv4/ipv6 in proto specific callbacks? > > The introducing of u_ip leads unnecessary ipv4 codes changes for ipv6 > coalescing implementation. Sure. >> + unit->ip_plen =3D &(unit->u_ip.ip6->ip6_ctlun.ip6_un1.ip6_un1_ple= n); >> + unit->tcp =3D (struct tcp_header *)(((uint8_t *)unit->u_ip.ip6)\ >> + + IP6_HDR_SZ); >> + unit->tcp_hdrlen =3D (htons(unit->tcp->th_offset_flags) & 0xF000)= >> 10; >> + /* There is a difference between payload lenght in ipv4 and v6, >> + ip header is excluded in ipv6 */ >> + unit->payload =3D htons(*unit->ip_plen) - unit->tcp_hdrlen; >> +} >> + >> static void virtio_net_rsc_ipv4_checksum(struct ip_header *ip) >> { >> uint32_t sum; >> @@ -1743,7 +1761,10 @@ static size_t virtio_net_rsc_drain_seg(NetRscCh= ain *chain, NetRscSeg *seg) >> { >> int ret; >> =20 >> - virtio_net_rsc_ipv4_checksum(seg->unit.ip); >> + if ((chain->proto =3D=3D ETH_P_IP) && seg->is_coalesced) { >> + virtio_net_rsc_ipv4_checksum(seg->unit.u_ip.ip); >> + } >> + >> ret =3D virtio_net_do_receive(seg->nc, seg->buf, seg->size); >> QTAILQ_REMOVE(&chain->buffers, seg, next); >> g_free(seg->buf); >> @@ -1807,7 +1828,11 @@ static void virtio_net_rsc_cache_buf(NetRscChai= n *chain, NetClientState *nc, >> QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); >> chain->stat.cache++; >> =20 >> - virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); >> + if (chain->proto =3D=3D ETH_P_IP) { >> + virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit); >> + } else { > A switch and a g_assert_not_reached() is better than this. sure. > >> + virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit); >> + } >> } >> =20 >> static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscS= eg *seg, >> @@ -1930,8 +1955,8 @@ coalesce: >> static int32_t virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSe= g *seg, >> const uint8_t *buf, size_t size, NetRscUnit = *unit) >> { >> - if ((unit->ip->ip_src ^ seg->unit.ip->ip_src) >> - || (unit->ip->ip_dst ^ seg->unit.ip->ip_dst) >> + if ((unit->u_ip.ip->ip_src ^ seg->unit.u_ip.ip->ip_src) >> + || (unit->u_ip.ip->ip_dst ^ seg->unit.u_ip.ip->ip_dst) >> || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) >> || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { >> chain->stat.no_match++; >> @@ -1941,6 +1966,22 @@ static int32_t virtio_net_rsc_coalesce4(NetRscC= hain *chain, NetRscSeg *seg, >> return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); >> } >> =20 >> +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg= *seg, >> + const uint8_t *buf, size_t size, NetRscUnit *= unit) >> +{ >> + if (memcmp(&unit->u_ip.ip6->ip6_src, &seg->unit.u_ip.ip6->ip6_src= , >> + sizeof(struct in6_address)) >> + || memcmp(&unit->u_ip.ip6->ip6_dst, &seg->unit.u_ip.ip6->ip6_= dst, >> + sizeof(struct in6_address)) >> + || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport) >> + || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) { >> + chain->stat.no_match++; >> + return RSC_NO_MATCH; >> + } >> + >> + return virtio_net_rsc_coalesce_data(chain, seg, buf, unit); >> +} >> + >> /* Pakcets with 'SYN' should bypass, other flag should be sent after= drain >> * to prevent out of order */ >> static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain, >> @@ -1983,7 +2024,11 @@ static size_t virtio_net_rsc_do_coalesce(NetRsc= Chain *chain, NetClientState *nc, >> NetRscSeg *seg, *nseg; >> =20 >> QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { >> - ret =3D virtio_net_rsc_coalesce4(chain, seg, buf, size, unit)= ; >> + if (chain->proto =3D=3D ETH_P_IP) { >> + ret =3D virtio_net_rsc_coalesce4(chain, seg, buf, size, u= nit); >> + } else { >> + ret =3D virtio_net_rsc_coalesce6(chain, seg, buf, size, u= nit); >> + } >> =20 >> if (ret =3D=3D RSC_FINAL) { >> if (virtio_net_rsc_drain_seg(chain, seg) =3D=3D 0) { >> @@ -2082,7 +2127,8 @@ static size_t virtio_net_rsc_receive4(void *opq,= NetClientState* nc, >> =20 >> chain =3D (NetRscChain *)opq; >> virtio_net_rsc_extract_unit4(chain, buf, &unit); >> - if (RSC_WANT !=3D virtio_net_rsc_sanity_check4(chain, unit.ip, bu= f, size)) { >> + if (RSC_WANT !=3D virtio_net_rsc_sanity_check4(chain, >> + unit.u_ip.ip, buf, s= ize)) { >> return virtio_net_do_receive(nc, buf, size); >> } >> =20 >> @@ -2102,13 +2148,74 @@ static size_t virtio_net_rsc_receive4(void *op= q, NetClientState* nc, >> return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); >> } >> =20 >> +static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain, >> + struct ip6_header *ip, const uint8_t *buf, si= ze_t size) > Indentation is wrong here. ok. > >> +{ >> + uint16_t ip_len; >> + >> + if (size < (chain->hdr_size + ETH_IP6_HDR_SZ + TCP_HDR_SZ)) { >> + return RSC_BYPASS; >> + } >> + >> + if (((0xF0 & ip->ip6_ctlun.ip6_un1.ip6_un1_flow) >> 4) >> + !=3D IP_HEADER_VERSION_6) { >> + return RSC_BYPASS; >> + } >> + >> + /* Both option and protocol is checked in this */ >> + if (ip->ip6_ctlun.ip6_un1.ip6_un1_nxt !=3D IPPROTO_TCP) { >> + chain->stat.bypass_not_tcp++; >> + return RSC_BYPASS; >> + } >> + >> + /* Sanity check */ > The comment is useless. ok. > >> + ip_len =3D htons(ip->ip6_ctlun.ip6_un1.ip6_un1_plen); >> + if (ip_len < TCP_HDR_SZ >> + || ip_len > (size - chain->hdr_size - ETH_IP6_HDR_SZ)) { >> + chain->stat.ip_hacked++; >> + return RSC_BYPASS; >> + } >> + >> + return RSC_WANT; >> +} >> + >> +static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc, >> + const uint8_t *buf, size_t size= ) >> +{ > Rather similar to ipv4 version, need to unify the code. ok. > >> + int32_t ret; >> + NetRscChain *chain; >> + NetRscUnit unit; >> + >> + chain =3D (NetRscChain *)opq; >> + virtio_net_rsc_extract_unit6(chain, buf, &unit); >> + if (RSC_WANT !=3D virtio_net_rsc_sanity_check6(chain, >> + unit.u_ip.ip6, buf, = size)) { >> + return virtio_net_do_receive(nc, buf, size); >> + } >> + >> + ret =3D virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp); >> + if (ret =3D=3D RSC_BYPASS) { >> + return virtio_net_do_receive(nc, buf, size); >> + } else if (ret =3D=3D RSC_FINAL) { >> + return virtio_net_rsc_drain_flow(chain, nc, buf, size, >> + ((chain->hdr_size + ETH_HDR_SZ) + 8), IP6_ADD= R_SIZE, >> + (chain->hdr_size + ETH_IP6_HDR_SZ), TCP_PORT_= SIZE); >> + } >> + >> + if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) { >> + return size; >> + } >> + >> + return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit); >> +} >> + >> static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n, >> NetClientState *nc, uint= 16_t proto) >> { >> NetRscChain *chain; >> =20 >> /* Only handle IPv4/6 */ >> - if (proto !=3D (uint16_t)ETH_P_IP) { >> + if ((proto !=3D (uint16_t)ETH_P_IP) && (proto !=3D (uint16_t)ETH_= P_IPV6)) { >> return NULL; >> } >> =20 >> @@ -2121,7 +2228,11 @@ static NetRscChain *virtio_net_rsc_lookup_chain= (VirtIONet * n, >> chain =3D g_malloc(sizeof(*chain)); >> chain->hdr_size =3D n->guest_hdr_len; >> chain->proto =3D proto; >> - chain->max_payload =3D MAX_IP4_PAYLOAD; >> + if (proto =3D=3D (uint16_t)ETH_P_IP) { >> + chain->max_payload =3D MAX_IP4_PAYLOAD; >> + } else { >> + chain->max_payload =3D MAX_IP6_PAYLOAD; >> + } >> chain->drain_timer =3D timer_new_ns(QEMU_CLOCK_VIRTUAL, >> virtio_net_rsc_purge, chain); >> memset(&chain->stat, 0, sizeof(chain->stat)); >> @@ -2153,7 +2264,12 @@ static ssize_t virtio_net_rsc_receive(NetClient= State *nc, >> return virtio_net_do_receive(nc, buf, size); >> } else { >> chain->stat.received++; >> - return virtio_net_rsc_receive4(chain, nc, buf, size); >> + >> + if (proto =3D=3D (uint16_t)ETH_P_IP) { >> + return virtio_net_rsc_receive4(chain, nc, buf, size); >> + } else { >> + return virtio_net_rsc_receive6(chain, nc, buf, size); >> + } >> } >> } >> =20 >> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h >> index 3b1dfa8..13d20a4 100644 >> --- a/include/hw/virtio/virtio.h >> +++ b/include/hw/virtio/virtio.h >> @@ -170,7 +170,10 @@ typedef struct NetRscStat { >> =20 >> /* Rsc unit general info used to checking if can coalescing */ >> typedef struct NetRscUnit { >> - struct ip_header *ip; /* ip header */ >> + union { >> + struct ip_header *ip; /* ip header */ >> + struct ip6_header *ip6; /* ip6 header */ >> + } u_ip; >> uint16_t *ip_plen; /* data len pointer in ip header field */ >> struct tcp_header *tcp; /* tcp header */ >> uint16_t tcp_hdrlen; /* tcp header len */