From: Jason Wang <jasowang@redhat.com>
To: wexu@redhat.com, qemu-devel@nongnu.org
Cc: marcel@redhat.com, victork@redhat.com, dfleytma@redhat.com,
yvugenfi@redhat.com, mst@redhat.com
Subject: Re: [Qemu-devel] [ Patch 2/2] virtio-net rsc: support coalescing ipv6 tcp traffic
Date: Thu, 17 Mar 2016 16:50:58 +0800 [thread overview]
Message-ID: <56EA6FF2.7030009@redhat.com> (raw)
In-Reply-To: <1458033424-25414-3-git-send-email-wexu@redhat.com>
On 03/15/2016 05:17 PM, wexu@redhat.com wrote:
> From: Wei Xu <wexu@redhat.com>
>
> Most things like ipv4 except there is a significant difference between ipv4
> and ipv6, the fragment lenght in ipv4 header includes itself, while it's not
> included for ipv6, thus means ipv6 can carry a real '65535' unit.
>
> Signed-off-by: Wei Xu <wexu@redhat.com>
> ---
> hw/net/virtio-net.c | 146 ++++++++++++++++++++++++++++++++++++++++-----
> include/hw/virtio/virtio.h | 5 +-
> 2 files changed, 135 insertions(+), 16 deletions(-)
>
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index c23b45f..ef61b74 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -52,9 +52,14 @@
> #define MAX_IP4_PAYLOAD (65535 - IP4_HDR_SZ)
> #define MAX_TCP_PAYLOAD 65535
>
> -/* max payload with virtio header */
> +#define IP6_HDR_SZ (sizeof(struct ip6_header))
> +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ)
> +#define IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
> +#define MAX_IP6_PAYLOAD MAX_TCP_PAYLOAD
> +
> +/* ip6 max payload, payload in ipv6 don't include the header */
> #define MAX_VIRTIO_PAYLOAD (sizeof(struct virtio_net_hdr_mrg_rxbuf) \
> - + ETH_HDR_SZ + MAX_TCP_PAYLOAD)
> + + ETH_IP6_HDR_SZ + MAX_IP6_PAYLOAD)
>
> #define IP4_HEADER_LEN 5 /* header lenght value in ip header without option */
>
> @@ -1722,14 +1727,27 @@ static void virtio_net_rsc_extract_unit4(NetRscChain *chain,
> {
> uint16_t ip_hdrlen;
>
> - unit->ip = (struct ip_header *)(buf + chain->hdr_size + ETH_HDR_SZ);
> - ip_hdrlen = ((0xF & unit->ip->ip_ver_len) << 2);
> - unit->ip_plen = &unit->ip->ip_len;
> - unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip) + ip_hdrlen);
> + unit->u_ip.ip = (struct ip_header *)(buf + chain->hdr_size + ETH_HDR_SZ);
> + ip_hdrlen = ((0xF & unit->u_ip.ip->ip_ver_len) << 2);
> + unit->ip_plen = &unit->u_ip.ip->ip_len;
> + unit->tcp = (struct tcp_header *)(((uint8_t *)unit->u_ip.ip) + ip_hdrlen);
> unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
> unit->payload = htons(*unit->ip_plen) - ip_hdrlen - unit->tcp_hdrlen;
> }
>
> +static void virtio_net_rsc_extract_unit6(NetRscChain *chain,
> + const uint8_t *buf, NetRscUnit* unit)
> +{
> + unit->u_ip.ip6 = (struct ip6_header *)(buf + chain->hdr_size + ETH_HDR_SZ);
The u_ip seems a little bit redundant. How about use a simple void * and
cast it to ipv4/ipv6 in proto specific callbacks?
The introducing of u_ip leads unnecessary ipv4 codes changes for ipv6
coalescing implementation.
> + unit->ip_plen = &(unit->u_ip.ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
> + unit->tcp = (struct tcp_header *)(((uint8_t *)unit->u_ip.ip6)\
> + + IP6_HDR_SZ);
> + unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000) >> 10;
> + /* There is a difference between payload lenght in ipv4 and v6,
> + ip header is excluded in ipv6 */
> + unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
> +}
> +
> static void virtio_net_rsc_ipv4_checksum(struct ip_header *ip)
> {
> uint32_t sum;
> @@ -1743,7 +1761,10 @@ static size_t virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg)
> {
> int ret;
>
> - virtio_net_rsc_ipv4_checksum(seg->unit.ip);
> + if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
> + virtio_net_rsc_ipv4_checksum(seg->unit.u_ip.ip);
> + }
> +
> ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
> QTAILQ_REMOVE(&chain->buffers, seg, next);
> g_free(seg->buf);
> @@ -1807,7 +1828,11 @@ static void virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
> QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
> chain->stat.cache++;
>
> - virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
> + if (chain->proto == ETH_P_IP) {
> + virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
> + } else {
A switch and a g_assert_not_reached() is better than this.
> + virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
> + }
> }
>
> static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg,
> @@ -1930,8 +1955,8 @@ coalesce:
> static int32_t virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg,
> const uint8_t *buf, size_t size, NetRscUnit *unit)
> {
> - if ((unit->ip->ip_src ^ seg->unit.ip->ip_src)
> - || (unit->ip->ip_dst ^ seg->unit.ip->ip_dst)
> + if ((unit->u_ip.ip->ip_src ^ seg->unit.u_ip.ip->ip_src)
> + || (unit->u_ip.ip->ip_dst ^ seg->unit.u_ip.ip->ip_dst)
> || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
> || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
> chain->stat.no_match++;
> @@ -1941,6 +1966,22 @@ static int32_t virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg,
> return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
> }
>
> +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg *seg,
> + const uint8_t *buf, size_t size, NetRscUnit *unit)
> +{
> + if (memcmp(&unit->u_ip.ip6->ip6_src, &seg->unit.u_ip.ip6->ip6_src,
> + sizeof(struct in6_address))
> + || memcmp(&unit->u_ip.ip6->ip6_dst, &seg->unit.u_ip.ip6->ip6_dst,
> + sizeof(struct in6_address))
> + || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
> + || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
> + chain->stat.no_match++;
> + return RSC_NO_MATCH;
> + }
> +
> + return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
> +}
> +
> /* Pakcets with 'SYN' should bypass, other flag should be sent after drain
> * to prevent out of order */
> static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain,
> @@ -1983,7 +2024,11 @@ static size_t virtio_net_rsc_do_coalesce(NetRscChain *chain, NetClientState *nc,
> NetRscSeg *seg, *nseg;
>
> QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
> - ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
> + if (chain->proto == ETH_P_IP) {
> + ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
> + } else {
> + ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
> + }
>
> if (ret == RSC_FINAL) {
> if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
> @@ -2082,7 +2127,8 @@ static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
>
> chain = (NetRscChain *)opq;
> virtio_net_rsc_extract_unit4(chain, buf, &unit);
> - if (RSC_WANT != virtio_net_rsc_sanity_check4(chain, unit.ip, buf, size)) {
> + if (RSC_WANT != virtio_net_rsc_sanity_check4(chain,
> + unit.u_ip.ip, buf, size)) {
> return virtio_net_do_receive(nc, buf, size);
> }
>
> @@ -2102,13 +2148,74 @@ static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
> return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
> }
>
> +static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain,
> + struct ip6_header *ip, const uint8_t *buf, size_t size)
Indentation is wrong here.
> +{
> + uint16_t ip_len;
> +
> + if (size < (chain->hdr_size + ETH_IP6_HDR_SZ + TCP_HDR_SZ)) {
> + return RSC_BYPASS;
> + }
> +
> + if (((0xF0 & ip->ip6_ctlun.ip6_un1.ip6_un1_flow) >> 4)
> + != IP_HEADER_VERSION_6) {
> + return RSC_BYPASS;
> + }
> +
> + /* Both option and protocol is checked in this */
> + if (ip->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
> + chain->stat.bypass_not_tcp++;
> + return RSC_BYPASS;
> + }
> +
> + /* Sanity check */
The comment is useless.
> + ip_len = htons(ip->ip6_ctlun.ip6_un1.ip6_un1_plen);
> + if (ip_len < TCP_HDR_SZ
> + || ip_len > (size - chain->hdr_size - ETH_IP6_HDR_SZ)) {
> + chain->stat.ip_hacked++;
> + return RSC_BYPASS;
> + }
> +
> + return RSC_WANT;
> +}
> +
> +static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc,
> + const uint8_t *buf, size_t size)
> +{
Rather similar to ipv4 version, need to unify the code.
> + int32_t ret;
> + NetRscChain *chain;
> + NetRscUnit unit;
> +
> + chain = (NetRscChain *)opq;
> + virtio_net_rsc_extract_unit6(chain, buf, &unit);
> + if (RSC_WANT != virtio_net_rsc_sanity_check6(chain,
> + unit.u_ip.ip6, buf, size)) {
> + return virtio_net_do_receive(nc, buf, size);
> + }
> +
> + ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
> + if (ret == RSC_BYPASS) {
> + return virtio_net_do_receive(nc, buf, size);
> + } else if (ret == RSC_FINAL) {
> + return virtio_net_rsc_drain_flow(chain, nc, buf, size,
> + ((chain->hdr_size + ETH_HDR_SZ) + 8), IP6_ADDR_SIZE,
> + (chain->hdr_size + ETH_IP6_HDR_SZ), TCP_PORT_SIZE);
> + }
> +
> + if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) {
> + return size;
> + }
> +
> + return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
> +}
> +
> static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n,
> NetClientState *nc, uint16_t proto)
> {
> NetRscChain *chain;
>
> /* Only handle IPv4/6 */
> - if (proto != (uint16_t)ETH_P_IP) {
> + if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
> return NULL;
> }
>
> @@ -2121,7 +2228,11 @@ static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n,
> chain = g_malloc(sizeof(*chain));
> chain->hdr_size = n->guest_hdr_len;
> chain->proto = proto;
> - chain->max_payload = MAX_IP4_PAYLOAD;
> + if (proto == (uint16_t)ETH_P_IP) {
> + chain->max_payload = MAX_IP4_PAYLOAD;
> + } else {
> + chain->max_payload = MAX_IP6_PAYLOAD;
> + }
> chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
> virtio_net_rsc_purge, chain);
> memset(&chain->stat, 0, sizeof(chain->stat));
> @@ -2153,7 +2264,12 @@ static ssize_t virtio_net_rsc_receive(NetClientState *nc,
> return virtio_net_do_receive(nc, buf, size);
> } else {
> chain->stat.received++;
> - return virtio_net_rsc_receive4(chain, nc, buf, size);
> +
> + if (proto == (uint16_t)ETH_P_IP) {
> + return virtio_net_rsc_receive4(chain, nc, buf, size);
> + } else {
> + return virtio_net_rsc_receive6(chain, nc, buf, size);
> + }
> }
> }
>
> diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
> index 3b1dfa8..13d20a4 100644
> --- a/include/hw/virtio/virtio.h
> +++ b/include/hw/virtio/virtio.h
> @@ -170,7 +170,10 @@ typedef struct NetRscStat {
>
> /* Rsc unit general info used to checking if can coalescing */
> typedef struct NetRscUnit {
> - struct ip_header *ip; /* ip header */
> + union {
> + struct ip_header *ip; /* ip header */
> + struct ip6_header *ip6; /* ip6 header */
> + } u_ip;
> uint16_t *ip_plen; /* data len pointer in ip header field */
> struct tcp_header *tcp; /* tcp header */
> uint16_t tcp_hdrlen; /* tcp header len */
next prev parent reply other threads:[~2016-03-17 8:51 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-15 9:17 [Qemu-devel] [ Patch 0/2] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest wexu
2016-03-15 9:17 ` [Qemu-devel] [ Patch 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic wexu
2016-03-15 10:00 ` Michael S. Tsirkin
2016-03-16 3:23 ` Wei Xu
2016-03-17 8:42 ` Jason Wang
2016-03-17 16:45 ` Wei Xu
2016-03-18 2:03 ` Jason Wang
2016-03-18 4:17 ` Wei Xu
2016-03-18 5:20 ` Jason Wang
2016-03-18 6:38 ` Wei Xu
2016-03-18 6:56 ` Jason Wang
2016-03-18 14:52 ` Wei Xu
2016-03-15 9:17 ` [Qemu-devel] [ Patch 2/2] virtio-net rsc: support coalescing ipv6 " wexu
2016-03-17 8:50 ` Jason Wang [this message]
2016-03-17 16:50 ` Wei Xu
2016-03-15 10:01 ` [Qemu-devel] [ Patch 0/2] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest Michael S. Tsirkin
2016-03-16 3:08 ` Wei Xu
2016-03-17 6:47 ` Jason Wang
2016-03-17 15:21 ` Wei Xu
2016-03-17 15:44 ` Michael S. Tsirkin
2016-03-17 16:57 ` Wei Xu
2016-03-18 2:22 ` Jason Wang
2016-03-18 4:24 ` Wei Xu
2016-03-18 5:21 ` Jason Wang
2016-03-18 6:30 ` Wei Xu
-- strict thread matches above, loose matches on Subject: below --
2016-10-31 17:41 [Qemu-devel] [ RFC Patch v7 0/2] Support Receive-Segment-Offload(RSC) for WHQL wexu
2016-10-31 17:41 ` [Qemu-devel] [PATCH 2/2] virtio-net rsc: support coalescing ipv6 tcp traffic wexu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56EA6FF2.7030009@redhat.com \
--to=jasowang@redhat.com \
--cc=dfleytma@redhat.com \
--cc=marcel@redhat.com \
--cc=mst@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=victork@redhat.com \
--cc=wexu@redhat.com \
--cc=yvugenfi@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.