From: Wei Xu <wexu@redhat.com>
To: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [ RFC Patch v6 2/3] virtio-net rsc: support coalescing ipv6 tcp traffic
Date: Fri, 3 Jun 2016 00:17:45 +0800 [thread overview]
Message-ID: <57505C29.7050402@redhat.com> (raw)
In-Reply-To: <574BC09D.5050705@redhat.com>
On 2016年05月30日 12:25, Jason Wang wrote:
>
>
> On 2016年05月29日 00:37, wexu@redhat.com wrote:
>> From: Wei Xu <wexu@redhat.com>
>>
>> Most stuffs are like ipv4 2 differences between ipv4 and ipv6.
>>
>> 1. Fragment length in ipv4 header includes itself, while it's not
>> included for ipv6, thus means ipv6 can carry a real '65535' payload.
>>
>> 2. IPv6 header does not need calculate header checksum.
>>
>> Signed-off-by: Wei Xu <wexu@redhat.com>
>> ---
>> hw/net/virtio-net.c | 152
>> +++++++++++++++++++++++++++++++++++++++++++++++++---
>> 1 file changed, 144 insertions(+), 8 deletions(-)
>>
>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>> index b3bb63b..cc8cbe4 100644
>> --- a/hw/net/virtio-net.c
>> +++ b/hw/net/virtio-net.c
>> @@ -53,6 +53,10 @@
>> /* header length value in ip header without option */
>> #define VIRTIO_NET_IP4_HEADER_LENGTH 5
>> +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ)
>> +#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
>> +#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
>> +
>> /* Purge coalesced packets timer interval, This value affects the
>> performance
>> a lot, and should be tuned carefully, '300000'(300us) is the
>> recommended
>> value to pass the WHQL test, '50000' can gain 2x netperf
>> throughput with
>> @@ -1724,6 +1728,25 @@ static void
>> virtio_net_rsc_extract_unit4(NetRscChain *chain,
>> unit->payload = htons(*unit->ip_plen) - ip_hdrlen -
>> unit->tcp_hdrlen;
>> }
>> +static void virtio_net_rsc_extract_unit6(NetRscChain *chain,
>> + const uint8_t *buf,
>> NetRscUnit* unit)
>> +{
>> + uint16_t hdr_len;
>> + struct ip6_header *ip6;
>> +
>> + hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
>> + ip6 = (struct ip6_header *)(buf + hdr_len + sizeof(struct
>> eth_header));
>> + unit->ip = ip6;
>> + unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
>> + unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
>> + + sizeof(struct ip6_header));
>> + unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000)
>> >> 10;
>> +
>> + /* There is a difference between payload lenght in ipv4 and v6,
>> + ip header is excluded in ipv6 */
>> + unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
>> +}
>> +
>> static void virtio_net_rsc_ipv4_checksum(struct virtio_net_hdr *vhdr,
>> struct ip_header *ip)
>> {
>> @@ -1742,7 +1765,9 @@ static size_t
>> virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg)
>> struct virtio_net_hdr *h;
>> h = (struct virtio_net_hdr *)seg->buf;
>> - virtio_net_rsc_ipv4_checksum(h, seg->unit.ip);
>> + if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
>> + virtio_net_rsc_ipv4_checksum(h, seg->unit.ip);
>> + }
>> ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
>> QTAILQ_REMOVE(&chain->buffers, seg, next);
>> g_free(seg->buf);
>> @@ -1798,7 +1823,7 @@ static void virtio_net_rsc_cache_buf(NetRscChain
>> *chain, NetClientState *nc,
>> hdr_len = chain->n->guest_hdr_len;
>> seg = g_malloc(sizeof(NetRscSeg));
>> seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)\
>> - + VIRTIO_NET_MAX_TCP_PAYLOAD);
>> + + sizeof(struct ip6_header) +
>> VIRTIO_NET_MAX_TCP_PAYLOAD);
>> memcpy(seg->buf, buf, size);
>> seg->size = size;
>> seg->packets = 1;
>> @@ -1809,7 +1834,18 @@ static void
>> virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
>> QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
>> chain->stat.cache++;
>> - virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
>> + switch (chain->proto) {
>> + case ETH_P_IP:
>> + virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
>> + break;
>> +
>> + case ETH_P_IPV6:
>> + virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
>> + break;
>> +
>> + default:
>> + g_assert_not_reached();
>> + }
>> }
>> static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain,
>> @@ -1929,6 +1965,24 @@ static int32_t
>> virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg,
>> return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
>> }
>> +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg
>> *seg,
>> + const uint8_t *buf, size_t size, NetRscUnit
>> *unit)
>> +{
>> + struct ip6_header *ip1, *ip2;
>> +
>> + ip1 = (struct ip6_header *)(unit->ip);
>> + ip2 = (struct ip6_header *)(seg->unit.ip);
>> + if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
>> + || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct
>> in6_address))
>> + || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
>> + || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
>> + chain->stat.no_match++;
>> + return RSC_NO_MATCH;
>> + }
>> +
>> + return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
>> +}
>> +
>> /* Pakcets with 'SYN' should bypass, other flag should be sent after
>> drain
>> * to prevent out of order */
>> static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain,
>> @@ -1981,7 +2035,11 @@ static size_t
>> virtio_net_rsc_do_coalesce(NetRscChain *chain, NetClientState *nc,
>> NetRscSeg *seg, *nseg;
>> QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
>> - ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
>> + if (chain->proto == ETH_P_IP) {
>> + ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
>> + } else {
>> + ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
>> + }
>> if (ret == RSC_FINAL) {
>> if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
>> @@ -2106,13 +2164,82 @@ static size_t
>> virtio_net_rsc_receive4(NetRscChain *chain, NetClientState* nc,
>> return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
>> }
>> +static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain,
>> + struct ip6_header *ip6,
>> + const uint8_t *buf,
>> size_t size)
>> +{
>> + uint16_t ip_len;
>> + uint16_t hdr_len;
>> +
>> + hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
>> + if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct
>> ip6_header)
>> + + sizeof(tcp_header))) {
>> + return RSC_BYPASS;
>> + }
>> +
>> + if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
>> + != IP_HEADER_VERSION_6) {
>> + return RSC_BYPASS;
>> + }
>> +
>> + /* Both option and protocol is checked in this */
>> + if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
>> + chain->stat.bypass_not_tcp++;
>> + return RSC_BYPASS;
>> + }
>> +
>> + ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
>> + if (ip_len < sizeof(struct tcp_header)
>> + || ip_len > (size - hdr_len - sizeof(struct eth_header)
>> + - sizeof(struct ip6_header))) {
>> + chain->stat.ip_hacked++;
>> + return RSC_BYPASS;
>> + }
>> +
>> + return RSC_WANT;
>> +}
>> +
>> +static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc,
>> + const uint8_t *buf, size_t size)
>> +{
>> + int32_t ret;
>> + uint16_t hdr_len;
>> + NetRscChain *chain;
>> + NetRscUnit unit;
>> +
>> + chain = (NetRscChain *)opq;
>> + hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
>> + virtio_net_rsc_extract_unit6(chain, buf, &unit);
>
> Same issue as ipv4, looks not safe if you analyze header before doing
> sanity check.
OK.
>
>> + if (RSC_WANT != virtio_net_rsc_sanity_check6(chain,
>> + unit.ip, buf, size)) {
>> + return virtio_net_do_receive(nc, buf, size);
>> + }
>> +
>> + ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
>> + if (ret == RSC_BYPASS) {
>> + return virtio_net_do_receive(nc, buf, size);
>> + } else if (ret == RSC_FINAL) {
>> + return virtio_net_rsc_drain_flow(chain, nc, buf, size,
>> + ((hdr_len + sizeof(struct eth_header)) + 8),
>> + VIRTIO_NET_IP6_ADDR_SIZE,
>> + hdr_len + sizeof(struct eth_header) + sizeof(struct
>> ip6_header),
>> + VIRTIO_NET_TCP_PORT_SIZE);
>> + }
>> +
>> + if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) {
>> + return size;
>> + }
>> +
>> + return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
>> +}
>> +
>> static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n,
>> NetClientState *nc,
>> uint16_t proto)
>> {
>> NetRscChain *chain;
>> - if (proto != (uint16_t)ETH_P_IP) {
>> + if ((proto != (uint16_t)ETH_P_IP) && (proto !=
>> (uint16_t)ETH_P_IPV6)) {
>> return NULL;
>> }
>> @@ -2125,8 +2252,13 @@ static NetRscChain
>> *virtio_net_rsc_lookup_chain(VirtIONet * n,
>> chain = g_malloc(sizeof(*chain));
>> chain->n = n;
>> chain->proto = proto;
>> - chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
>> - chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
>> + if (proto == (uint16_t)ETH_P_IP) {
>> + chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
>> + chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
>> + } else {
>> + chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
>> + chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
>> + }
>> chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
>> virtio_net_rsc_purge, chain);
>> memset(&chain->stat, 0, sizeof(chain->stat));
>> @@ -2158,7 +2290,11 @@ static ssize_t
>> virtio_net_rsc_receive(NetClientState *nc,
>> return virtio_net_do_receive(nc, buf, size);
>> } else {
>> chain->stat.received++;
>> - return virtio_net_rsc_receive4(chain, nc, buf, size);
>> + if (proto == (uint16_t)ETH_P_IP) {
>> + return virtio_net_rsc_receive4(chain, nc, buf, size);
>> + } else {
>> + return virtio_net_rsc_receive6(chain, nc, buf, size);
>> + }
>> }
>> }
>
>
next prev parent reply other threads:[~2016-06-02 16:17 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-05-28 16:37 [Qemu-devel] [ RFC Patch v6 0/2] Support Receive-Segment-Offload(RSC) for WHQL wexu
2016-05-28 16:37 ` [Qemu-devel] [ RFC Patch v6 1/3] virtio-net rsc: support coalescing ipv4 tcp traffic wexu
2016-05-30 4:20 ` Jason Wang
2016-06-02 16:16 ` Wei Xu
2016-05-28 16:37 ` [Qemu-devel] [ RFC Patch v6 2/3] virtio-net rsc: support coalescing ipv6 " wexu
2016-05-30 4:25 ` Jason Wang
2016-06-02 16:17 ` Wei Xu [this message]
2016-05-28 16:37 ` [Qemu-devel] [ RFC Patch v6 3/3] virtio-net rsc: add 2 new rsc information fields to 'virtio_net_hdr' wexu
2016-05-30 5:57 ` Jason Wang
2016-06-02 16:23 ` Wei Xu
2016-05-30 4:22 ` [Qemu-devel] [ RFC Patch v6 0/2] Support Receive-Segment-Offload(RSC) for WHQL Jason Wang
2016-05-30 4:50 ` Wei Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=57505C29.7050402@redhat.com \
--to=wexu@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.