From: Wei Xu <wexu@redhat.com>
To: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] [ RFC Patch v6 2/3] virtio-net rsc: support coalescing ipv6 tcp traffic
Date: Fri, 3 Jun 2016 00:17:45 +0800 [thread overview]
Message-ID: <57505C29.7050402@redhat.com> (raw)
In-Reply-To: <574BC09D.5050705@redhat.com>
On 2016年05月30日 12:25, Jason Wang wrote:
>
>
> On 2016年05月29日 00:37, wexu@redhat.com wrote:
>> From: Wei Xu <wexu@redhat.com>
>>
>> Most stuffs are like ipv4 2 differences between ipv4 and ipv6.
>>
>> 1. Fragment length in ipv4 header includes itself, while it's not
>> included for ipv6, thus means ipv6 can carry a real '65535' payload.
>>
>> 2. IPv6 header does not need calculate header checksum.
>>
>> Signed-off-by: Wei Xu <wexu@redhat.com>
>> ---
>> hw/net/virtio-net.c | 152
>> +++++++++++++++++++++++++++++++++++++++++++++++++---
>> 1 file changed, 144 insertions(+), 8 deletions(-)
>>
>> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
>> index b3bb63b..cc8cbe4 100644
>> --- a/hw/net/virtio-net.c
>> +++ b/hw/net/virtio-net.c
>> @@ -53,6 +53,10 @@
>> /* header length value in ip header without option */
>> #define VIRTIO_NET_IP4_HEADER_LENGTH 5
>> +#define ETH_IP6_HDR_SZ (ETH_HDR_SZ + IP6_HDR_SZ)
>> +#define VIRTIO_NET_IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
>> +#define VIRTIO_NET_MAX_IP6_PAYLOAD VIRTIO_NET_MAX_TCP_PAYLOAD
>> +
>> /* Purge coalesced packets timer interval, This value affects the
>> performance
>> a lot, and should be tuned carefully, '300000'(300us) is the
>> recommended
>> value to pass the WHQL test, '50000' can gain 2x netperf
>> throughput with
>> @@ -1724,6 +1728,25 @@ static void
>> virtio_net_rsc_extract_unit4(NetRscChain *chain,
>> unit->payload = htons(*unit->ip_plen) - ip_hdrlen -
>> unit->tcp_hdrlen;
>> }
>> +static void virtio_net_rsc_extract_unit6(NetRscChain *chain,
>> + const uint8_t *buf,
>> NetRscUnit* unit)
>> +{
>> + uint16_t hdr_len;
>> + struct ip6_header *ip6;
>> +
>> + hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
>> + ip6 = (struct ip6_header *)(buf + hdr_len + sizeof(struct
>> eth_header));
>> + unit->ip = ip6;
>> + unit->ip_plen = &(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
>> + unit->tcp = (struct tcp_header *)(((uint8_t *)unit->ip)\
>> + + sizeof(struct ip6_header));
>> + unit->tcp_hdrlen = (htons(unit->tcp->th_offset_flags) & 0xF000)
>> >> 10;
>> +
>> + /* There is a difference between payload lenght in ipv4 and v6,
>> + ip header is excluded in ipv6 */
>> + unit->payload = htons(*unit->ip_plen) - unit->tcp_hdrlen;
>> +}
>> +
>> static void virtio_net_rsc_ipv4_checksum(struct virtio_net_hdr *vhdr,
>> struct ip_header *ip)
>> {
>> @@ -1742,7 +1765,9 @@ static size_t
>> virtio_net_rsc_drain_seg(NetRscChain *chain, NetRscSeg *seg)
>> struct virtio_net_hdr *h;
>> h = (struct virtio_net_hdr *)seg->buf;
>> - virtio_net_rsc_ipv4_checksum(h, seg->unit.ip);
>> + if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
>> + virtio_net_rsc_ipv4_checksum(h, seg->unit.ip);
>> + }
>> ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
>> QTAILQ_REMOVE(&chain->buffers, seg, next);
>> g_free(seg->buf);
>> @@ -1798,7 +1823,7 @@ static void virtio_net_rsc_cache_buf(NetRscChain
>> *chain, NetClientState *nc,
>> hdr_len = chain->n->guest_hdr_len;
>> seg = g_malloc(sizeof(NetRscSeg));
>> seg->buf = g_malloc(hdr_len + sizeof(struct eth_header)\
>> - + VIRTIO_NET_MAX_TCP_PAYLOAD);
>> + + sizeof(struct ip6_header) +
>> VIRTIO_NET_MAX_TCP_PAYLOAD);
>> memcpy(seg->buf, buf, size);
>> seg->size = size;
>> seg->packets = 1;
>> @@ -1809,7 +1834,18 @@ static void
>> virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
>> QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
>> chain->stat.cache++;
>> - virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
>> + switch (chain->proto) {
>> + case ETH_P_IP:
>> + virtio_net_rsc_extract_unit4(chain, seg->buf, &seg->unit);
>> + break;
>> +
>> + case ETH_P_IPV6:
>> + virtio_net_rsc_extract_unit6(chain, seg->buf, &seg->unit);
>> + break;
>> +
>> + default:
>> + g_assert_not_reached();
>> + }
>> }
>> static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain,
>> @@ -1929,6 +1965,24 @@ static int32_t
>> virtio_net_rsc_coalesce4(NetRscChain *chain, NetRscSeg *seg,
>> return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
>> }
>> +static int32_t virtio_net_rsc_coalesce6(NetRscChain *chain, NetRscSeg
>> *seg,
>> + const uint8_t *buf, size_t size, NetRscUnit
>> *unit)
>> +{
>> + struct ip6_header *ip1, *ip2;
>> +
>> + ip1 = (struct ip6_header *)(unit->ip);
>> + ip2 = (struct ip6_header *)(seg->unit.ip);
>> + if (memcmp(&ip1->ip6_src, &ip2->ip6_src, sizeof(struct in6_address))
>> + || memcmp(&ip1->ip6_dst, &ip2->ip6_dst, sizeof(struct
>> in6_address))
>> + || (unit->tcp->th_sport ^ seg->unit.tcp->th_sport)
>> + || (unit->tcp->th_dport ^ seg->unit.tcp->th_dport)) {
>> + chain->stat.no_match++;
>> + return RSC_NO_MATCH;
>> + }
>> +
>> + return virtio_net_rsc_coalesce_data(chain, seg, buf, unit);
>> +}
>> +
>> /* Pakcets with 'SYN' should bypass, other flag should be sent after
>> drain
>> * to prevent out of order */
>> static int virtio_net_rsc_tcp_ctrl_check(NetRscChain *chain,
>> @@ -1981,7 +2035,11 @@ static size_t
>> virtio_net_rsc_do_coalesce(NetRscChain *chain, NetClientState *nc,
>> NetRscSeg *seg, *nseg;
>> QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
>> - ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
>> + if (chain->proto == ETH_P_IP) {
>> + ret = virtio_net_rsc_coalesce4(chain, seg, buf, size, unit);
>> + } else {
>> + ret = virtio_net_rsc_coalesce6(chain, seg, buf, size, unit);
>> + }
>> if (ret == RSC_FINAL) {
>> if (virtio_net_rsc_drain_seg(chain, seg) == 0) {
>> @@ -2106,13 +2164,82 @@ static size_t
>> virtio_net_rsc_receive4(NetRscChain *chain, NetClientState* nc,
>> return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
>> }
>> +static int32_t virtio_net_rsc_sanity_check6(NetRscChain *chain,
>> + struct ip6_header *ip6,
>> + const uint8_t *buf,
>> size_t size)
>> +{
>> + uint16_t ip_len;
>> + uint16_t hdr_len;
>> +
>> + hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
>> + if (size < (hdr_len + sizeof(struct eth_header) + sizeof(struct
>> ip6_header)
>> + + sizeof(tcp_header))) {
>> + return RSC_BYPASS;
>> + }
>> +
>> + if (((ip6->ip6_ctlun.ip6_un1.ip6_un1_flow & 0xF0) >> 4)
>> + != IP_HEADER_VERSION_6) {
>> + return RSC_BYPASS;
>> + }
>> +
>> + /* Both option and protocol is checked in this */
>> + if (ip6->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
>> + chain->stat.bypass_not_tcp++;
>> + return RSC_BYPASS;
>> + }
>> +
>> + ip_len = htons(ip6->ip6_ctlun.ip6_un1.ip6_un1_plen);
>> + if (ip_len < sizeof(struct tcp_header)
>> + || ip_len > (size - hdr_len - sizeof(struct eth_header)
>> + - sizeof(struct ip6_header))) {
>> + chain->stat.ip_hacked++;
>> + return RSC_BYPASS;
>> + }
>> +
>> + return RSC_WANT;
>> +}
>> +
>> +static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc,
>> + const uint8_t *buf, size_t size)
>> +{
>> + int32_t ret;
>> + uint16_t hdr_len;
>> + NetRscChain *chain;
>> + NetRscUnit unit;
>> +
>> + chain = (NetRscChain *)opq;
>> + hdr_len = ((VirtIONet *)(chain->n))->guest_hdr_len;
>> + virtio_net_rsc_extract_unit6(chain, buf, &unit);
>
> Same issue as ipv4, looks not safe if you analyze header before doing
> sanity check.
OK.
>
>> + if (RSC_WANT != virtio_net_rsc_sanity_check6(chain,
>> + unit.ip, buf, size)) {
>> + return virtio_net_do_receive(nc, buf, size);
>> + }
>> +
>> + ret = virtio_net_rsc_tcp_ctrl_check(chain, unit.tcp);
>> + if (ret == RSC_BYPASS) {
>> + return virtio_net_do_receive(nc, buf, size);
>> + } else if (ret == RSC_FINAL) {
>> + return virtio_net_rsc_drain_flow(chain, nc, buf, size,
>> + ((hdr_len + sizeof(struct eth_header)) + 8),
>> + VIRTIO_NET_IP6_ADDR_SIZE,
>> + hdr_len + sizeof(struct eth_header) + sizeof(struct
>> ip6_header),
>> + VIRTIO_NET_TCP_PORT_SIZE);
>> + }
>> +
>> + if (virtio_net_rsc_empty_cache(chain, nc, buf, size)) {
>> + return size;
>> + }
>> +
>> + return virtio_net_rsc_do_coalesce(chain, nc, buf, size, &unit);
>> +}
>> +
>> static NetRscChain *virtio_net_rsc_lookup_chain(VirtIONet * n,
>> NetClientState *nc,
>> uint16_t proto)
>> {
>> NetRscChain *chain;
>> - if (proto != (uint16_t)ETH_P_IP) {
>> + if ((proto != (uint16_t)ETH_P_IP) && (proto !=
>> (uint16_t)ETH_P_IPV6)) {
>> return NULL;
>> }
>> @@ -2125,8 +2252,13 @@ static NetRscChain
>> *virtio_net_rsc_lookup_chain(VirtIONet * n,
>> chain = g_malloc(sizeof(*chain));
>> chain->n = n;
>> chain->proto = proto;
>> - chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
>> - chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
>> + if (proto == (uint16_t)ETH_P_IP) {
>> + chain->max_payload = VIRTIO_NET_MAX_IP4_PAYLOAD;
>> + chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
>> + } else {
>> + chain->max_payload = VIRTIO_NET_MAX_IP6_PAYLOAD;
>> + chain->gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
>> + }
>> chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
>> virtio_net_rsc_purge, chain);
>> memset(&chain->stat, 0, sizeof(chain->stat));
>> @@ -2158,7 +2290,11 @@ static ssize_t
>> virtio_net_rsc_receive(NetClientState *nc,
>> return virtio_net_do_receive(nc, buf, size);
>> } else {
>> chain->stat.received++;
>> - return virtio_net_rsc_receive4(chain, nc, buf, size);
>> + if (proto == (uint16_t)ETH_P_IP) {
>> + return virtio_net_rsc_receive4(chain, nc, buf, size);
>> + } else {
>> + return virtio_net_rsc_receive6(chain, nc, buf, size);
>> + }
>> }
>> }
>
>
next prev parent reply other threads:[~2016-06-02 16:17 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-05-28 16:37 [Qemu-devel] [ RFC Patch v6 0/2] Support Receive-Segment-Offload(RSC) for WHQL wexu
2016-05-28 16:37 ` [Qemu-devel] [ RFC Patch v6 1/3] virtio-net rsc: support coalescing ipv4 tcp traffic wexu
2016-05-30 4:20 ` Jason Wang
2016-06-02 16:16 ` Wei Xu
2016-05-28 16:37 ` [Qemu-devel] [ RFC Patch v6 2/3] virtio-net rsc: support coalescing ipv6 " wexu
2016-05-30 4:25 ` Jason Wang
2016-06-02 16:17 ` Wei Xu [this message]
2016-05-28 16:37 ` [Qemu-devel] [ RFC Patch v6 3/3] virtio-net rsc: add 2 new rsc information fields to 'virtio_net_hdr' wexu
2016-05-30 5:57 ` Jason Wang
2016-06-02 16:23 ` Wei Xu
2016-05-30 4:22 ` [Qemu-devel] [ RFC Patch v6 0/2] Support Receive-Segment-Offload(RSC) for WHQL Jason Wang
2016-05-30 4:50 ` Wei Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=57505C29.7050402@redhat.com \
--to=wexu@redhat.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).