* [Qemu-devel] [RFC Patch 01/10] 'Segment', 'Chain' and 'Status' enumeration data structure.
2016-01-25 22:24 [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest wexu
@ 2016-01-25 22:24 ` wexu
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 02/10] Initilize & Cleanup wexu
` (9 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: wexu @ 2016-01-25 22:24 UTC (permalink / raw)
To: qemu-devel; +Cc: victork, mst, jasowang, yvugenfi, Wei Xu, marcel, dfleytma
Signed-off-by: Wei Xu <wexu@redhat.com>
---
include/hw/virtio/virtio.h | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 205fadf..1383220 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -127,6 +127,38 @@ typedef struct VirtioDeviceClass {
int (*load)(VirtIODevice *vdev, QEMUFile *f, int version_id);
} VirtioDeviceClass;
+/* Coalesced packets type & status */
+typedef enum {
+ RSC_COALESCE, /* Data been coalesced */
+ RSC_FINAL, /* Will terminate current connection */
+ RSC_NO_MATCH, /* No matched in the buffer pool */
+ RSC_BYPASS, /* Packet to be bypass, not tcp, tcp ctrl, etc */
+ RSC_WANT /* Data want to be coalesced */
+} COALESCE_STATUS;
+
+/* Coalesced segmant */
+typedef struct NetRscSeg {
+ QTAILQ_ENTRY(NetRscSeg) next;
+ void *buf;
+ size_t size;
+ uint32_t dup_ack_count;
+ bool is_coalesced; /* need recal ipv4 header checksum, mark here */
+ NetClientState *nc;
+} NetRscSeg;
+
+/* Receive callback for ipv4/6 */
+typedef size_t (VirtioNetReceive) (void *,
+ NetClientState *, const uint8_t *, size_t);
+
+/* Chain is divided by protocol(ipv4/v6) and NetClientInfo */
+typedef struct NetRscChain {
+ QTAILQ_ENTRY(NetRscChain) next;
+ uint16_t proto;
+ VirtioNetReceive *do_receive;
+ QEMUTimer *drain_timer;
+ QTAILQ_HEAD(, NetRscSeg) buffers;
+} NetRscChain;
+
void virtio_instance_init_common(Object *proxy_obj, void *data,
size_t vdev_size, const char *vdev_name);
--
2.4.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC Patch 02/10] Initilize & Cleanup.
2016-01-25 22:24 [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest wexu
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 01/10] 'Segment', 'Chain' and 'Status' enumeration data structure wexu
@ 2016-01-25 22:24 ` wexu
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 03/10] Chain lookup and packets caching wexu
` (8 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: wexu @ 2016-01-25 22:24 UTC (permalink / raw)
To: qemu-devel; +Cc: victork, mst, jasowang, yvugenfi, Wei Xu, marcel, dfleytma
Signed-off-by: Wei Xu <wexu@redhat.com>
---
hw/net/virtio-net.c | 22 ++++++++++++++++++++++
include/hw/virtio/virtio-net.h | 1 +
2 files changed, 23 insertions(+)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index a877614..4e9458e 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1603,6 +1603,26 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
return 0;
}
+
+static void virtio_net_rsc_cleanup(VirtIONet *n)
+{
+ NetRscChain *chain, *rn_chain;
+ NetRscSeg *seg, *rn_seg;
+
+ QTAILQ_FOREACH_SAFE(chain, &n->rsc_chains, next, rn_chain) {
+ QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn_seg) {
+ QTAILQ_REMOVE(&chain->buffers, seg, next);
+ g_free(seg->buf);
+ g_free(seg);
+
+ timer_del(chain->drain_timer);
+ timer_free(chain->drain_timer);
+ QTAILQ_REMOVE(&n->rsc_chains, chain, next);
+ g_free(chain);
+ }
+ }
+}
+
static NetClientInfo net_virtio_info = {
.type = NET_CLIENT_OPTIONS_KIND_NIC,
.size = sizeof(NICState),
@@ -1732,6 +1752,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
nc = qemu_get_queue(n->nic);
nc->rxfilter_notify_enabled = 1;
+ QTAILQ_INIT(&n->rsc_chains);
n->qdev = dev;
register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
virtio_net_save, virtio_net_load, n);
@@ -1766,6 +1787,7 @@ static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
g_free(n->vqs);
qemu_del_nic(n->nic);
virtio_cleanup(vdev);
+ virtio_net_rsc_cleanup(n);
}
static void virtio_net_instance_init(Object *obj)
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
index f3cc25f..6ce8b93 100644
--- a/include/hw/virtio/virtio-net.h
+++ b/include/hw/virtio/virtio-net.h
@@ -59,6 +59,7 @@ typedef struct VirtIONet {
VirtIONetQueue *vqs;
VirtQueue *ctrl_vq;
NICState *nic;
+ QTAILQ_HEAD(, NetRscChain) rsc_chains;
uint32_t tx_timeout;
int32_t tx_burst;
uint32_t has_vnet_hdr;
--
2.4.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC Patch 03/10] Chain lookup and packets caching.
2016-01-25 22:24 [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest wexu
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 01/10] 'Segment', 'Chain' and 'Status' enumeration data structure wexu
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 02/10] Initilize & Cleanup wexu
@ 2016-01-25 22:24 ` wexu
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 04/10] Tcp general data coalescing, the parameters is a little bit horrible, it's complicated to read, should can be optimized later wexu
` (7 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: wexu @ 2016-01-25 22:24 UTC (permalink / raw)
To: qemu-devel; +Cc: victork, mst, jasowang, yvugenfi, Wei Xu, marcel, dfleytma
Looking up chain by protocol and invoke coalescing
callback, cache the packet if can't handle it.
Signed-off-by: Wei Xu <wexu@redhat.com>
---
hw/net/virtio-net.c | 171 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 170 insertions(+), 1 deletion(-)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 4e9458e..7a6cd4c 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -14,10 +14,12 @@
#include "qemu/iov.h"
#include "hw/virtio/virtio.h"
#include "net/net.h"
+#include "net/eth.h"
#include "net/checksum.h"
#include "net/tap.h"
#include "qemu/error-report.h"
#include "qemu/timer.h"
+#include "qemu/sockets.h"
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
#include "hw/virtio/virtio-bus.h"
@@ -37,6 +39,21 @@
#define endof(container, field) \
(offsetof(container, field) + sizeof(((container *)0)->field))
+#define VIRTIO_HEADER 12 /* Virtio net header size */
+#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header))
+
+#define MAX_VIRTIO_IP_PAYLOAD (65535 + IP_OFFSET)
+
+/* Global statistics */
+static uint32_t rsc_chain_no_mem;
+
+/* Switcher to enable/disable rsc */
+static bool virtio_net_rsc_bypass;
+
+/* Coalesce callback for ipv4/6 */
+typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg,
+ const uint8_t *buf, size_t size);
+
typedef struct VirtIOFeature {
uint32_t flags;
size_t end;
@@ -1019,7 +1036,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
return 0;
}
-static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
+static ssize_t virtio_net_do_receive(NetClientState *nc,
+ const uint8_t *buf, size_t size)
{
VirtIONet *n = qemu_get_nic_opaque(nc);
VirtIONetQueue *q = virtio_net_get_subqueue(nc);
@@ -1623,6 +1641,157 @@ static void virtio_net_rsc_cleanup(VirtIONet *n)
}
}
+static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
+ const uint8_t *buf, size_t size)
+{
+ NetRscSeg *seg;
+
+ seg = g_malloc(sizeof(NetRscSeg));
+ if (!seg) {
+ return 0;
+ }
+
+ seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD);
+ if (!seg->buf) {
+ goto out;
+ }
+
+ memmove(seg->buf, buf, size);
+ seg->size = size;
+ seg->dup_ack_count = 0;
+ seg->is_coalesced = 0;
+ seg->nc = nc;
+
+ QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
+ return size;
+
+out:
+ g_free(seg);
+ return 0;
+}
+
+
+static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
+ NetRscSeg *seg, const uint8_t *buf, size_t size)
+{
+ return RSC_FINAL;
+}
+
+static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
+ const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce)
+{
+ int ret;
+ NetRscSeg *seg, *nseg;
+
+ if (QTAILQ_EMPTY(&chain->buffers)) {
+ if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) {
+ return 0;
+ } else {
+ return size;
+ }
+ }
+
+ QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
+ ret = coalesce(chain, seg, buf, size);
+ if (RSC_FINAL == ret) {
+ ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
+ QTAILQ_REMOVE(&chain->buffers, seg, next);
+ g_free(seg->buf);
+ g_free(seg);
+ if (ret == 0) {
+ /* Send failed */
+ return 0;
+ }
+
+ /* Send current packet */
+ return virtio_net_do_receive(nc, buf, size);
+ } else if (RSC_NO_MATCH == ret) {
+ continue;
+ } else {
+ /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */
+ seg->is_coalesced = 1;
+ return size;
+ }
+ }
+
+ return virtio_net_rsc_cache_buf(chain, nc, buf, size);
+}
+
+static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
+ const uint8_t *buf, size_t size)
+{
+ NetRscChain *chain;
+
+ chain = (NetRscChain *)opq;
+ return virtio_net_rsc_callback(chain, nc, buf, size,
+ virtio_net_rsc_try_coalesce4);
+}
+
+static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc,
+ uint16_t proto)
+{
+ VirtIONet *n;
+ NetRscChain *chain;
+ NICState *nic;
+
+ /* Only handle IPv4/6 */
+ if (proto != (uint16_t)ETH_P_IP) {
+ return NULL;
+ }
+
+ nic = (NICState *)nc;
+ n = container_of(&nic, VirtIONet, nic);
+ QTAILQ_FOREACH(chain, &n->rsc_chains, next) {
+ if (chain->proto == proto) {
+ return chain;
+ }
+ }
+
+ chain = g_malloc(sizeof(*chain));
+ if (!chain) {
+ rsc_chain_no_mem++;
+ return NULL;
+ }
+
+ chain->proto = proto;
+ chain->do_receive = virtio_net_rsc_receive4;
+
+ QTAILQ_INIT(&chain->buffers);
+ QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
+ return chain;
+}
+
+static ssize_t virtio_net_rsc_receive(NetClientState *nc,
+ const uint8_t *buf, size_t size)
+{
+ uint16_t proto;
+ NetRscChain *chain;
+ struct eth_header *eth;
+
+ if (size < IP_OFFSET) {
+ return virtio_net_do_receive(nc, buf, size);
+ }
+
+ eth = (struct eth_header *)(buf + VIRTIO_HEADER);
+ proto = htons(eth->h_proto);
+ chain = virtio_net_rsc_lookup_chain(nc, proto);
+ if (!chain) {
+ return virtio_net_do_receive(nc, buf, size);
+ } else {
+ return chain->do_receive(chain, nc, buf, size);
+ }
+}
+
+static ssize_t virtio_net_receive(NetClientState *nc,
+ const uint8_t *buf, size_t size)
+{
+ if (virtio_net_rsc_bypass) {
+ return virtio_net_do_receive(nc, buf, size);
+ } else {
+ return virtio_net_rsc_receive(nc, buf, size);
+ }
+}
+
static NetClientInfo net_virtio_info = {
.type = NET_CLIENT_OPTIONS_KIND_NIC,
.size = sizeof(NICState),
--
2.4.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC Patch 04/10] Tcp general data coalescing, the parameters is a little bit horrible, it's complicated to read, should can be optimized later.
2016-01-25 22:24 [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest wexu
` (2 preceding siblings ...)
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 03/10] Chain lookup and packets caching wexu
@ 2016-01-25 22:24 ` wexu
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 05/10] The draining timer, create a timer to purge the packets from the cached pool wexu
` (6 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: wexu @ 2016-01-25 22:24 UTC (permalink / raw)
To: qemu-devel; +Cc: victork, mst, jasowang, yvugenfi, Wei Xu, marcel, dfleytma
Signed-off-by: Wei Xu <wexu@redhat.com>
---
hw/net/virtio-net.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 124 insertions(+), 1 deletion(-)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 7a6cd4c..d005a56 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -41,6 +41,10 @@
#define VIRTIO_HEADER 12 /* Virtio net header size */
#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header))
+#define TCP_WINDOW 65535
+
+/* ip4 max payload, 16 bits in the header */
+#define MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
#define MAX_VIRTIO_IP_PAYLOAD (65535 + IP_OFFSET)
@@ -1670,11 +1674,130 @@ out:
return 0;
}
+static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg,
+ const uint8_t *buf, struct tcp_header *n_tcp,
+ struct tcp_header *o_tcp)
+{
+ uint32_t nack, oack;
+ uint16_t nwin, owin;
+
+ nack = htonl(n_tcp->th_ack);
+ nwin = htons(n_tcp->th_win);
+ oack = htonl(o_tcp->th_ack);
+ owin = htons(o_tcp->th_win);
+
+ if ((nack - oack) >= TCP_WINDOW) {
+ return RSC_FINAL;
+ } else if (nack == oack) {
+ /* duplicated ack or window probe */
+ if (nwin == owin) {
+ /* duplicated ack, add dup ack count due to whql test up to 1 */
+
+ if (seg->dup_ack_count == 0) {
+ seg->dup_ack_count++;
+ return RSC_COALESCE;
+ } else {
+ /* Spec says should send it directly */
+ return RSC_FINAL;
+ }
+ } else {
+ /* Coalesce window update */
+ o_tcp->th_win = n_tcp->th_win;
+ return RSC_COALESCE;
+ }
+ } else {
+ /* pure ack, update ack */
+ o_tcp->th_ack = n_tcp->th_ack;
+ return RSC_COALESCE;
+ }
+}
+
+static int32_t virtio_net_rsc_coalesce_tcp(NetRscChain *chain, NetRscSeg *seg,
+ const uint8_t *buf, struct tcp_header *n_tcp, uint16_t n_tcp_len,
+ uint16_t n_data, struct tcp_header *o_tcp, uint16_t o_tcp_len,
+ uint16_t o_data, uint16_t *p_ip_len, uint16_t max_data)
+{
+ void *data;
+ uint16_t o_ip_len;
+ uint32_t nseq, oseq;
+
+ o_ip_len = htons(*p_ip_len);
+ nseq = htonl(n_tcp->th_seq);
+ oseq = htonl(o_tcp->th_seq);
+
+ /* Ignore packet with more/larger tcp options */
+ if (n_tcp_len > o_tcp_len) {
+ return RSC_FINAL;
+ }
+
+ /* out of order or retransmitted. */
+ if ((nseq - oseq) > TCP_WINDOW) {
+ return RSC_FINAL;
+ }
+
+ data = ((uint8_t *)n_tcp) + n_tcp_len;
+ if (nseq == oseq) {
+ if ((0 == o_data) && n_data) {
+ /* From no payload to payload, normal case, not a dup ack or etc */
+ goto coalesce;
+ } else {
+ return virtio_net_rsc_handle_ack(chain, seg, buf, n_tcp, o_tcp);
+ }
+ } else if ((nseq - oseq) != o_data) {
+ /* Not a consistent packet, out of order */
+ return RSC_FINAL;
+ } else {
+coalesce:
+ if ((o_ip_len + n_data) > max_data) {
+ return RSC_FINAL;
+ }
+
+ /* Here comes the right data, the payload lengh in v4/v6 is different,
+ so use the field value to update */
+ *p_ip_len = htons(o_ip_len + n_data); /* Update new data len */
+ o_tcp->th_offset_flags = n_tcp->th_offset_flags; /* Bring 'PUSH' big */
+ o_tcp->th_ack = n_tcp->th_ack;
+ o_tcp->th_win = n_tcp->th_win;
+
+ memmove(seg->buf + seg->size, data, n_data);
+ seg->size += n_data;
+ return RSC_COALESCE;
+ }
+}
static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
NetRscSeg *seg, const uint8_t *buf, size_t size)
{
- return RSC_FINAL;
+ uint16_t o_ip_len, n_ip_len; /* len in ip header field */
+ uint16_t n_ip_hdrlen, o_ip_hdrlen; /* ipv4 header len */
+ uint16_t n_tcp_len, o_tcp_len; /* tcp header len */
+ uint16_t o_data, n_data; /* payload without virtio/eth/ip/tcp */
+ struct ip_header *n_ip, *o_ip;
+ struct tcp_header *n_tcp, *o_tcp;
+
+ n_ip = (struct ip_header *)(buf + IP_OFFSET);
+ n_ip_hdrlen = ((0xF & n_ip->ip_ver_len) << 2);
+ n_ip_len = htons(n_ip->ip_len);
+ n_tcp = (struct tcp_header *)(((uint8_t *)n_ip) + n_ip_hdrlen);
+ n_tcp_len = (htons(n_tcp->th_offset_flags) & 0xF000) >> 10;
+ n_data = n_ip_len - n_ip_hdrlen - n_tcp_len;
+
+ o_ip = (struct ip_header *)(seg->buf + IP_OFFSET);
+ o_ip_hdrlen = ((0xF & o_ip->ip_ver_len) << 2);
+ o_ip_len = htons(o_ip->ip_len);
+ o_tcp = (struct tcp_header *)(((uint8_t *)o_ip) + o_ip_hdrlen);
+ o_tcp_len = (htons(o_tcp->th_offset_flags) & 0xF000) >> 10;
+ o_data = o_ip_len - o_ip_hdrlen - o_tcp_len;
+
+ if ((n_ip->ip_src ^ o_ip->ip_src) || (n_ip->ip_dst ^ o_ip->ip_dst)
+ || (n_tcp->th_sport ^ o_tcp->th_sport)
+ || (n_tcp->th_dport ^ o_tcp->th_dport)) {
+ return RSC_NO_MATCH;
+ }
+
+ return virtio_net_rsc_coalesce_tcp(chain, seg, buf,
+ n_tcp, n_tcp_len, n_data, o_tcp, o_tcp_len,
+ o_data, &o_ip->ip_len, MAX_IP4_PAYLOAD);
}
static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
--
2.4.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC Patch 05/10] The draining timer, create a timer to purge the packets from the cached pool.
2016-01-25 22:24 [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest wexu
` (3 preceding siblings ...)
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 04/10] Tcp general data coalescing, the parameters is a little bit horrible, it's complicated to read, should can be optimized later wexu
@ 2016-01-25 22:24 ` wexu
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 06/10] IPv4 checksum wexu
` (5 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: wexu @ 2016-01-25 22:24 UTC (permalink / raw)
To: qemu-devel; +Cc: victork, mst, jasowang, yvugenfi, Wei Xu, marcel, dfleytma
Signed-off-by: Wei Xu <wexu@redhat.com>
---
hw/net/virtio-net.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index d005a56..8da2815 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -48,12 +48,17 @@
#define MAX_VIRTIO_IP_PAYLOAD (65535 + IP_OFFSET)
+/* Purge coalesced packets timer interval */
+#define RSC_TIMER_INTERVAL 500000
+
/* Global statistics */
static uint32_t rsc_chain_no_mem;
/* Switcher to enable/disable rsc */
static bool virtio_net_rsc_bypass;
+static uint32_t rsc_timeout = RSC_TIMER_INTERVAL;
+
/* Coalesce callback for ipv4/6 */
typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg,
const uint8_t *buf, size_t size);
@@ -1625,6 +1630,35 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
return 0;
}
+static void virtio_net_rsc_purge(void *opq)
+{
+ int ret = 0;
+ NetRscChain *chain = (NetRscChain *)opq;
+ NetRscSeg *seg, *rn;
+
+ QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, rn) {
+ if (!qemu_can_send_packet(seg->nc)) {
+ /* Should quit or continue? not sure if one or some
+ * of the queues fail would happen, try continue here */
+ continue;
+ }
+
+ ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
+ QTAILQ_REMOVE(&chain->buffers, seg, next);
+ g_free(seg->buf);
+ g_free(seg);
+
+ if (ret == 0) {
+ /* Try next queue */
+ continue;
+ }
+ }
+
+ if (!QTAILQ_EMPTY(&chain->buffers)) {
+ timer_mod(chain->drain_timer,
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + rsc_timeout);
+ }
+}
static void virtio_net_rsc_cleanup(VirtIONet *n)
{
@@ -1810,6 +1844,8 @@ static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) {
return 0;
} else {
+ timer_mod(chain->drain_timer,
+ qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + rsc_timeout);
return size;
}
}
@@ -1877,6 +1913,8 @@ static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc,
}
chain->proto = proto;
+ chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
+ virtio_net_rsc_purge, chain);
chain->do_receive = virtio_net_rsc_receive4;
QTAILQ_INIT(&chain->buffers);
--
2.4.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC Patch 06/10] IPv4 checksum.
2016-01-25 22:24 [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest wexu
` (4 preceding siblings ...)
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 05/10] The draining timer, create a timer to purge the packets from the cached pool wexu
@ 2016-01-25 22:24 ` wexu
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 07/10] TCP control packet handling wexu
` (4 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: wexu @ 2016-01-25 22:24 UTC (permalink / raw)
To: qemu-devel; +Cc: victork, mst, jasowang, yvugenfi, Wei Xu, marcel, dfleytma
Signed-off-by: Wei Xu <wexu@redhat.com>
---
hw/net/virtio-net.c | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 8da2815..1ca3dd5 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1630,6 +1630,18 @@ static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
return 0;
}
+static void virtio_net_rsc_ipv4_checksum(NetRscSeg *seg)
+{
+ uint32_t sum;
+ struct ip_header *ip;
+
+ ip = (struct ip_header *)(seg->buf + IP_OFFSET);
+
+ ip->ip_sum = 0;
+ sum = net_checksum_add_cont(sizeof(struct ip_header), (uint8_t *)ip, 0);
+ ip->ip_sum = cpu_to_be16(net_checksum_finish(sum));
+}
+
static void virtio_net_rsc_purge(void *opq)
{
int ret = 0;
@@ -1643,6 +1655,10 @@ static void virtio_net_rsc_purge(void *opq)
continue;
}
+ if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
+ virtio_net_rsc_ipv4_checksum(seg);
+ }
+
ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
QTAILQ_REMOVE(&chain->buffers, seg, next);
g_free(seg->buf);
@@ -1853,6 +1869,9 @@ static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
ret = coalesce(chain, seg, buf, size);
if (RSC_FINAL == ret) {
+ if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
+ virtio_net_rsc_ipv4_checksum(seg);
+ }
ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size);
QTAILQ_REMOVE(&chain->buffers, seg, next);
g_free(seg->buf);
--
2.4.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC Patch 07/10] TCP control packet handling.
2016-01-25 22:24 [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest wexu
` (5 preceding siblings ...)
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 06/10] IPv4 checksum wexu
@ 2016-01-25 22:24 ` wexu
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 08/10] Sanity check & More bypass cases check wexu
` (3 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: wexu @ 2016-01-25 22:24 UTC (permalink / raw)
To: qemu-devel; +Cc: victork, mst, jasowang, yvugenfi, Wei Xu, marcel, dfleytma
From: Wei Xu <wexu@redhat.com>
All the 'SYN' packets will be bypassed, and flag with 'FIN/RST' set
will signal all the cached packets in the same connection to be purged,
this is to avoid out of data on the line.
Signed-off-by: Wei Xu <wexu@redhat.com>
---
hw/net/virtio-net.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 66 insertions(+)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 1ca3dd5..042b538 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -41,6 +41,12 @@
#define VIRTIO_HEADER 12 /* Virtio net header size */
#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header))
+
+#define IP4_ADDR_OFFSET (IP_OFFSET + 12) /* ipv4 address start */
+#define TCP4_OFFSET (IP_OFFSET + sizeof(struct ip_header)) /* tcp4 header */
+#define TCP4_PORT_OFFSET TCP4_OFFSET /* tcp4 port offset */
+#define IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
+#define TCP_PORT_SIZE 4 /* sport + dport */
#define TCP_WINDOW 65535
/* ip4 max payload, 16 bits in the header */
@@ -1850,6 +1856,27 @@ static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
o_data, &o_ip->ip_len, MAX_IP4_PAYLOAD);
}
+
+/* Pakcets with 'SYN' should bypass, other flag should be sent after drain
+ * to prevent out of order */
+static int virtio_net_rsc_parse_tcp_ctrl(uint8_t *ip, uint16_t offset)
+{
+ uint16_t tcp_flag;
+ struct tcp_header *tcp;
+
+ tcp = (struct tcp_header *)(ip + offset);
+ tcp_flag = htons(tcp->th_offset_flags) & 0x3F;
+ if (tcp_flag & TH_SYN) {
+ return RSC_BYPASS;
+ }
+
+ if (tcp_flag & (TH_FIN | TH_URG | TH_RST)) {
+ return RSC_FINAL;
+ }
+
+ return 0;
+}
+
static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce)
{
@@ -1895,12 +1922,51 @@ static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
return virtio_net_rsc_cache_buf(chain, nc, buf, size);
}
+/* Drain a connection data, this is to avoid out of order segments */
+static size_t virtio_net_rsc_drain_one(NetRscChain *chain, NetClientState *nc,
+ const uint8_t *buf, size_t size, uint16_t ip_start,
+ uint16_t ip_size, uint16_t tcp_port, uint16_t port_size)
+{
+ NetRscSeg *seg, *nseg;
+
+ QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) {
+ if (memcmp(buf + ip_start, seg->buf + ip_start, ip_size)
+ || memcmp(buf + tcp_port, seg->buf + tcp_port, port_size)) {
+ continue;
+ }
+ if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
+ virtio_net_rsc_ipv4_checksum(seg);
+ }
+
+ virtio_net_do_receive(seg->nc, seg->buf, seg->size);
+
+ QTAILQ_REMOVE(&chain->buffers, seg, next);
+ g_free(seg->buf);
+ g_free(seg);
+ break;
+ }
+
+ return virtio_net_do_receive(nc, buf, size);
+}
static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
const uint8_t *buf, size_t size)
{
+ int32_t ret;
+ struct ip_header *ip;
NetRscChain *chain;
chain = (NetRscChain *)opq;
+ ip = (struct ip_header *)(buf + IP_OFFSET);
+
+ ret = virtio_net_rsc_parse_tcp_ctrl((uint8_t *)ip,
+ (0xF & ip->ip_ver_len) << 2);
+ if (RSC_BYPASS == ret) {
+ return virtio_net_do_receive(nc, buf, size);
+ } else if (RSC_FINAL == ret) {
+ return virtio_net_rsc_drain_one(chain, nc, buf, size, IP4_ADDR_OFFSET,
+ IP4_ADDR_SIZE, TCP4_PORT_OFFSET, TCP_PORT_SIZE);
+ }
+
return virtio_net_rsc_callback(chain, nc, buf, size,
virtio_net_rsc_try_coalesce4);
}
--
2.4.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC Patch 08/10] Sanity check & More bypass cases check.
2016-01-25 22:24 [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest wexu
` (6 preceding siblings ...)
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 07/10] TCP control packet handling wexu
@ 2016-01-25 22:24 ` wexu
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 09/10] IPv6 support wexu
` (2 subsequent siblings)
10 siblings, 0 replies; 15+ messages in thread
From: wexu @ 2016-01-25 22:24 UTC (permalink / raw)
To: qemu-devel; +Cc: victork, mst, jasowang, yvugenfi, Wei Xu, marcel, dfleytma
1. IP options & IP fragment should be bypassed.
2. Sanity size check to prevent buffer overflow attack.
Signed-off-by: Wei Xu <wexu@redhat.com>
---
hw/net/virtio-net.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 042b538..55e8e99 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -1948,6 +1948,46 @@ static size_t virtio_net_rsc_drain_one(NetRscChain *chain, NetClientState *nc,
return virtio_net_do_receive(nc, buf, size);
}
+
+static int32_t virtio_net_rsc_filter4(NetRscChain *chain, struct ip_header *ip,
+ const uint8_t *buf, size_t size)
+{
+ uint16_t ip_len;
+
+ if (size < (TCP4_OFFSET + sizeof(tcp_header))) {
+ return RSC_BYPASS;
+ }
+
+ /* Not an ipv4 one */
+ if (0x4 != ((0xF0 & ip->ip_ver_len) >> 4)) {
+ return RSC_BYPASS;
+ }
+
+ /* Don't handle packets with ip option */
+ if (5 != (0xF & ip->ip_ver_len)) {
+ return RSC_BYPASS;
+ }
+
+ /* Don't handle packets with ip fragment */
+ if (!(htons(ip->ip_off) & IP_DF)) {
+ return RSC_BYPASS;
+ }
+
+ if (ip->ip_p != IPPROTO_TCP) {
+ return RSC_BYPASS;
+ }
+
+ /* Sanity check */
+ ip_len = htons(ip->ip_len);
+ if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
+ || ip_len > (size - IP_OFFSET)) {
+ return RSC_BYPASS;
+ }
+
+ return RSC_WANT;
+}
+
+
static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
const uint8_t *buf, size_t size)
{
@@ -1958,6 +1998,10 @@ static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
chain = (NetRscChain *)opq;
ip = (struct ip_header *)(buf + IP_OFFSET);
+ if (RSC_WANT != virtio_net_rsc_filter4(chain, ip, buf, size)) {
+ return virtio_net_do_receive(nc, buf, size);
+ }
+
ret = virtio_net_rsc_parse_tcp_ctrl((uint8_t *)ip,
(0xF & ip->ip_ver_len) << 2);
if (RSC_BYPASS == ret) {
--
2.4.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC Patch 09/10] IPv6 support.
2016-01-25 22:24 [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest wexu
` (7 preceding siblings ...)
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 08/10] Sanity check & More bypass cases check wexu
@ 2016-01-25 22:24 ` wexu
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 10/10] Statistics wexu
2016-01-26 6:44 ` [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest Fam Zheng
10 siblings, 0 replies; 15+ messages in thread
From: wexu @ 2016-01-25 22:24 UTC (permalink / raw)
To: qemu-devel; +Cc: victork, mst, jasowang, yvugenfi, Wei Xu, marcel, dfleytma
1. Callback for IPv6
2. Filter & Sanity Check.
Signed-off-by: Wei Xu <wexu@redhat.com>
---
hw/net/virtio-net.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 102 insertions(+), 2 deletions(-)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 55e8e99..362f304 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -46,12 +46,19 @@
#define TCP4_OFFSET (IP_OFFSET + sizeof(struct ip_header)) /* tcp4 header */
#define TCP4_PORT_OFFSET TCP4_OFFSET /* tcp4 port offset */
#define IP4_ADDR_SIZE 8 /* ipv4 saddr + daddr */
+
+#define IP6_ADDR_OFFSET (IP_OFFSET + 8) /* ipv6 address start */
+#define TCP6_OFFSET (IP_OFFSET + sizeof(struct ip6_header)) /* tcp6 header */
+#define TCP6_PORT_OFFSET TCP6_OFFSET /* tcp6 port offset */
+#define IP6_ADDR_SIZE 32 /* ipv6 saddr + daddr */
#define TCP_PORT_SIZE 4 /* sport + dport */
#define TCP_WINDOW 65535
/* ip4 max payload, 16 bits in the header */
#define MAX_IP4_PAYLOAD (65535 - sizeof(struct ip_header))
+/* ip6 max payload, payload in ipv6 don't include the header */
+#define MAX_IP6_PAYLOAD 65535
#define MAX_VIRTIO_IP_PAYLOAD (65535 + IP_OFFSET)
/* Purge coalesced packets timer interval */
@@ -1856,6 +1863,42 @@ static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
o_data, &o_ip->ip_len, MAX_IP4_PAYLOAD);
}
+static int32_t virtio_net_rsc_try_coalesce6(NetRscChain *chain,
+ NetRscSeg *seg, const uint8_t *buf, size_t size)
+{
+ uint16_t o_ip_len, n_ip_len; /* len in ip header field */
+ uint16_t n_tcp_len, o_tcp_len; /* tcp header len */
+ uint16_t o_data, n_data; /* payload without virtio/eth/ip/tcp */
+ struct ip6_header *n_ip, *o_ip;
+ struct tcp_header *n_tcp, *o_tcp;
+
+ n_ip = (struct ip6_header *)(buf + IP_OFFSET);
+ n_ip_len = htons(n_ip->ip6_ctlun.ip6_un1.ip6_un1_plen);
+ n_tcp = (struct tcp_header *)(((uint8_t *)n_ip)\
+ + sizeof(struct ip6_header));
+ n_tcp_len = (htons(n_tcp->th_offset_flags) & 0xF000) >> 10;
+ n_data = n_ip_len - n_tcp_len;
+
+ o_ip = (struct ip6_header *)(seg->buf + IP_OFFSET);
+ o_ip_len = htons(o_ip->ip6_ctlun.ip6_un1.ip6_un1_plen);
+ o_tcp = (struct tcp_header *)(((uint8_t *)o_ip)\
+ + sizeof(struct ip6_header));
+ o_tcp_len = (htons(o_tcp->th_offset_flags) & 0xF000) >> 10;
+ o_data = o_ip_len - o_tcp_len;
+
+ if (memcmp(&n_ip->ip6_src, &o_ip->ip6_src, sizeof(struct in6_address))
+ || memcmp(&n_ip->ip6_dst, &o_ip->ip6_dst, sizeof(struct in6_address))
+ || (n_tcp->th_sport ^ o_tcp->th_sport)
+ || (n_tcp->th_dport ^ o_tcp->th_dport)) {
+ return RSC_NO_MATCH;
+ }
+
+ /* There is a difference between payload lenght in ipv4 and v6,
+ ip header is excluded in ipv6 */
+ return virtio_net_rsc_coalesce_tcp(chain, seg, buf,
+ n_tcp, n_tcp_len, n_data, o_tcp, o_tcp_len, o_data,
+ &o_ip->ip6_ctlun.ip6_un1.ip6_un1_plen, MAX_IP6_PAYLOAD);
+}
/* Pakcets with 'SYN' should bypass, other flag should be sent after drain
* to prevent out of order */
@@ -2015,6 +2058,59 @@ static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
virtio_net_rsc_try_coalesce4);
}
+static int32_t virtio_net_rsc_filter6(NetRscChain *chain, struct ip6_header *ip,
+ const uint8_t *buf, size_t size)
+{
+ uint16_t ip_len;
+
+ if (size < (TCP6_OFFSET + sizeof(tcp_header))) {
+ return RSC_BYPASS;
+ }
+
+ if (0x6 != (0xF & ip->ip6_ctlun.ip6_un1.ip6_un1_flow)) {
+ return RSC_BYPASS;
+ }
+
+ /* Both option and protocol is checked in this */
+ if (ip->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
+ return RSC_BYPASS;
+ }
+
+ /* Sanity check */
+ ip_len = htons(ip->ip6_ctlun.ip6_un1.ip6_un1_plen);
+ if (ip_len < sizeof(struct tcp_header)
+ || ip_len > (size - TCP6_OFFSET)) {
+ return RSC_BYPASS;
+ }
+
+ return 0;
+}
+
+static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc,
+ const uint8_t *buf, size_t size)
+{
+ int32_t ret;
+ NetRscChain *chain;
+ struct ip6_header *ip;
+
+ chain = (NetRscChain *)opq;
+ ip = (struct ip6_header *)(buf + IP_OFFSET);
+ if (RSC_WANT != virtio_net_rsc_filter6(chain, ip, buf, size)) {
+ return virtio_net_do_receive(nc, buf, size);
+ }
+
+ ret = virtio_net_rsc_parse_tcp_ctrl((uint8_t *)ip, sizeof(*ip));
+ if (RSC_BYPASS == ret) {
+ return virtio_net_do_receive(nc, buf, size);
+ } else if (RSC_FINAL == ret) {
+ return virtio_net_rsc_drain_one(chain, nc, buf, size, IP6_ADDR_OFFSET,
+ IP6_ADDR_SIZE, TCP6_PORT_OFFSET, TCP_PORT_SIZE);
+ }
+
+ return virtio_net_rsc_callback(chain, nc, buf, size,
+ virtio_net_rsc_try_coalesce6);
+}
+
static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc,
uint16_t proto)
{
@@ -2023,7 +2119,7 @@ static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc,
NICState *nic;
/* Only handle IPv4/6 */
- if (proto != (uint16_t)ETH_P_IP) {
+ if ((proto != (uint16_t)ETH_P_IP) && (proto != (uint16_t)ETH_P_IPV6)) {
return NULL;
}
@@ -2044,7 +2140,11 @@ static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc,
chain->proto = proto;
chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
virtio_net_rsc_purge, chain);
- chain->do_receive = virtio_net_rsc_receive4;
+ if (ETH_P_IP == proto) {
+ chain->do_receive = virtio_net_rsc_receive4;
+ } else {
+ chain->do_receive = virtio_net_rsc_receive6;
+ }
QTAILQ_INIT(&chain->buffers);
QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next);
--
2.4.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [Qemu-devel] [RFC Patch 10/10] Statistics.
2016-01-25 22:24 [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest wexu
` (8 preceding siblings ...)
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 09/10] IPv6 support wexu
@ 2016-01-25 22:24 ` wexu
2016-01-26 6:44 ` [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest Fam Zheng
10 siblings, 0 replies; 15+ messages in thread
From: wexu @ 2016-01-25 22:24 UTC (permalink / raw)
To: qemu-devel; +Cc: victork, mst, jasowang, yvugenfi, Wei Xu, marcel, dfleytma
From: Wei Xu <wexu@redhat.com>
Signed-off-by: Wei Xu <wexu@redhat.com>
---
hw/net/virtio-net.c | 49 +++++++++++++++++++++++++++++++++++++++++++---
include/hw/virtio/virtio.h | 35 +++++++++++++++++++++++++++++++++
2 files changed, 81 insertions(+), 3 deletions(-)
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
index 362f304..b2f8ae8 100644
--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
@@ -66,6 +66,7 @@
/* Global statistics */
static uint32_t rsc_chain_no_mem;
+static uint64_t virtio_net_received;
/* Switcher to enable/disable rsc */
static bool virtio_net_rsc_bypass;
@@ -1679,10 +1680,12 @@ static void virtio_net_rsc_purge(void *opq)
if (ret == 0) {
/* Try next queue */
+ chain->stat.purge_failed++;
continue;
}
}
+ chain->stat.timer++;
if (!QTAILQ_EMPTY(&chain->buffers)) {
timer_mod(chain->drain_timer,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + rsc_timeout);
@@ -1715,6 +1718,7 @@ static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
seg = g_malloc(sizeof(NetRscSeg));
if (!seg) {
+ chain->stat.no_buf++;
return 0;
}
@@ -1730,9 +1734,11 @@ static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc,
seg->nc = nc;
QTAILQ_INSERT_TAIL(&chain->buffers, seg, next);
+ chain->stat.cache++;
return size;
out:
+ chain->stat.no_buf++;
g_free(seg);
return 0;
}
@@ -1750,27 +1756,33 @@ static int32_t virtio_net_rsc_handle_ack(NetRscChain *chain, NetRscSeg *seg,
owin = htons(o_tcp->th_win);
if ((nack - oack) >= TCP_WINDOW) {
+ chain->stat.ack_out_of_win++;
return RSC_FINAL;
} else if (nack == oack) {
/* duplicated ack or window probe */
if (nwin == owin) {
/* duplicated ack, add dup ack count due to whql test up to 1 */
+ chain->stat.dup_ack++;
if (seg->dup_ack_count == 0) {
seg->dup_ack_count++;
+ chain->stat.dup_ack1++;
return RSC_COALESCE;
} else {
/* Spec says should send it directly */
+ chain->stat.dup_ack2++;
return RSC_FINAL;
}
} else {
/* Coalesce window update */
o_tcp->th_win = n_tcp->th_win;
+ chain->stat.win_update++;
return RSC_COALESCE;
}
} else {
/* pure ack, update ack */
o_tcp->th_ack = n_tcp->th_ack;
+ chain->stat.pure_ack++;
return RSC_COALESCE;
}
}
@@ -1788,13 +1800,20 @@ static int32_t virtio_net_rsc_coalesce_tcp(NetRscChain *chain, NetRscSeg *seg,
nseq = htonl(n_tcp->th_seq);
oseq = htonl(o_tcp->th_seq);
+ if (n_tcp_len > sizeof(struct tcp_header)) {
+ /* Log this only for debugging observation */
+ chain->stat.tcp_option++;
+ }
+
/* Ignore packet with more/larger tcp options */
if (n_tcp_len > o_tcp_len) {
+ chain->stat.tcp_larger_option++;
return RSC_FINAL;
}
/* out of order or retransmitted. */
if ((nseq - oseq) > TCP_WINDOW) {
+ chain->stat.data_out_of_win++;
return RSC_FINAL;
}
@@ -1802,16 +1821,19 @@ static int32_t virtio_net_rsc_coalesce_tcp(NetRscChain *chain, NetRscSeg *seg,
if (nseq == oseq) {
if ((0 == o_data) && n_data) {
/* From no payload to payload, normal case, not a dup ack or etc */
+ chain->stat.data_after_pure_ack++;
goto coalesce;
} else {
return virtio_net_rsc_handle_ack(chain, seg, buf, n_tcp, o_tcp);
}
} else if ((nseq - oseq) != o_data) {
/* Not a consistent packet, out of order */
+ chain->stat.data_out_of_order++;
return RSC_FINAL;
} else {
coalesce:
if ((o_ip_len + n_data) > max_data) {
+ chain->stat.over_size++;
return RSC_FINAL;
}
@@ -1824,6 +1846,7 @@ coalesce:
memmove(seg->buf + seg->size, data, n_data);
seg->size += n_data;
+ chain->stat.coalesced++;
return RSC_COALESCE;
}
}
@@ -1855,6 +1878,7 @@ static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain,
if ((n_ip->ip_src ^ o_ip->ip_src) || (n_ip->ip_dst ^ o_ip->ip_dst)
|| (n_tcp->th_sport ^ o_tcp->th_sport)
|| (n_tcp->th_dport ^ o_tcp->th_dport)) {
+ chain->stat.no_match++;
return RSC_NO_MATCH;
}
@@ -1890,6 +1914,7 @@ static int32_t virtio_net_rsc_try_coalesce6(NetRscChain *chain,
|| memcmp(&n_ip->ip6_dst, &o_ip->ip6_dst, sizeof(struct in6_address))
|| (n_tcp->th_sport ^ o_tcp->th_sport)
|| (n_tcp->th_dport ^ o_tcp->th_dport)) {
+ chain->stat.no_match++;
return RSC_NO_MATCH;
}
@@ -1927,6 +1952,7 @@ static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
NetRscSeg *seg, *nseg;
if (QTAILQ_EMPTY(&chain->buffers)) {
+ chain->stat.empty_cache++;
if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) {
return 0;
} else {
@@ -1948,6 +1974,7 @@ static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
g_free(seg);
if (ret == 0) {
/* Send failed */
+ chain->stat.final_failed++;
return 0;
}
@@ -1962,6 +1989,7 @@ static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc,
}
}
+ chain->stat.no_match_cache++;
return virtio_net_rsc_cache_buf(chain, nc, buf, size);
}
@@ -1980,8 +2008,9 @@ static size_t virtio_net_rsc_drain_one(NetRscChain *chain, NetClientState *nc,
if ((chain->proto == ETH_P_IP) && seg->is_coalesced) {
virtio_net_rsc_ipv4_checksum(seg);
}
-
- virtio_net_do_receive(seg->nc, seg->buf, seg->size);
+ if (!virtio_net_do_receive(seg->nc, seg->buf, seg->size)) {
+ chain->stat.drain_failed++;
+ }
QTAILQ_REMOVE(&chain->buffers, seg, next);
g_free(seg->buf);
@@ -2003,20 +2032,24 @@ static int32_t virtio_net_rsc_filter4(NetRscChain *chain, struct ip_header *ip,
/* Not an ipv4 one */
if (0x4 != ((0xF0 & ip->ip_ver_len) >> 4)) {
+ chain->stat.ip_option++;
return RSC_BYPASS;
}
/* Don't handle packets with ip option */
if (5 != (0xF & ip->ip_ver_len)) {
+ chain->stat.ip_option++;
return RSC_BYPASS;
}
/* Don't handle packets with ip fragment */
if (!(htons(ip->ip_off) & IP_DF)) {
+ chain->stat.ip_frag++;
return RSC_BYPASS;
}
if (ip->ip_p != IPPROTO_TCP) {
+ chain->stat.bypass_not_tcp++;
return RSC_BYPASS;
}
@@ -2024,13 +2057,13 @@ static int32_t virtio_net_rsc_filter4(NetRscChain *chain, struct ip_header *ip,
ip_len = htons(ip->ip_len);
if (ip_len < (sizeof(struct ip_header) + sizeof(struct tcp_header))
|| ip_len > (size - IP_OFFSET)) {
+ chain->stat.ip_hacked++;
return RSC_BYPASS;
}
return RSC_WANT;
}
-
static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
const uint8_t *buf, size_t size)
{
@@ -2048,8 +2081,10 @@ static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc,
ret = virtio_net_rsc_parse_tcp_ctrl((uint8_t *)ip,
(0xF & ip->ip_ver_len) << 2);
if (RSC_BYPASS == ret) {
+ chain->stat.tcp_syn++;
return virtio_net_do_receive(nc, buf, size);
} else if (RSC_FINAL == ret) {
+ chain->stat.tcp_ctrl_drain++;
return virtio_net_rsc_drain_one(chain, nc, buf, size, IP4_ADDR_OFFSET,
IP4_ADDR_SIZE, TCP4_PORT_OFFSET, TCP_PORT_SIZE);
}
@@ -2073,6 +2108,7 @@ static int32_t virtio_net_rsc_filter6(NetRscChain *chain, struct ip6_header *ip,
/* Both option and protocol is checked in this */
if (ip->ip6_ctlun.ip6_un1.ip6_un1_nxt != IPPROTO_TCP) {
+ chain->stat.bypass_not_tcp++;
return RSC_BYPASS;
}
@@ -2080,6 +2116,7 @@ static int32_t virtio_net_rsc_filter6(NetRscChain *chain, struct ip6_header *ip,
ip_len = htons(ip->ip6_ctlun.ip6_un1.ip6_un1_plen);
if (ip_len < sizeof(struct tcp_header)
|| ip_len > (size - TCP6_OFFSET)) {
+ chain->stat.ip_hacked++;
return RSC_BYPASS;
}
@@ -2101,8 +2138,10 @@ static size_t virtio_net_rsc_receive6(void *opq, NetClientState* nc,
ret = virtio_net_rsc_parse_tcp_ctrl((uint8_t *)ip, sizeof(*ip));
if (RSC_BYPASS == ret) {
+ chain->stat.tcp_syn++;
return virtio_net_do_receive(nc, buf, size);
} else if (RSC_FINAL == ret) {
+ chain->stat.tcp_ctrl_drain++;
return virtio_net_rsc_drain_one(chain, nc, buf, size, IP6_ADDR_OFFSET,
IP6_ADDR_SIZE, TCP6_PORT_OFFSET, TCP_PORT_SIZE);
}
@@ -2140,6 +2179,7 @@ static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc,
chain->proto = proto;
chain->drain_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
virtio_net_rsc_purge, chain);
+ memset(&chain->stat, 0, sizeof(chain->stat));
if (ETH_P_IP == proto) {
chain->do_receive = virtio_net_rsc_receive4;
} else {
@@ -2168,6 +2208,7 @@ static ssize_t virtio_net_rsc_receive(NetClientState *nc,
if (!chain) {
return virtio_net_do_receive(nc, buf, size);
} else {
+ chain->stat.received++;
return chain->do_receive(chain, nc, buf, size);
}
}
@@ -2175,6 +2216,8 @@ static ssize_t virtio_net_rsc_receive(NetClientState *nc,
static ssize_t virtio_net_receive(NetClientState *nc,
const uint8_t *buf, size_t size)
{
+ virtio_net_received++;
+
if (virtio_net_rsc_bypass) {
return virtio_net_do_receive(nc, buf, size);
} else {
diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
index 1383220..f399d3d 100644
--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
@@ -136,6 +136,40 @@ typedef enum {
RSC_WANT /* Data want to be coalesced */
} COALESCE_STATUS;
+typedef struct NetRscStat {
+ uint32_t received;
+ uint32_t coalesced;
+ uint32_t over_size;
+ uint32_t cache;
+ uint32_t empty_cache;
+ uint32_t no_match_cache;
+ uint32_t no_buf;
+ uint32_t new_tos;
+ uint32_t win_update;
+ uint32_t no_match;
+ uint32_t tcp_syn;
+ uint32_t tcp_ctrl_drain;
+ uint32_t dup_ack;
+ uint32_t dup_ack1;
+ uint32_t dup_ack2;
+ uint32_t pure_ack;
+ uint32_t ack_out_of_win;
+ uint32_t data_out_of_win;
+ uint32_t data_out_of_order;
+ uint32_t data_after_pure_ack;
+ uint32_t bypass_not_tcp;
+ uint32_t bypass_tcp_ctrl;
+ uint32_t tcp_option;
+ uint32_t tcp_larger_option;
+ uint32_t ip_frag;
+ uint32_t ip_hacked;
+ uint32_t ip_option;
+ uint32_t purge_failed;
+ uint32_t drain_failed;
+ uint32_t final_failed;
+ int64_t timer;
+} NetRscStat;
+
/* Coalesced segmant */
typedef struct NetRscSeg {
QTAILQ_ENTRY(NetRscSeg) next;
@@ -157,6 +191,7 @@ typedef struct NetRscChain {
VirtioNetReceive *do_receive;
QEMUTimer *drain_timer;
QTAILQ_HEAD(, NetRscSeg) buffers;
+ NetRscStat stat;
} NetRscChain;
void virtio_instance_init_common(Object *proxy_obj, void *data,
--
2.4.0
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest
2016-01-25 22:24 [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest wexu
` (9 preceding siblings ...)
2016-01-25 22:24 ` [Qemu-devel] [RFC Patch 10/10] Statistics wexu
@ 2016-01-26 6:44 ` Fam Zheng
2016-01-26 13:29 ` Wei Xu
2016-01-27 4:52 ` Jason Wang
10 siblings, 2 replies; 15+ messages in thread
From: Fam Zheng @ 2016-01-26 6:44 UTC (permalink / raw)
To: wexu; +Cc: victork, mst, jasowang, yvugenfi, qemu-devel, marcel, dfleytma
On Tue, 01/26 06:24, wexu@redhat.com wrote:
> Wei Xu (10):
> 'Segment', 'Chain' and 'Status' enumeration.
> Initilize & Cleanup.
> Chain lookup and packets caching.
> Tcp general data coalescing
> The draining timer
> IPv4 checksum.
> TCP control packet handling
> Sanity check & More bypass cases check.
> IPv6 support.
> Statistics.
Please add subsystem prefixes to subjects, like:
"virtio-net: IPv6 support"
"virtio-net: Statistics"
This applies to the cover letter too.
(nit-pick: period "." is not necessary)
Fam
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest
2016-01-26 6:44 ` [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest Fam Zheng
@ 2016-01-26 13:29 ` Wei Xu
2016-01-27 4:52 ` Jason Wang
1 sibling, 0 replies; 15+ messages in thread
From: Wei Xu @ 2016-01-26 13:29 UTC (permalink / raw)
To: qemu-devel
On 01/26/2016 02:44 PM, Fam Zheng wrote:
> On Tue, 01/26 06:24, wexu@redhat.com wrote:
>> Wei Xu (10):
>> 'Segment', 'Chain' and 'Status' enumeration.
>> Initilize & Cleanup.
>> Chain lookup and packets caching.
>> Tcp general data coalescing
>> The draining timer
>> IPv4 checksum.
>> TCP control packet handling
>> Sanity check & More bypass cases check.
>> IPv6 support.
>> Statistics.
> Please add subsystem prefixes to subjects, like:
>
> "virtio-net: IPv6 support"
> "virtio-net: Statistics"
>
> This applies to the cover letter too.
>
> (nit-pick: period "." is not necessary)
Thanks Fam.
Wei
>
> Fam
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest
2016-01-26 6:44 ` [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest Fam Zheng
2016-01-26 13:29 ` Wei Xu
@ 2016-01-27 4:52 ` Jason Wang
2016-01-27 8:03 ` Wei Xu
1 sibling, 1 reply; 15+ messages in thread
From: Jason Wang @ 2016-01-27 4:52 UTC (permalink / raw)
To: Fam Zheng, wexu; +Cc: victork, mst, yvugenfi, qemu-devel, marcel, dfleytma
On 01/26/2016 02:44 PM, Fam Zheng wrote:
> On Tue, 01/26 06:24, wexu@redhat.com wrote:
>> Wei Xu (10):
>> 'Segment', 'Chain' and 'Status' enumeration.
>> Initilize & Cleanup.
>> Chain lookup and packets caching.
>> Tcp general data coalescing
>> The draining timer
>> IPv4 checksum.
>> TCP control packet handling
>> Sanity check & More bypass cases check.
>> IPv6 support.
>> Statistics.
> Please add subsystem prefixes to subjects, like:
>
> "virtio-net: IPv6 support"
> "virtio-net: Statistics
And need to be more verbose. E.g:
- "Statistics" is too generic, something like "TCP coalescing
statistics" is much better.
- "virtio-net: IPv6 support" which is really confusing since it lacks
some context. Reviewers may suspect there's no ipv6 support in the past.
- "Tcp general data coalescing, the parameters is a little bit horrible,
it's complicated to read, should can be optimized later." is too long to
be a subject. "Tcp general data coalescing" should be ok. For personal
comment like "the parameters is a little bit horrible, it's complicated
to read, should can be optimized later." could be places below '---' in
the patch.
And need a more verbose commit log please. At least I could not figure
out what is happening just form most of the commit logs. [1] is a very
good documentation for how to describe your changes, please have a look
at that and describe the changes correctly in each commit log. You can
also have a look at git history to see how it was done.
[1]
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches#n106
Thanks
>
> This applies to the cover letter too.
>
> (nit-pick: period "." is not necessary)
>
> Fam
>
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [Qemu-devel] [RFC 0/10] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest
2016-01-27 4:52 ` Jason Wang
@ 2016-01-27 8:03 ` Wei Xu
0 siblings, 0 replies; 15+ messages in thread
From: Wei Xu @ 2016-01-27 8:03 UTC (permalink / raw)
To: Jason Wang, Fam Zheng
Cc: victork, mst, yvugenfi, qemu-devel, marcel, dfleytma
On 01/27/2016 12:52 PM, Jason Wang wrote:
>
> On 01/26/2016 02:44 PM, Fam Zheng wrote:
>> On Tue, 01/26 06:24, wexu@redhat.com wrote:
>>> Wei Xu (10):
>>> 'Segment', 'Chain' and 'Status' enumeration.
>>> Initilize & Cleanup.
>>> Chain lookup and packets caching.
>>> Tcp general data coalescing
>>> The draining timer
>>> IPv4 checksum.
>>> TCP control packet handling
>>> Sanity check & More bypass cases check.
>>> IPv6 support.
>>> Statistics.
>> Please add subsystem prefixes to subjects, like:
>>
>> "virtio-net: IPv6 support"
>> "virtio-net: Statistics
> And need to be more verbose. E.g:
>
> - "Statistics" is too generic, something like "TCP coalescing
> statistics" is much better.
> - "virtio-net: IPv6 support" which is really confusing since it lacks
> some context. Reviewers may suspect there's no ipv6 support in the past.
> - "Tcp general data coalescing, the parameters is a little bit horrible,
> it's complicated to read, should can be optimized later." is too long to
> be a subject. "Tcp general data coalescing" should be ok. For personal
> comment like "the parameters is a little bit horrible, it's complicated
> to read, should can be optimized later." could be places below '---' in
> the patch.
>
> And need a more verbose commit log please. At least I could not figure
> out what is happening just form most of the commit logs. [1] is a very
> good documentation for how to describe your changes, please have a look
> at that and describe the changes correctly in each commit log. You can
> also have a look at git history to see how it was done.
>
> [1]
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/SubmittingPatches#n106
OK, Thanks a lot.
Wei
>
> Thanks
>> This applies to the cover letter too.
>>
>> (nit-pick: period "." is not necessary)
>>
>> Fam
>>
>
^ permalink raw reply [flat|nested] 15+ messages in thread