From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:53130) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aQ7SM-0007Ei-Ao for qemu-devel@nongnu.org; Mon, 01 Feb 2016 00:55:23 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1aQ7SJ-0005d3-3O for qemu-devel@nongnu.org; Mon, 01 Feb 2016 00:55:22 -0500 Received: from mx1.redhat.com ([209.132.183.28]:57116) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1aQ7SI-0005cx-PN for qemu-devel@nongnu.org; Mon, 01 Feb 2016 00:55:19 -0500 Received: from int-mx10.intmail.prod.int.phx2.redhat.com (int-mx10.intmail.prod.int.phx2.redhat.com [10.5.11.23]) by mx1.redhat.com (Postfix) with ESMTPS id 7BA43804E6 for ; Mon, 1 Feb 2016 05:55:17 +0000 (UTC) References: <1454264009-24094-1-git-send-email-wexu@redhat.com> <1454264009-24094-4-git-send-email-wexu@redhat.com> From: Jason Wang Message-ID: <56AEF339.20504@redhat.com> Date: Mon, 1 Feb 2016 13:55:05 +0800 MIME-Version: 1.0 In-Reply-To: <1454264009-24094-4-git-send-email-wexu@redhat.com> Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [RFC Patch v2 03/10] virtio-net rsc: Chain Lookup, Packet Caching and Framework of IPv4 List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: wexu@redhat.com, qemu-devel@nongnu.org Cc: Wei Xu , victork@redhat.com, mst@redhat.com, yvugenfi@redhat.com, marcel@redhat.com, dfleytma@redhat.com On 02/01/2016 02:13 AM, wexu@redhat.com wrote: > From: Wei Xu > > Upon a packet is arriving, a corresponding chain will be selected or created, > or be bypassed if it's not an IPv4 packets. > > The callback in the chain will be invoked to call the real coalescing. > > Since the coalescing is based on the TCP connection, so the packets will be > cached if there is no previous data within the same connection. > > The framework of IPv4 is also introduced. > > This patch depends on patch 2918cf2 (Detailed IPv4 and General TCP data > coalescing) Then looks like the order needs to be changed? > > Signed-off-by: Wei Xu > --- > hw/net/virtio-net.c | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 172 insertions(+), 1 deletion(-) > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > index 4e9458e..cfbac6d 100644 > --- a/hw/net/virtio-net.c > +++ b/hw/net/virtio-net.c > @@ -14,10 +14,12 @@ > #include "qemu/iov.h" > #include "hw/virtio/virtio.h" > #include "net/net.h" > +#include "net/eth.h" > #include "net/checksum.h" > #include "net/tap.h" > #include "qemu/error-report.h" > #include "qemu/timer.h" > +#include "qemu/sockets.h" > #include "hw/virtio/virtio-net.h" > #include "net/vhost_net.h" > #include "hw/virtio/virtio-bus.h" > @@ -37,6 +39,21 @@ > #define endof(container, field) \ > (offsetof(container, field) + sizeof(((container *)0)->field)) > > +#define VIRTIO_HEADER 12 /* Virtio net header size */ This looks wrong if mrg_rxbuf (VIRTIO_NET_F_MRG_RXBUF) is off. > +#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header)) > + > +#define MAX_VIRTIO_IP_PAYLOAD (65535 + IP_OFFSET) > + > +/* Global statistics */ > +static uint32_t rsc_chain_no_mem; This is meaningless, see below comments. > + > +/* Switcher to enable/disable rsc */ > +static bool virtio_net_rsc_bypass; > + > +/* Coalesce callback for ipv4/6 */ > +typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg, > + const uint8_t *buf, size_t size); > + > typedef struct VirtIOFeature { > uint32_t flags; > size_t end; > @@ -1019,7 +1036,8 @@ static int receive_filter(VirtIONet *n, const uint8_t *buf, int size) > return 0; > } > > -static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size) > +static ssize_t virtio_net_do_receive(NetClientState *nc, > + const uint8_t *buf, size_t size) > { > VirtIONet *n = qemu_get_nic_opaque(nc); > VirtIONetQueue *q = virtio_net_get_subqueue(nc); > @@ -1623,6 +1641,159 @@ static void virtio_net_rsc_cleanup(VirtIONet *n) > } > } > > +static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, > + const uint8_t *buf, size_t size) > +{ > + NetRscSeg *seg; > + > + seg = g_malloc(sizeof(NetRscSeg)); > + if (!seg) { > + return 0; > + } g_malloc() can't fail, no need to check if it succeeded. > + > + seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD); > + if (!seg->buf) { > + goto out; > + } > + > + memmove(seg->buf, buf, size); > + seg->size = size; > + seg->dup_ack_count = 0; > + seg->is_coalesced = 0; > + seg->nc = nc; > + > + QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); > + return size; > + > +out: > + g_free(seg); > + return 0; > +} > + > + > +static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain, > + NetRscSeg *seg, const uint8_t *buf, size_t size) > +{ > + /* This real part of this function will be introduced in next patch, just > + * return a 'final' to feed the compilation. */ > + return RSC_FINAL; > +} > + > +static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, > + const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce) > +{ Looks like this function was called directly, so "callback" suffix is not accurate. > + int ret; > + NetRscSeg *seg, *nseg; > + > + if (QTAILQ_EMPTY(&chain->buffers)) { > + if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) { > + return 0; > + } else { > + return size; > + } > + } > + > + QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { > + ret = coalesce(chain, seg, buf, size); > + if (RSC_FINAL == ret) { Let's use "ret == RSC_FINAL" for a consistent coding style with other qemu codes. > + ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); > + QTAILQ_REMOVE(&chain->buffers, seg, next); > + g_free(seg->buf); > + g_free(seg); > + if (ret == 0) { > + /* Send failed */ > + return 0; > + } > + > + /* Send current packet */ > + return virtio_net_do_receive(nc, buf, size); > + } else if (RSC_NO_MATCH == ret) { > + continue; > + } else { > + /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ > + seg->is_coalesced = 1; > + return size; > + } > + } > + > + return virtio_net_rsc_cache_buf(chain, nc, buf, size); Maybe you can move the seg traversing info coalesce() function? This can greatly simplify the codes above? (E.g no need to call virtio_net_rsc_cache_buf() in two places?) > +} > + > +static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc, > + const uint8_t *buf, size_t size) > +{ > + NetRscChain *chain; > + > + chain = (NetRscChain *)opq; > + return virtio_net_rsc_callback(chain, nc, buf, size, > + virtio_net_rsc_try_coalesce4); > +} > + > +static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc, > + uint16_t proto) > +{ > + VirtIONet *n; > + NetRscChain *chain; > + NICState *nic; > + > + /* Only handle IPv4/6 */ > + if (proto != (uint16_t)ETH_P_IP) { The comments is inconsistent with code, the code can only handle IPv4 currently. > + return NULL; > + } > + > + nic = (NICState *)nc; This cast is wrong for multiqueue backend. You can refer the exist virtio_net_receive() for how to vet VirtIONet structure. E.g: ... VirtIONet *n = qemu_get_nic_opaque(nc); ... > + n = container_of(&nic, VirtIONet, nic); > + QTAILQ_FOREACH(chain, &n->rsc_chains, next) { > + if (chain->proto == proto) { > + return chain; > + } > + } > + > + chain = g_malloc(sizeof(*chain)); > + if (!chain) { > + rsc_chain_no_mem++; Since g_malloc() can't fail, this is meaningless. > + return NULL; > + } > + > + chain->proto = proto; > + chain->do_receive = virtio_net_rsc_receive4; > + > + QTAILQ_INIT(&chain->buffers); > + QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); > + return chain; > +} Better to split the chain initialization from lookup. And we can initialize ipv4 chain during initialization. > + > +static ssize_t virtio_net_rsc_receive(NetClientState *nc, > + const uint8_t *buf, size_t size) > +{ > + uint16_t proto; > + NetRscChain *chain; > + struct eth_header *eth; > + > + if (size < IP_OFFSET) { > + return virtio_net_do_receive(nc, buf, size); > + } > + > + eth = (struct eth_header *)(buf + VIRTIO_HEADER); > + proto = htons(eth->h_proto); > + chain = virtio_net_rsc_lookup_chain(nc, proto); > + if (!chain) { > + return virtio_net_do_receive(nc, buf, size); > + } else { > + return chain->do_receive(chain, nc, buf, size); > + } > +} > + > +static ssize_t virtio_net_receive(NetClientState *nc, > + const uint8_t *buf, size_t size) > +{ > + if (virtio_net_rsc_bypass) { > + return virtio_net_do_receive(nc, buf, size); > + } else { > + return virtio_net_rsc_receive(nc, buf, size); > + } > +} > + > static NetClientInfo net_virtio_info = { > .type = NET_CLIENT_OPTIONS_KIND_NIC, > .size = sizeof(NICState),