diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c index 5538979..dc13630 100644 --- a/qemu/hw/virtio-net.c +++ b/qemu/hw/virtio-net.c @@ -63,6 +63,8 @@ typedef struct VirtIONet VLANClientState *vc; QEMUTimer *tx_timer; int tx_timer_active; + int last_elem_valid; + VirtQueueElement last_elem; } VirtIONet; /* TODO @@ -112,35 +114,75 @@ static int virtio_net_can_receive(void *opaque) return 1; } -static void virtio_net_receive(void *opaque, const uint8_t *buf, int size) +static void virtio_net_receive_zc(void *opaque, IOZeroCopyHandler *zc, + void *data) { VirtIONet *n = opaque; - VirtQueueElement elem; + VirtQueueElement *elem = &n->last_elem; struct virtio_net_hdr *hdr; - int offset, i; + ssize_t err; - if (virtqueue_pop(n->rx_vq, &elem) == 0) { + if (!n->last_elem_valid && virtqueue_pop(n->rx_vq, elem) == 0) { fprintf(stderr, "virtio_net: this should not happen\n"); return; } - hdr = (void *)elem.in_sg[0].iov_base; + n->last_elem_valid = 1; + + hdr = (void *)elem->in_sg[0].iov_base; hdr->flags = 0; hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE; + do { + err = zc(data, &elem->in_sg[1], elem->in_num - 1); + } while (err == -1 && errno == EINTR); + + if (err == -1 && errno == EAGAIN) + return; + + if (err < 0) { + fprintf(stderr, "virtio_net: error during IO\n"); + return; + } + + /* signal other side */ + n->last_elem_valid = 0; + virtqueue_push(n->rx_vq, elem, sizeof(*hdr) + err); + virtio_notify(&n->vdev, n->rx_vq); +} + +struct compat_data +{ + const uint8_t *buf; + int size; +}; + +static ssize_t compat_copy(void *opaque, struct iovec *iov, int iovcnt) +{ + struct compat_data *compat = opaque; + int offset, i; + /* copy in packet. ugh */ offset = 0; - i = 1; - while (offset < size && i < elem.in_num) { - int len = MIN(elem.in_sg[i].iov_len, size - offset); - memcpy(elem.in_sg[i].iov_base, buf + offset, len); + i = 0; + while (offset < compat->size && i < iovcnt) { + int len = MIN(iov[i].iov_len, compat->size - offset); + memcpy(iov[i].iov_base, compat->buf + offset, len); offset += len; i++; } - /* signal other side */ - virtqueue_push(n->rx_vq, &elem, sizeof(*hdr) + offset); - virtio_notify(&n->vdev, n->rx_vq); + return offset; +} + +static void virtio_net_receive(void *opaque, const uint8_t *buf, int size) +{ + struct compat_data compat; + + compat.buf = buf; + compat.size = size; + + virtio_net_receive_zc(opaque, compat_copy, &compat); } /* TX */ @@ -220,6 +262,7 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int devfn) memcpy(n->mac, nd->macaddr, 6); n->vc = qemu_new_vlan_client(nd->vlan, virtio_net_receive, virtio_net_can_receive, n); + n->vc->fd_read_zc = virtio_net_receive_zc; n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n); n->tx_timer_active = 0; diff --git a/qemu/net.h b/qemu/net.h index dfdf9af..478518e 100644 --- a/qemu/net.h +++ b/qemu/net.h @@ -1,11 +1,17 @@ #ifndef QEMU_NET_H #define QEMU_NET_H +#include + +typedef ssize_t (IOZeroCopyHandler)(void *, struct iovec *, int); +typedef void (IOReadZCHandler)(void *, IOZeroCopyHandler *, void *); + /* VLANs support */ typedef struct VLANClientState VLANClientState; struct VLANClientState { + IOReadZCHandler *fd_read_zc; IOReadHandler *fd_read; /* Packets may still be sent if this returns zero. It's used to rate-limit the slirp code. */ diff --git a/qemu/vl.c b/qemu/vl.c index 74c34b6..a222c7c 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -3974,6 +3974,7 @@ typedef struct TAPState { char down_script[1024]; char buf[4096]; int size; + int received_eagain; } TAPState; static void tap_receive(void *opaque, const uint8_t *buf, int size) @@ -3989,12 +3990,54 @@ static void tap_receive(void *opaque, const uint8_t *buf, int size) } } +#ifndef __sun__ +static VLANClientState *tap_can_zero_copy(TAPState *s) +{ + VLANClientState *vc, *vc1 = NULL; + int vc_count = 0; + + for (vc = s->vc->vlan->first_client; vc; vc = vc->next) { + if (vc == s->vc) + continue; + + if (!vc->fd_read_zc || vc_count) + return NULL; + + vc_count++; + vc1 = vc; + } + + return vc1; +} + +static ssize_t tap_sendv(void *opaque, struct iovec *iov, int iovcnt) +{ + TAPState *s = opaque; + ssize_t ret; + + ret = readv(s->fd, iov, iovcnt); + if (ret == -1 && errno == EAGAIN) + s->received_eagain = 1; + + return ret; +} + +static void tap_send_zero_copy(TAPState *s, VLANClientState *vc) +{ + s->received_eagain = 0; + while (s->received_eagain == 0 && + (!vc->fd_can_read || vc->fd_can_read(vc->opaque))) { + vc->fd_read_zc(vc->opaque, tap_sendv, s); + } +} +#endif + static int tap_can_send(void *opaque) { TAPState *s = opaque; VLANClientState *vc; int can_receive = 0; - + /* Check to see if any of our clients can receive a packet */ for (vc = s->vc->vlan->first_client; vc; vc = vc->next) { /* Skip ourselves */ @@ -4018,6 +4061,15 @@ static int tap_can_send(void *opaque) static void tap_send(void *opaque) { TAPState *s = opaque; +#ifndef __sun__ + VLANClientState *zc; + + zc = tap_can_zero_copy(s); + if (zc) { + tap_send_zero_copy(s, zc); + return; + } +#endif /* First try to send any buffered packet */ if (s->size > 0) {