* [Qemu-devel] [PATCH v2] VMXNET3 paravirtual NIC device implementation
@ 2012-02-29 12:49 Dmitry Fleytman
2012-03-01 11:48 ` Michael Tokarev
2012-03-03 16:55 ` Gerhard Wiesinger
0 siblings, 2 replies; 5+ messages in thread
From: Dmitry Fleytman @ 2012-02-29 12:49 UTC (permalink / raw)
To: qemu-devel
Cc: Anthony Liguori, Alex Fishman, Dmitry Fleytman, yvugenfi,
Izik Eidus, Yan Vugenfirer, Michael S. Tsirkin, Dmitry Fleytman
Changes in V2:
License text changed accoring to community suggestions
Standard license header from GPLv2+ - licensed QEMU files used
Implementation of VMWare VMXNET3 paravirtual NIC device.
Supports of all the device features including offload capabilties,
VLANs and etc.
The device is tested on different OSes:
Fedora 15
Ubuntu 10.4
Centos 6.2
Windows 2008R2
Windows 2008 64bit
Windows 2008 32bit
Windows 2003 64bit
Windows 2003 32bit
Currently live migration is not supported.
Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
Signed-off-by: Yan Vugenfirer <yan@daynix.com>
---
qemu/Makefile.objs | 1 +
qemu/default-configs/pci.mak | 1 +
qemu/hw/pci.c | 2 +
qemu/hw/pci.h | 1 +
qemu/hw/virtio-net.h | 13 +-
qemu/hw/vmware_utils.h | 131 +++
qemu/hw/vmxnet3.c | 2559 ++++++++++++++++++++++++++++++++++++++++++
qemu/hw/vmxnet3.h | 727 ++++++++++++
qemu/hw/vmxnet3_debug.h | 104 ++
qemu/hw/vmxnet_utils.c | 172 +++
qemu/hw/vmxnet_utils.h | 242 ++++
qemu/net.c | 2 +-
qemu/net/checksum.h | 7 +
13 files changed, 3955 insertions(+), 7 deletions(-)
create mode 100644 qemu/hw/vmware_utils.h
create mode 100644 qemu/hw/vmxnet3.c
create mode 100644 qemu/hw/vmxnet3.h
create mode 100644 qemu/hw/vmxnet3_debug.h
create mode 100644 qemu/hw/vmxnet_utils.c
create mode 100644 qemu/hw/vmxnet_utils.h
diff --git a/qemu/Makefile.objs b/qemu/Makefile.objs
index 808de6a..3f846a6 100644
--- a/qemu/Makefile.objs
+++ b/qemu/Makefile.objs
@@ -264,6 +264,7 @@ hw-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o
hw-obj-$(CONFIG_PCNET_COMMON) += pcnet.o
hw-obj-$(CONFIG_E1000_PCI) += e1000.o
hw-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o
+hw-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o vmxnet_utils.o
hw-obj-$(CONFIG_SMC91C111) += smc91c111.o
hw-obj-$(CONFIG_LAN9118) += lan9118.o
diff --git a/qemu/default-configs/pci.mak b/qemu/default-configs/pci.mak
index 21e4ccf..f8e6ee1 100644
--- a/qemu/default-configs/pci.mak
+++ b/qemu/default-configs/pci.mak
@@ -13,6 +13,7 @@ CONFIG_PCNET_COMMON=y
CONFIG_LSI_SCSI_PCI=y
CONFIG_RTL8139_PCI=y
CONFIG_E1000_PCI=y
+CONFIG_VMXNET3_PCI=y
CONFIG_IDE_CORE=y
CONFIG_IDE_QDEV=y
CONFIG_IDE_PCI=y
diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
index bf046bf..f0fb1ee 100644
--- a/qemu/hw/pci.c
+++ b/qemu/hw/pci.c
@@ -1350,6 +1350,7 @@ static const char * const pci_nic_models[] = {
"e1000",
"pcnet",
"virtio",
+ "vmxnet3",
NULL
};
@@ -1362,6 +1363,7 @@ static const char * const pci_nic_names[] = {
"e1000",
"pcnet",
"virtio-net-pci",
+ "vmxnet3",
NULL
};
diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h
index 4f19fdb..fee8250 100644
--- a/qemu/hw/pci.h
+++ b/qemu/hw/pci.h
@@ -60,6 +60,7 @@
#define PCI_DEVICE_ID_VMWARE_NET 0x0720
#define PCI_DEVICE_ID_VMWARE_SCSI 0x0730
#define PCI_DEVICE_ID_VMWARE_IDE 0x1729
+#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07B0
/* Intel (0x8086) */
#define PCI_DEVICE_ID_INTEL_82551IT 0x1209
diff --git a/qemu/hw/virtio-net.h b/qemu/hw/virtio-net.h
index 4468741..fa3c17b 100644
--- a/qemu/hw/virtio-net.h
+++ b/qemu/hw/virtio-net.h
@@ -78,13 +78,14 @@ struct virtio_net_config
* specify GSO or CSUM features, you can simply ignore the header. */
struct virtio_net_hdr
{
-#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset
+#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
+#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
uint8_t flags;
-#define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame
-#define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO)
-#define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO)
-#define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP
-#define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set
+#define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
+#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
+#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */
+#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */
+#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */
uint8_t gso_type;
uint16_t hdr_len;
uint16_t gso_size;
diff --git a/qemu/hw/vmware_utils.h b/qemu/hw/vmware_utils.h
new file mode 100644
index 0000000..304bb48
--- /dev/null
+++ b/qemu/hw/vmware_utils.h
@@ -0,0 +1,131 @@
+/*
+ * QEMU VMWARE paravirtual devices - auxiliary code
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Yan Vugenfirer <yan@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+/* Shared memory access functions with byte swap support */
+static inline void
+vmw_shmem_read(target_phys_addr_t addr, void *buf, int len)
+{
+ DSHPRINTF("SHMEM r: %" PRIx64 ", len: %d to %p",
+ (uint64_t) addr, len, buf);
+ cpu_physical_memory_read(addr, buf, len);
+}
+
+static inline void
+vmw_shmem_write(target_phys_addr_t addr, void *buf, int len)
+{
+ DSHPRINTF("SHMEM w: %" PRIx64 ", len: %d to %p",
+ (uint64_t) addr, len, buf);
+ cpu_physical_memory_write(addr, buf, len);
+}
+
+static inline void
+vmw_shmem_rw(target_phys_addr_t addr, void *buf, int len, int is_write)
+{
+ DSHPRINTF("SHMEM r/w: %" PRIx64 ", len: %d (to %p), is write: %d",
+ (uint64_t) addr, len, buf, is_write);
+
+ cpu_physical_memory_rw(addr, buf, len, is_write);
+}
+
+static inline void
+vmw_shmem_set(target_phys_addr_t addr, uint8 val, int len)
+{
+ int i;
+ DSHPRINTF("SHMEM set: %" PRIx64 ", len: %d (value 0x%X)",
+ (uint64_t) addr, len, val);
+
+ for (i = 0; i < len; i++) {
+ cpu_physical_memory_write(addr + i, &val, 1);
+ }
+}
+
+static inline uint32_t
+vmw_shmem_ld8(target_phys_addr_t addr)
+{
+ uint8_t res = ldub_phys(addr);
+ DSHPRINTF("SHMEM load8: %" PRIx64 " (value 0x%X)",
+ (uint64_t) addr, res);
+ return res;
+}
+
+static inline void
+vmw_shmem_st8(target_phys_addr_t addr, uint8_t value)
+{
+ DSHPRINTF("SHMEM store8: %" PRIx64 " (value 0x%X)",
+ (uint64_t) addr, value);
+ stb_phys(addr, value);
+}
+
+static inline uint32_t
+vmw_shmem_ld16(target_phys_addr_t addr)
+{
+ uint16_t res = lduw_le_phys(addr);
+ DSHPRINTF("SHMEM load16: %" PRIx64 " (value 0x%X)",
+ (uint64_t) addr, res);
+ return res;
+}
+
+static inline void
+vmw_shmem_st16(target_phys_addr_t addr, uint16_t value)
+{
+ DSHPRINTF("SHMEM store16: %" PRIx64 " (value 0x%X)",
+ (uint64_t) addr, value);
+ stw_le_phys(addr, value);
+}
+
+static inline uint32_t
+vmw_shmem_ld32(target_phys_addr_t addr)
+{
+ uint32_t res = ldl_le_phys(addr);
+ DSHPRINTF("SHMEM load32: %" PRIx64 " (value 0x%X)",
+ (uint64_t) addr, res);
+ return res;
+}
+
+static inline void
+vmw_shmem_st32(target_phys_addr_t addr, uint32_t value)
+{
+ DSHPRINTF("SHMEM store32: %" PRIx64 " (value 0x%X)",
+ (uint64_t) addr, value);
+ stl_le_phys(addr, value);
+}
+
+static inline uint64_t
+vmw_shmem_ld64(target_phys_addr_t addr)
+{
+ uint64_t res = ldq_le_phys(addr);
+ DSHPRINTF("SHMEM load64: %" PRIx64 " (value %" PRIx64 ")",
+ (uint64_t) addr, res);
+ return res;
+}
+
+static inline void
+vmw_shmem_st64(target_phys_addr_t addr, uint64_t value)
+{
+ DSHPRINTF("SHMEM store64: %" PRIx64 " (value %" PRIx64 ")",
+ (uint64_t) addr, value);
+ stq_le_phys(addr, value);
+}
+
+/* MACROS for simplification of operations on array-style registers */
+#define IS_MULTIREG_ADDR(addr, base, cnt, regsize) \
+ (((addr) >= (base)) && ((addr) < (base) + (cnt) * (regsize)))
+
+#define MULTIREG_IDX_BY_ADDR(addr, base, regsize) \
+ (((addr) - (base)) / (regsize))
+
+/* Bitfields */
+#define FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag))
diff --git a/qemu/hw/vmxnet3.c b/qemu/hw/vmxnet3.c
new file mode 100644
index 0000000..112d3b9
--- /dev/null
+++ b/qemu/hw/vmxnet3.c
@@ -0,0 +1,2559 @@
+/*
+ * QEMU VMWARE VMXNET3 paravirtual NIC
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Yan Vugenfirer <yan@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#define VMXNET3_ENABLE_MSIX
+#define VMXNET3_ENABLE_MSI
+
+/* Define this constant to non-zero to enable IP4 */
+/* fragmentation feature */
+
+/* #define VMXNET_MAX_IP_PLOAD_LEN ETH_MAX_IP_PLOAD_LEN */
+#define VMXNET3_MAX_IP_PLOAD_LEN 0
+
+#include "hw.h"
+#include "pci.h"
+#include "net.h"
+#include "virtio-net.h"
+#include "net/tap.h"
+#include "sysemu.h"
+#include "iov.h"
+#include "bswap.h"
+#ifdef VMXNET3_ENABLE_MSIX
+#include "msix.h"
+#endif
+#ifdef VMXNET3_ENABLE_MSI
+#include "msi.h"
+#endif
+
+#include "vmxnet3_debug.h"
+#include "vmxnet3.h"
+#include "vmware_utils.h"
+#include "vmxnet_utils.h"
+
+#define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
+#define VMXNET3_MSIX_BAR_SIZE 0x2000
+
+#define VMXNET3_BAR0_IDX (0)
+#define VMXNET3_BAR1_IDX (1)
+#define VMXNET3_MSIX_BAR_IDX (2)
+
+/* Link speed in Mbps should be shifted by 16 */
+#define VMXNET3_LINK_SPEED (1000 << 16)
+
+/* Link status: 1 - up, 0 - down. */
+#define VMXNET3_LINK_STATUS_UP 0x1
+
+/* Least significant bit should be set for revision and version */
+#define VMXNET3_DEVICE_VERSION 0x1
+#define VMXNET3_DEVICE_REVISION 0x1
+
+/* Macros for rings descriptors access */
+#define VMXNET3_READ_TX_QUEUE_DESCR8(dpa, field) \
+ (vmw_shmem_ld8(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
+
+#define VMXNET3_WRITE_TX_QUEUE_DESCR8(dpa, field, value) \
+ (vmw_shmem_st8(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value)))
+
+#define VMXNET3_READ_TX_QUEUE_DESCR32(dpa, field) \
+ (vmw_shmem_ld32(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
+
+#define VMXNET3_WRITE_TX_QUEUE_DESCR32(dpa, field, value) \
+ (vmw_shmem_st32(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
+
+#define VMXNET3_READ_TX_QUEUE_DESCR64(dpa, field) \
+ (vmw_shmem_ld64(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
+
+#define VMXNET3_WRITE_TX_QUEUE_DESCR64(dpa, field, value) \
+ (vmw_shmem_st64(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
+
+#define VMXNET3_READ_RX_QUEUE_DESCR64(dpa, field) \
+ (vmw_shmem_ld64(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
+
+#define VMXNET3_READ_RX_QUEUE_DESCR32(dpa, field) \
+ (vmw_shmem_ld32(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
+
+#define VMXNET3_WRITE_RX_QUEUE_DESCR64(dpa, field, value) \
+ (vmw_shmem_st64(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
+
+#define VMXNET3_WRITE_RX_QUEUE_DESCR8(dpa, field, value) \
+ (vmw_shmem_st8(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
+
+/* Macros for guest driver shared area access */
+#define VMXNET3_READ_DRV_SHARED64(shpa, field) \
+ (vmw_shmem_ld64(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
+
+#define VMXNET3_READ_DRV_SHARED32(shpa, field) \
+ (vmw_shmem_ld32(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
+
+#define VMXNET3_WRITE_DRV_SHARED32(shpa, field, val) \
+ (vmw_shmem_st32(shpa + offsetof(struct Vmxnet3_DriverShared, field), val))
+
+#define VMXNET3_READ_DRV_SHARED16(shpa, field) \
+ (vmw_shmem_ld16(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
+
+#define VMXNET3_READ_DRV_SHARED8(shpa, field) \
+ (vmw_shmem_ld8(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
+
+#define VMXNET3_READ_DRV_SHARED(shpa, field, b, l) \
+ (vmw_shmem_read(shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l))
+
+/* TX/RX packets abstractions */
+typedef struct Vmxnet3_TxPktMdata {
+ uint32_t offload_mode;
+ uint32_t cso_or_gso_size;
+ uint32_t hdr_length;
+ eth_pkt_types_e packet_type;
+} Vmxnet3_TxPktMdata;
+
+typedef struct _Vmxnet3_RxPktMdata {
+ uint32_t tot_len;
+ uint16_t vlan_tag;
+ bool vlan_stripped;
+ bool vhdr_valid;
+ eth_pkt_types_e packet_type;
+} Vmxnet3_RxPktMdata;
+
+#define VMXNET3_TXPKT_REBUILT_HDR_LEN (1024)
+
+typedef struct _Vmxnet3_TxPkt {
+ Vmxnet3_TxPktMdata mdata;
+ struct virtio_net_hdr virt_hdr;
+ bool has_virt_hdr;
+
+ struct iovec *vec;
+
+ uint8_t __l2_hdr[ETH_MAX_L2_HDR_LEN];
+ uint8_t __l3_hdr[ETH_MAX_L3_HDR_LEN];
+
+ uint32_t payload_len;
+ uint32_t max_payload_len;
+
+ uint32_t payload_frags;
+ uint32_t max_payload_frags;
+
+ struct {
+ uint32_t offset;
+ bool more_frags;
+ bool orig_more_frags;
+ } fragmentation;
+} Vmxnet3_TxPkt;
+
+#define VMXNET3_TXPKT_VHDR_FRAG (0)
+#define VMXNET3_TXPKT_L2HDR_FRAG (1)
+#define VMXNET3_TXPKT_L3HDR_FRAG (2)
+#define VMXNET3_TXPKT_PL_START_FRAG (3)
+
+#define vmxnet3_txpkt_get_mdata(p) (&((p)->mdata))
+#define vmxnet3_txpkt_get_vhdr(p) (&((p)->virt_hdr))
+
+#define vmxnet3_txpkt_get_l2hdr(p) \
+ ((p)->vec[VMXNET3_TXPKT_L2HDR_FRAG].iov_base)
+#define vmxnet3_txpkt_get_l2hdr_len(p) \
+ ((p)->vec[VMXNET3_TXPKT_L2HDR_FRAG].iov_len)
+#define vmxnet3_txpkt_set_l2hdr_len(p, l) \
+ ((p)->vec[VMXNET3_TXPKT_L2HDR_FRAG].iov_len = l)
+#define vmxnet3_txpkt_get_l3hdr(p) \
+ ((p)->vec[VMXNET3_TXPKT_L3HDR_FRAG].iov_base)
+#define vmxnet3_txpkt_get_l3hdr_len(p) \
+ ((p)->vec[VMXNET3_TXPKT_L3HDR_FRAG].iov_len)
+#define vmxnet3_txpkt_set_l3hdr_len(p, l) \
+ ((p)->vec[VMXNET3_TXPKT_L3HDR_FRAG].iov_len = l)
+#define vmxnet3_txpkt_get_payload_len(p) \
+ ((p)->payload_len)
+
+#define vmxnet3_txpkt_set_more_frags(p, mf) \
+ ((p)->fragmentation.more_frags = mf)
+#define vmxnet3_txpkt_get_more_frags(p) \
+ ((p)->fragmentation.more_frags | \
+ (p)->fragmentation.orig_more_frags)
+#define vmxnet3_txpkt_set_frag_off(p, off) \
+ ((p)->fragmentation.offset = off)
+#define vmxnet3_txpkt_get_frag_off(p) \
+ ((p)->fragmentation.offset)
+#define vmxnet3_txpkt_advance_frag_off(p, off) \
+ ((p)->fragmentation.offset += off)
+
+static inline size_t
+vmxnet3_txpkt_get_total_len(const Vmxnet3_TxPkt *p)
+{
+ return vmxnet3_txpkt_get_l2hdr_len(p) +
+ vmxnet3_txpkt_get_l3hdr_len(p) +
+ vmxnet3_txpkt_get_payload_len(p);
+}
+
+static inline struct iovec*
+vmxnet3_txpkt_get_payload_frag(Vmxnet3_TxPkt *p, uint32_t num)
+{
+ assert(num < p->max_payload_frags);
+ return &p->vec[num + VMXNET3_TXPKT_PL_START_FRAG];
+}
+
+static inline void
+vmxnet3_txpkt_set_num_pl_frags(Vmxnet3_TxPkt *p, uint32_t num)
+{
+ assert(num <= p->max_payload_frags);
+ p->payload_frags = num;
+}
+
+static inline void
+vmxnet3_txpkt_reset_payload(Vmxnet3_TxPkt *p)
+{
+ p->payload_len = 0;
+}
+
+static void vmxnet3_txpkt_reset(Vmxnet3_TxPkt *p)
+{
+ memset(&p->mdata, 0, sizeof(p->mdata));
+ vmxnet3_txpkt_set_num_pl_frags(p, 0);
+ vmxnet3_txpkt_reset_payload(p);
+ vmxnet3_txpkt_set_more_frags(p, 0);
+ vmxnet3_txpkt_set_frag_off(p, 0);
+ p->max_payload_len = 0;
+
+ if (NULL != p->vec) {
+ p->vec[VMXNET3_TXPKT_L2HDR_FRAG].iov_len = 0;
+ p->vec[VMXNET3_TXPKT_L3HDR_FRAG].iov_len = 0;
+ }
+}
+
+static bool
+vmxnet3_txpkt_prealloc(Vmxnet3_TxPkt *p, uint32_t max_frags, bool has_virt_hdr)
+{
+ if (NULL != p->vec) {
+ g_free(p->vec);
+ }
+
+ p->vec =
+ g_malloc(sizeof(*p->vec) * (max_frags + VMXNET3_TXPKT_PL_START_FRAG));
+ if (NULL == p->vec) {
+ return false;
+ }
+
+ p->max_payload_frags = max_frags;
+ p->has_virt_hdr = has_virt_hdr;
+ p->vec[VMXNET3_TXPKT_VHDR_FRAG].iov_base = &p->virt_hdr;
+ p->vec[VMXNET3_TXPKT_VHDR_FRAG].iov_len =
+ p->has_virt_hdr ? sizeof(p->virt_hdr) : 0;
+ p->vec[VMXNET3_TXPKT_L2HDR_FRAG].iov_base = &p->__l2_hdr;
+ p->vec[VMXNET3_TXPKT_L3HDR_FRAG].iov_base = &p->__l3_hdr;
+ vmxnet3_txpkt_reset(p);
+ return true;
+}
+
+static void vmxnet3_txpkt_init(Vmxnet3_TxPkt *p)
+{
+ p->vec = NULL;
+}
+
+static void vmxnet3_txpkt_cleanup(Vmxnet3_TxPkt *p)
+{
+ g_free(p->vec);
+}
+
+static void vmxnet3_txpkt_unmap(Vmxnet3_TxPkt *p, bool is_write)
+{
+ int i;
+
+ for (i = VMXNET3_TXPKT_PL_START_FRAG;
+ i < p->payload_frags + VMXNET3_TXPKT_PL_START_FRAG; i++) {
+ cpu_physical_memory_unmap(p->vec[i].iov_base, p->vec[i].iov_len,
+ is_write, p->vec[i].iov_len);
+ }
+}
+
+static void*
+vmxnet3_txpkt_map(Vmxnet3_TxPkt *p, uint32_t *mapped_fragments, bool is_write)
+{
+ int i;
+
+ for (i = VMXNET3_TXPKT_PL_START_FRAG;
+ i < p->payload_frags + VMXNET3_TXPKT_PL_START_FRAG; i++) {
+ target_phys_addr_t mapped_len = p->vec[i].iov_len;
+ size_t orig_len = p->vec[i].iov_len;
+ p->vec[i].iov_base =
+ cpu_physical_memory_map((uint64_t) p->vec[i].iov_base,
+ &mapped_len, is_write);
+ p->vec[i].iov_len = mapped_len;
+
+ if ((NULL == p->vec[i].iov_base) || (orig_len != mapped_len)) {
+ p->payload_frags = i + !!p->vec[i].iov_base;
+ vmxnet3_txpkt_unmap(p, is_write);
+ return NULL;
+ }
+ }
+
+ *mapped_fragments = VMXNET3_TXPKT_PL_START_FRAG + p->payload_frags;
+ return p->vec;
+}
+
+static inline void
+vmxnet3_txpkt_dump(Vmxnet3_TxPkt *p)
+{
+#ifdef DEBUG_VMXNET3_PACKETS
+ Vmxnet3_TxPktMdata *m = vmxnet3_txpkt_get_mdata(p);
+#endif
+
+ DPKPRINTF("TXPKT MDATA: om: %d, cso/gso_size: %d, hdr_len: %d, "
+ "pkt_type: 0x%X, l2hdr_len: %lu l3hdr_len: %lu, payload_len: %u",
+ m->offload_mode, m->cso_or_gso_size,
+ m->hdr_length, m->packet_type,
+ vmxnet3_txpkt_get_l2hdr_len(p),
+ vmxnet3_txpkt_get_l3hdr_len(p),
+ vmxnet3_txpkt_get_payload_len(p));
+};
+
+/* RX packet may contain up to 2 fragments - rebuilt eth header */
+/* in case of VLAN tag stripping */
+/* and payload received from QEMU - in any case */
+#define VMXNET3_MAX_RX_PACKET_FRAGMENTS (2)
+
+typedef struct _Vmxnet3_RxPkt {
+ Vmxnet3_RxPktMdata mdata;
+ struct virtio_net_hdr virt_hdr;
+ struct eth_header eth_hdr;
+ struct iovec vec[VMXNET3_MAX_RX_PACKET_FRAGMENTS];
+ uint16 vec_len;
+} Vmxnet3_RxPkt;
+
+#define vmxnet3_rxpkt_get_mdata(p) (&((p)->mdata))
+#define vmxnet3_rxpkt_get_ehdr(p) (&((p)->eth_hdr))
+#define vmxnet3_rxpkt_get_vhdr(p) (&((p)->virt_hdr))
+#define vmxnet3_rxpkt_get_frag(p, n) (&((p)->vec[(n)]))
+#define vmxnet3_rxpkt_set_num_frags(p, n) ((p)->vec_len = (n))
+#define vmxnet3_rxpkt_get_num_frags(p) ((p)->vec_len)
+
+static inline void vmxnet3_rxpkt_attach_ehdr(Vmxnet3_RxPkt *p)
+{
+ vmxnet3_rxpkt_get_frag(p, 0)->iov_base = &p->eth_hdr;
+ vmxnet3_rxpkt_get_frag(p, 0)->iov_len = sizeof(p->eth_hdr);
+}
+
+static inline void vmxnet3_rxpkt_reset(Vmxnet3_RxPkt *p)
+{
+ memset(&p->mdata, 0, sizeof(p->mdata));
+ memset(&p->virt_hdr, 0, sizeof(p->virt_hdr));
+ vmxnet3_rxpkt_set_num_frags(p, 0);
+}
+
+static void vmxnet3_rxpkt_init(Vmxnet3_RxPkt *p)
+{
+ vmxnet3_rxpkt_reset(p);
+}
+
+static inline void
+vmxnet3_rxpkt_dump(Vmxnet3_RxPkt *p)
+{
+#ifdef DEBUG_VMXNET3_PACKETS
+ Vmxnet3_RxPktMdata *m = vmxnet3_rxpkt_get_mdata(p);
+#endif
+
+ DPKPRINTF("RXPKT MDATA: tot_len: %d, pkt_type: 0x%X, "
+ "vlan_stripped: %d, vlan_tag: %d, vhdr_valid: %d",
+ m->tot_len, m->packet_type,
+ m->vlan_stripped, m->vlan_tag, m->vhdr_valid);
+};
+
+/* Cyclic ring abstraction */
+typedef struct _Vmxnet3_Ring {
+ target_phys_addr_t pa;
+ size_t size;
+ size_t cell_size;
+ size_t next;
+ uint8_t gen;
+} Vmxnet3_Ring;
+
+static inline void vmxnet3_ring_init(Vmxnet3_Ring *ring,
+ target_phys_addr_t pa,
+ size_t size,
+ size_t cell_size,
+ bool zero_region)
+{
+ ring->pa = pa;
+ ring->size = size;
+ ring->cell_size = cell_size;
+ ring->gen = VMXNET3_INIT_GEN;
+ ring->next = 0;
+
+ if (zero_region) {
+ vmw_shmem_set(pa, 0, size*cell_size);
+ }
+}
+
+#define vmxnet3_ring_dump(macro, ring_name, ridx, r) \
+ macro("%s#%d: base %" PRIx64 " size %lu cell_size %lu gen %d next %lu", \
+ (ring_name), (ridx), (uint64_t) (r)->pa, \
+ (r)->size, (r)->cell_size, (r)->gen, (r)->next)
+
+static inline void vmxnet3_ring_inc(Vmxnet3_Ring *ring)
+{
+ if (++ring->next >= ring->size) {
+ ring->next = 0;
+ ring->gen ^= 1;
+ }
+}
+
+static inline void vmxnet3_ring_dec(Vmxnet3_Ring *ring)
+{
+ if (0 == ring->next--) {
+ ring->next = ring->size - 1;
+ ring->gen ^= 1;
+ }
+}
+
+static inline target_phys_addr_t vmxnet3_ring_curr_cell_pa(Vmxnet3_Ring *ring)
+{
+ return ring->pa + ring->next * ring->cell_size;
+}
+
+static inline void vmxnet3_ring_read_curr_cell(Vmxnet3_Ring *ring, void *buff)
+{
+ vmw_shmem_read(vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
+}
+
+static inline void vmxnet3_ring_write_curr_cell(Vmxnet3_Ring *ring, void *buff)
+{
+ vmw_shmem_write(vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
+}
+
+static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3_Ring *ring)
+{
+ return ring->next;
+}
+
+static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3_Ring *ring)
+{
+ return ring->gen;
+}
+
+/* Debug trace-related functions */
+static inline void
+vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr)
+{
+ DPKPRINTF("TX DESCR: "
+ "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
+ "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, "
+ "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d",
+ le64_to_cpu(descr->addr), descr->len, descr->gen, descr->rsvd,
+ descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om,
+ descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci);
+}
+
+static inline void
+vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr)
+{
+ DPKPRINTF("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, "
+ "csum_start: %d, csum_offset: %d",
+ vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size,
+ vhdr->csum_start, vhdr->csum_offset);
+}
+
+static inline void
+vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
+{
+ DPKPRINTF("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
+ "dtype: %d, ext1: %d, btype: %d",
+ le64_to_cpu(descr->addr), descr->len, descr->gen,
+ descr->rsvd, descr->dtype, descr->ext1, descr->btype);
+}
+
+/* Device state and helper functions */
+#define VMXNET3_RX_RINGS_PER_QUEUE (2)
+
+typedef struct _VMXNET3_State {
+ PCIDevice dev;
+ NICState *nic;
+ NICConf conf;
+ MemoryRegion bar0;
+ MemoryRegion bar1;
+ MemoryRegion msix_bar;
+
+#ifdef VMXNET3_ENABLE_MSIX
+ /* Whether MSI-X support was installed successfully */
+ uint8_t msix_used;
+#endif
+#ifdef VMXNET3_ENABLE_MSI
+ /* Whether MSI support was installed successfully */
+ uint8_t msi_used;
+#endif
+
+ target_phys_addr_t drv_shmem;
+ target_phys_addr_t temp_shared_guest_driver_memory;
+
+ uint8_t txq_num;
+ struct {
+ Vmxnet3_Ring tx_ring;
+ Vmxnet3_Ring comp_ring;
+
+ uint8_t intr_idx;
+ target_phys_addr_t tx_stats_pa;
+ struct UPT1_TxStats txq_stats;
+ } txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES];
+
+ /* This boolean tells whether RX packet being indicated has to */
+ /* be split into head and body chunks from different RX rings */
+ bool rx_packets_compound;
+
+ bool rx_vlan_stripping;
+ bool lro_supported;
+
+ uint8_t rxq_num;
+ struct {
+ Vmxnet3_Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE];
+ Vmxnet3_Ring comp_ring;
+ uint8_t intr_idx;
+ target_phys_addr_t rx_stats_pa;
+ struct UPT1_RxStats rxq_stats;
+ } rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES];
+
+ /* Network MTU */
+ uint32_t mtu;
+
+ /* Maximum number of fragments for indicated TX packets */
+ uint32_t max_tx_frags;
+
+ /* Maximum number of fragments for indicated RX packets */
+ uint16_t max_rx_frags;
+
+ /* Index for events interrupt */
+ uint8_t event_int_idx;
+
+ /* Whether automatic interrupts masking enabled */
+ uint8_t auto_int_masking;
+
+ bool peer_has_vhdr;
+
+ /* TX packets to QEMU interface */
+ Vmxnet3_TxPkt curr_txpkt;
+ size_t curr_txpkt_pl_frags;
+ bool curr_txpkt_skip;
+ bool curr_txpkt_header_processed;
+
+ uint32_t device_active;
+ uint32_t last_command;
+
+ uint32_t link_status_and_speed;
+
+ struct {
+ bool is_masked;
+ bool is_pending;
+ bool is_asserted;
+ } interrupt_states[VMXNET3_MAX_INTRS];
+
+ uint32_t temp_mac; /* To store the low part first */
+
+ MACAddr perm_mac;
+ uint32_t vlan_table[VMXNET3_VFT_SIZE];
+ uint32_t rx_mode;
+ MACAddr *mcast_list;
+ uint16_t mcast_list_len;
+} VMXNET3_State;
+
+/* Interrupt management */
+
+/* This function returns sign whether interrupt line is in asserted state */
+/* This depends on the type of interrupt used. For INTX interrupt line will */
+/* be asserted until explicit deassertion, for MSI(X) interrupt line will */
+/* be deasserted automatically due to notifiction symantics of the MSI(X) */
+/* interrupts */
+static bool _vmxnet3_assert_interrupt_line(VMXNET3_State *s, uint32_t int_idx)
+{
+#ifdef VMXNET3_ENABLE_MSIX
+ if (s->msix_used && msix_enabled(&s->dev)) {
+ DIRPRINTF("Sending MSI-X notification for vector %u", int_idx);
+ msix_notify(&s->dev, int_idx);
+ return false;
+ }
+#endif
+#ifdef VMXNET3_ENABLE_MSI
+ if (s->msi_used && msi_enabled(&s->dev)) {
+ DIRPRINTF("Sending MSI notification for vector %u", int_idx);
+ msi_notify(&s->dev, int_idx);
+ return false;
+ }
+#endif
+
+ DIRPRINTF("Asserting line for interrupt %u", int_idx);
+ qemu_set_irq(s->dev.irq[int_idx], 1);
+ return true;
+}
+
+static void _vmxnet3_deassert_interrupt_line(VMXNET3_State *s, int lidx)
+{
+#ifdef VMXNET3_ENABLE_MSIX
+ /* This function should never be called for MSI(X) interrupts */
+ /* because deassertion never required for message interrupts */
+ assert(!s->msix_used || !msix_enabled(&s->dev));
+#endif
+#ifdef VMXNET3_ENABLE_MSI
+ /* This function should never be called for MSI(X) interrupts */
+ /* because deassertion never required for message interrupts */
+ assert(!s->msi_used || !msi_enabled(&s->dev));
+#endif
+
+ DIRPRINTF("Deasserting line for interrupt %u", lidx);
+ qemu_set_irq(s->dev.irq[lidx], 0);
+}
+
+static void vmxnet3_update_interrupt_line_state(VMXNET3_State *s, int lidx)
+{
+ if (!s->interrupt_states[lidx].is_pending &&
+ s->interrupt_states[lidx].is_asserted) {
+ DIRPRINTF("New interrupt line state for index %d is DOWN", lidx);
+ _vmxnet3_deassert_interrupt_line(s, lidx);
+ s->interrupt_states[lidx].is_asserted = false;
+ return;
+ }
+
+ if (s->interrupt_states[lidx].is_pending &&
+ !s->interrupt_states[lidx].is_masked &&
+ !s->interrupt_states[lidx].is_asserted) {
+ DIRPRINTF("New interrupt line state for index %d is UP", lidx);
+ s->interrupt_states[lidx].is_asserted =
+ _vmxnet3_assert_interrupt_line(s, lidx);
+ s->interrupt_states[lidx].is_pending = false;
+ return;
+ }
+}
+
+static void vmxnet3_trigger_interrupt(VMXNET3_State *s, int lidx)
+{
+ s->interrupt_states[lidx].is_pending = true;
+ vmxnet3_update_interrupt_line_state(s, lidx);
+
+#ifdef VMXNET3_ENABLE_MSIX
+ if (s->msix_used && msix_enabled(&s->dev) && s->auto_int_masking) {
+ goto do_automask;
+ }
+#endif
+
+#ifdef VMXNET3_ENABLE_MSI
+ if (s->msi_used && msi_enabled(&s->dev) && s->auto_int_masking) {
+ goto do_automask;
+ }
+#endif
+
+ return;
+
+do_automask:
+ s->interrupt_states[lidx].is_masked = true;
+ vmxnet3_update_interrupt_line_state(s, lidx);
+}
+
+static bool vmxnet3_interrupt_asserted(VMXNET3_State *s, int lidx)
+{
+ return s->interrupt_states[lidx].is_asserted;
+}
+
+static void vmxnet3_clear_interrupt(VMXNET3_State *s, int int_idx)
+{
+ s->interrupt_states[int_idx].is_pending = false;
+ if (s->auto_int_masking) {
+ s->interrupt_states[int_idx].is_masked = true;
+ }
+ vmxnet3_update_interrupt_line_state(s, int_idx);
+}
+
+static void
+vmxnet3_on_interrupt_mask_changed(VMXNET3_State *s, int lidx, bool is_masked)
+{
+ s->interrupt_states[lidx].is_masked = is_masked;
+ vmxnet3_update_interrupt_line_state(s, lidx);
+}
+
+static bool vmxnet3_verify_driver_magic(target_phys_addr_t dshmem)
+{
+ return (VMXNET3_REV1_MAGIC == VMXNET3_READ_DRV_SHARED32(dshmem, magic));
+}
+
+#define _GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF)
+#define _MAKE_BYTE(byte_num, val) (((uint32_t)((val) & 0xFF)) << (byte_num)*8)
+
+static void vmxnet3_set_variable_mac(VMXNET3_State *s, uint32_t h, uint32_t l)
+{
+ s->conf.macaddr.a[0] = _GET_BYTE(l, 0);
+ s->conf.macaddr.a[1] = _GET_BYTE(l, 1);
+ s->conf.macaddr.a[2] = _GET_BYTE(l, 2);
+ s->conf.macaddr.a[3] = _GET_BYTE(l, 3);
+ s->conf.macaddr.a[4] = _GET_BYTE(h, 0);
+ s->conf.macaddr.a[5] = _GET_BYTE(h, 1);
+
+ DCFPRINTF("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
+
+ qemu_format_nic_info_str(&s->nic->nc, s->conf.macaddr.a);
+}
+
+static uint64_t vmxnet3_get_mac_low(MACAddr *addr)
+{
+ return _MAKE_BYTE(0, addr->a[0]) |
+ _MAKE_BYTE(1, addr->a[1]) |
+ _MAKE_BYTE(2, addr->a[2]) |
+ _MAKE_BYTE(3, addr->a[3]);
+}
+
+static uint64_t vmxnet3_get_mac_high(MACAddr *addr)
+{
+ return _MAKE_BYTE(0, addr->a[4]) |
+ _MAKE_BYTE(1, addr->a[5]);
+}
+
+static void
+vmxnet3_inc_tx_consumption_counter(VMXNET3_State *s, int qidx)
+{
+ vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring);
+}
+
+static inline void
+vmxnet3_inc_rx_consumption_counter(VMXNET3_State *s, int qidx, int ridx)
+{
+ vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]);
+}
+
+static inline void
+vmxnet3_inc_tx_completion_counter(VMXNET3_State *s, int qidx)
+{
+ vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring);
+}
+
+static void
+vmxnet3_inc_rx_completion_counter(VMXNET3_State *s, int qidx)
+{
+ vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring);
+}
+
+static void
+vmxnet3_dec_rx_completion_counter(VMXNET3_State *s, int qidx)
+{
+ vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring);
+}
+
+static inline void vmxnet3_flush_shmem_changes(void)
+{
+ /* Flush shared memory changes */
+ /* Needed before transferring comntrol to guest */
+ smp_wmb();
+}
+
+static void vmxnet3_complete_packet(VMXNET3_State *s, int qidx, uint32 tx_ridx)
+{
+ struct Vmxnet3_TxCompDesc txcq_descr;
+
+ vmxnet3_ring_dump(DRIPRINTF, "TXC", qidx, &s->txq_descr[qidx].comp_ring);
+
+ txcq_descr.txdIdx = tx_ridx;
+ txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring);
+
+ vmxnet3_ring_write_curr_cell(&s->txq_descr[qidx].comp_ring, &txcq_descr);
+ vmxnet3_inc_tx_completion_counter(s, qidx);
+
+ vmxnet3_flush_shmem_changes();
+ vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx);
+}
+
+static bool
+vmxnet3_setup_tx_offloads(Vmxnet3_TxPkt *pkt)
+{
+ Vmxnet3_TxPktMdata *mdata = vmxnet3_txpkt_get_mdata(pkt);
+ struct virtio_net_hdr *vhdr = vmxnet3_txpkt_get_vhdr(pkt);
+ bool res = true;
+
+ vhdr->hdr_len = mdata->hdr_length;
+
+ switch (mdata->offload_mode) {
+ case VMXNET3_OM_NONE:
+ vhdr->flags = 0;
+ vhdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+ vhdr->gso_size = 0;
+ break;
+
+ case VMXNET3_OM_CSUM: {
+ DPKPRINTF("L4 CSO requested data_offset: %d, csoff: %d",
+ mdata->hdr_length, mdata->cso_or_gso_size);
+ vhdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vhdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+ vhdr->gso_size = 0;
+ vhdr->csum_start = mdata->hdr_length;
+ vhdr->csum_offset = mdata->cso_or_gso_size - mdata->hdr_length;
+ }
+ break;
+
+ case VMXNET3_OM_TSO: {
+ uint8_t *l2hdr = vmxnet3_txpkt_get_l2hdr(pkt);
+ size_t l2hdr_len = vmxnet3_txpkt_get_l2hdr_len(pkt);
+ uint8_t *l3hdr = vmxnet3_txpkt_get_l3hdr(pkt);
+ size_t l3hdr_len = vmxnet3_txpkt_get_l3hdr_len(pkt);
+ uint16_t l3_proto = eth_get_l3_proto(l2hdr, l2hdr_len);
+ size_t payload_len = vmxnet3_txpkt_get_payload_len(pkt);
+
+ vhdr->gso_type = eth_get_gso_type(l3_proto, l3hdr);
+ vhdr->gso_size = mdata->cso_or_gso_size;
+ res = eth_setup_tx_offloads(l3hdr, l3hdr_len, l2hdr_len,
+ payload_len, vhdr,
+ vmxnet3_txpkt_get_more_frags(pkt),
+ vmxnet3_txpkt_get_frag_off(pkt));
+ DPKPRINTF("GSO offload type %d requested.", vhdr->gso_type);
+ }
+ break;
+
+ default:
+ assert(false);
+ }
+
+ return res;
+}
+
+static size_t
+vmxnet3_txpkt_adopt_headers(Vmxnet3_TxPkt *pkt,
+ size_t data_len,
+ target_phys_addr_t data_pa,
+ bool needs_tso)
+{
+ /* Copy L2 header */
+ uint8_t *l2hdr = vmxnet3_txpkt_get_l2hdr(pkt);
+ uint8_t *l3hdr = vmxnet3_txpkt_get_l3hdr(pkt);
+ size_t l2hdr_len = 0;
+ size_t l3hdr_len = 0;
+
+ assert(data_len >= ETH_MAX_L2_HDR_LEN);
+ cpu_physical_memory_read(data_pa, l2hdr, ETH_MAX_L2_HDR_LEN);
+ l2hdr_len = eth_get_l2_hdr_length(l2hdr);
+ vmxnet3_txpkt_set_l2hdr_len(pkt, l2hdr_len);
+
+ /* If packet requires offload - copy L3 header */
+ if (needs_tso) {
+ switch (eth_get_l3_proto(l2hdr, l2hdr_len)) {
+ case ETH_P_IP: {
+ target_phys_addr_t ip_opt_pa;
+ struct ip_header *iphdr = (struct ip_header *) l3hdr;
+ assert(data_len >= l2hdr_len + sizeof(struct ip_header));
+ cpu_physical_memory_read(data_pa + l2hdr_len,
+ l3hdr, sizeof(struct ip_header));
+ l3hdr_len = IP_HDR_GET_LEN(l3hdr);
+ ip_opt_pa = data_pa + l2hdr_len + sizeof(struct ip_header);
+ cpu_physical_memory_read(ip_opt_pa,
+ l3hdr + sizeof(struct ip_header),
+ l3hdr_len - sizeof(struct ip_header));
+ pkt->max_payload_len =
+ IP_FRAG_ALIGN_SIZE(VMXNET3_MAX_IP_PLOAD_LEN);
+ pkt->fragmentation.orig_more_frags =
+ FLAG_IS_SET(be16_to_cpu(iphdr->ip_off), IP_MF);
+ }
+ break;
+
+ case ETH_P_IPV6: {
+ target_phys_addr_t l3hdr_pa = data_pa + l2hdr_len;
+ l3hdr_len = sizeof(struct ip6_header);
+ assert(data_len >= l2hdr_len + l3hdr_len);
+ cpu_physical_memory_read(l3hdr_pa, l3hdr, l3hdr_len);
+ pkt->max_payload_len = 0;
+ }
+ break;
+
+ default: {
+ l3hdr_len = 0;
+ pkt->max_payload_len = 0;
+ }
+ break;
+ }
+ }
+
+ vmxnet3_txpkt_set_l3hdr_len(pkt, l3hdr_len);
+
+ /* Return amount of data adopted */
+ return l2hdr_len + l3hdr_len;
+}
+
+static void
+vmxnet3_tx_retrieve_metadata(Vmxnet3_TxPkt *pkt,
+ const struct Vmxnet3_TxDesc *txd)
+{
+ struct eth_header *ehdr =
+ (struct eth_header *) vmxnet3_txpkt_get_l2hdr(pkt);
+
+ Vmxnet3_TxPktMdata *mdata = vmxnet3_txpkt_get_mdata(pkt);
+ mdata->offload_mode = txd->om;
+ mdata->cso_or_gso_size = txd->msscof;
+ mdata->hdr_length = txd->hlen;
+ mdata->packet_type = get_eth_packet_type(ehdr);
+
+ if (txd->ti) {
+ eth_setup_vlan_headers(ehdr, txd->tci);
+ }
+}
+
+static size_t
+vmxnet3_txpkt_adopt_data_fragment(Vmxnet3_TxPkt *pkt,
+ target_phys_addr_t data_pa,
+ size_t data_len,
+ uint32_t fragment_num)
+{
+ struct iovec *v = vmxnet3_txpkt_get_payload_frag(pkt, fragment_num);
+ size_t bytes_to_adopt = data_len;
+
+ if ((0 != pkt->max_payload_len) &&
+ (pkt->payload_len + data_len > pkt->max_payload_len)) {
+ bytes_to_adopt = pkt->max_payload_len - pkt->payload_len;
+ }
+
+ v->iov_base = (void *) (uint64_t) data_pa;
+ v->iov_len = bytes_to_adopt;
+ pkt->payload_len += bytes_to_adopt;
+ return bytes_to_adopt;
+}
+
+typedef enum {
+ VMXNET3_SUCCEEDED = 0xBEEFBEEF,
+ VMXNET3_OUT_OF_BUF,
+ VMXNET3_PKT_ERROR
+} Vmxnet3_PktStatus;
+
+static void
+vmxnet3_on_tx_done_update_stats(VMXNET3_State *s,
+ Vmxnet3_TxPkt *pkt,
+ int qidx,
+ Vmxnet3_PktStatus status)
+{
+ Vmxnet3_TxPktMdata *mdata = vmxnet3_txpkt_get_mdata(&s->curr_txpkt);
+ size_t tot_len = vmxnet3_txpkt_get_total_len(pkt);
+ struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
+
+ switch (status) {
+ case VMXNET3_SUCCEEDED: {
+ switch (mdata->packet_type) {
+ case VMXNET3_PKT_BCAST:
+ stats->bcastPktsTxOK++;
+ stats->bcastBytesTxOK += tot_len;
+ break;
+ case VMXNET3_PKT_MCAST:
+ stats->mcastPktsTxOK++;
+ stats->mcastBytesTxOK += tot_len;
+ break;
+ case VMXNET3_PKT_UCAST:
+ stats->ucastPktsTxOK++;
+ stats->ucastBytesTxOK += tot_len;
+ break;
+ default:
+ assert(false);
+ }
+
+ if (VMXNET3_OM_TSO == mdata->offload_mode) {
+ /* According to VMWARE headers this statistic is a number */
+ /* of packets after segmentation but since we don't have */
+ /* this information in QEMU model, the best we can do is to */
+ /* provide number of non-segmented packets */
+ stats->TSOPktsTxOK++;
+ stats->TSOBytesTxOK += tot_len;
+ }
+ }
+ break;
+
+ case VMXNET3_PKT_ERROR: {
+ stats->pktsTxDiscard++;
+ }
+ break;
+
+ case VMXNET3_OUT_OF_BUF: {
+ stats->pktsTxError++;
+ }
+ break;
+
+ default:
+ assert(false);
+ }
+}
+
+static void
+vmxnet3_on_rx_done_update_stats(VMXNET3_State *s,
+ Vmxnet3_RxPkt *pkt,
+ int qidx,
+ Vmxnet3_PktStatus status)
+{
+ Vmxnet3_RxPktMdata *mdata = vmxnet3_rxpkt_get_mdata(pkt);
+ struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
+
+ switch (status) {
+ case VMXNET3_OUT_OF_BUF:
+ stats->pktsRxOutOfBuf++;
+ break;
+
+ case VMXNET3_PKT_ERROR:
+ stats->pktsRxError++;
+ break;
+
+ case VMXNET3_SUCCEEDED:
+ switch (mdata->packet_type) {
+ case VMXNET3_PKT_BCAST:
+ stats->bcastPktsRxOK++;
+ stats->bcastBytesRxOK += mdata->tot_len;
+ break;
+ case VMXNET3_PKT_MCAST:
+ stats->mcastPktsRxOK++;
+ stats->mcastBytesRxOK += mdata->tot_len;
+ break;
+ case VMXNET3_PKT_UCAST:
+ stats->ucastPktsRxOK++;
+ stats->ucastBytesRxOK += mdata->tot_len;
+ break;
+ default:
+ assert(false);
+ }
+
+ if (mdata->tot_len > s->mtu) {
+ stats->LROPktsRxOK++;
+ stats->LROBytesRxOK += mdata->tot_len;
+ }
+ break;
+
+ default:
+ assert(false);
+ }
+}
+
+static inline bool
+vmxnet3_pop_next_tx_descr(VMXNET3_State *s,
+ int qidx,
+ struct Vmxnet3_TxDesc *txd,
+ uint32_t *descr_idx)
+{
+ Vmxnet3_Ring *ring = &s->txq_descr[qidx].tx_ring;
+
+ vmxnet3_ring_read_curr_cell(ring, txd);
+ if (txd->gen == vmxnet3_ring_curr_gen(ring)) {
+ vmxnet3_ring_dump(DRIPRINTF, "TX", qidx, ring);
+ *descr_idx = vmxnet3_ring_curr_cell_idx(ring);
+ vmxnet3_inc_tx_consumption_counter(s, qidx);
+ return true;
+ }
+
+ return false;
+}
+
+static bool
+vmxnet3_send_packet(VMXNET3_State *s, Vmxnet3_TxPkt *pkt, uint32_t qidx)
+{
+ uint32_t mapped_fragments;
+ void *mapped = vmxnet3_txpkt_map(&s->curr_txpkt, &mapped_fragments, false);
+ if (NULL != mapped) {
+ bool res;
+ if (vmxnet3_setup_tx_offloads(pkt)) {
+ vmxnet3_dump_virt_hdr(vmxnet3_txpkt_get_vhdr(pkt));
+ vmxnet3_txpkt_dump(pkt);
+ qemu_sendv_packet(&s->nic->nc, mapped, mapped_fragments);
+ vmxnet3_on_tx_done_update_stats(s, pkt, qidx, VMXNET3_SUCCEEDED);
+ res = true;
+ } else {
+ vmxnet3_on_tx_done_update_stats(s, pkt, qidx, VMXNET3_PKT_ERROR);
+ res = false;
+ }
+ vmxnet3_txpkt_unmap(pkt, false);
+ return res;
+ } else {
+ vmxnet3_on_tx_done_update_stats(s, &s->curr_txpkt,
+ qidx, VMXNET3_OUT_OF_BUF);
+ return false;
+ }
+}
+
+static void vmxnet3_process_tx_queue(VMXNET3_State *s, int qidx)
+{
+ struct Vmxnet3_TxDesc txd;
+ uint32_t txd_idx;
+ uint32_t data_len;
+ target_phys_addr_t data_pa;
+ size_t bytes_adopted;
+
+ for (;;) {
+ if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) {
+ break;
+ }
+
+ vmxnet3_dump_tx_descr(&txd);
+
+ if (!s->curr_txpkt_skip) {
+ data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
+ data_pa = le64_to_cpu(txd.addr);
+
+ if (!s->curr_txpkt_header_processed) {
+ bool needs_tso = (VMXNET3_OM_TSO == txd.om);
+ bytes_adopted = vmxnet3_txpkt_adopt_headers(&s->curr_txpkt,
+ data_len,
+ data_pa,
+ needs_tso);
+ vmxnet3_tx_retrieve_metadata(&s->curr_txpkt, &txd);
+ data_pa += bytes_adopted;
+ data_len -= bytes_adopted;
+ s->curr_txpkt_header_processed = true;
+ }
+
+ do {
+ if (0 != data_len) {
+ int frag_num = s->curr_txpkt_pl_frags++;
+ bytes_adopted =
+ vmxnet3_txpkt_adopt_data_fragment(&s->curr_txpkt,
+ data_pa,
+ data_len,
+ frag_num);
+ data_pa += bytes_adopted;
+ data_len -= bytes_adopted;
+ }
+
+ if ((0 != data_len) || txd.eop) {
+ size_t frag_off;
+
+ vmxnet3_txpkt_set_num_pl_frags(&s->curr_txpkt,
+ s->curr_txpkt_pl_frags);
+
+ vmxnet3_txpkt_set_more_frags(&s->curr_txpkt,
+ (0 != data_len));
+
+ s->curr_txpkt_skip =
+ !vmxnet3_send_packet(s, &s->curr_txpkt, qidx);
+
+ frag_off = vmxnet3_txpkt_get_payload_len(&s->curr_txpkt) /
+ IP_FRAG_UNIT_SIZE;
+
+ vmxnet3_txpkt_advance_frag_off(&s->curr_txpkt, frag_off);
+
+ vmxnet3_txpkt_reset_payload(&s->curr_txpkt);
+ s->curr_txpkt_pl_frags = 0;
+ }
+ } while (0 != data_len);
+ }
+
+ if (txd.eop) {
+ vmxnet3_complete_packet(s, qidx, txd_idx);
+ vmxnet3_txpkt_reset(&s->curr_txpkt);
+ s->curr_txpkt_skip = false;
+ s->curr_txpkt_header_processed = false;
+ }
+ }
+}
+
+static inline void
+vmxnet3_read_next_rx_descr(VMXNET3_State *s, int qidx, int ridx,
+ struct Vmxnet3_RxDesc *dbuf, uint32_t *didx)
+{
+ Vmxnet3_Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx];
+ *didx = vmxnet3_ring_curr_cell_idx(ring);
+ vmxnet3_ring_read_curr_cell(ring, dbuf);
+}
+
+static inline uint8_t
+vmxnet3_get_rx_ring_gen(VMXNET3_State *s, int qidx, int ridx)
+{
+ return s->rxq_descr[qidx].rx_ring[ridx].gen;
+}
+
+static inline target_phys_addr_t
+vmxnet3_pop_rxc_descr(VMXNET3_State *s, int qidx, uint32_t *descr_gen)
+{
+ uint8_t ring_gen;
+ struct Vmxnet3_RxCompDesc rxcd;
+
+ target_phys_addr_t daddr =
+ vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring);
+
+ cpu_physical_memory_read(daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc));
+ ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring);
+
+ if (rxcd.gen != ring_gen) {
+ *descr_gen = ring_gen;
+ vmxnet3_inc_rx_completion_counter(s, qidx);
+ return daddr;
+ }
+
+ return 0;
+}
+
+static inline void
+vmxnet3_revert_rxc_descr(VMXNET3_State *s, int qidx)
+{
+ vmxnet3_dec_rx_completion_counter(s, qidx);
+}
+
+#define RXQ_IDX (0)
+#define RX_HEAD_BODY_RING (0)
+#define RX_BODY_ONLY_RING (1)
+
+static bool
+vmxnet3_get_next_head_rx_descr(VMXNET3_State *s,
+ struct Vmxnet3_RxDesc *descr_buf,
+ uint32_t *descr_idx,
+ uint32_t *ridx)
+{
+ for (;;) {
+ uint32_t ring_gen;
+ vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
+ descr_buf, descr_idx);
+
+ /* If no more free descriptors - return */
+ ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING);
+ if (descr_buf->gen != ring_gen) {
+ return false;
+ }
+
+ /* Mark current descriptor as used/skipped */
+ vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
+
+ /* If this is what we are looking for - return */
+ if (VMXNET3_RXD_BTYPE_HEAD == descr_buf->btype) {
+ *ridx = RX_HEAD_BODY_RING;
+ return true;
+ }
+ }
+}
+
+static bool
+vmxnet3_get_next_body_rx_descr(VMXNET3_State *s,
+ struct Vmxnet3_RxDesc *dbuf,
+ uint32_t *didx,
+ uint32_t *ridx)
+{
+ vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, dbuf, didx);
+
+ /* Try to find corresponding descriptor in head/body ring */
+ if ((dbuf->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) &&
+ (VMXNET3_RXD_BTYPE_BODY == dbuf->btype)) {
+ vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
+ *ridx = RX_HEAD_BODY_RING;
+ return true;
+ }
+
+ /* If there is no free descriptors on head/body ring or next free */
+ /* descriptor is a head descriptor switch to body only ring */
+ vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, dbuf, didx);
+
+ /* If no more free descriptors - return */
+ if (dbuf->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) {
+ assert(VMXNET3_RXD_BTYPE_BODY == dbuf->btype);
+ *ridx = RX_BODY_ONLY_RING;
+ vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING);
+ return true;
+ }
+
+ return false;
+}
+
+static inline bool
+vmxnet3_get_next_rx_descr(VMXNET3_State *s, bool is_head,
+ struct Vmxnet3_RxDesc *descr_buf,
+ uint32_t *descr_idx,
+ uint32_t *ridx)
+{
+ if (is_head || !s->rx_packets_compound) {
+ return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx);
+ } else {
+ return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx);
+ }
+}
+
+static void
+vmxnet3_rx_put_metadata_to_descr(Vmxnet3_RxPkt *pkt,
+ struct Vmxnet3_RxCompDesc *rxcd)
+{
+ int csum_correct, is_gso;
+ bool isip4, isip6, istcp, isudp;
+ uint8_t headers[ETH_MAX_L2_HDR_LEN + ETH_MAX_L3_HDR_LEN];
+ Vmxnet3_RxPktMdata *mdata = vmxnet3_rxpkt_get_mdata(pkt);
+ struct virtio_net_hdr *vhdr;
+ uint8_t offload_type;
+
+ if (mdata->vlan_stripped) {
+ rxcd->ts = 1;
+ rxcd->tci = mdata->vlan_tag;
+ }
+
+ if (!mdata->vhdr_valid) {
+ goto nocsum;
+ }
+
+ vhdr = vmxnet3_rxpkt_get_vhdr(pkt);
+ /* Checksum is valid when lower level tell so or when lower level */
+ /* requires checksum offload telling that packet produced/bridged */
+ /* locally and did travel over network after last checksum calculation */
+ /* or production */
+ csum_correct = FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) ||
+ FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM);
+
+ offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
+ is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0;
+
+ if (!csum_correct && !is_gso) {
+ goto nocsum;
+ }
+
+ /* Make linear copy of L2-L3 headers and parse it */
+ iov_to_buf(vmxnet3_rxpkt_get_frag(pkt, 0),
+ vmxnet3_rxpkt_get_num_frags(pkt),
+ headers, 0, MIN(sizeof(headers), mdata->tot_len));
+
+ eth_get_protocols(headers, MIN(sizeof(headers), mdata->tot_len),
+ &isip4, &isip6, &isudp, &istcp);
+ if ((!istcp && !isudp) || (!isip4 && !isip6)) {
+ goto nocsum;
+ }
+
+ rxcd->cnc = 0;
+ rxcd->v4 = isip4 ? 1 : 0;
+ rxcd->v6 = isip6 ? 1 : 0;
+ rxcd->tcp = istcp ? 1 : 0;
+ rxcd->udp = isudp ? 1 : 0;
+ rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
+ return;
+
+nocsum:
+ rxcd->cnc = 1;
+ return;
+}
+
+static void
+vmxnet3_physical_memory_writev(const struct iovec *iov,
+ size_t start_iov_off,
+ target_phys_addr_t target_addr,
+ size_t bytes_to_copy)
+{
+ size_t curr_off = 0;
+ size_t copied = 0;
+
+ while (bytes_to_copy) {
+ if (start_iov_off < (curr_off + iov->iov_len)) {
+ size_t chunk_len =
+ MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy);
+
+ cpu_physical_memory_write(target_addr + copied,
+ iov->iov_base + start_iov_off - curr_off,
+ chunk_len);
+
+ copied += chunk_len;
+ start_iov_off += chunk_len;
+ curr_off = start_iov_off;
+ bytes_to_copy -= chunk_len;
+ } else {
+ curr_off += iov->iov_len;
+ }
+ iov++;
+ }
+}
+
+static bool
+vmxnet3_indicate_packet(VMXNET3_State *s, Vmxnet3_RxPkt *pkt)
+{
+ struct Vmxnet3_RxDesc rxd;
+ bool is_head = true;
+ uint32_t rxd_idx;
+ uint32_t rx_ridx;
+
+ struct Vmxnet3_RxCompDesc rxcd;
+ uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
+ target_phys_addr_t new_rxcd_pa = 0;
+ target_phys_addr_t ready_rxcd_pa = 0;
+ struct iovec *data = vmxnet3_rxpkt_get_frag(pkt, 0);
+ Vmxnet3_RxPktMdata *mdata = vmxnet3_rxpkt_get_mdata(pkt);
+ size_t bytes_copied = 0;
+ size_t bytes_left = mdata->tot_len;
+ uint16_t num_frags = 0;
+
+ vmxnet3_rxpkt_dump(pkt);
+
+ while ((num_frags < s->max_rx_frags) &&
+ (bytes_left > 0) &&
+ (new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen)) &&
+ vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) {
+ size_t chunk_size = MIN(bytes_left, rxd.len);
+ vmxnet3_physical_memory_writev(data, bytes_copied,
+ le64_to_cpu(rxd.addr), chunk_size);
+ bytes_copied += chunk_size;
+ bytes_left -= chunk_size;
+
+ vmxnet3_dump_rx_descr(&rxd);
+
+ if (0 != ready_rxcd_pa) {
+ cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd));
+ }
+
+ memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc));
+ rxcd.rxdIdx = rxd_idx;
+ rxcd.len = chunk_size;
+ rxcd.sop = is_head;
+ rxcd.gen = new_rxcd_gen;
+ rxcd.rqID = RXQ_IDX + rx_ridx*s->rxq_num;
+
+ if (0 == bytes_left) {
+ vmxnet3_rx_put_metadata_to_descr(pkt, &rxcd);
+ }
+
+ DRIPRINTF("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu "
+ "sop %d csum_correct %lu",
+ (unsigned long) rx_ridx,
+ (unsigned long) rxcd.rxdIdx,
+ (unsigned long) rxcd.len,
+ (int) rxcd.sop,
+ (unsigned long) rxcd.tuc);
+
+ is_head = false;
+ ready_rxcd_pa = new_rxcd_pa;
+ new_rxcd_pa = 0;
+ }
+
+ if (0 != ready_rxcd_pa) {
+ rxcd.eop = 1;
+ rxcd.err = (0 != bytes_left);
+ cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd));
+ vmxnet3_flush_shmem_changes();
+ }
+
+ if (0 != new_rxcd_pa) {
+ vmxnet3_revert_rxc_descr(s, RXQ_IDX);
+ }
+
+ vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx);
+
+ if (bytes_left == 0) {
+ vmxnet3_on_rx_done_update_stats(s, pkt, RXQ_IDX, VMXNET3_SUCCEEDED);
+ return true;
+ } else if (num_frags == s->max_rx_frags) {
+ vmxnet3_on_rx_done_update_stats(s, pkt, RXQ_IDX, VMXNET3_PKT_ERROR);
+ return false;
+ } else {
+ vmxnet3_on_rx_done_update_stats(s, pkt, RXQ_IDX, VMXNET3_OUT_OF_BUF);
+ return false;
+ }
+}
+
+static void
+vmxnet3_io_bar0_write(void *opaque, target_phys_addr_t addr,
+ uint64_t val, unsigned size)
+{
+ VMXNET3_State *s = opaque;
+
+ if (IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD,
+ VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) {
+ int tx_queue_idx =
+ MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD, VMXNET3_REG_ALIGN);
+ assert(tx_queue_idx <= s->txq_num);
+ vmxnet3_process_tx_queue(s, tx_queue_idx);
+ return;
+ }
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wtype-limits"
+ if (IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
+ VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
+ int l = MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR, VMXNET3_REG_ALIGN);
+
+ DCBPRINTF("Interrupt mask for line %d written: 0x%" PRIx64, l, val);
+
+ vmxnet3_on_interrupt_mask_changed(s, l, val);
+ return;
+ }
+#pragma GCC diagnostic pop
+
+ if (IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD,
+ VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) ||
+ IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2,
+ VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) {
+ return;
+ }
+
+ DWRPRINTF("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d",
+ (uint64_t) addr, val, size);
+}
+
+static uint64_t
+vmxnet3_io_bar0_read(void *opaque, target_phys_addr_t addr, unsigned size)
+{
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wtype-limits"
+ if (IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
+ VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
+ assert(false);
+ }
+#pragma GCC diagnostic pop
+
+ DCBPRINTF("BAR0 unknown read [%" PRIx64 "], size %d",
+ (uint64_t) addr, size);
+ return 0;
+}
+
+static void vmxnet3_reset(VMXNET3_State *s)
+{
+ DCBPRINTF("Resetting vmxnet3...");
+
+ vmxnet3_txpkt_reset(&s->curr_txpkt);
+ s->curr_txpkt_pl_frags = 0;
+ s->curr_txpkt_skip = false;
+ s->curr_txpkt_header_processed = false;
+}
+
+static void vmxnet3_deactivate_device(VMXNET3_State *s)
+{
+ DCBPRINTF("Deactivating vmxnet3...");
+ s->device_active = false;
+}
+
+static void vmxnet3_update_rx_mode(VMXNET3_State *s)
+{
+ s->rx_mode = VMXNET3_READ_DRV_SHARED32(s->drv_shmem,
+ devRead.rxFilterConf.rxMode);
+ DCFPRINTF("RX mode: 0x%08X", s->rx_mode);
+}
+
+static void vmxnet3_update_vlan_filters(VMXNET3_State *s)
+{
+ int i;
+
+ /* Copy configuration from shared memory */
+ VMXNET3_READ_DRV_SHARED(s->drv_shmem,
+ devRead.rxFilterConf.vfTable,
+ s->vlan_table,
+ sizeof(s->vlan_table));
+
+ /* Invert byte order when needed */
+ for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) {
+ s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]);
+ }
+
+ /* Dump configuration for debugging purposes */
+ DCFPRINTF("Configured VLANs:");
+ for (i = 0; i < sizeof(s->vlan_table) * 8; i++) {
+ if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) {
+ DCFPRINTF("\tVLAN %d is present", i);
+ }
+ }
+}
+
+static void vmxnet3_update_mcast_filters(VMXNET3_State *s)
+{
+ uint16_t list_bytes =
+ VMXNET3_READ_DRV_SHARED16(s->drv_shmem,
+ devRead.rxFilterConf.mfTableLen);
+
+ s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]);
+
+ s->mcast_list = g_realloc(s->mcast_list, list_bytes);
+ if (NULL == s->mcast_list) {
+ if (0 == s->mcast_list_len) {
+ DCFPRINTF("Current multicast list is empty");
+ } else {
+ DERPRINTF("Failed to allocate multicast list of %d elements",
+ s->mcast_list_len);
+ }
+ s->mcast_list_len = 0;
+ } else {
+ int i;
+ target_phys_addr_t mcast_list_pa =
+ VMXNET3_READ_DRV_SHARED64(s->drv_shmem,
+ devRead.rxFilterConf.mfTablePA);
+
+ cpu_physical_memory_read(mcast_list_pa, s->mcast_list, list_bytes);
+ DCFPRINTF("Current multicast list len is %d:", s->mcast_list_len);
+ for (i = 0; i < s->mcast_list_len; i++) {
+ DCFPRINTF("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a));
+ }
+ }
+}
+
+static void vmxnet3_setup_rx_filtering(VMXNET3_State *s)
+{
+ vmxnet3_update_rx_mode(s);
+ vmxnet3_update_vlan_filters(s);
+ vmxnet3_update_mcast_filters(s);
+}
+
+static uint32_t vmxnet3_get_interrupt_config(VMXNET3_State *s)
+{
+ uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2);
+ DCFPRINTF("Interrupt config is 0x%X", interrupt_mode);
+ return interrupt_mode;
+}
+
+static void vmxnet3_fill_stats(VMXNET3_State *s)
+{
+ int i;
+ for (i = 0; i < s->txq_num; i++) {
+ cpu_physical_memory_write(s->txq_descr[i].tx_stats_pa,
+ &s->txq_descr[i].txq_stats,
+ sizeof(s->txq_descr[i].txq_stats));
+ }
+
+ for (i = 0; i < s->rxq_num; i++) {
+ cpu_physical_memory_write(s->rxq_descr[i].rx_stats_pa,
+ &s->rxq_descr[i].rxq_stats,
+ sizeof(s->rxq_descr[i].rxq_stats));
+ }
+}
+
+static void vmxnet3_adjust_by_guest_type(VMXNET3_State *s)
+{
+ struct Vmxnet3_GOSInfo gos;
+
+ VMXNET3_READ_DRV_SHARED(s->drv_shmem, devRead.misc.driverInfo.gos,
+ &gos, sizeof(gos));
+ s->rx_packets_compound =
+ (VMXNET3_GOS_TYPE_WIN == gos.gosType) ? false : true;
+
+ DCFPRINTF("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound);
+}
+
+static void
+vmxnet3_dump_conf_descr(const char *name,
+ struct Vmxnet3_VariableLenConfDesc *pm_descr)
+{
+ DCFPRINTF("%s descriptor dump: Version %u, Length %u",
+ name, pm_descr->confVer, pm_descr->confLen);
+
+};
+
+static void vmxnet3_update_pm_state(VMXNET3_State *s)
+{
+ struct Vmxnet3_VariableLenConfDesc pm_descr;
+
+ pm_descr.confLen =
+ VMXNET3_READ_DRV_SHARED32(s->drv_shmem, devRead.pmConfDesc.confLen);
+ pm_descr.confVer =
+ VMXNET3_READ_DRV_SHARED32(s->drv_shmem, devRead.pmConfDesc.confVer);
+ pm_descr.confPA =
+ VMXNET3_READ_DRV_SHARED64(s->drv_shmem, devRead.pmConfDesc.confPA);
+
+ vmxnet3_dump_conf_descr("PM State", &pm_descr);
+}
+
+static void vmxnet3_update_features(VMXNET3_State *s)
+{
+ uint32_t guest_features;
+ int rxcsum_offload_supported;
+
+ guest_features = VMXNET3_READ_DRV_SHARED32(s->drv_shmem,
+ devRead.misc.uptFeatures);
+
+ rxcsum_offload_supported = FLAG_IS_SET(guest_features, UPT1_F_RXCSUM);
+ s->rx_vlan_stripping = FLAG_IS_SET(guest_features, UPT1_F_RXVLAN);
+ s->lro_supported = FLAG_IS_SET(guest_features, UPT1_F_LRO);
+
+ DCFPRINTF("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d",
+ s->lro_supported, rxcsum_offload_supported,
+ s->rx_vlan_stripping);
+
+ tap_set_offload(s->nic->nc.peer,
+ rxcsum_offload_supported,
+ s->lro_supported,
+ s->lro_supported,
+ 0,
+ 0);
+}
+
+static void vmxnet3_activate_device(VMXNET3_State *s)
+{
+ int i;
+ static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1;
+ target_phys_addr_t qdescr_table_pa;
+ uint64_t pa;
+ uint32_t size;
+
+ /* Verify configuration consistency */
+ if (!vmxnet3_verify_driver_magic(s->drv_shmem)) {
+ DERPRINTF("Device configuration received from driver is invalid");
+ return;
+ }
+
+ vmxnet3_adjust_by_guest_type(s);
+ vmxnet3_update_features(s);
+ vmxnet3_update_pm_state(s);
+ vmxnet3_setup_rx_filtering(s);
+ /* Cache fields from shared memory */
+ s->mtu = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, devRead.misc.mtu);
+ DCFPRINTF("MTU is %u", s->mtu);
+
+ s->max_rx_frags =
+ VMXNET3_READ_DRV_SHARED16(s->drv_shmem, devRead.misc.maxNumRxSG);
+
+ DCFPRINTF("Max RX fragments is %u", s->max_rx_frags);
+
+ s->event_int_idx =
+ VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.intrConf.eventIntrIdx);
+ DCFPRINTF("Events interrupt line is %u", s->event_int_idx);
+
+ s->auto_int_masking =
+ VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.intrConf.autoMask);
+ DCFPRINTF("Automatic interrupt masking is %d", (int)s->auto_int_masking);
+
+ s->txq_num =
+ VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.misc.numTxQueues);
+ s->rxq_num =
+ VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.misc.numRxQueues);
+
+ DCFPRINTF("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
+ assert(s->txq_num <= VMXNET3_DEVICE_MAX_TX_QUEUES);
+
+ qdescr_table_pa =
+ VMXNET3_READ_DRV_SHARED64(s->drv_shmem, devRead.misc.queueDescPA);
+ DCFPRINTF("TX queues descriptors table is at 0x%" PRIx64,
+ (uint64_t) qdescr_table_pa);
+
+ /* Worst-case scenario is a packet that holds all TX rings space so */
+ /* we calculate total size of all TX rings for max TX fragments number */
+ s->max_tx_frags = 0;
+
+ /* TX queues */
+ for (i = 0; i < s->txq_num; i++) {
+ target_phys_addr_t qdescr_pa =
+ qdescr_table_pa + i*sizeof(struct Vmxnet3_TxQueueDesc);
+
+ /* Read interrupt number for this TX queue */
+ s->txq_descr[i].intr_idx =
+ VMXNET3_READ_TX_QUEUE_DESCR8(qdescr_pa, conf.intrIdx);
+
+ DCFPRINTF("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx);
+
+ /* Read rings memory locations for TX queues */
+ pa = VMXNET3_READ_TX_QUEUE_DESCR64(qdescr_pa, conf.txRingBasePA);
+ size = VMXNET3_READ_TX_QUEUE_DESCR32(qdescr_pa, conf.txRingSize);
+
+ vmxnet3_ring_init(&s->txq_descr[i].tx_ring, pa, size,
+ sizeof(struct Vmxnet3_TxDesc), false);
+ vmxnet3_ring_dump(DCFPRINTF, "TX", i, &s->txq_descr[i].tx_ring);
+
+ s->max_tx_frags += size;
+
+ /* TXC ring */
+ pa = VMXNET3_READ_TX_QUEUE_DESCR64(qdescr_pa, conf.compRingBasePA);
+ size = VMXNET3_READ_TX_QUEUE_DESCR32(qdescr_pa, conf.compRingSize);
+ vmxnet3_ring_init(&s->txq_descr[i].comp_ring, pa, size,
+ sizeof(struct Vmxnet3_TxCompDesc), true);
+ vmxnet3_ring_dump(DCFPRINTF, "TXC", i, &s->txq_descr[i].comp_ring);
+
+ s->txq_descr[i].tx_stats_pa =
+ qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats);
+
+ memset(&s->txq_descr[i].txq_stats, 0,
+ sizeof(s->txq_descr[i].txq_stats));
+
+ /* Fill device-managed parameters for queues */
+ VMXNET3_WRITE_TX_QUEUE_DESCR32(qdescr_pa,
+ ctrl.txThreshold,
+ VMXNET3_DEF_TX_THRESHOLD);
+ }
+
+ /* Preallocate TX packet wrapper */
+ DCFPRINTF("Max TX fragments is %u", s->max_tx_frags);
+ if (!vmxnet3_txpkt_prealloc(&s->curr_txpkt, s->max_tx_frags,
+ s->peer_has_vhdr)) {
+ hw_error("TX rings configuration problem");
+ }
+
+ /* Read rings memory locations for RX queues */
+ for (i = 0; i < s->rxq_num; i++) {
+ int j;
+ target_phys_addr_t qd_pa =
+ qdescr_table_pa + s->txq_num*sizeof(struct Vmxnet3_TxQueueDesc) +
+ i*sizeof(struct Vmxnet3_RxQueueDesc);
+
+ /* Read interrupt number for this RX queue */
+ s->rxq_descr[i].intr_idx =
+ VMXNET3_READ_TX_QUEUE_DESCR8(qd_pa, conf.intrIdx);
+
+ DCFPRINTF("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx);
+
+ /* Read rings memory locations */
+ for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) {
+ /* RX rings */
+ pa = VMXNET3_READ_RX_QUEUE_DESCR64(qd_pa, conf.rxRingBasePA[j]);
+ size = VMXNET3_READ_RX_QUEUE_DESCR32(qd_pa, conf.rxRingSize[j]);
+ vmxnet3_ring_init(&s->rxq_descr[i].rx_ring[j], pa, size,
+ sizeof(struct Vmxnet3_RxDesc), false);
+ DCFPRINTF("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
+ i, j, pa, size);
+ }
+
+ /* RXC ring */
+ pa = VMXNET3_READ_RX_QUEUE_DESCR64(qd_pa, conf.compRingBasePA);
+ size = VMXNET3_READ_RX_QUEUE_DESCR32(qd_pa, conf.compRingSize);
+ vmxnet3_ring_init(&s->rxq_descr[i].comp_ring, pa, size,
+ sizeof(struct Vmxnet3_RxCompDesc), true);
+ DCFPRINTF("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);
+
+ s->rxq_descr[i].rx_stats_pa =
+ qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats);
+ memset(&s->rxq_descr[i].rxq_stats, 0,
+ sizeof(s->rxq_descr[i].rxq_stats));
+ }
+
+ vmxnet3_flush_shmem_changes();
+ s->device_active = true;
+}
+
+static void vmxnet3_handle_command(VMXNET3_State *s, uint64_t cmd)
+{
+ s->last_command = cmd;
+
+ switch (cmd) {
+ case VMXNET3_CMD_GET_PERM_MAC_HI:
+ DCBPRINTF("Set: Get upper part of permanent MAC");
+ break;
+
+ case VMXNET3_CMD_GET_PERM_MAC_LO:
+ DCBPRINTF("Set: Get lower part of permanent MAC");
+ break;
+
+ case VMXNET3_CMD_GET_STATS:
+ DCBPRINTF("Set: Get device statistics");
+ vmxnet3_fill_stats(s);
+ break;
+
+ case VMXNET3_CMD_ACTIVATE_DEV:
+ DCBPRINTF("Set: Activating vmxnet3 device");
+ vmxnet3_activate_device(s);
+ break;
+
+ case VMXNET3_CMD_UPDATE_RX_MODE:
+ DCBPRINTF("Set: Update rx mode");
+ vmxnet3_update_rx_mode(s);
+ break;
+
+ case VMXNET3_CMD_UPDATE_VLAN_FILTERS:
+ DCBPRINTF("Set: Update VLAN filters");
+ vmxnet3_update_vlan_filters(s);
+ break;
+
+ case VMXNET3_CMD_UPDATE_MAC_FILTERS:
+ DCBPRINTF("Set: Update MAC filters");
+ vmxnet3_update_mcast_filters(s);
+ break;
+
+ case VMXNET3_CMD_UPDATE_FEATURE:
+ DCBPRINTF("Set: Update features");
+ vmxnet3_update_features(s);
+ break;
+
+ case VMXNET3_CMD_UPDATE_PMCFG:
+ DCBPRINTF("Set: Update power management config");
+ vmxnet3_update_pm_state(s);
+ break;
+
+ case VMXNET3_CMD_GET_LINK:
+ DCBPRINTF("Set: Get link");
+ break;
+
+ case VMXNET3_CMD_RESET_DEV:
+ DCBPRINTF("Set: Reset device");
+ vmxnet3_reset(s);
+ break;
+
+ case VMXNET3_CMD_QUIESCE_DEV:
+ DCBPRINTF("Set: VMXNET3_CMD_QUIESCE_DEV - pause the device");
+ vmxnet3_deactivate_device(s);
+ break;
+
+ case VMXNET3_CMD_GET_CONF_INTR:
+ DCBPRINTF("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration");
+ break;
+
+ default:
+ DCBPRINTF("Received unknown command: %" PRIx64, cmd);
+ break;
+ }
+}
+
+static uint64_t vmxnet3_get_command_status(VMXNET3_State *s)
+{
+ uint64_t ret;
+
+ switch (s->last_command) {
+ case VMXNET3_CMD_ACTIVATE_DEV:
+ ret = (s->device_active) ? 0 : -1;
+ DCFPRINTF("Device active: %" PRIx64, ret);
+ break;
+
+ case VMXNET3_CMD_GET_LINK:
+ ret = s->link_status_and_speed;
+ DCFPRINTF("Link and speed: %" PRIx64, ret);
+ break;
+
+ case VMXNET3_CMD_GET_PERM_MAC_LO:
+ ret = vmxnet3_get_mac_low(&s->perm_mac);
+ break;
+
+ case VMXNET3_CMD_GET_PERM_MAC_HI:
+ ret = vmxnet3_get_mac_high(&s->perm_mac);
+ break;
+
+ case VMXNET3_CMD_GET_CONF_INTR:
+ ret = vmxnet3_get_interrupt_config(s);
+ break;
+
+ default:
+ DWRPRINTF("Received request for unknown command: %x", s->last_command);
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+static void vmxnet3_set_events(VMXNET3_State *s, uint32_t val)
+{
+ uint32_t events;
+
+ DCBPRINTF("Setting events: 0x%x", val);
+ events = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, ecr) | val;
+ VMXNET3_WRITE_DRV_SHARED32(s->drv_shmem, ecr, events);
+}
+
+static void vmxnet3_ack_events(VMXNET3_State *s, uint32_t val)
+{
+ uint32_t events;
+
+ DCBPRINTF("Clearing events: 0x%x", val);
+ events = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, ecr) & ~val;
+ VMXNET3_WRITE_DRV_SHARED32(s->drv_shmem, ecr, events);
+}
+
+static void
+vmxnet3_io_bar1_write(void *opaque,
+ target_phys_addr_t addr,
+ uint64_t val,
+ unsigned size)
+{
+ VMXNET3_State *s = opaque;
+
+ switch (addr) {
+ /* Vmxnet3 Revision Report Selection */
+ case VMXNET3_REG_VRRS:
+ DCBPRINTF("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d",
+ val, size);
+ break;
+
+ /* UPT Version Report Selection */
+ case VMXNET3_REG_UVRS:
+ DCBPRINTF("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d",
+ val, size);
+ break;
+
+ /* Driver Shared Address Low */
+ case VMXNET3_REG_DSAL:
+ DCBPRINTF("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d",
+ val, size);
+ /* Guest driver will first write the low part of the shared */
+ /* memory address. We save it to temp variable and set the */
+ /* shared address only after we get the high part */
+ if (0 == val) {
+ s->device_active = false;
+ }
+ s->temp_shared_guest_driver_memory = val;
+ s->drv_shmem = 0;
+ break;
+
+ /* Driver Shared Address High */
+ case VMXNET3_REG_DSAH:
+ DCBPRINTF("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d",
+ val, size);
+ /* Set the shared memory between guest driver and device. */
+ /* We already should have low address part. */
+ s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32);
+ break;
+
+ /* Command */
+ case VMXNET3_REG_CMD:
+ DCBPRINTF("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d",
+ val, size);
+ vmxnet3_handle_command(s, val);
+ break;
+
+ /* MAC Address Low */
+ case VMXNET3_REG_MACL:
+ DCBPRINTF("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d",
+ val, size);
+ s->temp_mac = val;
+ break;
+
+ /* MAC Address High */
+ case VMXNET3_REG_MACH:
+ DCBPRINTF("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d",
+ val, size);
+ vmxnet3_set_variable_mac(s, val, s->temp_mac);
+ break;
+
+ /* Interrupt Cause Register */
+ case VMXNET3_REG_ICR:
+ DCBPRINTF("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
+ val, size);
+ assert(false);
+ break;
+
+ /* Event Cause Register */
+ case VMXNET3_REG_ECR:
+ DCBPRINTF("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
+ val, size);
+ vmxnet3_ack_events(s, val);
+ break;
+
+ default:
+ DCBPRINTF("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d",
+ (uint64_t) addr, val, size);
+ break;
+ }
+}
+
+static uint64_t
+vmxnet3_io_bar1_read(void *opaque, target_phys_addr_t addr, unsigned size)
+{
+ VMXNET3_State *s = opaque;
+ uint64_t ret = 0;
+
+ switch (addr) {
+ /* Vmxnet3 Revision Report Selection */
+ case VMXNET3_REG_VRRS:
+ DCBPRINTF("Read BAR1 [VMXNET3_REG_VRRS], size %d", size);
+ ret = VMXNET3_DEVICE_REVISION;
+ break;
+
+ /* UPT Version Report Selection */
+ case VMXNET3_REG_UVRS:
+ DCBPRINTF("Read BAR1 [VMXNET3_REG_UVRS], size %d", size);
+ ret = VMXNET3_DEVICE_VERSION;
+ break;
+
+ /* Command */
+ case VMXNET3_REG_CMD:
+ DCBPRINTF("Read BAR1 [VMXNET3_REG_CMD], size %d", size);
+ ret = vmxnet3_get_command_status(s);
+ break;
+
+ /* MAC Address Low */
+ case VMXNET3_REG_MACL:
+ DCBPRINTF("Read BAR1 [VMXNET3_REG_MACL], size %d", size);
+ ret = vmxnet3_get_mac_low(&s->conf.macaddr);
+ break;
+
+ /* MAC Address High */
+ case VMXNET3_REG_MACH:
+ DCBPRINTF("Read BAR1 [VMXNET3_REG_MACH], size %d", size);
+ ret = vmxnet3_get_mac_high(&s->conf.macaddr);
+ break;
+
+ /* Interrupt Cause Register */
+ /* Used for legacy interrupts only so interrupt index always 0 */
+ case VMXNET3_REG_ICR:
+ DCBPRINTF("Read BAR1 [VMXNET3_REG_ICR], size %d", size);
+ if (vmxnet3_interrupt_asserted(s, 0)) {
+ vmxnet3_clear_interrupt(s, 0);
+ ret = true;
+ } else {
+ ret = false;
+ }
+ break;
+
+ default:
+ DCBPRINTF("Unknow read BAR1[%" PRIx64 "], %d bytes",
+ (uint64_t) addr, size);
+ break;
+ }
+
+ return ret;
+}
+
+static int
+vmxnet3_can_receive(VLANClientState *nc)
+{
+ VMXNET3_State *s = DO_UPCAST(NICState, nc, nc)->opaque;
+ return s->device_active &&
+ FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP);
+}
+
+static inline bool
+vmxnet3_is_registered_vlan(VMXNET3_State *s, const void *data)
+{
+ uint16_t vlan_tag = eth_get_pkt_vlan_tag(data) & VLAN_VID_MASK;
+
+ if (IS_SPECIAL_VLAN_ID(vlan_tag)) {
+ return true;
+ }
+
+ return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag);
+}
+
+static bool
+vmxnet3_is_allowed_mcast_group(VMXNET3_State *s, const uint8_t *group_mac)
+{
+ int i;
+ for (i = 0; i < s->mcast_list_len; i++) {
+ if (memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool
+vmxnet3_rx_filter_may_indicate(VMXNET3_State *s, const void *data,
+ size_t size, eth_pkt_types_e packet_type)
+{
+ struct eth_header *ehdr = PKT_GET_ETH_HDR(data);
+
+ if (FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) {
+ return true;
+ }
+
+ switch (packet_type) {
+ case VMXNET3_PKT_UCAST:
+ if (!FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) {
+ return false;
+ }
+ if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) {
+ return false;
+ }
+ break;
+
+ case VMXNET3_PKT_BCAST:
+ if (!FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) {
+ return false;
+ }
+ break;
+
+ case VMXNET3_PKT_MCAST:
+ if (FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) {
+ return true;
+ }
+ if (!FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) {
+ return false;
+ }
+ if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) {
+ return false;
+ }
+ break;
+
+ default:
+ assert(false);
+ }
+
+ return vmxnet3_is_registered_vlan(s, data);
+}
+
+static void
+vmxnet3_rxpkt_attach_data(VMXNET3_State *s, Vmxnet3_RxPkt *pkt,
+ const void *data, size_t len)
+{
+ uint16_t vtag = 0;
+ uint16_t ploff;
+ Vmxnet3_RxPktMdata *mdata = vmxnet3_rxpkt_get_mdata(pkt);
+
+ if (s->rx_vlan_stripping) {
+ mdata->vlan_stripped =
+ eth_strip_vlan(&data, vmxnet3_rxpkt_get_ehdr(pkt), &ploff, &vtag);
+ } else {
+ mdata->vlan_stripped = false;
+ }
+
+ if (mdata->vlan_stripped) {
+ vmxnet3_rxpkt_attach_ehdr(pkt);
+ vmxnet3_rxpkt_get_frag(pkt, 1)->iov_base = (uint8_t *) data + ploff;
+ vmxnet3_rxpkt_get_frag(pkt, 1)->iov_len = len - ploff;
+ vmxnet3_rxpkt_set_num_frags(pkt, 2);
+ mdata->tot_len = len - ploff + sizeof(struct eth_header);
+ } else {
+ vmxnet3_rxpkt_get_frag(pkt, 0)->iov_base = (void *) data;
+ vmxnet3_rxpkt_get_frag(pkt, 0)->iov_len = len;
+ vmxnet3_rxpkt_set_num_frags(pkt, 1);
+ mdata->tot_len = len;
+ }
+
+ mdata->vlan_tag = vtag;
+}
+
+static ssize_t
+vmxnet3_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
+{
+ VMXNET3_State *s = DO_UPCAST(NICState, nc, nc)->opaque;
+ Vmxnet3_RxPkt pkt;
+ size_t bytes_indicated;
+ Vmxnet3_RxPktMdata *mdata;
+
+ if (!vmxnet3_can_receive(&s->nic->nc)) {
+ DPKPRINTF("Cannot receive now");
+ return -1;
+ }
+
+ vmxnet3_rxpkt_init(&pkt);
+ mdata = vmxnet3_rxpkt_get_mdata(&pkt);
+
+ if (s->peer_has_vhdr) {
+ buf += sizeof(struct virtio_net_hdr);
+ size -= sizeof(struct virtio_net_hdr);
+ *vmxnet3_rxpkt_get_vhdr(&pkt) = *(const struct virtio_net_hdr *) buf;
+ mdata->vhdr_valid = true;
+ } else {
+ mdata->vhdr_valid = false;
+ }
+
+ mdata->packet_type = get_eth_packet_type(PKT_GET_ETH_HDR(buf));
+
+ if (vmxnet3_rx_filter_may_indicate(s, buf, size, mdata->packet_type)) {
+ vmxnet3_rxpkt_attach_data(s, &pkt, buf, size);
+ bytes_indicated = vmxnet3_indicate_packet(s, &pkt) ? size : -1;
+ if (bytes_indicated < size) {
+ DPKPRINTF("RX: %lu of %lu bytes indicated", bytes_indicated, size);
+ }
+ } else {
+ DPKPRINTF("Packet dropped by RX filter");
+ bytes_indicated = size;
+ }
+
+ assert(size > 0);
+ assert(bytes_indicated != 0);
+ return bytes_indicated;
+}
+
+static void vmxnet3_cleanup(VLANClientState *nc)
+{
+ VMXNET3_State *s = DO_UPCAST(NICState, nc, nc)->opaque;
+ s->nic = NULL;
+}
+
+static void vmxnet3_set_link_status(VLANClientState *nc)
+{
+ VMXNET3_State *s = DO_UPCAST(NICState, nc, nc)->opaque;
+
+ if (nc->link_down) {
+ s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP;
+ } else {
+ s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP;
+ }
+
+ vmxnet3_set_events(s, VMXNET3_ECR_LINK);
+ vmxnet3_trigger_interrupt(s, s->event_int_idx);
+}
+
+static NetClientInfo net_vmxnet3_info = {
+ .type = NET_CLIENT_TYPE_NIC,
+ .size = sizeof(NICState),
+ .can_receive = vmxnet3_can_receive,
+ .receive = vmxnet3_receive,
+ .cleanup = vmxnet3_cleanup,
+ .link_status_changed = vmxnet3_set_link_status,
+};
+
+static bool vmxnet3_peer_has_vnet_hdr(VMXNET3_State *s)
+{
+ VLANClientState *peer = s->nic->nc.peer;
+
+ if ((NULL != peer) &&
+ (NET_CLIENT_TYPE_TAP == peer->info->type) &&
+ tap_has_vnet_hdr(peer)) {
+ return true;
+ }
+
+ DWRPRINTF("Peer has no virtio extension. Task offloads will not work.");
+ return false;
+}
+
+static void vmxnet3_net_uninit(VMXNET3_State *s)
+{
+ if (NULL != s->mcast_list) {
+ g_free(s->mcast_list);
+ }
+
+ vmxnet3_txpkt_cleanup(&s->curr_txpkt);
+}
+
+static void vmxnet3_net_init(VMXNET3_State *s)
+{
+ DCBPRINTF("vmxnet3_net_init called...");
+
+ vmxnet3_txpkt_init(&s->curr_txpkt);
+
+ qemu_macaddr_default_if_unset(&s->conf.macaddr);
+
+ /* Windows guest will query the address that was set on init */
+ memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a));
+
+ s->mcast_list = NULL;
+ s->mcast_list_len = 0;
+
+ s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP;
+
+ DCFPRINTF("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a));
+
+ s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf,
+ object_get_typename(OBJECT(s)),
+ s->dev.qdev.id, s);
+
+ s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s);
+ s->curr_txpkt_pl_frags = 0;
+ s->curr_txpkt_skip = false;
+ s->curr_txpkt_header_processed = false;
+
+ if (s->peer_has_vhdr) {
+ tap_set_vnet_hdr_len(s->nic->nc.peer, sizeof(struct virtio_net_hdr));
+ tap_using_vnet_hdr(s->nic->nc.peer, 1);
+ }
+
+ qemu_format_nic_info_str(&s->nic->nc, s->conf.macaddr.a);
+}
+
+#ifdef VMXNET3_ENABLE_MSIX
+
+static void
+vmxnet3_unuse_msix_vectors(VMXNET3_State *s, int num_vectors)
+{
+ int i;
+ for (i = 0; i < num_vectors; i++) {
+ msix_vector_unuse(&s->dev, i);
+ }
+}
+
+static bool
+vmxnet3_use_msix_vectors(VMXNET3_State *s, int num_vectors)
+{
+ int i;
+ for (i = 0; i < num_vectors; i++) {
+ int res = msix_vector_use(&s->dev, i);
+ if (0 > res) {
+ DWRPRINTF("Failed to use MSI-X vector %d, error %d", i, res);
+ vmxnet3_unuse_msix_vectors(s, i);
+ return false;
+ }
+ }
+ return true;
+}
+
+static bool
+vmxnet3_init_msix(VMXNET3_State *s)
+{
+ int res = msix_init(&s->dev, VMXNET3_MAX_INTRS,
+ &s->msix_bar, VMXNET3_MSIX_BAR_IDX, 0);
+ if (0 > res) {
+ DWRPRINTF("Failed to initialize MSI-X, error %d", res);
+ s->msix_used = false;
+ } else {
+ if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
+ DWRPRINTF("Failed to use MSI-X vectors, error %d", res);
+ msix_uninit(&s->dev, &s->msix_bar);
+ s->msix_used = false;
+ } else {
+ s->msix_used = true;
+ }
+ }
+ return s->msix_used;
+}
+
+static void
+vmxnet3_cleanup_msix(VMXNET3_State *s)
+{
+ if (s->msix_used) {
+ msix_vector_unuse(&s->dev, VMXNET3_MAX_INTRS);
+ msix_uninit(&s->dev, &s->msix_bar);
+ }
+}
+#endif
+
+#ifdef VMXNET3_ENABLE_MSI
+
+static bool
+vmxnet3_init_msi(VMXNET3_State *s)
+{
+#define VMXNET3_MSI_NUM_VECTORS (1)
+#define VMXNET3_MSI_OFFSET (0x50)
+#define VMXNET3_USE_64BIT (true)
+#define VMXNET3_PER_VECTOR_MASK (false)
+
+ int res;
+ res = msi_init(&s->dev, VMXNET3_MSI_OFFSET, VMXNET3_MSI_NUM_VECTORS,
+ VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK);
+ if (0 > res) {
+ DWRPRINTF("Failed to initialize MSI, error %d", res);
+ s->msi_used = false;
+ } else {
+ s->msi_used = true;
+ }
+
+ return s->msi_used;
+}
+
+static void
+vmxnet3_cleanup_msi(VMXNET3_State *s)
+{
+ if (s->msi_used) {
+ msi_uninit(&s->dev);
+ }
+}
+#endif
+
+static int vmxnet3_pci_init(PCIDevice *dev)
+{
+ static const MemoryRegionOps b0_ops = {
+ .read = vmxnet3_io_bar0_read,
+ .write = vmxnet3_io_bar0_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+ };
+
+ static const MemoryRegionOps b1_ops = {
+ .read = vmxnet3_io_bar1_read,
+ .write = vmxnet3_io_bar1_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+ .impl = {
+ .min_access_size = 4,
+ .max_access_size = 4,
+ },
+ };
+
+ VMXNET3_State *s = DO_UPCAST(VMXNET3_State, dev, dev);
+ int i;
+
+ DCBPRINTF("Starting init...");
+
+ memory_region_init_io(&s->bar0, &b0_ops, s,
+ "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
+ pci_register_bar(&s->dev, VMXNET3_BAR0_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0);
+
+ memory_region_init_io(&s->bar1, &b1_ops, s,
+ "vmxnet3-b1", VMXNET3_VD_REG_SIZE);
+ pci_register_bar(&s->dev, VMXNET3_BAR1_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1);
+
+ memory_region_init(&s->msix_bar, "vmxnet3-msix-bar",
+ VMXNET3_MSIX_BAR_SIZE);
+ pci_register_bar(&s->dev, VMXNET3_MSIX_BAR_IDX,
+ PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar);
+
+ for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) {
+ s->interrupt_states[i].is_asserted = false;
+ s->interrupt_states[i].is_pending = false;
+ s->interrupt_states[i].is_masked = true;
+ }
+
+ /* Interrupt pin A */
+ s->dev.config[PCI_INTERRUPT_PIN] = 0x01;
+
+#ifdef VMXNET3_ENABLE_MSIX
+ if (!vmxnet3_init_msix(s)) {
+ hw_error("Failed to initialize MSI-X, configuration is inconsistent.");
+ }
+#endif
+
+#ifdef VMXNET3_ENABLE_MSI
+ if (!vmxnet3_init_msi(s)) {
+ hw_error("Failed to initialize MSI, configuration is inconsistent.");
+ }
+#endif
+
+ vmxnet3_net_init(s);
+ add_boot_device_path(s->conf.bootindex, &dev->qdev, "/ethernet-phy@0");
+
+ return 0;
+}
+
+
+static int vmxnet3_pci_uninit(PCIDevice *dev)
+{
+ VMXNET3_State *s = DO_UPCAST(VMXNET3_State, dev, dev);
+
+ DCBPRINTF("Starting uninit...");
+
+ vmxnet3_net_uninit(s);
+
+#ifdef VMXNET3_ENABLE_MSIX
+ vmxnet3_cleanup_msix(s);
+#endif
+
+#ifdef VMXNET3_ENABLE_MSI
+ vmxnet3_cleanup_msi(s);
+#endif
+
+ memory_region_destroy(&s->bar0);
+ memory_region_destroy(&s->bar1);
+ memory_region_destroy(&s->msix_bar);
+
+ return 0;
+}
+
+static void vmxnet3_qdev_reset(DeviceState *dev)
+{
+ VMXNET3_State *s = DO_UPCAST(VMXNET3_State, dev.qdev, dev);
+ DCBPRINTF("Starting QDEV reset...");
+ vmxnet3_reset(s);
+}
+
+static const VMStateDescription vmstate_vmxnet3 = {
+ .name = "vmxnet3",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .minimum_version_id_old = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_PCI_DEVICE(dev, VMXNET3_State),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#if defined(VMXNET3_ENABLE_MSI) || defined(VMXNET3_ENABLE_MSIX)
+static void
+vmxnet3_write_config(PCIDevice *pci, uint32_t addr, uint32_t val, int len)
+{
+ pci_default_write_config(pci, addr, val, len);
+#if defined(VMXNET3_ENABLE_MSIX)
+ msix_write_config(pci, addr, val, len);
+#endif
+#if defined(VMXNET3_ENABLE_MSI)
+ msi_write_config(pci, addr, val, len);
+#endif
+}
+#endif
+
+static Property vmxnet3_properties[] = {
+ DEFINE_NIC_PROPERTIES(VMXNET3_State, conf),
+ DEFINE_PROP_END_OF_LIST(),
+};
+
+static void vmxnet3_class_init(ObjectClass *class, void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(class);
+ PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
+
+ c->init = vmxnet3_pci_init;
+ c->exit = vmxnet3_pci_uninit;
+ c->romfile = "pxe-e1000.rom";
+ c->vendor_id = PCI_VENDOR_ID_VMWARE;
+ c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
+ c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION;
+ c->class_id = PCI_CLASS_NETWORK_ETHERNET;
+ c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
+ c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
+#if defined(VMXNET3_ENABLE_MSI) || defined(VMXNET3_ENABLE_MSIX)
+ c->config_write = vmxnet3_write_config,
+#endif
+ dc->desc = "VMWare Paravirtualized Ethernet v3";
+ dc->reset = vmxnet3_qdev_reset;
+ dc->vmsd = &vmstate_vmxnet3;
+ dc->props = vmxnet3_properties;
+}
+
+static TypeInfo vmxnet3_info = {
+ .name = "vmxnet3",
+ .parent = TYPE_PCI_DEVICE,
+ .instance_size = sizeof(VMXNET3_State),
+ .class_init = vmxnet3_class_init,
+};
+
+static void vmxnet3_register_types(void)
+{
+ DCBPRINTF("vmxnet3_register_types called...");
+ type_register_static(&vmxnet3_info);
+}
+
+type_init(vmxnet3_register_types)
diff --git a/qemu/hw/vmxnet3.h b/qemu/hw/vmxnet3.h
new file mode 100644
index 0000000..6ec3fd5
--- /dev/null
+++ b/qemu/hw/vmxnet3.h
@@ -0,0 +1,727 @@
+/*
+ * QEMU VMWARE VMXNET3 paravirtual NIC
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Yan Vugenfirer <yan@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _QEMU_VMXNET3_H
+#define _QEMU_VMXNET3_H
+
+#define VMXNET3_DEVICE_MAX_TX_QUEUES 8
+#define VMXNET3_DEVICE_MAX_RX_QUEUES 8 /* Keep this value as a power of 2 */
+
+/* Defines needed to integrate VMWARE headers */
+#define u64 uint64_t
+#define u32 uint32_t
+#define u16 uint16_t
+#define u8 uint8_t
+#define __le16 uint16_t
+#define __le32 uint32_t
+#define __le64 uint64_t
+#define __packed QEMU_PACKED
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define const_cpu_to_le64(x) bswap_64(x)
+#define __BIG_ENDIAN_BITFIELD
+#else
+#define const_cpu_to_le64(x) (x)
+#endif
+
+/* Following is an interface definition for */
+/* VMXNET3 device as provided by VMWARE */
+/* Original file and copyright is available */
+/* in Linux kernel v3.2.8 at */
+/* drivers/net/vmxnet3/vmxnet3_defs.h */
+
+struct UPT1_TxStats {
+ u64 TSOPktsTxOK; /* TSO pkts post-segmentation */
+ u64 TSOBytesTxOK;
+ u64 ucastPktsTxOK;
+ u64 ucastBytesTxOK;
+ u64 mcastPktsTxOK;
+ u64 mcastBytesTxOK;
+ u64 bcastPktsTxOK;
+ u64 bcastBytesTxOK;
+ u64 pktsTxError;
+ u64 pktsTxDiscard;
+};
+
+struct UPT1_RxStats {
+ u64 LROPktsRxOK; /* LRO pkts */
+ u64 LROBytesRxOK; /* bytes from LRO pkts */
+ /* the following counters are for pkts from the wire, i.e., pre-LRO */
+ u64 ucastPktsRxOK;
+ u64 ucastBytesRxOK;
+ u64 mcastPktsRxOK;
+ u64 mcastBytesRxOK;
+ u64 bcastPktsRxOK;
+ u64 bcastBytesRxOK;
+ u64 pktsRxOutOfBuf;
+ u64 pktsRxError;
+};
+
+/* interrupt moderation level */
+enum {
+ UPT1_IML_NONE = 0, /* no interrupt moderation */
+ UPT1_IML_HIGHEST = 7, /* least intr generated */
+ UPT1_IML_ADAPTIVE = 8, /* adpative intr moderation */
+};
+/* values for UPT1_RSSConf.hashFunc */
+enum {
+ UPT1_RSS_HASH_TYPE_NONE = 0x0,
+ UPT1_RSS_HASH_TYPE_IPV4 = 0x01,
+ UPT1_RSS_HASH_TYPE_TCP_IPV4 = 0x02,
+ UPT1_RSS_HASH_TYPE_IPV6 = 0x04,
+ UPT1_RSS_HASH_TYPE_TCP_IPV6 = 0x08,
+};
+
+enum {
+ UPT1_RSS_HASH_FUNC_NONE = 0x0,
+ UPT1_RSS_HASH_FUNC_TOEPLITZ = 0x01,
+};
+
+#define UPT1_RSS_MAX_KEY_SIZE 40
+#define UPT1_RSS_MAX_IND_TABLE_SIZE 128
+
+struct UPT1_RSSConf {
+ u16 hashType;
+ u16 hashFunc;
+ u16 hashKeySize;
+ u16 indTableSize;
+ u8 hashKey[UPT1_RSS_MAX_KEY_SIZE];
+ u8 indTable[UPT1_RSS_MAX_IND_TABLE_SIZE];
+};
+
+/* features */
+enum {
+ UPT1_F_RXCSUM = const_cpu_to_le64(0x0001), /* rx csum verification */
+ UPT1_F_RSS = const_cpu_to_le64(0x0002),
+ UPT1_F_RXVLAN = const_cpu_to_le64(0x0004), /* VLAN tag stripping */
+ UPT1_F_LRO = const_cpu_to_le64(0x0008),
+};
+
+/* all registers are 32 bit wide */
+/* BAR 1 */
+enum {
+ VMXNET3_REG_VRRS = 0x0, /* Vmxnet3 Revision Report Selection */
+ VMXNET3_REG_UVRS = 0x8, /* UPT Version Report Selection */
+ VMXNET3_REG_DSAL = 0x10, /* Driver Shared Address Low */
+ VMXNET3_REG_DSAH = 0x18, /* Driver Shared Address High */
+ VMXNET3_REG_CMD = 0x20, /* Command */
+ VMXNET3_REG_MACL = 0x28, /* MAC Address Low */
+ VMXNET3_REG_MACH = 0x30, /* MAC Address High */
+ VMXNET3_REG_ICR = 0x38, /* Interrupt Cause Register */
+ VMXNET3_REG_ECR = 0x40 /* Event Cause Register */
+};
+
+/* BAR 0 */
+enum {
+ VMXNET3_REG_IMR = 0x0, /* Interrupt Mask Register */
+ VMXNET3_REG_TXPROD = 0x600, /* Tx Producer Index */
+ VMXNET3_REG_RXPROD = 0x800, /* Rx Producer Index for ring 1 */
+ VMXNET3_REG_RXPROD2 = 0xA00 /* Rx Producer Index for ring 2 */
+};
+
+#define VMXNET3_PT_REG_SIZE 4096 /* BAR 0 */
+#define VMXNET3_VD_REG_SIZE 4096 /* BAR 1 */
+
+#define VMXNET3_REG_ALIGN 8 /* All registers are 8-byte aligned. */
+#define VMXNET3_REG_ALIGN_MASK 0x7
+
+/* I/O Mapped access to registers */
+#define VMXNET3_IO_TYPE_PT 0
+#define VMXNET3_IO_TYPE_VD 1
+#define VMXNET3_IO_ADDR(type, reg) (((type) << 24) | ((reg) & 0xFFFFFF))
+#define VMXNET3_IO_TYPE(addr) ((addr) >> 24)
+#define VMXNET3_IO_REG(addr) ((addr) & 0xFFFFFF)
+
+enum {
+ VMXNET3_CMD_FIRST_SET = 0xCAFE0000,
+ VMXNET3_CMD_ACTIVATE_DEV = VMXNET3_CMD_FIRST_SET, /* 0xCAFE0000 */
+ VMXNET3_CMD_QUIESCE_DEV, /* 0xCAFE0001 */
+ VMXNET3_CMD_RESET_DEV, /* 0xCAFE0002 */
+ VMXNET3_CMD_UPDATE_RX_MODE, /* 0xCAFE0003 */
+ VMXNET3_CMD_UPDATE_MAC_FILTERS, /* 0xCAFE0004 */
+ VMXNET3_CMD_UPDATE_VLAN_FILTERS, /* 0xCAFE0005 */
+ VMXNET3_CMD_UPDATE_RSSIDT, /* 0xCAFE0006 */
+ VMXNET3_CMD_UPDATE_IML, /* 0xCAFE0007 */
+ VMXNET3_CMD_UPDATE_PMCFG, /* 0xCAFE0008 */
+ VMXNET3_CMD_UPDATE_FEATURE, /* 0xCAFE0009 */
+ VMXNET3_CMD_LOAD_PLUGIN, /* 0xCAFE000A */
+
+ VMXNET3_CMD_FIRST_GET = 0xF00D0000,
+ VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET, /* 0xF00D0000 */
+ VMXNET3_CMD_GET_STATS, /* 0xF00D0001 */
+ VMXNET3_CMD_GET_LINK, /* 0xF00D0002 */
+ VMXNET3_CMD_GET_PERM_MAC_LO, /* 0xF00D0003 */
+ VMXNET3_CMD_GET_PERM_MAC_HI, /* 0xF00D0004 */
+ VMXNET3_CMD_GET_DID_LO, /* 0xF00D0005 */
+ VMXNET3_CMD_GET_DID_HI, /* 0xF00D0006 */
+ VMXNET3_CMD_GET_DEV_EXTRA_INFO, /* 0xF00D0007 */
+ VMXNET3_CMD_GET_CONF_INTR /* 0xF00D0008 */
+};
+
+/*
+ * Little Endian layout of bitfields -
+ * Byte 0 : 7.....len.....0
+ * Byte 1 : rsvd gen 13.len.8
+ * Byte 2 : 5.msscof.0 ext1 dtype
+ * Byte 3 : 13...msscof...6
+ *
+ * Big Endian layout of bitfields -
+ * Byte 0: 13...msscof...6
+ * Byte 1 : 5.msscof.0 ext1 dtype
+ * Byte 2 : rsvd gen 13.len.8
+ * Byte 3 : 7.....len.....0
+ *
+ * Thus, le32_to_cpu on the dword will allow the big endian driver to read
+ * the bit fields correctly. And cpu_to_le32 will convert bitfields
+ * bit fields written by big endian driver to format required by device.
+ */
+
+struct Vmxnet3_TxDesc {
+ __le64 addr;
+
+#ifdef __BIG_ENDIAN_BITFIELD
+ u32 msscof:14; /* MSS, checksum offset, flags */
+ u32 ext1:1;
+ u32 dtype:1; /* descriptor type */
+ u32 rsvd:1;
+ u32 gen:1; /* generation bit */
+ u32 len:14;
+#else
+ u32 len:14;
+ u32 gen:1; /* generation bit */
+ u32 rsvd:1;
+ u32 dtype:1; /* descriptor type */
+ u32 ext1:1;
+ u32 msscof:14; /* MSS, checksum offset, flags */
+#endif /* __BIG_ENDIAN_BITFIELD */
+
+#ifdef __BIG_ENDIAN_BITFIELD
+ u32 tci:16; /* Tag to Insert */
+ u32 ti:1; /* VLAN Tag Insertion */
+ u32 ext2:1;
+ u32 cq:1; /* completion request */
+ u32 eop:1; /* End Of Packet */
+ u32 om:2; /* offload mode */
+ u32 hlen:10; /* header len */
+#else
+ u32 hlen:10; /* header len */
+ u32 om:2; /* offload mode */
+ u32 eop:1; /* End Of Packet */
+ u32 cq:1; /* completion request */
+ u32 ext2:1;
+ u32 ti:1; /* VLAN Tag Insertion */
+ u32 tci:16; /* Tag to Insert */
+#endif /* __BIG_ENDIAN_BITFIELD */
+};
+
+/* TxDesc.OM values */
+#define VMXNET3_OM_NONE 0
+#define VMXNET3_OM_CSUM 2
+#define VMXNET3_OM_TSO 3
+
+/* fields in TxDesc we access w/o using bit fields */
+#define VMXNET3_TXD_EOP_SHIFT 12
+#define VMXNET3_TXD_CQ_SHIFT 13
+#define VMXNET3_TXD_GEN_SHIFT 14
+#define VMXNET3_TXD_EOP_DWORD_SHIFT 3
+#define VMXNET3_TXD_GEN_DWORD_SHIFT 2
+
+#define VMXNET3_TXD_CQ (1 << VMXNET3_TXD_CQ_SHIFT)
+#define VMXNET3_TXD_EOP (1 << VMXNET3_TXD_EOP_SHIFT)
+#define VMXNET3_TXD_GEN (1 << VMXNET3_TXD_GEN_SHIFT)
+
+#define VMXNET3_HDR_COPY_SIZE 128
+
+
+struct Vmxnet3_TxDataDesc {
+ u8 data[VMXNET3_HDR_COPY_SIZE];
+};
+
+#define VMXNET3_TCD_GEN_SHIFT 31
+#define VMXNET3_TCD_GEN_SIZE 1
+#define VMXNET3_TCD_TXIDX_SHIFT 0
+#define VMXNET3_TCD_TXIDX_SIZE 12
+#define VMXNET3_TCD_GEN_DWORD_SHIFT 3
+
+struct Vmxnet3_TxCompDesc {
+ u32 txdIdx:12; /* Index of the EOP TxDesc */
+ u32 ext1:20;
+
+ __le32 ext2;
+ __le32 ext3;
+
+ u32 rsvd:24;
+ u32 type:7; /* completion type */
+ u32 gen:1; /* generation bit */
+};
+
+struct Vmxnet3_RxDesc {
+ __le64 addr;
+
+#ifdef __BIG_ENDIAN_BITFIELD
+ u32 gen:1; /* Generation bit */
+ u32 rsvd:15;
+ u32 dtype:1; /* Descriptor type */
+ u32 btype:1; /* Buffer Type */
+ u32 len:14;
+#else
+ u32 len:14;
+ u32 btype:1; /* Buffer Type */
+ u32 dtype:1; /* Descriptor type */
+ u32 rsvd:15;
+ u32 gen:1; /* Generation bit */
+#endif
+ u32 ext1;
+};
+
+/* values of RXD.BTYPE */
+#define VMXNET3_RXD_BTYPE_HEAD 0 /* head only */
+#define VMXNET3_RXD_BTYPE_BODY 1 /* body only */
+
+/* fields in RxDesc we access w/o using bit fields */
+#define VMXNET3_RXD_BTYPE_SHIFT 14
+#define VMXNET3_RXD_GEN_SHIFT 31
+
+struct Vmxnet3_RxCompDesc {
+#ifdef __BIG_ENDIAN_BITFIELD
+ u32 ext2:1;
+ u32 cnc:1; /* Checksum Not Calculated */
+ u32 rssType:4; /* RSS hash type used */
+ u32 rqID:10; /* rx queue/ring ID */
+ u32 sop:1; /* Start of Packet */
+ u32 eop:1; /* End of Packet */
+ u32 ext1:2;
+ u32 rxdIdx:12; /* Index of the RxDesc */
+#else
+ u32 rxdIdx:12; /* Index of the RxDesc */
+ u32 ext1:2;
+ u32 eop:1; /* End of Packet */
+ u32 sop:1; /* Start of Packet */
+ u32 rqID:10; /* rx queue/ring ID */
+ u32 rssType:4; /* RSS hash type used */
+ u32 cnc:1; /* Checksum Not Calculated */
+ u32 ext2:1;
+#endif /* __BIG_ENDIAN_BITFIELD */
+
+ __le32 rssHash; /* RSS hash value */
+
+#ifdef __BIG_ENDIAN_BITFIELD
+ u32 tci:16; /* Tag stripped */
+ u32 ts:1; /* Tag is stripped */
+ u32 err:1; /* Error */
+ u32 len:14; /* data length */
+#else
+ u32 len:14; /* data length */
+ u32 err:1; /* Error */
+ u32 ts:1; /* Tag is stripped */
+ u32 tci:16; /* Tag stripped */
+#endif /* __BIG_ENDIAN_BITFIELD */
+
+
+#ifdef __BIG_ENDIAN_BITFIELD
+ u32 gen:1; /* generation bit */
+ u32 type:7; /* completion type */
+ u32 fcs:1; /* Frame CRC correct */
+ u32 frg:1; /* IP Fragment */
+ u32 v4:1; /* IPv4 */
+ u32 v6:1; /* IPv6 */
+ u32 ipc:1; /* IP Checksum Correct */
+ u32 tcp:1; /* TCP packet */
+ u32 udp:1; /* UDP packet */
+ u32 tuc:1; /* TCP/UDP Checksum Correct */
+ u32 csum:16;
+#else
+ u32 csum:16;
+ u32 tuc:1; /* TCP/UDP Checksum Correct */
+ u32 udp:1; /* UDP packet */
+ u32 tcp:1; /* TCP packet */
+ u32 ipc:1; /* IP Checksum Correct */
+ u32 v6:1; /* IPv6 */
+ u32 v4:1; /* IPv4 */
+ u32 frg:1; /* IP Fragment */
+ u32 fcs:1; /* Frame CRC correct */
+ u32 type:7; /* completion type */
+ u32 gen:1; /* generation bit */
+#endif /* __BIG_ENDIAN_BITFIELD */
+};
+
+/* fields in RxCompDesc we access via Vmxnet3_GenericDesc.dword[3] */
+#define VMXNET3_RCD_TUC_SHIFT 16
+#define VMXNET3_RCD_IPC_SHIFT 19
+
+/* fields in RxCompDesc we access via Vmxnet3_GenericDesc.qword[1] */
+#define VMXNET3_RCD_TYPE_SHIFT 56
+#define VMXNET3_RCD_GEN_SHIFT 63
+
+/* csum OK for TCP/UDP pkts over IP */
+#define VMXNET3_RCD_CSUM_OK (1 << VMXNET3_RCD_TUC_SHIFT | \
+ 1 << VMXNET3_RCD_IPC_SHIFT)
+#define VMXNET3_TXD_GEN_SIZE 1
+#define VMXNET3_TXD_EOP_SIZE 1
+
+/* value of RxCompDesc.rssType */
+enum {
+ VMXNET3_RCD_RSS_TYPE_NONE = 0,
+ VMXNET3_RCD_RSS_TYPE_IPV4 = 1,
+ VMXNET3_RCD_RSS_TYPE_TCPIPV4 = 2,
+ VMXNET3_RCD_RSS_TYPE_IPV6 = 3,
+ VMXNET3_RCD_RSS_TYPE_TCPIPV6 = 4,
+};
+
+
+/* a union for accessing all cmd/completion descriptors */
+union Vmxnet3_GenericDesc {
+ __le64 qword[2];
+ __le32 dword[4];
+ __le16 word[8];
+ struct Vmxnet3_TxDesc txd;
+ struct Vmxnet3_RxDesc rxd;
+ struct Vmxnet3_TxCompDesc tcd;
+ struct Vmxnet3_RxCompDesc rcd;
+};
+
+#define VMXNET3_INIT_GEN 1
+
+/* Max size of a single tx buffer */
+#define VMXNET3_MAX_TX_BUF_SIZE (1 << 14)
+
+/* # of tx desc needed for a tx buffer size */
+#define VMXNET3_TXD_NEEDED(size) (((size) + VMXNET3_MAX_TX_BUF_SIZE - 1) / \
+ VMXNET3_MAX_TX_BUF_SIZE)
+
+/* max # of tx descs for a non-tso pkt */
+#define VMXNET3_MAX_TXD_PER_PKT 16
+
+/* Max size of a single rx buffer */
+#define VMXNET3_MAX_RX_BUF_SIZE ((1 << 14) - 1)
+/* Minimum size of a type 0 buffer */
+#define VMXNET3_MIN_T0_BUF_SIZE 128
+#define VMXNET3_MAX_CSUM_OFFSET 1024
+
+/* Ring base address alignment */
+#define VMXNET3_RING_BA_ALIGN 512
+#define VMXNET3_RING_BA_MASK (VMXNET3_RING_BA_ALIGN - 1)
+
+/* Ring size must be a multiple of 32 */
+#define VMXNET3_RING_SIZE_ALIGN 32
+#define VMXNET3_RING_SIZE_MASK (VMXNET3_RING_SIZE_ALIGN - 1)
+
+/* Max ring size */
+#define VMXNET3_TX_RING_MAX_SIZE 4096
+#define VMXNET3_TC_RING_MAX_SIZE 4096
+#define VMXNET3_RX_RING_MAX_SIZE 4096
+#define VMXNET3_RC_RING_MAX_SIZE 8192
+
+/* a list of reasons for queue stop */
+
+enum {
+ VMXNET3_ERR_NOEOP = 0x80000000, /* cannot find the EOP desc of a pkt */
+ VMXNET3_ERR_TXD_REUSE = 0x80000001, /* reuse TxDesc before tx completion */
+ VMXNET3_ERR_BIG_PKT = 0x80000002, /* too many TxDesc for a pkt */
+ VMXNET3_ERR_DESC_NOT_SPT = 0x80000003, /* descriptor type not supported */
+ VMXNET3_ERR_SMALL_BUF = 0x80000004, /* type 0 buffer too small */
+ VMXNET3_ERR_STRESS = 0x80000005, /* stress option firing in vmkernel */
+ VMXNET3_ERR_SWITCH = 0x80000006, /* mode switch failure */
+ VMXNET3_ERR_TXD_INVALID = 0x80000007, /* invalid TxDesc */
+};
+
+/* completion descriptor types */
+#define VMXNET3_CDTYPE_TXCOMP 0 /* Tx Completion Descriptor */
+#define VMXNET3_CDTYPE_RXCOMP 3 /* Rx Completion Descriptor */
+
+enum {
+ VMXNET3_GOS_BITS_UNK = 0, /* unknown */
+ VMXNET3_GOS_BITS_32 = 1,
+ VMXNET3_GOS_BITS_64 = 2,
+};
+
+#define VMXNET3_GOS_TYPE_UNK 0 /* unknown */
+#define VMXNET3_GOS_TYPE_LINUX 1
+#define VMXNET3_GOS_TYPE_WIN 2
+#define VMXNET3_GOS_TYPE_SOLARIS 3
+#define VMXNET3_GOS_TYPE_FREEBSD 4
+#define VMXNET3_GOS_TYPE_PXE 5
+
+struct Vmxnet3_GOSInfo {
+#ifdef __BIG_ENDIAN_BITFIELD
+ u32 gosMisc:10; /* other info about gos */
+ u32 gosVer:16; /* gos version */
+ u32 gosType:4; /* which guest */
+ u32 gosBits:2; /* 32-bit or 64-bit? */
+#else
+ u32 gosBits:2; /* 32-bit or 64-bit? */
+ u32 gosType:4; /* which guest */
+ u32 gosVer:16; /* gos version */
+ u32 gosMisc:10; /* other info about gos */
+#endif /* __BIG_ENDIAN_BITFIELD */
+};
+
+struct Vmxnet3_DriverInfo {
+ __le32 version;
+ struct Vmxnet3_GOSInfo gos;
+ __le32 vmxnet3RevSpt;
+ __le32 uptVerSpt;
+};
+
+
+#define VMXNET3_REV1_MAGIC 0xbabefee1
+
+/*
+ * QueueDescPA must be 128 bytes aligned. It points to an array of
+ * Vmxnet3_TxQueueDesc followed by an array of Vmxnet3_RxQueueDesc.
+ * The number of Vmxnet3_TxQueueDesc/Vmxnet3_RxQueueDesc are specified by
+ * Vmxnet3_MiscConf.numTxQueues/numRxQueues, respectively.
+ */
+#define VMXNET3_QUEUE_DESC_ALIGN 128
+
+
+struct Vmxnet3_MiscConf {
+ struct Vmxnet3_DriverInfo driverInfo;
+ __le64 uptFeatures;
+ __le64 ddPA; /* driver data PA */
+ __le64 queueDescPA; /* queue descriptor table PA */
+ __le32 ddLen; /* driver data len */
+ __le32 queueDescLen; /* queue desc. table len in bytes */
+ __le32 mtu;
+ __le16 maxNumRxSG;
+ u8 numTxQueues;
+ u8 numRxQueues;
+ __le32 reserved[4];
+};
+
+
+struct Vmxnet3_TxQueueConf {
+ __le64 txRingBasePA;
+ __le64 dataRingBasePA;
+ __le64 compRingBasePA;
+ __le64 ddPA; /* driver data */
+ __le64 reserved;
+ __le32 txRingSize; /* # of tx desc */
+ __le32 dataRingSize; /* # of data desc */
+ __le32 compRingSize; /* # of comp desc */
+ __le32 ddLen; /* size of driver data */
+ u8 intrIdx;
+ u8 _pad[7];
+};
+
+
+struct Vmxnet3_RxQueueConf {
+ __le64 rxRingBasePA[2];
+ __le64 compRingBasePA;
+ __le64 ddPA; /* driver data */
+ __le64 reserved;
+ __le32 rxRingSize[2]; /* # of rx desc */
+ __le32 compRingSize; /* # of rx comp desc */
+ __le32 ddLen; /* size of driver data */
+ u8 intrIdx;
+ u8 _pad[7];
+};
+
+
+enum vmxnet3_intr_mask_mode {
+ VMXNET3_IMM_AUTO = 0,
+ VMXNET3_IMM_ACTIVE = 1,
+ VMXNET3_IMM_LAZY = 2
+};
+
+enum vmxnet3_intr_type {
+ VMXNET3_IT_AUTO = 0,
+ VMXNET3_IT_INTX = 1,
+ VMXNET3_IT_MSI = 2,
+ VMXNET3_IT_MSIX = 3
+};
+
+#define VMXNET3_MAX_TX_QUEUES 8
+#define VMXNET3_MAX_RX_QUEUES 16
+/* addition 1 for events */
+#define VMXNET3_MAX_INTRS 25
+
+/* value of intrCtrl */
+#define VMXNET3_IC_DISABLE_ALL 0x1 /* bit 0 */
+
+
+struct Vmxnet3_IntrConf {
+ bool autoMask;
+ u8 numIntrs; /* # of interrupts */
+ u8 eventIntrIdx;
+ u8 modLevels[VMXNET3_MAX_INTRS]; /* moderation level for
+ * each intr */
+ __le32 intrCtrl;
+ __le32 reserved[2];
+};
+
+/* one bit per VLAN ID, the size is in the units of u32 */
+#define VMXNET3_VFT_SIZE (4096/(sizeof(uint32_t)*8))
+
+
+struct Vmxnet3_QueueStatus {
+ bool stopped;
+ u8 _pad[3];
+ __le32 error;
+};
+
+
+struct Vmxnet3_TxQueueCtrl {
+ __le32 txNumDeferred;
+ __le32 txThreshold;
+ __le64 reserved;
+};
+
+
+struct Vmxnet3_RxQueueCtrl {
+ bool updateRxProd;
+ u8 _pad[7];
+ __le64 reserved;
+};
+
+enum {
+ VMXNET3_RXM_UCAST = 0x01, /* unicast only */
+ VMXNET3_RXM_MCAST = 0x02, /* multicast passing the filters */
+ VMXNET3_RXM_BCAST = 0x04, /* broadcast only */
+ VMXNET3_RXM_ALL_MULTI = 0x08, /* all multicast */
+ VMXNET3_RXM_PROMISC = 0x10 /* promiscuous */
+};
+
+struct Vmxnet3_RxFilterConf {
+ __le32 rxMode; /* VMXNET3_RXM_xxx */
+ __le16 mfTableLen; /* size of the multicast filter table */
+ __le16 _pad1;
+ __le64 mfTablePA; /* PA of the multicast filters table */
+ __le32 vfTable[VMXNET3_VFT_SIZE]; /* vlan filter */
+};
+
+
+#define VMXNET3_PM_MAX_FILTERS 6
+#define VMXNET3_PM_MAX_PATTERN_SIZE 128
+#define VMXNET3_PM_MAX_MASK_SIZE (VMXNET3_PM_MAX_PATTERN_SIZE / 8)
+
+#define VMXNET3_PM_WAKEUP_MAGIC cpu_to_le16(0x01) /* wake up on magic pkts */
+#define VMXNET3_PM_WAKEUP_FILTER cpu_to_le16(0x02) /* wake up on pkts matching
+ * filters */
+
+
+struct Vmxnet3_PM_PktFilter {
+ u8 maskSize;
+ u8 patternSize;
+ u8 mask[VMXNET3_PM_MAX_MASK_SIZE];
+ u8 pattern[VMXNET3_PM_MAX_PATTERN_SIZE];
+ u8 pad[6];
+};
+
+
+struct Vmxnet3_PMConf {
+ __le16 wakeUpEvents; /* VMXNET3_PM_WAKEUP_xxx */
+ u8 numFilters;
+ u8 pad[5];
+ struct Vmxnet3_PM_PktFilter filters[VMXNET3_PM_MAX_FILTERS];
+};
+
+
+struct Vmxnet3_VariableLenConfDesc {
+ __le32 confVer;
+ __le32 confLen;
+ __le64 confPA;
+};
+
+
+struct Vmxnet3_TxQueueDesc {
+ struct Vmxnet3_TxQueueCtrl ctrl;
+ struct Vmxnet3_TxQueueConf conf;
+
+ /* Driver read after a GET command */
+ struct Vmxnet3_QueueStatus status;
+ struct UPT1_TxStats stats;
+ u8 _pad[88]; /* 128 aligned */
+};
+
+
+struct Vmxnet3_RxQueueDesc {
+ struct Vmxnet3_RxQueueCtrl ctrl;
+ struct Vmxnet3_RxQueueConf conf;
+ /* Driver read after a GET commad */
+ struct Vmxnet3_QueueStatus status;
+ struct UPT1_RxStats stats;
+ u8 __pad[88]; /* 128 aligned */
+};
+
+
+struct Vmxnet3_DSDevRead {
+ /* read-only region for device, read by dev in response to a SET cmd */
+ struct Vmxnet3_MiscConf misc;
+ struct Vmxnet3_IntrConf intrConf;
+ struct Vmxnet3_RxFilterConf rxFilterConf;
+ struct Vmxnet3_VariableLenConfDesc rssConfDesc;
+ struct Vmxnet3_VariableLenConfDesc pmConfDesc;
+ struct Vmxnet3_VariableLenConfDesc pluginConfDesc;
+};
+
+/* All structures in DriverShared are padded to multiples of 8 bytes */
+struct Vmxnet3_DriverShared {
+ __le32 magic;
+ /* make devRead start at 64bit boundaries */
+ __le32 pad;
+ struct Vmxnet3_DSDevRead devRead;
+ __le32 ecr;
+ __le32 reserved[5];
+};
+
+
+#define VMXNET3_ECR_RQERR (1 << 0)
+#define VMXNET3_ECR_TQERR (1 << 1)
+#define VMXNET3_ECR_LINK (1 << 2)
+#define VMXNET3_ECR_DIC (1 << 3)
+#define VMXNET3_ECR_DEBUG (1 << 4)
+
+/* flip the gen bit of a ring */
+#define VMXNET3_FLIP_RING_GEN(gen) ((gen) = (gen) ^ 0x1)
+
+/* only use this if moving the idx won't affect the gen bit */
+#define VMXNET3_INC_RING_IDX_ONLY(idx, ring_size) \
+ do {\
+ (idx)++;\
+ if (unlikely((idx) == (ring_size))) {\
+ (idx) = 0;\
+ } \
+ } while (0)
+
+#define VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid) \
+ (vfTable[vid >> 5] |= (1 << (vid & 31)))
+#define VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid) \
+ (vfTable[vid >> 5] &= ~(1 << (vid & 31)))
+
+#define VMXNET3_VFTABLE_ENTRY_IS_SET(vfTable, vid) \
+ ((vfTable[vid >> 5] & (1 << (vid & 31))) != 0)
+
+#define VMXNET3_MAX_MTU 9000
+#define VMXNET3_MIN_MTU 60
+
+#define VMXNET3_LINK_UP (10000 << 16 | 1) /* 10 Gbps, up */
+#define VMXNET3_LINK_DOWN 0
+
+#undef u64
+#undef u32
+#undef u16
+#undef u8
+#undef __le16
+#undef __le32
+#undef __le64
+#undef __packed
+#undef const_cpu_to_le64
+#if defined(HOST_WORDS_BIGENDIAN)
+#undef __BIG_ENDIAN_BITFIELD
+#endif
+
+#endif
diff --git a/qemu/hw/vmxnet3_debug.h b/qemu/hw/vmxnet3_debug.h
new file mode 100644
index 0000000..8383c22
--- /dev/null
+++ b/qemu/hw/vmxnet3_debug.h
@@ -0,0 +1,104 @@
+/*
+ * QEMU VMWARE VMXNET3 paravirtual NIC - debugging facilities
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Yan Vugenfirer <yan@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef _QEMU_VMXNET3_DEBUG_H
+#define _QEMU_VMXNET3_DEBUG_H
+
+/* #define DEBUG_VMXNET3_CB */
+#define DEBUG_VMXNET3_WARNINGS
+#define DEBUG_VMXNET3_ERRORS
+/* #define DEBUG_VMXNET3_INTERRUPTS */
+/* #define DEBUG_VMXNET3_CONFIG */
+/* #define DEBUG_VMXNET3_SHMEM_ACCESS */
+/* #define DEBUG_VMXNET3_RINGS */
+/* #define DEBUG_VMXNET3_PACKETS */
+
+#ifdef DEBUG_VMXNET3_SHMEM_ACCESS
+#define DSHPRINTF(fmt, ...) \
+ do { \
+ printf("[vmxnet3][SH][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
+ } while (0)
+#else
+#define DSHPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_VMXNET3_CB
+#define DCBPRINTF(fmt, ...) \
+ do { \
+ printf("[vmxnet3][CB][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
+ } while (0)
+#else
+#define DCBPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_VMXNET3_PACKETS
+#define DPKPRINTF(fmt, ...) \
+ do { \
+ printf("[vmxnet3][PK][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
+ } while (0)
+#else
+#define DPKPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_VMXNET3_WARNINGS
+#define DWRPRINTF(fmt, ...) \
+ do { \
+ printf("[vmxnet3][WR][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
+ } while (0)
+#else
+#define DWRPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_VMXNET3_ERRORS
+#define DERPRINTF(fmt, ...) \
+ do { \
+ printf("[vmxnet3][ER][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
+ } while (0)
+#else
+#define DERPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_VMXNET3_INTERRUPTS
+#define DIRPRINTF(fmt, ...) \
+ do { \
+ printf("[vmxnet3][IR][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
+ } while (0)
+#else
+#define DIRPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_VMXNET3_CONFIG
+#define DCFPRINTF(fmt, ...) \
+ do { \
+ printf("[vmxnet3][CF][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
+ } while (0)
+#else
+#define DCFPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#ifdef DEBUG_VMXNET3_RINGS
+#define DRIPRINTF(fmt, ...) \
+ do { \
+ printf("[vmxnet3][RI][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
+ } while (0)
+#else
+#define DRIPRINTF(fmt, ...) do {} while (0)
+#endif
+
+#define MAC_FMT "%02X:%02X:%02X:%02X:%02X:%02X"
+#define MAC_ARG(a) (a)[0], (a)[1], (a)[2], (a)[3], (a)[4], (a)[5]
+
+#endif /* _QEMU_VMXNET3_DEBUG_H */
diff --git a/qemu/hw/vmxnet_utils.c b/qemu/hw/vmxnet_utils.c
new file mode 100644
index 0000000..e310828
--- /dev/null
+++ b/qemu/hw/vmxnet_utils.c
@@ -0,0 +1,172 @@
+/*
+ * QEMU VMWARE paravirtual devices - network auxiliary code
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Yan Vugenfirer <yan@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "hw.h"
+#include "virtio-net.h"
+#include "vmxnet_utils.h"
+#include "net/checksum.h"
+
+void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag)
+{
+ switch (be16_to_cpu(ehdr->h_proto)) {
+ case ETH_P_VLAN:
+ case ETH_P_DVLAN: {
+ /* Header already present, just put proper VLAN tag */
+ struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
+ vhdr->h_tci = cpu_to_be16(vlan_tag);
+ }
+ default: {
+ /* No VLAN header, put a new one */
+ struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
+ vhdr->h_proto = ehdr->h_proto;
+ ehdr->h_proto = cpu_to_be16(ETH_P_VLAN);
+ vhdr->h_tci = cpu_to_be16(vlan_tag);
+ }
+ }
+}
+
+bool eth_setup_tx_offloads(uint8_t *l3hdr,
+ size_t l3hdr_len,
+ size_t l3hdr_off,
+ uint32_t l3payload_len,
+ struct virtio_net_hdr *vhdr,
+ bool more_frags,
+ uint16_t fragmentation_offset)
+{
+ uint16_t csum;
+
+ switch (vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+ case VIRTIO_NET_HDR_GSO_TCPV4:
+ case VIRTIO_NET_HDR_GSO_UDP: {
+ struct ip_header *iphdr = (struct ip_header *) l3hdr;
+ uint16_t new_ip_off;
+
+ vhdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vhdr->csum_start = l3hdr_off;
+ vhdr->csum_offset = offsetof(struct ip_header, ip_sum);
+
+ if (l3payload_len + l3hdr_len > ETH_MAX_IP_DGRAM_LEN) {
+ /* This must never happen with fragmentation enabled */
+ assert(0 == more_frags);
+ assert(0 == fragmentation_offset);
+ return false;
+ }
+
+ iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
+ assert(0 == (~IP_OFFMASK & fragmentation_offset));
+
+ new_ip_off = fragmentation_offset | (more_frags ? IP_MF : 0) |
+ (be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF));
+
+ iphdr->ip_off = cpu_to_be16(new_ip_off);
+
+ /* Due to Linux bridge bugs/features IP header checksum */
+ /* must be calculated in order to make it process */
+ /* packet with segmentation requirements successfully */
+ eth_put_csum(l3hdr, vhdr->csum_offset, 0);
+ csum = net_raw_checksum(l3hdr, l3hdr_len);
+ eth_put_csum(l3hdr, vhdr->csum_offset, csum);
+ }
+ break;
+
+ case VIRTIO_NET_HDR_GSO_TCPV6:
+ default:
+ vhdr->flags = 0;
+ break;
+ }
+
+ return true;
+}
+
+uint8_t
+eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr)
+{
+ uint8_t ecn_state = 0;
+
+ if (ETH_P_IP == l3_proto) {
+ struct ip_header *iphdr = (struct ip_header *) l3_hdr;
+
+ if (IP_HEADER_VERSION_4 == IP_HEADER_VERSION(iphdr)) {
+ if (IPTOS_ECN_CE == IPTOS_ECN(iphdr->ip_tos)) {
+ ecn_state = VIRTIO_NET_HDR_GSO_ECN;
+ }
+ if (IP_PROTO_TCP == iphdr->ip_p) {
+ return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
+ } else if (IP_PROTO_UDP == iphdr->ip_p) {
+ return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
+ }
+ }
+ } else if (ETH_P_IPV6 == l3_proto) {
+ struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
+
+ if (IP6_ECN_CE == IP6_ECN(ip6hdr->ip6_ecn_acc)) {
+ ecn_state = VIRTIO_NET_HDR_GSO_ECN;
+ }
+
+ if (IP_PROTO_TCP == ip6hdr->ip6_nxt) {
+ return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
+ }
+ }
+
+ /* Unsupported offload */
+ assert(false);
+
+ return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
+}
+
+void eth_get_protocols(const uint8_t *headers,
+ uint32_t hdr_length,
+ bool *isip4, bool *isip6,
+ bool *isudp, bool *istcp)
+{
+ int proto;
+ size_t l2hdr_len = eth_get_l2_hdr_length(headers);
+ assert(hdr_length >= eth_get_l2_hdr_length(headers));
+ *isip4 = *isip6 = *isudp = *istcp = false;
+
+ proto = eth_get_l3_proto(headers, l2hdr_len);
+ if (ETH_P_IP == proto) {
+ *isip4 = true;
+
+ struct ip_header *iphdr;
+
+ assert(hdr_length >=
+ eth_get_l2_hdr_length(headers) + sizeof(struct ip_header));
+
+ iphdr = PKT_GET_IP_HDR(headers);
+
+ if (IP_HEADER_VERSION_4 == IP_HEADER_VERSION(iphdr)) {
+ if (IP_PROTO_TCP == iphdr->ip_p) {
+ *istcp = true;
+ } else if (IP_PROTO_UDP == iphdr->ip_p) {
+ *isudp = true;
+ }
+ }
+ } else if (ETH_P_IPV6 == proto) {
+ *isip6 = true;
+
+ struct ip6_header *ip6hdr;
+ assert(hdr_length >=
+ eth_get_l2_hdr_length(headers) + sizeof(struct ip6_header));
+ ip6hdr = PKT_GET_IP6_HDR(headers);
+
+ if (IP_PROTO_TCP == ip6hdr->ip6_nxt) {
+ *istcp = true;
+ } else if (IP_PROTO_UDP == ip6hdr->ip6_nxt) {
+ *isudp = true;
+ }
+ }
+}
diff --git a/qemu/hw/vmxnet_utils.h b/qemu/hw/vmxnet_utils.h
new file mode 100644
index 0000000..f5e79dd
--- /dev/null
+++ b/qemu/hw/vmxnet_utils.h
@@ -0,0 +1,242 @@
+/*
+ * QEMU VMWARE paravirtual devices - network auxiliary code
+ *
+ * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
+ *
+ * Developed by Daynix Computing LTD (http://www.daynix.com)
+ *
+ * Authors:
+ * Dmitry Fleytman <dmitry@daynix.com>
+ * Yan Vugenfirer <yan@daynix.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#define ETH_ALEN 6
+
+struct eth_header {
+ uint8_t h_dest[ETH_ALEN]; /* destination eth addr */
+ uint8_t h_source[ETH_ALEN]; /* source ether addr */
+ uint16_t h_proto; /* packet type ID field */
+};
+
+struct vlan_header {
+ uint16_t h_tci; /* priority and VLAN ID */
+ uint16_t h_proto; /* encapsulated protocol */
+};
+
+struct ip_header {
+ uint8_t ip_ver_len; /* version and header length */
+ uint8_t ip_tos; /* type of service */
+ uint16_t ip_len; /* total length */
+ uint16_t ip_id; /* identification */
+ uint16_t ip_off; /* fragment offset field */
+ uint8_t ip_ttl; /* time to live */
+ uint8_t ip_p; /* protocol */
+ uint16_t ip_sum; /* checksum */
+ uint32_t ip_src, ip_dst; /* source and dest address */
+};
+
+/* IPv6 address */
+struct in6_addr {
+ union {
+ uint8_t __u6_addr8[16];
+ } __in6_u;
+};
+
+struct ip6_header {
+ union {
+ struct ip6_hdrctl {
+ uint32_t ip6_un1_flow; /* 4 bits version, 8 bits TC,
+ 20 bits flow-ID */
+ uint16_t ip6_un1_plen; /* payload length */
+ uint8_t ip6_un1_nxt; /* next header */
+ uint8_t ip6_un1_hlim; /* hop limit */
+ } ip6_un1;
+ uint8_t ip6_un2_vfc; /* 4 bits version, top 4 bits tclass */
+ struct ip6_ecn_access {
+ uint8_t ip6_un3_vfc; /* 4 bits version, top 4 bits tclass */
+ uint8_t ip6_un3_ecn; /* 2 bits ECN, top 6 bits payload length */
+ } ip6_un3;
+ } ip6_ctlun;
+ struct in6_addr ip6_src; /* source address */
+ struct in6_addr ip6_dst; /* destination address */
+};
+#define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt
+#define ip6_ecn_acc ip6_ctlun.ip6_un3.ip6_un3_ecn
+
+#define PKT_GET_ETH_HDR(p) \
+ ((struct eth_header *)(p))
+#define PKT_GET_VLAN_HDR(p) \
+ ((struct vlan_header *) (((uint8_t *)(p)) + sizeof(struct eth_header)))
+#define PKT_GET_IP_HDR(p) \
+ ((struct ip_header *)(((uint8_t *)(p)) + eth_get_l2_hdr_length(p)))
+#define IP_HDR_GET_LEN(p) \
+ ((((struct ip_header *)p)->ip_ver_len & 0x0F) << 2)
+#define PKT_GET_IP_HDR_LEN(p) \
+ (IP_HDR_GET_LEN(PKT_GET_IP_HDR(p)))
+#define PKT_GET_IP6_HDR(p) \
+ ((struct ip6_header *) (((uint8_t *)(p)) + eth_get_l2_hdr_length(p)))
+#define IP_HEADER_VERSION(ip) \
+ ((ip->ip_ver_len >> 4)&0xf)
+
+#define ETH_P_IP (0x0800)
+#define ETH_P_IPV6 (0x86dd)
+#define ETH_P_VLAN (0x8100)
+#define ETH_P_DVLAN (0x88a8)
+#define VLAN_VID_MASK 0x0fff
+#define IP_HEADER_VERSION_4 (4)
+#define IP_HEADER_VERSION_6 (6)
+#define IP_PROTO_TCP (6)
+#define IP_PROTO_UDP (17)
+#define IPTOS_ECN_MASK 0x03
+#define IPTOS_ECN(x) ((x) & IPTOS_ECN_MASK)
+#define IPTOS_ECN_CE 0x03
+#define IP6_ECN_MASK 0xC0
+#define IP6_ECN(x) ((x) & IP6_ECN_MASK)
+#define IP6_ECN_CE 0xC0
+#define IP4_DONT_FRAGMENT_FLAG (1 << 14)
+
+#define IS_SPECIAL_VLAN_ID(x) \
+ ((0 == (x)) || (0xFFF == (x)))
+
+#define ETH_MAX_L2_HDR_LEN \
+ (sizeof(struct eth_header) + 2*sizeof(struct vlan_header))
+
+#define ETH_MAX_IP4_HDR_LEN (60)
+#define ETH_MAX_IP6_HDR_LEN \
+ (sizeof(struct ip6_header))
+#define ETH_MAX_L3_HDR_LEN \
+ (MAX(ETH_MAX_IP4_HDR_LEN, ETH_MAX_IP6_HDR_LEN))
+#define ETH_MAX_IP_DGRAM_LEN (0xFFFF)
+#define ETH_MAX_IP_PLOAD_LEN \
+ (ETH_MAX_IP_DGRAM_LEN - ETH_MAX_IP4_HDR_LEN - ETH_MAX_L2_HDR_LEN)
+
+#define IP_FRAG_UNIT_SIZE (8)
+#define IP_FRAG_ALIGN_SIZE(x) ((x) & ~0x7)
+#define IP_RF 0x8000 /* reserved fragment flag */
+#define IP_DF 0x4000 /* don't fragment flag */
+#define IP_MF 0x2000 /* more fragments flag */
+#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */
+
+
+static inline int is_multicast_ether_addr(const uint8_t *addr)
+{
+ return 0x01 & addr[0];
+}
+
+static inline int is_broadcast_ether_addr(const uint8_t *addr)
+{
+ return (addr[0] & addr[1] & addr[2] & addr[3] & addr[4] & addr[5]) == 0xff;
+}
+
+static inline int is_unicast_ether_addr(const uint8_t *addr)
+{
+ return !is_multicast_ether_addr(addr);
+}
+
+typedef enum {
+ VMXNET3_PKT_UCAST = 0xAABBCC00,
+ VMXNET3_PKT_BCAST,
+ VMXNET3_PKT_MCAST
+} eth_pkt_types_e;
+
+static inline eth_pkt_types_e
+get_eth_packet_type(const struct eth_header *ehdr)
+{
+ if (is_broadcast_ether_addr(ehdr->h_dest)) {
+ return VMXNET3_PKT_BCAST;
+ } else if (is_multicast_ether_addr(ehdr->h_dest)) {
+ return VMXNET3_PKT_MCAST;
+ } else { /* unicast */
+ return VMXNET3_PKT_UCAST;
+ }
+}
+
+static inline uint32_t
+eth_get_l2_hdr_length(const void *p)
+{
+ uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
+ struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p);
+ switch (proto) {
+ case ETH_P_VLAN:
+ return sizeof(struct eth_header) + sizeof(struct vlan_header);
+ case ETH_P_DVLAN:
+ if (ETH_P_VLAN == hvlan->h_proto) {
+ return sizeof(struct eth_header) + 2*sizeof(struct vlan_header);
+ } else {
+ return sizeof(struct eth_header) + sizeof(struct vlan_header);
+ }
+ default:
+ return sizeof(struct eth_header);
+ }
+}
+
+static inline uint16_t
+eth_get_pkt_vlan_tag(const void *p)
+{
+ uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
+ struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p);
+ switch (proto) {
+ case ETH_P_VLAN:
+ case ETH_P_DVLAN:
+ return be16_to_cpu(hvlan->h_proto);
+ default:
+ return 0;
+ }
+}
+
+static inline bool
+eth_strip_vlan(const void *p, struct eth_header *new_ehdr,
+ uint16_t *payload_offset, uint16_t *vlan_tag)
+{
+ uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
+ struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p);
+
+ switch (proto) {
+ case ETH_P_VLAN:
+ case ETH_P_DVLAN:
+ memcpy(new_ehdr->h_source, PKT_GET_ETH_HDR(p)->h_source, ETH_ALEN);
+ memcpy(new_ehdr->h_dest, PKT_GET_ETH_HDR(p)->h_dest, ETH_ALEN);
+ new_ehdr->h_proto = hvlan->h_proto;
+ *vlan_tag = be16_to_cpu(hvlan->h_tci);
+ *payload_offset =
+ sizeof(struct eth_header) + sizeof(struct vlan_header);
+ return true;
+ default:
+ return false;
+ }
+}
+
+static inline uint16_t
+eth_get_l3_proto(const void *l2hdr, size_t l2hdr_len)
+{
+ uint8_t *proto_ptr = (uint8_t *) l2hdr + l2hdr_len - sizeof(uint16_t);
+ return be16_to_cpup((uint16_t *)proto_ptr);
+}
+
+static inline void
+eth_put_csum(uint8_t *buf, uint32_t cso, uint16_t csum)
+{
+ cpu_to_be16wu((uint16_t *)(buf + cso), csum);
+}
+
+void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag);
+
+
+bool eth_setup_tx_offloads(uint8_t *l3hdr,
+ size_t l3hdr_len,
+ size_t l3hdr_off,
+ uint32_t l3payload_len,
+ struct virtio_net_hdr *vhdr,
+ bool more_frags,
+ uint16_t fragmentation_offset);
+
+uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr);
+
+void eth_get_protocols(const uint8_t *headers,
+ uint32_t hdr_length,
+ bool *isip4, bool *isip6,
+ bool *isudp, bool *istcp);
diff --git a/qemu/net.c b/qemu/net.c
index c34474f..e2f586c 100644
--- a/qemu/net.c
+++ b/qemu/net.c
@@ -857,7 +857,7 @@ static const struct {
}, {
.name = "model",
.type = QEMU_OPT_STRING,
- .help = "device model (e1000, rtl8139, virtio etc.)",
+ .help = "device model (e1000, rtl8139, virtio, vmxnet3 etc.)",
}, {
.name = "addr",
.type = QEMU_OPT_STRING,
diff --git a/qemu/net/checksum.h b/qemu/net/checksum.h
index 1f05298..5f42a02 100644
--- a/qemu/net/checksum.h
+++ b/qemu/net/checksum.h
@@ -26,4 +26,11 @@ uint16_t net_checksum_tcpudp(uint16_t length, uint16_t proto,
uint8_t *addrs, uint8_t *buf);
void net_checksum_calculate(uint8_t *data, int length);
+static inline uint16_t
+net_raw_checksum(uint8_t *data, int length)
+{
+ return net_checksum_finish(net_checksum_add(length, data));
+}
+
+
#endif /* QEMU_NET_CHECKSUM_H */
--
1.7.7.6
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [PATCH v2] VMXNET3 paravirtual NIC device implementation
2012-02-29 12:49 [Qemu-devel] [PATCH v2] VMXNET3 paravirtual NIC device implementation Dmitry Fleytman
@ 2012-03-01 11:48 ` Michael Tokarev
2012-03-01 13:31 ` Dmitry Fleytman
2012-03-03 16:55 ` Gerhard Wiesinger
1 sibling, 1 reply; 5+ messages in thread
From: Michael Tokarev @ 2012-03-01 11:48 UTC (permalink / raw)
To: Dmitry Fleytman
Cc: Alex Fishman, Michael S. Tsirkin, yvugenfi, Izik Eidus,
qemu-devel, Dmitry Fleytman, Yan Vugenfirer
On 29.02.2012 16:49, Dmitry Fleytman wrote:
> Implementation of VMWare VMXNET3 paravirtual NIC device.
> Supports of all the device features including offload capabilties,
> VLANs and etc.
[..]
Not a review or anything (I don't know qemu networking
internals much), just a question: does it support PXE
booting?
There appears to be an etherboot/iPXE boot rom for it, maybe
it is a good idea to enable pxe in this "anti-driver" too?
Thanks,
/mjt
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [PATCH v2] VMXNET3 paravirtual NIC device implementation
2012-03-01 11:48 ` Michael Tokarev
@ 2012-03-01 13:31 ` Dmitry Fleytman
0 siblings, 0 replies; 5+ messages in thread
From: Dmitry Fleytman @ 2012-03-01 13:31 UTC (permalink / raw)
To: Michael Tokarev
Cc: Alex Fishman, Michael S. Tsirkin, yvugenfi, Izik Eidus,
qemu-devel, Dmitry Fleytman, Yan Vugenfirer
On Thu, Mar 1, 2012 at 1:48 PM, Michael Tokarev <mjt@tls.msk.ru> wrote:
>
> On 29.02.2012 16:49, Dmitry Fleytman wrote:
>
> > Implementation of VMWare VMXNET3 paravirtual NIC device.
> > Supports of all the device features including offload capabilties,
> > VLANs and etc.
> [..]
>
> Not a review or anything (I don't know qemu networking
> internals much), just a question: does it support PXE
> booting?
>
> There appears to be an etherboot/iPXE boot rom for it, maybe
> it is a good idea to enable pxe in this "anti-driver" too?
Hello, Michael
Thanks for your suggestion.
Boot from this device is not supported in this version.
Indeed this could be useful, we'll consider adding this feature in the future.
>
> Thanks,
>
> /mjt
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [PATCH v2] VMXNET3 paravirtual NIC device implementation
2012-02-29 12:49 [Qemu-devel] [PATCH v2] VMXNET3 paravirtual NIC device implementation Dmitry Fleytman
2012-03-01 11:48 ` Michael Tokarev
@ 2012-03-03 16:55 ` Gerhard Wiesinger
2012-03-04 17:09 ` Dmitry Fleytman
1 sibling, 1 reply; 5+ messages in thread
From: Gerhard Wiesinger @ 2012-03-03 16:55 UTC (permalink / raw)
To: Dmitry Fleytman
Cc: Anthony Liguori, Alex Fishman, Michael S. Tsirkin, yvugenfi,
Izik Eidus, qemu-devel, Dmitry Fleytman, Yan Vugenfirer
Hello,
Tried V2 of VMXNET3 under Knoppix Live CD/Linux (-cdrom
ISO/KNOPPIX_V6.7.1CD-2011-09-14-DE.iso) but it cored:
#0 tap_set_offload (nc=0x0, csum=1, tso4=1, tso6=1, ecn=0, ufo=0) at net/tap.c:271
#1 0x00007fa60a9986f8 in vmxnet3_activate_device (s=0x7fa60cbf37f0) at /root/download/qemu/git/qemu-kvm/hw/vmxnet3.c:1672
#2 vmxnet3_handle_command (cmd=<optimized out>, s=0x7fa60cbf37f0) at /root/download/qemu/git/qemu-kvm/hw/vmxnet3.c:1817
#3 vmxnet3_io_bar1_write (opaque=0x7fa60cbf37f0, addr=<optimized out>, val=<optimized out>, size=<optimized out>) at /root/download/qemu/git/qemu-kvm/hw/vmxnet3.c:1971
#4 0x00007fa60aa4a280 in access_with_adjusted_size (addr=32, value=0x7fa603917c60, size=4, access_size_min=<optimized out>, access_size_max=<optimized out>, access=0x7fa60aa4a1a0 <memory_region_write_accessor>, opaque=0x7fa60cbf3d48) at /root/download/qemu/git/qemu-kvm/memory.c:304
#5 0x00007fa60aa4ec80 in memory_region_dispatch_write (size=4, data=3405643776, addr=32, mr=0x7fa60cbf3d48) at /root/download/qemu/git/qemu-kvm/memory.c:982
#6 io_mem_write (io_index=<optimized out>, addr=32, val=<optimized out>, size=4) at /root/download/qemu/git/qemu-kvm/memory.c:1564
#7 0x00007fa60aa21b82 in cpu_physical_memory_rw (addr=4273954848, buf=0x7fa60a844028 <Address 0x7fa60a844028 out of bounds>, len=4, is_write=1) at /root/download/qemu/git/qemu-kvm/exec.c:3584
#8 0x00007fa60aa3ec75 in kvm_cpu_exec (env=0x7fa60c308a60) at /root/download/qemu/git/qemu-kvm/kvm-all.c:1192
#9 0x00007fa60aa14ce1 in qemu_kvm_cpu_thread_fn (arg=0x7fa60c308a60) at /root/download/qemu/git/qemu-kvm/cpus.c:732
#10 0x00007fa608ce6d90 in start_thread () from /lib64/libpthread.so.0
#11 0x00007fa606f82f5d in clone () from /lib64/libc.so.6
Relevant command line:
-cdrom ISO/KNOPPIX_V6.7.1CD-2011-09-14-DE.iso
-device vmxnet3,mac=1a:46:0b:ca:bc:7e,vlan=1,romfile=
-net tap,ifname=tap1,script=no,downscript=no,vlan=1
Can you please try to reproduce it and fix it.
Thnx.
Ciao,
Gerhard
--
http://www.wiesinger.com/
On Wed, 29 Feb 2012, Dmitry Fleytman wrote:
> Changes in V2:
> License text changed accoring to community suggestions
> Standard license header from GPLv2+ - licensed QEMU files used
>
> Implementation of VMWare VMXNET3 paravirtual NIC device.
> Supports of all the device features including offload capabilties,
> VLANs and etc.
> The device is tested on different OSes:
> Fedora 15
> Ubuntu 10.4
> Centos 6.2
> Windows 2008R2
> Windows 2008 64bit
> Windows 2008 32bit
> Windows 2003 64bit
> Windows 2003 32bit
> Currently live migration is not supported.
>
> Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
> Signed-off-by: Yan Vugenfirer <yan@daynix.com>
> ---
> qemu/Makefile.objs | 1 +
> qemu/default-configs/pci.mak | 1 +
> qemu/hw/pci.c | 2 +
> qemu/hw/pci.h | 1 +
> qemu/hw/virtio-net.h | 13 +-
> qemu/hw/vmware_utils.h | 131 +++
> qemu/hw/vmxnet3.c | 2559 ++++++++++++++++++++++++++++++++++++++++++
> qemu/hw/vmxnet3.h | 727 ++++++++++++
> qemu/hw/vmxnet3_debug.h | 104 ++
> qemu/hw/vmxnet_utils.c | 172 +++
> qemu/hw/vmxnet_utils.h | 242 ++++
> qemu/net.c | 2 +-
> qemu/net/checksum.h | 7 +
> 13 files changed, 3955 insertions(+), 7 deletions(-)
> create mode 100644 qemu/hw/vmware_utils.h
> create mode 100644 qemu/hw/vmxnet3.c
> create mode 100644 qemu/hw/vmxnet3.h
> create mode 100644 qemu/hw/vmxnet3_debug.h
> create mode 100644 qemu/hw/vmxnet_utils.c
> create mode 100644 qemu/hw/vmxnet_utils.h
>
> diff --git a/qemu/Makefile.objs b/qemu/Makefile.objs
> index 808de6a..3f846a6 100644
> --- a/qemu/Makefile.objs
> +++ b/qemu/Makefile.objs
> @@ -264,6 +264,7 @@ hw-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o
> hw-obj-$(CONFIG_PCNET_COMMON) += pcnet.o
> hw-obj-$(CONFIG_E1000_PCI) += e1000.o
> hw-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o
> +hw-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o vmxnet_utils.o
>
> hw-obj-$(CONFIG_SMC91C111) += smc91c111.o
> hw-obj-$(CONFIG_LAN9118) += lan9118.o
> diff --git a/qemu/default-configs/pci.mak b/qemu/default-configs/pci.mak
> index 21e4ccf..f8e6ee1 100644
> --- a/qemu/default-configs/pci.mak
> +++ b/qemu/default-configs/pci.mak
> @@ -13,6 +13,7 @@ CONFIG_PCNET_COMMON=y
> CONFIG_LSI_SCSI_PCI=y
> CONFIG_RTL8139_PCI=y
> CONFIG_E1000_PCI=y
> +CONFIG_VMXNET3_PCI=y
> CONFIG_IDE_CORE=y
> CONFIG_IDE_QDEV=y
> CONFIG_IDE_PCI=y
> diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
> index bf046bf..f0fb1ee 100644
> --- a/qemu/hw/pci.c
> +++ b/qemu/hw/pci.c
> @@ -1350,6 +1350,7 @@ static const char * const pci_nic_models[] = {
> "e1000",
> "pcnet",
> "virtio",
> + "vmxnet3",
> NULL
> };
>
> @@ -1362,6 +1363,7 @@ static const char * const pci_nic_names[] = {
> "e1000",
> "pcnet",
> "virtio-net-pci",
> + "vmxnet3",
> NULL
> };
>
> diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h
> index 4f19fdb..fee8250 100644
> --- a/qemu/hw/pci.h
> +++ b/qemu/hw/pci.h
> @@ -60,6 +60,7 @@
> #define PCI_DEVICE_ID_VMWARE_NET 0x0720
> #define PCI_DEVICE_ID_VMWARE_SCSI 0x0730
> #define PCI_DEVICE_ID_VMWARE_IDE 0x1729
> +#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07B0
>
> /* Intel (0x8086) */
> #define PCI_DEVICE_ID_INTEL_82551IT 0x1209
> diff --git a/qemu/hw/virtio-net.h b/qemu/hw/virtio-net.h
> index 4468741..fa3c17b 100644
> --- a/qemu/hw/virtio-net.h
> +++ b/qemu/hw/virtio-net.h
> @@ -78,13 +78,14 @@ struct virtio_net_config
> * specify GSO or CSUM features, you can simply ignore the header. */
> struct virtio_net_hdr
> {
> -#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start, csum_offset
> +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */
> +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */
> uint8_t flags;
> -#define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame
> -#define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO)
> -#define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO)
> -#define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP
> -#define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set
> +#define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */
> +#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */
> +#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */
> +#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */
> +#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */
> uint8_t gso_type;
> uint16_t hdr_len;
> uint16_t gso_size;
> diff --git a/qemu/hw/vmware_utils.h b/qemu/hw/vmware_utils.h
> new file mode 100644
> index 0000000..304bb48
> --- /dev/null
> +++ b/qemu/hw/vmware_utils.h
> @@ -0,0 +1,131 @@
> +/*
> + * QEMU VMWARE paravirtual devices - auxiliary code
> + *
> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Authors:
> + * Dmitry Fleytman <dmitry@daynix.com>
> + * Yan Vugenfirer <yan@daynix.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +/* Shared memory access functions with byte swap support */
> +static inline void
> +vmw_shmem_read(target_phys_addr_t addr, void *buf, int len)
> +{
> + DSHPRINTF("SHMEM r: %" PRIx64 ", len: %d to %p",
> + (uint64_t) addr, len, buf);
> + cpu_physical_memory_read(addr, buf, len);
> +}
> +
> +static inline void
> +vmw_shmem_write(target_phys_addr_t addr, void *buf, int len)
> +{
> + DSHPRINTF("SHMEM w: %" PRIx64 ", len: %d to %p",
> + (uint64_t) addr, len, buf);
> + cpu_physical_memory_write(addr, buf, len);
> +}
> +
> +static inline void
> +vmw_shmem_rw(target_phys_addr_t addr, void *buf, int len, int is_write)
> +{
> + DSHPRINTF("SHMEM r/w: %" PRIx64 ", len: %d (to %p), is write: %d",
> + (uint64_t) addr, len, buf, is_write);
> +
> + cpu_physical_memory_rw(addr, buf, len, is_write);
> +}
> +
> +static inline void
> +vmw_shmem_set(target_phys_addr_t addr, uint8 val, int len)
> +{
> + int i;
> + DSHPRINTF("SHMEM set: %" PRIx64 ", len: %d (value 0x%X)",
> + (uint64_t) addr, len, val);
> +
> + for (i = 0; i < len; i++) {
> + cpu_physical_memory_write(addr + i, &val, 1);
> + }
> +}
> +
> +static inline uint32_t
> +vmw_shmem_ld8(target_phys_addr_t addr)
> +{
> + uint8_t res = ldub_phys(addr);
> + DSHPRINTF("SHMEM load8: %" PRIx64 " (value 0x%X)",
> + (uint64_t) addr, res);
> + return res;
> +}
> +
> +static inline void
> +vmw_shmem_st8(target_phys_addr_t addr, uint8_t value)
> +{
> + DSHPRINTF("SHMEM store8: %" PRIx64 " (value 0x%X)",
> + (uint64_t) addr, value);
> + stb_phys(addr, value);
> +}
> +
> +static inline uint32_t
> +vmw_shmem_ld16(target_phys_addr_t addr)
> +{
> + uint16_t res = lduw_le_phys(addr);
> + DSHPRINTF("SHMEM load16: %" PRIx64 " (value 0x%X)",
> + (uint64_t) addr, res);
> + return res;
> +}
> +
> +static inline void
> +vmw_shmem_st16(target_phys_addr_t addr, uint16_t value)
> +{
> + DSHPRINTF("SHMEM store16: %" PRIx64 " (value 0x%X)",
> + (uint64_t) addr, value);
> + stw_le_phys(addr, value);
> +}
> +
> +static inline uint32_t
> +vmw_shmem_ld32(target_phys_addr_t addr)
> +{
> + uint32_t res = ldl_le_phys(addr);
> + DSHPRINTF("SHMEM load32: %" PRIx64 " (value 0x%X)",
> + (uint64_t) addr, res);
> + return res;
> +}
> +
> +static inline void
> +vmw_shmem_st32(target_phys_addr_t addr, uint32_t value)
> +{
> + DSHPRINTF("SHMEM store32: %" PRIx64 " (value 0x%X)",
> + (uint64_t) addr, value);
> + stl_le_phys(addr, value);
> +}
> +
> +static inline uint64_t
> +vmw_shmem_ld64(target_phys_addr_t addr)
> +{
> + uint64_t res = ldq_le_phys(addr);
> + DSHPRINTF("SHMEM load64: %" PRIx64 " (value %" PRIx64 ")",
> + (uint64_t) addr, res);
> + return res;
> +}
> +
> +static inline void
> +vmw_shmem_st64(target_phys_addr_t addr, uint64_t value)
> +{
> + DSHPRINTF("SHMEM store64: %" PRIx64 " (value %" PRIx64 ")",
> + (uint64_t) addr, value);
> + stq_le_phys(addr, value);
> +}
> +
> +/* MACROS for simplification of operations on array-style registers */
> +#define IS_MULTIREG_ADDR(addr, base, cnt, regsize) \
> + (((addr) >= (base)) && ((addr) < (base) + (cnt) * (regsize)))
> +
> +#define MULTIREG_IDX_BY_ADDR(addr, base, regsize) \
> + (((addr) - (base)) / (regsize))
> +
> +/* Bitfields */
> +#define FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag))
> diff --git a/qemu/hw/vmxnet3.c b/qemu/hw/vmxnet3.c
> new file mode 100644
> index 0000000..112d3b9
> --- /dev/null
> +++ b/qemu/hw/vmxnet3.c
> @@ -0,0 +1,2559 @@
> +/*
> + * QEMU VMWARE VMXNET3 paravirtual NIC
> + *
> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Authors:
> + * Dmitry Fleytman <dmitry@daynix.com>
> + * Yan Vugenfirer <yan@daynix.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#define VMXNET3_ENABLE_MSIX
> +#define VMXNET3_ENABLE_MSI
> +
> +/* Define this constant to non-zero to enable IP4 */
> +/* fragmentation feature */
> +
> +/* #define VMXNET_MAX_IP_PLOAD_LEN ETH_MAX_IP_PLOAD_LEN */
> +#define VMXNET3_MAX_IP_PLOAD_LEN 0
> +
> +#include "hw.h"
> +#include "pci.h"
> +#include "net.h"
> +#include "virtio-net.h"
> +#include "net/tap.h"
> +#include "sysemu.h"
> +#include "iov.h"
> +#include "bswap.h"
> +#ifdef VMXNET3_ENABLE_MSIX
> +#include "msix.h"
> +#endif
> +#ifdef VMXNET3_ENABLE_MSI
> +#include "msi.h"
> +#endif
> +
> +#include "vmxnet3_debug.h"
> +#include "vmxnet3.h"
> +#include "vmware_utils.h"
> +#include "vmxnet_utils.h"
> +
> +#define PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION 0x1
> +#define VMXNET3_MSIX_BAR_SIZE 0x2000
> +
> +#define VMXNET3_BAR0_IDX (0)
> +#define VMXNET3_BAR1_IDX (1)
> +#define VMXNET3_MSIX_BAR_IDX (2)
> +
> +/* Link speed in Mbps should be shifted by 16 */
> +#define VMXNET3_LINK_SPEED (1000 << 16)
> +
> +/* Link status: 1 - up, 0 - down. */
> +#define VMXNET3_LINK_STATUS_UP 0x1
> +
> +/* Least significant bit should be set for revision and version */
> +#define VMXNET3_DEVICE_VERSION 0x1
> +#define VMXNET3_DEVICE_REVISION 0x1
> +
> +/* Macros for rings descriptors access */
> +#define VMXNET3_READ_TX_QUEUE_DESCR8(dpa, field) \
> + (vmw_shmem_ld8(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
> +
> +#define VMXNET3_WRITE_TX_QUEUE_DESCR8(dpa, field, value) \
> + (vmw_shmem_st8(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field, value)))
> +
> +#define VMXNET3_READ_TX_QUEUE_DESCR32(dpa, field) \
> + (vmw_shmem_ld32(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
> +
> +#define VMXNET3_WRITE_TX_QUEUE_DESCR32(dpa, field, value) \
> + (vmw_shmem_st32(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
> +
> +#define VMXNET3_READ_TX_QUEUE_DESCR64(dpa, field) \
> + (vmw_shmem_ld64(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
> +
> +#define VMXNET3_WRITE_TX_QUEUE_DESCR64(dpa, field, value) \
> + (vmw_shmem_st64(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field), value))
> +
> +#define VMXNET3_READ_RX_QUEUE_DESCR64(dpa, field) \
> + (vmw_shmem_ld64(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
> +
> +#define VMXNET3_READ_RX_QUEUE_DESCR32(dpa, field) \
> + (vmw_shmem_ld32(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
> +
> +#define VMXNET3_WRITE_RX_QUEUE_DESCR64(dpa, field, value) \
> + (vmw_shmem_st64(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
> +
> +#define VMXNET3_WRITE_RX_QUEUE_DESCR8(dpa, field, value) \
> + (vmw_shmem_st8(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field), value))
> +
> +/* Macros for guest driver shared area access */
> +#define VMXNET3_READ_DRV_SHARED64(shpa, field) \
> + (vmw_shmem_ld64(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
> +
> +#define VMXNET3_READ_DRV_SHARED32(shpa, field) \
> + (vmw_shmem_ld32(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
> +
> +#define VMXNET3_WRITE_DRV_SHARED32(shpa, field, val) \
> + (vmw_shmem_st32(shpa + offsetof(struct Vmxnet3_DriverShared, field), val))
> +
> +#define VMXNET3_READ_DRV_SHARED16(shpa, field) \
> + (vmw_shmem_ld16(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
> +
> +#define VMXNET3_READ_DRV_SHARED8(shpa, field) \
> + (vmw_shmem_ld8(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
> +
> +#define VMXNET3_READ_DRV_SHARED(shpa, field, b, l) \
> + (vmw_shmem_read(shpa + offsetof(struct Vmxnet3_DriverShared, field), b, l))
> +
> +/* TX/RX packets abstractions */
> +typedef struct Vmxnet3_TxPktMdata {
> + uint32_t offload_mode;
> + uint32_t cso_or_gso_size;
> + uint32_t hdr_length;
> + eth_pkt_types_e packet_type;
> +} Vmxnet3_TxPktMdata;
> +
> +typedef struct _Vmxnet3_RxPktMdata {
> + uint32_t tot_len;
> + uint16_t vlan_tag;
> + bool vlan_stripped;
> + bool vhdr_valid;
> + eth_pkt_types_e packet_type;
> +} Vmxnet3_RxPktMdata;
> +
> +#define VMXNET3_TXPKT_REBUILT_HDR_LEN (1024)
> +
> +typedef struct _Vmxnet3_TxPkt {
> + Vmxnet3_TxPktMdata mdata;
> + struct virtio_net_hdr virt_hdr;
> + bool has_virt_hdr;
> +
> + struct iovec *vec;
> +
> + uint8_t __l2_hdr[ETH_MAX_L2_HDR_LEN];
> + uint8_t __l3_hdr[ETH_MAX_L3_HDR_LEN];
> +
> + uint32_t payload_len;
> + uint32_t max_payload_len;
> +
> + uint32_t payload_frags;
> + uint32_t max_payload_frags;
> +
> + struct {
> + uint32_t offset;
> + bool more_frags;
> + bool orig_more_frags;
> + } fragmentation;
> +} Vmxnet3_TxPkt;
> +
> +#define VMXNET3_TXPKT_VHDR_FRAG (0)
> +#define VMXNET3_TXPKT_L2HDR_FRAG (1)
> +#define VMXNET3_TXPKT_L3HDR_FRAG (2)
> +#define VMXNET3_TXPKT_PL_START_FRAG (3)
> +
> +#define vmxnet3_txpkt_get_mdata(p) (&((p)->mdata))
> +#define vmxnet3_txpkt_get_vhdr(p) (&((p)->virt_hdr))
> +
> +#define vmxnet3_txpkt_get_l2hdr(p) \
> + ((p)->vec[VMXNET3_TXPKT_L2HDR_FRAG].iov_base)
> +#define vmxnet3_txpkt_get_l2hdr_len(p) \
> + ((p)->vec[VMXNET3_TXPKT_L2HDR_FRAG].iov_len)
> +#define vmxnet3_txpkt_set_l2hdr_len(p, l) \
> + ((p)->vec[VMXNET3_TXPKT_L2HDR_FRAG].iov_len = l)
> +#define vmxnet3_txpkt_get_l3hdr(p) \
> + ((p)->vec[VMXNET3_TXPKT_L3HDR_FRAG].iov_base)
> +#define vmxnet3_txpkt_get_l3hdr_len(p) \
> + ((p)->vec[VMXNET3_TXPKT_L3HDR_FRAG].iov_len)
> +#define vmxnet3_txpkt_set_l3hdr_len(p, l) \
> + ((p)->vec[VMXNET3_TXPKT_L3HDR_FRAG].iov_len = l)
> +#define vmxnet3_txpkt_get_payload_len(p) \
> + ((p)->payload_len)
> +
> +#define vmxnet3_txpkt_set_more_frags(p, mf) \
> + ((p)->fragmentation.more_frags = mf)
> +#define vmxnet3_txpkt_get_more_frags(p) \
> + ((p)->fragmentation.more_frags | \
> + (p)->fragmentation.orig_more_frags)
> +#define vmxnet3_txpkt_set_frag_off(p, off) \
> + ((p)->fragmentation.offset = off)
> +#define vmxnet3_txpkt_get_frag_off(p) \
> + ((p)->fragmentation.offset)
> +#define vmxnet3_txpkt_advance_frag_off(p, off) \
> + ((p)->fragmentation.offset += off)
> +
> +static inline size_t
> +vmxnet3_txpkt_get_total_len(const Vmxnet3_TxPkt *p)
> +{
> + return vmxnet3_txpkt_get_l2hdr_len(p) +
> + vmxnet3_txpkt_get_l3hdr_len(p) +
> + vmxnet3_txpkt_get_payload_len(p);
> +}
> +
> +static inline struct iovec*
> +vmxnet3_txpkt_get_payload_frag(Vmxnet3_TxPkt *p, uint32_t num)
> +{
> + assert(num < p->max_payload_frags);
> + return &p->vec[num + VMXNET3_TXPKT_PL_START_FRAG];
> +}
> +
> +static inline void
> +vmxnet3_txpkt_set_num_pl_frags(Vmxnet3_TxPkt *p, uint32_t num)
> +{
> + assert(num <= p->max_payload_frags);
> + p->payload_frags = num;
> +}
> +
> +static inline void
> +vmxnet3_txpkt_reset_payload(Vmxnet3_TxPkt *p)
> +{
> + p->payload_len = 0;
> +}
> +
> +static void vmxnet3_txpkt_reset(Vmxnet3_TxPkt *p)
> +{
> + memset(&p->mdata, 0, sizeof(p->mdata));
> + vmxnet3_txpkt_set_num_pl_frags(p, 0);
> + vmxnet3_txpkt_reset_payload(p);
> + vmxnet3_txpkt_set_more_frags(p, 0);
> + vmxnet3_txpkt_set_frag_off(p, 0);
> + p->max_payload_len = 0;
> +
> + if (NULL != p->vec) {
> + p->vec[VMXNET3_TXPKT_L2HDR_FRAG].iov_len = 0;
> + p->vec[VMXNET3_TXPKT_L3HDR_FRAG].iov_len = 0;
> + }
> +}
> +
> +static bool
> +vmxnet3_txpkt_prealloc(Vmxnet3_TxPkt *p, uint32_t max_frags, bool has_virt_hdr)
> +{
> + if (NULL != p->vec) {
> + g_free(p->vec);
> + }
> +
> + p->vec =
> + g_malloc(sizeof(*p->vec) * (max_frags + VMXNET3_TXPKT_PL_START_FRAG));
> + if (NULL == p->vec) {
> + return false;
> + }
> +
> + p->max_payload_frags = max_frags;
> + p->has_virt_hdr = has_virt_hdr;
> + p->vec[VMXNET3_TXPKT_VHDR_FRAG].iov_base = &p->virt_hdr;
> + p->vec[VMXNET3_TXPKT_VHDR_FRAG].iov_len =
> + p->has_virt_hdr ? sizeof(p->virt_hdr) : 0;
> + p->vec[VMXNET3_TXPKT_L2HDR_FRAG].iov_base = &p->__l2_hdr;
> + p->vec[VMXNET3_TXPKT_L3HDR_FRAG].iov_base = &p->__l3_hdr;
> + vmxnet3_txpkt_reset(p);
> + return true;
> +}
> +
> +static void vmxnet3_txpkt_init(Vmxnet3_TxPkt *p)
> +{
> + p->vec = NULL;
> +}
> +
> +static void vmxnet3_txpkt_cleanup(Vmxnet3_TxPkt *p)
> +{
> + g_free(p->vec);
> +}
> +
> +static void vmxnet3_txpkt_unmap(Vmxnet3_TxPkt *p, bool is_write)
> +{
> + int i;
> +
> + for (i = VMXNET3_TXPKT_PL_START_FRAG;
> + i < p->payload_frags + VMXNET3_TXPKT_PL_START_FRAG; i++) {
> + cpu_physical_memory_unmap(p->vec[i].iov_base, p->vec[i].iov_len,
> + is_write, p->vec[i].iov_len);
> + }
> +}
> +
> +static void*
> +vmxnet3_txpkt_map(Vmxnet3_TxPkt *p, uint32_t *mapped_fragments, bool is_write)
> +{
> + int i;
> +
> + for (i = VMXNET3_TXPKT_PL_START_FRAG;
> + i < p->payload_frags + VMXNET3_TXPKT_PL_START_FRAG; i++) {
> + target_phys_addr_t mapped_len = p->vec[i].iov_len;
> + size_t orig_len = p->vec[i].iov_len;
> + p->vec[i].iov_base =
> + cpu_physical_memory_map((uint64_t) p->vec[i].iov_base,
> + &mapped_len, is_write);
> + p->vec[i].iov_len = mapped_len;
> +
> + if ((NULL == p->vec[i].iov_base) || (orig_len != mapped_len)) {
> + p->payload_frags = i + !!p->vec[i].iov_base;
> + vmxnet3_txpkt_unmap(p, is_write);
> + return NULL;
> + }
> + }
> +
> + *mapped_fragments = VMXNET3_TXPKT_PL_START_FRAG + p->payload_frags;
> + return p->vec;
> +}
> +
> +static inline void
> +vmxnet3_txpkt_dump(Vmxnet3_TxPkt *p)
> +{
> +#ifdef DEBUG_VMXNET3_PACKETS
> + Vmxnet3_TxPktMdata *m = vmxnet3_txpkt_get_mdata(p);
> +#endif
> +
> + DPKPRINTF("TXPKT MDATA: om: %d, cso/gso_size: %d, hdr_len: %d, "
> + "pkt_type: 0x%X, l2hdr_len: %lu l3hdr_len: %lu, payload_len: %u",
> + m->offload_mode, m->cso_or_gso_size,
> + m->hdr_length, m->packet_type,
> + vmxnet3_txpkt_get_l2hdr_len(p),
> + vmxnet3_txpkt_get_l3hdr_len(p),
> + vmxnet3_txpkt_get_payload_len(p));
> +};
> +
> +/* RX packet may contain up to 2 fragments - rebuilt eth header */
> +/* in case of VLAN tag stripping */
> +/* and payload received from QEMU - in any case */
> +#define VMXNET3_MAX_RX_PACKET_FRAGMENTS (2)
> +
> +typedef struct _Vmxnet3_RxPkt {
> + Vmxnet3_RxPktMdata mdata;
> + struct virtio_net_hdr virt_hdr;
> + struct eth_header eth_hdr;
> + struct iovec vec[VMXNET3_MAX_RX_PACKET_FRAGMENTS];
> + uint16 vec_len;
> +} Vmxnet3_RxPkt;
> +
> +#define vmxnet3_rxpkt_get_mdata(p) (&((p)->mdata))
> +#define vmxnet3_rxpkt_get_ehdr(p) (&((p)->eth_hdr))
> +#define vmxnet3_rxpkt_get_vhdr(p) (&((p)->virt_hdr))
> +#define vmxnet3_rxpkt_get_frag(p, n) (&((p)->vec[(n)]))
> +#define vmxnet3_rxpkt_set_num_frags(p, n) ((p)->vec_len = (n))
> +#define vmxnet3_rxpkt_get_num_frags(p) ((p)->vec_len)
> +
> +static inline void vmxnet3_rxpkt_attach_ehdr(Vmxnet3_RxPkt *p)
> +{
> + vmxnet3_rxpkt_get_frag(p, 0)->iov_base = &p->eth_hdr;
> + vmxnet3_rxpkt_get_frag(p, 0)->iov_len = sizeof(p->eth_hdr);
> +}
> +
> +static inline void vmxnet3_rxpkt_reset(Vmxnet3_RxPkt *p)
> +{
> + memset(&p->mdata, 0, sizeof(p->mdata));
> + memset(&p->virt_hdr, 0, sizeof(p->virt_hdr));
> + vmxnet3_rxpkt_set_num_frags(p, 0);
> +}
> +
> +static void vmxnet3_rxpkt_init(Vmxnet3_RxPkt *p)
> +{
> + vmxnet3_rxpkt_reset(p);
> +}
> +
> +static inline void
> +vmxnet3_rxpkt_dump(Vmxnet3_RxPkt *p)
> +{
> +#ifdef DEBUG_VMXNET3_PACKETS
> + Vmxnet3_RxPktMdata *m = vmxnet3_rxpkt_get_mdata(p);
> +#endif
> +
> + DPKPRINTF("RXPKT MDATA: tot_len: %d, pkt_type: 0x%X, "
> + "vlan_stripped: %d, vlan_tag: %d, vhdr_valid: %d",
> + m->tot_len, m->packet_type,
> + m->vlan_stripped, m->vlan_tag, m->vhdr_valid);
> +};
> +
> +/* Cyclic ring abstraction */
> +typedef struct _Vmxnet3_Ring {
> + target_phys_addr_t pa;
> + size_t size;
> + size_t cell_size;
> + size_t next;
> + uint8_t gen;
> +} Vmxnet3_Ring;
> +
> +static inline void vmxnet3_ring_init(Vmxnet3_Ring *ring,
> + target_phys_addr_t pa,
> + size_t size,
> + size_t cell_size,
> + bool zero_region)
> +{
> + ring->pa = pa;
> + ring->size = size;
> + ring->cell_size = cell_size;
> + ring->gen = VMXNET3_INIT_GEN;
> + ring->next = 0;
> +
> + if (zero_region) {
> + vmw_shmem_set(pa, 0, size*cell_size);
> + }
> +}
> +
> +#define vmxnet3_ring_dump(macro, ring_name, ridx, r) \
> + macro("%s#%d: base %" PRIx64 " size %lu cell_size %lu gen %d next %lu", \
> + (ring_name), (ridx), (uint64_t) (r)->pa, \
> + (r)->size, (r)->cell_size, (r)->gen, (r)->next)
> +
> +static inline void vmxnet3_ring_inc(Vmxnet3_Ring *ring)
> +{
> + if (++ring->next >= ring->size) {
> + ring->next = 0;
> + ring->gen ^= 1;
> + }
> +}
> +
> +static inline void vmxnet3_ring_dec(Vmxnet3_Ring *ring)
> +{
> + if (0 == ring->next--) {
> + ring->next = ring->size - 1;
> + ring->gen ^= 1;
> + }
> +}
> +
> +static inline target_phys_addr_t vmxnet3_ring_curr_cell_pa(Vmxnet3_Ring *ring)
> +{
> + return ring->pa + ring->next * ring->cell_size;
> +}
> +
> +static inline void vmxnet3_ring_read_curr_cell(Vmxnet3_Ring *ring, void *buff)
> +{
> + vmw_shmem_read(vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
> +}
> +
> +static inline void vmxnet3_ring_write_curr_cell(Vmxnet3_Ring *ring, void *buff)
> +{
> + vmw_shmem_write(vmxnet3_ring_curr_cell_pa(ring), buff, ring->cell_size);
> +}
> +
> +static inline size_t vmxnet3_ring_curr_cell_idx(Vmxnet3_Ring *ring)
> +{
> + return ring->next;
> +}
> +
> +static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3_Ring *ring)
> +{
> + return ring->gen;
> +}
> +
> +/* Debug trace-related functions */
> +static inline void
> +vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr)
> +{
> + DPKPRINTF("TX DESCR: "
> + "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
> + "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, "
> + "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d",
> + le64_to_cpu(descr->addr), descr->len, descr->gen, descr->rsvd,
> + descr->dtype, descr->ext1, descr->msscof, descr->hlen, descr->om,
> + descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci);
> +}
> +
> +static inline void
> +vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr)
> +{
> + DPKPRINTF("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size: %d, "
> + "csum_start: %d, csum_offset: %d",
> + vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size,
> + vhdr->csum_start, vhdr->csum_offset);
> +}
> +
> +static inline void
> +vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
> +{
> + DPKPRINTF("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
> + "dtype: %d, ext1: %d, btype: %d",
> + le64_to_cpu(descr->addr), descr->len, descr->gen,
> + descr->rsvd, descr->dtype, descr->ext1, descr->btype);
> +}
> +
> +/* Device state and helper functions */
> +#define VMXNET3_RX_RINGS_PER_QUEUE (2)
> +
> +typedef struct _VMXNET3_State {
> + PCIDevice dev;
> + NICState *nic;
> + NICConf conf;
> + MemoryRegion bar0;
> + MemoryRegion bar1;
> + MemoryRegion msix_bar;
> +
> +#ifdef VMXNET3_ENABLE_MSIX
> + /* Whether MSI-X support was installed successfully */
> + uint8_t msix_used;
> +#endif
> +#ifdef VMXNET3_ENABLE_MSI
> + /* Whether MSI support was installed successfully */
> + uint8_t msi_used;
> +#endif
> +
> + target_phys_addr_t drv_shmem;
> + target_phys_addr_t temp_shared_guest_driver_memory;
> +
> + uint8_t txq_num;
> + struct {
> + Vmxnet3_Ring tx_ring;
> + Vmxnet3_Ring comp_ring;
> +
> + uint8_t intr_idx;
> + target_phys_addr_t tx_stats_pa;
> + struct UPT1_TxStats txq_stats;
> + } txq_descr[VMXNET3_DEVICE_MAX_TX_QUEUES];
> +
> + /* This boolean tells whether RX packet being indicated has to */
> + /* be split into head and body chunks from different RX rings */
> + bool rx_packets_compound;
> +
> + bool rx_vlan_stripping;
> + bool lro_supported;
> +
> + uint8_t rxq_num;
> + struct {
> + Vmxnet3_Ring rx_ring[VMXNET3_RX_RINGS_PER_QUEUE];
> + Vmxnet3_Ring comp_ring;
> + uint8_t intr_idx;
> + target_phys_addr_t rx_stats_pa;
> + struct UPT1_RxStats rxq_stats;
> + } rxq_descr[VMXNET3_DEVICE_MAX_RX_QUEUES];
> +
> + /* Network MTU */
> + uint32_t mtu;
> +
> + /* Maximum number of fragments for indicated TX packets */
> + uint32_t max_tx_frags;
> +
> + /* Maximum number of fragments for indicated RX packets */
> + uint16_t max_rx_frags;
> +
> + /* Index for events interrupt */
> + uint8_t event_int_idx;
> +
> + /* Whether automatic interrupts masking enabled */
> + uint8_t auto_int_masking;
> +
> + bool peer_has_vhdr;
> +
> + /* TX packets to QEMU interface */
> + Vmxnet3_TxPkt curr_txpkt;
> + size_t curr_txpkt_pl_frags;
> + bool curr_txpkt_skip;
> + bool curr_txpkt_header_processed;
> +
> + uint32_t device_active;
> + uint32_t last_command;
> +
> + uint32_t link_status_and_speed;
> +
> + struct {
> + bool is_masked;
> + bool is_pending;
> + bool is_asserted;
> + } interrupt_states[VMXNET3_MAX_INTRS];
> +
> + uint32_t temp_mac; /* To store the low part first */
> +
> + MACAddr perm_mac;
> + uint32_t vlan_table[VMXNET3_VFT_SIZE];
> + uint32_t rx_mode;
> + MACAddr *mcast_list;
> + uint16_t mcast_list_len;
> +} VMXNET3_State;
> +
> +/* Interrupt management */
> +
> +/* This function returns sign whether interrupt line is in asserted state */
> +/* This depends on the type of interrupt used. For INTX interrupt line will */
> +/* be asserted until explicit deassertion, for MSI(X) interrupt line will */
> +/* be deasserted automatically due to notifiction symantics of the MSI(X) */
> +/* interrupts */
> +static bool _vmxnet3_assert_interrupt_line(VMXNET3_State *s, uint32_t int_idx)
> +{
> +#ifdef VMXNET3_ENABLE_MSIX
> + if (s->msix_used && msix_enabled(&s->dev)) {
> + DIRPRINTF("Sending MSI-X notification for vector %u", int_idx);
> + msix_notify(&s->dev, int_idx);
> + return false;
> + }
> +#endif
> +#ifdef VMXNET3_ENABLE_MSI
> + if (s->msi_used && msi_enabled(&s->dev)) {
> + DIRPRINTF("Sending MSI notification for vector %u", int_idx);
> + msi_notify(&s->dev, int_idx);
> + return false;
> + }
> +#endif
> +
> + DIRPRINTF("Asserting line for interrupt %u", int_idx);
> + qemu_set_irq(s->dev.irq[int_idx], 1);
> + return true;
> +}
> +
> +static void _vmxnet3_deassert_interrupt_line(VMXNET3_State *s, int lidx)
> +{
> +#ifdef VMXNET3_ENABLE_MSIX
> + /* This function should never be called for MSI(X) interrupts */
> + /* because deassertion never required for message interrupts */
> + assert(!s->msix_used || !msix_enabled(&s->dev));
> +#endif
> +#ifdef VMXNET3_ENABLE_MSI
> + /* This function should never be called for MSI(X) interrupts */
> + /* because deassertion never required for message interrupts */
> + assert(!s->msi_used || !msi_enabled(&s->dev));
> +#endif
> +
> + DIRPRINTF("Deasserting line for interrupt %u", lidx);
> + qemu_set_irq(s->dev.irq[lidx], 0);
> +}
> +
> +static void vmxnet3_update_interrupt_line_state(VMXNET3_State *s, int lidx)
> +{
> + if (!s->interrupt_states[lidx].is_pending &&
> + s->interrupt_states[lidx].is_asserted) {
> + DIRPRINTF("New interrupt line state for index %d is DOWN", lidx);
> + _vmxnet3_deassert_interrupt_line(s, lidx);
> + s->interrupt_states[lidx].is_asserted = false;
> + return;
> + }
> +
> + if (s->interrupt_states[lidx].is_pending &&
> + !s->interrupt_states[lidx].is_masked &&
> + !s->interrupt_states[lidx].is_asserted) {
> + DIRPRINTF("New interrupt line state for index %d is UP", lidx);
> + s->interrupt_states[lidx].is_asserted =
> + _vmxnet3_assert_interrupt_line(s, lidx);
> + s->interrupt_states[lidx].is_pending = false;
> + return;
> + }
> +}
> +
> +static void vmxnet3_trigger_interrupt(VMXNET3_State *s, int lidx)
> +{
> + s->interrupt_states[lidx].is_pending = true;
> + vmxnet3_update_interrupt_line_state(s, lidx);
> +
> +#ifdef VMXNET3_ENABLE_MSIX
> + if (s->msix_used && msix_enabled(&s->dev) && s->auto_int_masking) {
> + goto do_automask;
> + }
> +#endif
> +
> +#ifdef VMXNET3_ENABLE_MSI
> + if (s->msi_used && msi_enabled(&s->dev) && s->auto_int_masking) {
> + goto do_automask;
> + }
> +#endif
> +
> + return;
> +
> +do_automask:
> + s->interrupt_states[lidx].is_masked = true;
> + vmxnet3_update_interrupt_line_state(s, lidx);
> +}
> +
> +static bool vmxnet3_interrupt_asserted(VMXNET3_State *s, int lidx)
> +{
> + return s->interrupt_states[lidx].is_asserted;
> +}
> +
> +static void vmxnet3_clear_interrupt(VMXNET3_State *s, int int_idx)
> +{
> + s->interrupt_states[int_idx].is_pending = false;
> + if (s->auto_int_masking) {
> + s->interrupt_states[int_idx].is_masked = true;
> + }
> + vmxnet3_update_interrupt_line_state(s, int_idx);
> +}
> +
> +static void
> +vmxnet3_on_interrupt_mask_changed(VMXNET3_State *s, int lidx, bool is_masked)
> +{
> + s->interrupt_states[lidx].is_masked = is_masked;
> + vmxnet3_update_interrupt_line_state(s, lidx);
> +}
> +
> +static bool vmxnet3_verify_driver_magic(target_phys_addr_t dshmem)
> +{
> + return (VMXNET3_REV1_MAGIC == VMXNET3_READ_DRV_SHARED32(dshmem, magic));
> +}
> +
> +#define _GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF)
> +#define _MAKE_BYTE(byte_num, val) (((uint32_t)((val) & 0xFF)) << (byte_num)*8)
> +
> +static void vmxnet3_set_variable_mac(VMXNET3_State *s, uint32_t h, uint32_t l)
> +{
> + s->conf.macaddr.a[0] = _GET_BYTE(l, 0);
> + s->conf.macaddr.a[1] = _GET_BYTE(l, 1);
> + s->conf.macaddr.a[2] = _GET_BYTE(l, 2);
> + s->conf.macaddr.a[3] = _GET_BYTE(l, 3);
> + s->conf.macaddr.a[4] = _GET_BYTE(h, 0);
> + s->conf.macaddr.a[5] = _GET_BYTE(h, 1);
> +
> + DCFPRINTF("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
> +
> + qemu_format_nic_info_str(&s->nic->nc, s->conf.macaddr.a);
> +}
> +
> +static uint64_t vmxnet3_get_mac_low(MACAddr *addr)
> +{
> + return _MAKE_BYTE(0, addr->a[0]) |
> + _MAKE_BYTE(1, addr->a[1]) |
> + _MAKE_BYTE(2, addr->a[2]) |
> + _MAKE_BYTE(3, addr->a[3]);
> +}
> +
> +static uint64_t vmxnet3_get_mac_high(MACAddr *addr)
> +{
> + return _MAKE_BYTE(0, addr->a[4]) |
> + _MAKE_BYTE(1, addr->a[5]);
> +}
> +
> +static void
> +vmxnet3_inc_tx_consumption_counter(VMXNET3_State *s, int qidx)
> +{
> + vmxnet3_ring_inc(&s->txq_descr[qidx].tx_ring);
> +}
> +
> +static inline void
> +vmxnet3_inc_rx_consumption_counter(VMXNET3_State *s, int qidx, int ridx)
> +{
> + vmxnet3_ring_inc(&s->rxq_descr[qidx].rx_ring[ridx]);
> +}
> +
> +static inline void
> +vmxnet3_inc_tx_completion_counter(VMXNET3_State *s, int qidx)
> +{
> + vmxnet3_ring_inc(&s->txq_descr[qidx].comp_ring);
> +}
> +
> +static void
> +vmxnet3_inc_rx_completion_counter(VMXNET3_State *s, int qidx)
> +{
> + vmxnet3_ring_inc(&s->rxq_descr[qidx].comp_ring);
> +}
> +
> +static void
> +vmxnet3_dec_rx_completion_counter(VMXNET3_State *s, int qidx)
> +{
> + vmxnet3_ring_dec(&s->rxq_descr[qidx].comp_ring);
> +}
> +
> +static inline void vmxnet3_flush_shmem_changes(void)
> +{
> + /* Flush shared memory changes */
> + /* Needed before transferring comntrol to guest */
> + smp_wmb();
> +}
> +
> +static void vmxnet3_complete_packet(VMXNET3_State *s, int qidx, uint32 tx_ridx)
> +{
> + struct Vmxnet3_TxCompDesc txcq_descr;
> +
> + vmxnet3_ring_dump(DRIPRINTF, "TXC", qidx, &s->txq_descr[qidx].comp_ring);
> +
> + txcq_descr.txdIdx = tx_ridx;
> + txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_descr[qidx].comp_ring);
> +
> + vmxnet3_ring_write_curr_cell(&s->txq_descr[qidx].comp_ring, &txcq_descr);
> + vmxnet3_inc_tx_completion_counter(s, qidx);
> +
> + vmxnet3_flush_shmem_changes();
> + vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx);
> +}
> +
> +static bool
> +vmxnet3_setup_tx_offloads(Vmxnet3_TxPkt *pkt)
> +{
> + Vmxnet3_TxPktMdata *mdata = vmxnet3_txpkt_get_mdata(pkt);
> + struct virtio_net_hdr *vhdr = vmxnet3_txpkt_get_vhdr(pkt);
> + bool res = true;
> +
> + vhdr->hdr_len = mdata->hdr_length;
> +
> + switch (mdata->offload_mode) {
> + case VMXNET3_OM_NONE:
> + vhdr->flags = 0;
> + vhdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
> + vhdr->gso_size = 0;
> + break;
> +
> + case VMXNET3_OM_CSUM: {
> + DPKPRINTF("L4 CSO requested data_offset: %d, csoff: %d",
> + mdata->hdr_length, mdata->cso_or_gso_size);
> + vhdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
> + vhdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
> + vhdr->gso_size = 0;
> + vhdr->csum_start = mdata->hdr_length;
> + vhdr->csum_offset = mdata->cso_or_gso_size - mdata->hdr_length;
> + }
> + break;
> +
> + case VMXNET3_OM_TSO: {
> + uint8_t *l2hdr = vmxnet3_txpkt_get_l2hdr(pkt);
> + size_t l2hdr_len = vmxnet3_txpkt_get_l2hdr_len(pkt);
> + uint8_t *l3hdr = vmxnet3_txpkt_get_l3hdr(pkt);
> + size_t l3hdr_len = vmxnet3_txpkt_get_l3hdr_len(pkt);
> + uint16_t l3_proto = eth_get_l3_proto(l2hdr, l2hdr_len);
> + size_t payload_len = vmxnet3_txpkt_get_payload_len(pkt);
> +
> + vhdr->gso_type = eth_get_gso_type(l3_proto, l3hdr);
> + vhdr->gso_size = mdata->cso_or_gso_size;
> + res = eth_setup_tx_offloads(l3hdr, l3hdr_len, l2hdr_len,
> + payload_len, vhdr,
> + vmxnet3_txpkt_get_more_frags(pkt),
> + vmxnet3_txpkt_get_frag_off(pkt));
> + DPKPRINTF("GSO offload type %d requested.", vhdr->gso_type);
> + }
> + break;
> +
> + default:
> + assert(false);
> + }
> +
> + return res;
> +}
> +
> +static size_t
> +vmxnet3_txpkt_adopt_headers(Vmxnet3_TxPkt *pkt,
> + size_t data_len,
> + target_phys_addr_t data_pa,
> + bool needs_tso)
> +{
> + /* Copy L2 header */
> + uint8_t *l2hdr = vmxnet3_txpkt_get_l2hdr(pkt);
> + uint8_t *l3hdr = vmxnet3_txpkt_get_l3hdr(pkt);
> + size_t l2hdr_len = 0;
> + size_t l3hdr_len = 0;
> +
> + assert(data_len >= ETH_MAX_L2_HDR_LEN);
> + cpu_physical_memory_read(data_pa, l2hdr, ETH_MAX_L2_HDR_LEN);
> + l2hdr_len = eth_get_l2_hdr_length(l2hdr);
> + vmxnet3_txpkt_set_l2hdr_len(pkt, l2hdr_len);
> +
> + /* If packet requires offload - copy L3 header */
> + if (needs_tso) {
> + switch (eth_get_l3_proto(l2hdr, l2hdr_len)) {
> + case ETH_P_IP: {
> + target_phys_addr_t ip_opt_pa;
> + struct ip_header *iphdr = (struct ip_header *) l3hdr;
> + assert(data_len >= l2hdr_len + sizeof(struct ip_header));
> + cpu_physical_memory_read(data_pa + l2hdr_len,
> + l3hdr, sizeof(struct ip_header));
> + l3hdr_len = IP_HDR_GET_LEN(l3hdr);
> + ip_opt_pa = data_pa + l2hdr_len + sizeof(struct ip_header);
> + cpu_physical_memory_read(ip_opt_pa,
> + l3hdr + sizeof(struct ip_header),
> + l3hdr_len - sizeof(struct ip_header));
> + pkt->max_payload_len =
> + IP_FRAG_ALIGN_SIZE(VMXNET3_MAX_IP_PLOAD_LEN);
> + pkt->fragmentation.orig_more_frags =
> + FLAG_IS_SET(be16_to_cpu(iphdr->ip_off), IP_MF);
> + }
> + break;
> +
> + case ETH_P_IPV6: {
> + target_phys_addr_t l3hdr_pa = data_pa + l2hdr_len;
> + l3hdr_len = sizeof(struct ip6_header);
> + assert(data_len >= l2hdr_len + l3hdr_len);
> + cpu_physical_memory_read(l3hdr_pa, l3hdr, l3hdr_len);
> + pkt->max_payload_len = 0;
> + }
> + break;
> +
> + default: {
> + l3hdr_len = 0;
> + pkt->max_payload_len = 0;
> + }
> + break;
> + }
> + }
> +
> + vmxnet3_txpkt_set_l3hdr_len(pkt, l3hdr_len);
> +
> + /* Return amount of data adopted */
> + return l2hdr_len + l3hdr_len;
> +}
> +
> +static void
> +vmxnet3_tx_retrieve_metadata(Vmxnet3_TxPkt *pkt,
> + const struct Vmxnet3_TxDesc *txd)
> +{
> + struct eth_header *ehdr =
> + (struct eth_header *) vmxnet3_txpkt_get_l2hdr(pkt);
> +
> + Vmxnet3_TxPktMdata *mdata = vmxnet3_txpkt_get_mdata(pkt);
> + mdata->offload_mode = txd->om;
> + mdata->cso_or_gso_size = txd->msscof;
> + mdata->hdr_length = txd->hlen;
> + mdata->packet_type = get_eth_packet_type(ehdr);
> +
> + if (txd->ti) {
> + eth_setup_vlan_headers(ehdr, txd->tci);
> + }
> +}
> +
> +static size_t
> +vmxnet3_txpkt_adopt_data_fragment(Vmxnet3_TxPkt *pkt,
> + target_phys_addr_t data_pa,
> + size_t data_len,
> + uint32_t fragment_num)
> +{
> + struct iovec *v = vmxnet3_txpkt_get_payload_frag(pkt, fragment_num);
> + size_t bytes_to_adopt = data_len;
> +
> + if ((0 != pkt->max_payload_len) &&
> + (pkt->payload_len + data_len > pkt->max_payload_len)) {
> + bytes_to_adopt = pkt->max_payload_len - pkt->payload_len;
> + }
> +
> + v->iov_base = (void *) (uint64_t) data_pa;
> + v->iov_len = bytes_to_adopt;
> + pkt->payload_len += bytes_to_adopt;
> + return bytes_to_adopt;
> +}
> +
> +typedef enum {
> + VMXNET3_SUCCEEDED = 0xBEEFBEEF,
> + VMXNET3_OUT_OF_BUF,
> + VMXNET3_PKT_ERROR
> +} Vmxnet3_PktStatus;
> +
> +static void
> +vmxnet3_on_tx_done_update_stats(VMXNET3_State *s,
> + Vmxnet3_TxPkt *pkt,
> + int qidx,
> + Vmxnet3_PktStatus status)
> +{
> + Vmxnet3_TxPktMdata *mdata = vmxnet3_txpkt_get_mdata(&s->curr_txpkt);
> + size_t tot_len = vmxnet3_txpkt_get_total_len(pkt);
> + struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
> +
> + switch (status) {
> + case VMXNET3_SUCCEEDED: {
> + switch (mdata->packet_type) {
> + case VMXNET3_PKT_BCAST:
> + stats->bcastPktsTxOK++;
> + stats->bcastBytesTxOK += tot_len;
> + break;
> + case VMXNET3_PKT_MCAST:
> + stats->mcastPktsTxOK++;
> + stats->mcastBytesTxOK += tot_len;
> + break;
> + case VMXNET3_PKT_UCAST:
> + stats->ucastPktsTxOK++;
> + stats->ucastBytesTxOK += tot_len;
> + break;
> + default:
> + assert(false);
> + }
> +
> + if (VMXNET3_OM_TSO == mdata->offload_mode) {
> + /* According to VMWARE headers this statistic is a number */
> + /* of packets after segmentation but since we don't have */
> + /* this information in QEMU model, the best we can do is to */
> + /* provide number of non-segmented packets */
> + stats->TSOPktsTxOK++;
> + stats->TSOBytesTxOK += tot_len;
> + }
> + }
> + break;
> +
> + case VMXNET3_PKT_ERROR: {
> + stats->pktsTxDiscard++;
> + }
> + break;
> +
> + case VMXNET3_OUT_OF_BUF: {
> + stats->pktsTxError++;
> + }
> + break;
> +
> + default:
> + assert(false);
> + }
> +}
> +
> +static void
> +vmxnet3_on_rx_done_update_stats(VMXNET3_State *s,
> + Vmxnet3_RxPkt *pkt,
> + int qidx,
> + Vmxnet3_PktStatus status)
> +{
> + Vmxnet3_RxPktMdata *mdata = vmxnet3_rxpkt_get_mdata(pkt);
> + struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
> +
> + switch (status) {
> + case VMXNET3_OUT_OF_BUF:
> + stats->pktsRxOutOfBuf++;
> + break;
> +
> + case VMXNET3_PKT_ERROR:
> + stats->pktsRxError++;
> + break;
> +
> + case VMXNET3_SUCCEEDED:
> + switch (mdata->packet_type) {
> + case VMXNET3_PKT_BCAST:
> + stats->bcastPktsRxOK++;
> + stats->bcastBytesRxOK += mdata->tot_len;
> + break;
> + case VMXNET3_PKT_MCAST:
> + stats->mcastPktsRxOK++;
> + stats->mcastBytesRxOK += mdata->tot_len;
> + break;
> + case VMXNET3_PKT_UCAST:
> + stats->ucastPktsRxOK++;
> + stats->ucastBytesRxOK += mdata->tot_len;
> + break;
> + default:
> + assert(false);
> + }
> +
> + if (mdata->tot_len > s->mtu) {
> + stats->LROPktsRxOK++;
> + stats->LROBytesRxOK += mdata->tot_len;
> + }
> + break;
> +
> + default:
> + assert(false);
> + }
> +}
> +
> +static inline bool
> +vmxnet3_pop_next_tx_descr(VMXNET3_State *s,
> + int qidx,
> + struct Vmxnet3_TxDesc *txd,
> + uint32_t *descr_idx)
> +{
> + Vmxnet3_Ring *ring = &s->txq_descr[qidx].tx_ring;
> +
> + vmxnet3_ring_read_curr_cell(ring, txd);
> + if (txd->gen == vmxnet3_ring_curr_gen(ring)) {
> + vmxnet3_ring_dump(DRIPRINTF, "TX", qidx, ring);
> + *descr_idx = vmxnet3_ring_curr_cell_idx(ring);
> + vmxnet3_inc_tx_consumption_counter(s, qidx);
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static bool
> +vmxnet3_send_packet(VMXNET3_State *s, Vmxnet3_TxPkt *pkt, uint32_t qidx)
> +{
> + uint32_t mapped_fragments;
> + void *mapped = vmxnet3_txpkt_map(&s->curr_txpkt, &mapped_fragments, false);
> + if (NULL != mapped) {
> + bool res;
> + if (vmxnet3_setup_tx_offloads(pkt)) {
> + vmxnet3_dump_virt_hdr(vmxnet3_txpkt_get_vhdr(pkt));
> + vmxnet3_txpkt_dump(pkt);
> + qemu_sendv_packet(&s->nic->nc, mapped, mapped_fragments);
> + vmxnet3_on_tx_done_update_stats(s, pkt, qidx, VMXNET3_SUCCEEDED);
> + res = true;
> + } else {
> + vmxnet3_on_tx_done_update_stats(s, pkt, qidx, VMXNET3_PKT_ERROR);
> + res = false;
> + }
> + vmxnet3_txpkt_unmap(pkt, false);
> + return res;
> + } else {
> + vmxnet3_on_tx_done_update_stats(s, &s->curr_txpkt,
> + qidx, VMXNET3_OUT_OF_BUF);
> + return false;
> + }
> +}
> +
> +static void vmxnet3_process_tx_queue(VMXNET3_State *s, int qidx)
> +{
> + struct Vmxnet3_TxDesc txd;
> + uint32_t txd_idx;
> + uint32_t data_len;
> + target_phys_addr_t data_pa;
> + size_t bytes_adopted;
> +
> + for (;;) {
> + if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) {
> + break;
> + }
> +
> + vmxnet3_dump_tx_descr(&txd);
> +
> + if (!s->curr_txpkt_skip) {
> + data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
> + data_pa = le64_to_cpu(txd.addr);
> +
> + if (!s->curr_txpkt_header_processed) {
> + bool needs_tso = (VMXNET3_OM_TSO == txd.om);
> + bytes_adopted = vmxnet3_txpkt_adopt_headers(&s->curr_txpkt,
> + data_len,
> + data_pa,
> + needs_tso);
> + vmxnet3_tx_retrieve_metadata(&s->curr_txpkt, &txd);
> + data_pa += bytes_adopted;
> + data_len -= bytes_adopted;
> + s->curr_txpkt_header_processed = true;
> + }
> +
> + do {
> + if (0 != data_len) {
> + int frag_num = s->curr_txpkt_pl_frags++;
> + bytes_adopted =
> + vmxnet3_txpkt_adopt_data_fragment(&s->curr_txpkt,
> + data_pa,
> + data_len,
> + frag_num);
> + data_pa += bytes_adopted;
> + data_len -= bytes_adopted;
> + }
> +
> + if ((0 != data_len) || txd.eop) {
> + size_t frag_off;
> +
> + vmxnet3_txpkt_set_num_pl_frags(&s->curr_txpkt,
> + s->curr_txpkt_pl_frags);
> +
> + vmxnet3_txpkt_set_more_frags(&s->curr_txpkt,
> + (0 != data_len));
> +
> + s->curr_txpkt_skip =
> + !vmxnet3_send_packet(s, &s->curr_txpkt, qidx);
> +
> + frag_off = vmxnet3_txpkt_get_payload_len(&s->curr_txpkt) /
> + IP_FRAG_UNIT_SIZE;
> +
> + vmxnet3_txpkt_advance_frag_off(&s->curr_txpkt, frag_off);
> +
> + vmxnet3_txpkt_reset_payload(&s->curr_txpkt);
> + s->curr_txpkt_pl_frags = 0;
> + }
> + } while (0 != data_len);
> + }
> +
> + if (txd.eop) {
> + vmxnet3_complete_packet(s, qidx, txd_idx);
> + vmxnet3_txpkt_reset(&s->curr_txpkt);
> + s->curr_txpkt_skip = false;
> + s->curr_txpkt_header_processed = false;
> + }
> + }
> +}
> +
> +static inline void
> +vmxnet3_read_next_rx_descr(VMXNET3_State *s, int qidx, int ridx,
> + struct Vmxnet3_RxDesc *dbuf, uint32_t *didx)
> +{
> + Vmxnet3_Ring *ring = &s->rxq_descr[qidx].rx_ring[ridx];
> + *didx = vmxnet3_ring_curr_cell_idx(ring);
> + vmxnet3_ring_read_curr_cell(ring, dbuf);
> +}
> +
> +static inline uint8_t
> +vmxnet3_get_rx_ring_gen(VMXNET3_State *s, int qidx, int ridx)
> +{
> + return s->rxq_descr[qidx].rx_ring[ridx].gen;
> +}
> +
> +static inline target_phys_addr_t
> +vmxnet3_pop_rxc_descr(VMXNET3_State *s, int qidx, uint32_t *descr_gen)
> +{
> + uint8_t ring_gen;
> + struct Vmxnet3_RxCompDesc rxcd;
> +
> + target_phys_addr_t daddr =
> + vmxnet3_ring_curr_cell_pa(&s->rxq_descr[qidx].comp_ring);
> +
> + cpu_physical_memory_read(daddr, &rxcd, sizeof(struct Vmxnet3_RxCompDesc));
> + ring_gen = vmxnet3_ring_curr_gen(&s->rxq_descr[qidx].comp_ring);
> +
> + if (rxcd.gen != ring_gen) {
> + *descr_gen = ring_gen;
> + vmxnet3_inc_rx_completion_counter(s, qidx);
> + return daddr;
> + }
> +
> + return 0;
> +}
> +
> +static inline void
> +vmxnet3_revert_rxc_descr(VMXNET3_State *s, int qidx)
> +{
> + vmxnet3_dec_rx_completion_counter(s, qidx);
> +}
> +
> +#define RXQ_IDX (0)
> +#define RX_HEAD_BODY_RING (0)
> +#define RX_BODY_ONLY_RING (1)
> +
> +static bool
> +vmxnet3_get_next_head_rx_descr(VMXNET3_State *s,
> + struct Vmxnet3_RxDesc *descr_buf,
> + uint32_t *descr_idx,
> + uint32_t *ridx)
> +{
> + for (;;) {
> + uint32_t ring_gen;
> + vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
> + descr_buf, descr_idx);
> +
> + /* If no more free descriptors - return */
> + ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING);
> + if (descr_buf->gen != ring_gen) {
> + return false;
> + }
> +
> + /* Mark current descriptor as used/skipped */
> + vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
> +
> + /* If this is what we are looking for - return */
> + if (VMXNET3_RXD_BTYPE_HEAD == descr_buf->btype) {
> + *ridx = RX_HEAD_BODY_RING;
> + return true;
> + }
> + }
> +}
> +
> +static bool
> +vmxnet3_get_next_body_rx_descr(VMXNET3_State *s,
> + struct Vmxnet3_RxDesc *dbuf,
> + uint32_t *didx,
> + uint32_t *ridx)
> +{
> + vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, dbuf, didx);
> +
> + /* Try to find corresponding descriptor in head/body ring */
> + if ((dbuf->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_HEAD_BODY_RING)) &&
> + (VMXNET3_RXD_BTYPE_BODY == dbuf->btype)) {
> + vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_HEAD_BODY_RING);
> + *ridx = RX_HEAD_BODY_RING;
> + return true;
> + }
> +
> + /* If there is no free descriptors on head/body ring or next free */
> + /* descriptor is a head descriptor switch to body only ring */
> + vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, dbuf, didx);
> +
> + /* If no more free descriptors - return */
> + if (dbuf->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX, RX_BODY_ONLY_RING)) {
> + assert(VMXNET3_RXD_BTYPE_BODY == dbuf->btype);
> + *ridx = RX_BODY_ONLY_RING;
> + vmxnet3_inc_rx_consumption_counter(s, RXQ_IDX, RX_BODY_ONLY_RING);
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static inline bool
> +vmxnet3_get_next_rx_descr(VMXNET3_State *s, bool is_head,
> + struct Vmxnet3_RxDesc *descr_buf,
> + uint32_t *descr_idx,
> + uint32_t *ridx)
> +{
> + if (is_head || !s->rx_packets_compound) {
> + return vmxnet3_get_next_head_rx_descr(s, descr_buf, descr_idx, ridx);
> + } else {
> + return vmxnet3_get_next_body_rx_descr(s, descr_buf, descr_idx, ridx);
> + }
> +}
> +
> +static void
> +vmxnet3_rx_put_metadata_to_descr(Vmxnet3_RxPkt *pkt,
> + struct Vmxnet3_RxCompDesc *rxcd)
> +{
> + int csum_correct, is_gso;
> + bool isip4, isip6, istcp, isudp;
> + uint8_t headers[ETH_MAX_L2_HDR_LEN + ETH_MAX_L3_HDR_LEN];
> + Vmxnet3_RxPktMdata *mdata = vmxnet3_rxpkt_get_mdata(pkt);
> + struct virtio_net_hdr *vhdr;
> + uint8_t offload_type;
> +
> + if (mdata->vlan_stripped) {
> + rxcd->ts = 1;
> + rxcd->tci = mdata->vlan_tag;
> + }
> +
> + if (!mdata->vhdr_valid) {
> + goto nocsum;
> + }
> +
> + vhdr = vmxnet3_rxpkt_get_vhdr(pkt);
> + /* Checksum is valid when lower level tell so or when lower level */
> + /* requires checksum offload telling that packet produced/bridged */
> + /* locally and did travel over network after last checksum calculation */
> + /* or production */
> + csum_correct = FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID) ||
> + FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM);
> +
> + offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
> + is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0;
> +
> + if (!csum_correct && !is_gso) {
> + goto nocsum;
> + }
> +
> + /* Make linear copy of L2-L3 headers and parse it */
> + iov_to_buf(vmxnet3_rxpkt_get_frag(pkt, 0),
> + vmxnet3_rxpkt_get_num_frags(pkt),
> + headers, 0, MIN(sizeof(headers), mdata->tot_len));
> +
> + eth_get_protocols(headers, MIN(sizeof(headers), mdata->tot_len),
> + &isip4, &isip6, &isudp, &istcp);
> + if ((!istcp && !isudp) || (!isip4 && !isip6)) {
> + goto nocsum;
> + }
> +
> + rxcd->cnc = 0;
> + rxcd->v4 = isip4 ? 1 : 0;
> + rxcd->v6 = isip6 ? 1 : 0;
> + rxcd->tcp = istcp ? 1 : 0;
> + rxcd->udp = isudp ? 1 : 0;
> + rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
> + return;
> +
> +nocsum:
> + rxcd->cnc = 1;
> + return;
> +}
> +
> +static void
> +vmxnet3_physical_memory_writev(const struct iovec *iov,
> + size_t start_iov_off,
> + target_phys_addr_t target_addr,
> + size_t bytes_to_copy)
> +{
> + size_t curr_off = 0;
> + size_t copied = 0;
> +
> + while (bytes_to_copy) {
> + if (start_iov_off < (curr_off + iov->iov_len)) {
> + size_t chunk_len =
> + MIN((curr_off + iov->iov_len) - start_iov_off, bytes_to_copy);
> +
> + cpu_physical_memory_write(target_addr + copied,
> + iov->iov_base + start_iov_off - curr_off,
> + chunk_len);
> +
> + copied += chunk_len;
> + start_iov_off += chunk_len;
> + curr_off = start_iov_off;
> + bytes_to_copy -= chunk_len;
> + } else {
> + curr_off += iov->iov_len;
> + }
> + iov++;
> + }
> +}
> +
> +static bool
> +vmxnet3_indicate_packet(VMXNET3_State *s, Vmxnet3_RxPkt *pkt)
> +{
> + struct Vmxnet3_RxDesc rxd;
> + bool is_head = true;
> + uint32_t rxd_idx;
> + uint32_t rx_ridx;
> +
> + struct Vmxnet3_RxCompDesc rxcd;
> + uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
> + target_phys_addr_t new_rxcd_pa = 0;
> + target_phys_addr_t ready_rxcd_pa = 0;
> + struct iovec *data = vmxnet3_rxpkt_get_frag(pkt, 0);
> + Vmxnet3_RxPktMdata *mdata = vmxnet3_rxpkt_get_mdata(pkt);
> + size_t bytes_copied = 0;
> + size_t bytes_left = mdata->tot_len;
> + uint16_t num_frags = 0;
> +
> + vmxnet3_rxpkt_dump(pkt);
> +
> + while ((num_frags < s->max_rx_frags) &&
> + (bytes_left > 0) &&
> + (new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX, &new_rxcd_gen)) &&
> + vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx, &rx_ridx)) {
> + size_t chunk_size = MIN(bytes_left, rxd.len);
> + vmxnet3_physical_memory_writev(data, bytes_copied,
> + le64_to_cpu(rxd.addr), chunk_size);
> + bytes_copied += chunk_size;
> + bytes_left -= chunk_size;
> +
> + vmxnet3_dump_rx_descr(&rxd);
> +
> + if (0 != ready_rxcd_pa) {
> + cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd));
> + }
> +
> + memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc));
> + rxcd.rxdIdx = rxd_idx;
> + rxcd.len = chunk_size;
> + rxcd.sop = is_head;
> + rxcd.gen = new_rxcd_gen;
> + rxcd.rqID = RXQ_IDX + rx_ridx*s->rxq_num;
> +
> + if (0 == bytes_left) {
> + vmxnet3_rx_put_metadata_to_descr(pkt, &rxcd);
> + }
> +
> + DRIPRINTF("RX Completion descriptor: rxRing: %lu rxIdx %lu len %lu "
> + "sop %d csum_correct %lu",
> + (unsigned long) rx_ridx,
> + (unsigned long) rxcd.rxdIdx,
> + (unsigned long) rxcd.len,
> + (int) rxcd.sop,
> + (unsigned long) rxcd.tuc);
> +
> + is_head = false;
> + ready_rxcd_pa = new_rxcd_pa;
> + new_rxcd_pa = 0;
> + }
> +
> + if (0 != ready_rxcd_pa) {
> + rxcd.eop = 1;
> + rxcd.err = (0 != bytes_left);
> + cpu_physical_memory_write(ready_rxcd_pa, &rxcd, sizeof(rxcd));
> + vmxnet3_flush_shmem_changes();
> + }
> +
> + if (0 != new_rxcd_pa) {
> + vmxnet3_revert_rxc_descr(s, RXQ_IDX);
> + }
> +
> + vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_idx);
> +
> + if (bytes_left == 0) {
> + vmxnet3_on_rx_done_update_stats(s, pkt, RXQ_IDX, VMXNET3_SUCCEEDED);
> + return true;
> + } else if (num_frags == s->max_rx_frags) {
> + vmxnet3_on_rx_done_update_stats(s, pkt, RXQ_IDX, VMXNET3_PKT_ERROR);
> + return false;
> + } else {
> + vmxnet3_on_rx_done_update_stats(s, pkt, RXQ_IDX, VMXNET3_OUT_OF_BUF);
> + return false;
> + }
> +}
> +
> +static void
> +vmxnet3_io_bar0_write(void *opaque, target_phys_addr_t addr,
> + uint64_t val, unsigned size)
> +{
> + VMXNET3_State *s = opaque;
> +
> + if (IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD,
> + VMXNET3_DEVICE_MAX_TX_QUEUES, VMXNET3_REG_ALIGN)) {
> + int tx_queue_idx =
> + MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD, VMXNET3_REG_ALIGN);
> + assert(tx_queue_idx <= s->txq_num);
> + vmxnet3_process_tx_queue(s, tx_queue_idx);
> + return;
> + }
> +
> +#pragma GCC diagnostic push
> +#pragma GCC diagnostic ignored "-Wtype-limits"
> + if (IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
> + VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
> + int l = MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR, VMXNET3_REG_ALIGN);
> +
> + DCBPRINTF("Interrupt mask for line %d written: 0x%" PRIx64, l, val);
> +
> + vmxnet3_on_interrupt_mask_changed(s, l, val);
> + return;
> + }
> +#pragma GCC diagnostic pop
> +
> + if (IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD,
> + VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN) ||
> + IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2,
> + VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)) {
> + return;
> + }
> +
> + DWRPRINTF("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d",
> + (uint64_t) addr, val, size);
> +}
> +
> +static uint64_t
> +vmxnet3_io_bar0_read(void *opaque, target_phys_addr_t addr, unsigned size)
> +{
> +#pragma GCC diagnostic push
> +#pragma GCC diagnostic ignored "-Wtype-limits"
> + if (IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
> + VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
> + assert(false);
> + }
> +#pragma GCC diagnostic pop
> +
> + DCBPRINTF("BAR0 unknown read [%" PRIx64 "], size %d",
> + (uint64_t) addr, size);
> + return 0;
> +}
> +
> +static void vmxnet3_reset(VMXNET3_State *s)
> +{
> + DCBPRINTF("Resetting vmxnet3...");
> +
> + vmxnet3_txpkt_reset(&s->curr_txpkt);
> + s->curr_txpkt_pl_frags = 0;
> + s->curr_txpkt_skip = false;
> + s->curr_txpkt_header_processed = false;
> +}
> +
> +static void vmxnet3_deactivate_device(VMXNET3_State *s)
> +{
> + DCBPRINTF("Deactivating vmxnet3...");
> + s->device_active = false;
> +}
> +
> +static void vmxnet3_update_rx_mode(VMXNET3_State *s)
> +{
> + s->rx_mode = VMXNET3_READ_DRV_SHARED32(s->drv_shmem,
> + devRead.rxFilterConf.rxMode);
> + DCFPRINTF("RX mode: 0x%08X", s->rx_mode);
> +}
> +
> +static void vmxnet3_update_vlan_filters(VMXNET3_State *s)
> +{
> + int i;
> +
> + /* Copy configuration from shared memory */
> + VMXNET3_READ_DRV_SHARED(s->drv_shmem,
> + devRead.rxFilterConf.vfTable,
> + s->vlan_table,
> + sizeof(s->vlan_table));
> +
> + /* Invert byte order when needed */
> + for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) {
> + s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]);
> + }
> +
> + /* Dump configuration for debugging purposes */
> + DCFPRINTF("Configured VLANs:");
> + for (i = 0; i < sizeof(s->vlan_table) * 8; i++) {
> + if (VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, i)) {
> + DCFPRINTF("\tVLAN %d is present", i);
> + }
> + }
> +}
> +
> +static void vmxnet3_update_mcast_filters(VMXNET3_State *s)
> +{
> + uint16_t list_bytes =
> + VMXNET3_READ_DRV_SHARED16(s->drv_shmem,
> + devRead.rxFilterConf.mfTableLen);
> +
> + s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]);
> +
> + s->mcast_list = g_realloc(s->mcast_list, list_bytes);
> + if (NULL == s->mcast_list) {
> + if (0 == s->mcast_list_len) {
> + DCFPRINTF("Current multicast list is empty");
> + } else {
> + DERPRINTF("Failed to allocate multicast list of %d elements",
> + s->mcast_list_len);
> + }
> + s->mcast_list_len = 0;
> + } else {
> + int i;
> + target_phys_addr_t mcast_list_pa =
> + VMXNET3_READ_DRV_SHARED64(s->drv_shmem,
> + devRead.rxFilterConf.mfTablePA);
> +
> + cpu_physical_memory_read(mcast_list_pa, s->mcast_list, list_bytes);
> + DCFPRINTF("Current multicast list len is %d:", s->mcast_list_len);
> + for (i = 0; i < s->mcast_list_len; i++) {
> + DCFPRINTF("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a));
> + }
> + }
> +}
> +
> +static void vmxnet3_setup_rx_filtering(VMXNET3_State *s)
> +{
> + vmxnet3_update_rx_mode(s);
> + vmxnet3_update_vlan_filters(s);
> + vmxnet3_update_mcast_filters(s);
> +}
> +
> +static uint32_t vmxnet3_get_interrupt_config(VMXNET3_State *s)
> +{
> + uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2);
> + DCFPRINTF("Interrupt config is 0x%X", interrupt_mode);
> + return interrupt_mode;
> +}
> +
> +static void vmxnet3_fill_stats(VMXNET3_State *s)
> +{
> + int i;
> + for (i = 0; i < s->txq_num; i++) {
> + cpu_physical_memory_write(s->txq_descr[i].tx_stats_pa,
> + &s->txq_descr[i].txq_stats,
> + sizeof(s->txq_descr[i].txq_stats));
> + }
> +
> + for (i = 0; i < s->rxq_num; i++) {
> + cpu_physical_memory_write(s->rxq_descr[i].rx_stats_pa,
> + &s->rxq_descr[i].rxq_stats,
> + sizeof(s->rxq_descr[i].rxq_stats));
> + }
> +}
> +
> +static void vmxnet3_adjust_by_guest_type(VMXNET3_State *s)
> +{
> + struct Vmxnet3_GOSInfo gos;
> +
> + VMXNET3_READ_DRV_SHARED(s->drv_shmem, devRead.misc.driverInfo.gos,
> + &gos, sizeof(gos));
> + s->rx_packets_compound =
> + (VMXNET3_GOS_TYPE_WIN == gos.gosType) ? false : true;
> +
> + DCFPRINTF("Guest type specifics: RXCOMPOUND: %d", s->rx_packets_compound);
> +}
> +
> +static void
> +vmxnet3_dump_conf_descr(const char *name,
> + struct Vmxnet3_VariableLenConfDesc *pm_descr)
> +{
> + DCFPRINTF("%s descriptor dump: Version %u, Length %u",
> + name, pm_descr->confVer, pm_descr->confLen);
> +
> +};
> +
> +static void vmxnet3_update_pm_state(VMXNET3_State *s)
> +{
> + struct Vmxnet3_VariableLenConfDesc pm_descr;
> +
> + pm_descr.confLen =
> + VMXNET3_READ_DRV_SHARED32(s->drv_shmem, devRead.pmConfDesc.confLen);
> + pm_descr.confVer =
> + VMXNET3_READ_DRV_SHARED32(s->drv_shmem, devRead.pmConfDesc.confVer);
> + pm_descr.confPA =
> + VMXNET3_READ_DRV_SHARED64(s->drv_shmem, devRead.pmConfDesc.confPA);
> +
> + vmxnet3_dump_conf_descr("PM State", &pm_descr);
> +}
> +
> +static void vmxnet3_update_features(VMXNET3_State *s)
> +{
> + uint32_t guest_features;
> + int rxcsum_offload_supported;
> +
> + guest_features = VMXNET3_READ_DRV_SHARED32(s->drv_shmem,
> + devRead.misc.uptFeatures);
> +
> + rxcsum_offload_supported = FLAG_IS_SET(guest_features, UPT1_F_RXCSUM);
> + s->rx_vlan_stripping = FLAG_IS_SET(guest_features, UPT1_F_RXVLAN);
> + s->lro_supported = FLAG_IS_SET(guest_features, UPT1_F_LRO);
> +
> + DCFPRINTF("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP: %d",
> + s->lro_supported, rxcsum_offload_supported,
> + s->rx_vlan_stripping);
> +
> + tap_set_offload(s->nic->nc.peer,
> + rxcsum_offload_supported,
> + s->lro_supported,
> + s->lro_supported,
> + 0,
> + 0);
> +}
> +
> +static void vmxnet3_activate_device(VMXNET3_State *s)
> +{
> + int i;
> + static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1;
> + target_phys_addr_t qdescr_table_pa;
> + uint64_t pa;
> + uint32_t size;
> +
> + /* Verify configuration consistency */
> + if (!vmxnet3_verify_driver_magic(s->drv_shmem)) {
> + DERPRINTF("Device configuration received from driver is invalid");
> + return;
> + }
> +
> + vmxnet3_adjust_by_guest_type(s);
> + vmxnet3_update_features(s);
> + vmxnet3_update_pm_state(s);
> + vmxnet3_setup_rx_filtering(s);
> + /* Cache fields from shared memory */
> + s->mtu = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, devRead.misc.mtu);
> + DCFPRINTF("MTU is %u", s->mtu);
> +
> + s->max_rx_frags =
> + VMXNET3_READ_DRV_SHARED16(s->drv_shmem, devRead.misc.maxNumRxSG);
> +
> + DCFPRINTF("Max RX fragments is %u", s->max_rx_frags);
> +
> + s->event_int_idx =
> + VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.intrConf.eventIntrIdx);
> + DCFPRINTF("Events interrupt line is %u", s->event_int_idx);
> +
> + s->auto_int_masking =
> + VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.intrConf.autoMask);
> + DCFPRINTF("Automatic interrupt masking is %d", (int)s->auto_int_masking);
> +
> + s->txq_num =
> + VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.misc.numTxQueues);
> + s->rxq_num =
> + VMXNET3_READ_DRV_SHARED8(s->drv_shmem, devRead.misc.numRxQueues);
> +
> + DCFPRINTF("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
> + assert(s->txq_num <= VMXNET3_DEVICE_MAX_TX_QUEUES);
> +
> + qdescr_table_pa =
> + VMXNET3_READ_DRV_SHARED64(s->drv_shmem, devRead.misc.queueDescPA);
> + DCFPRINTF("TX queues descriptors table is at 0x%" PRIx64,
> + (uint64_t) qdescr_table_pa);
> +
> + /* Worst-case scenario is a packet that holds all TX rings space so */
> + /* we calculate total size of all TX rings for max TX fragments number */
> + s->max_tx_frags = 0;
> +
> + /* TX queues */
> + for (i = 0; i < s->txq_num; i++) {
> + target_phys_addr_t qdescr_pa =
> + qdescr_table_pa + i*sizeof(struct Vmxnet3_TxQueueDesc);
> +
> + /* Read interrupt number for this TX queue */
> + s->txq_descr[i].intr_idx =
> + VMXNET3_READ_TX_QUEUE_DESCR8(qdescr_pa, conf.intrIdx);
> +
> + DCFPRINTF("TX Queue %d interrupt: %d", i, s->txq_descr[i].intr_idx);
> +
> + /* Read rings memory locations for TX queues */
> + pa = VMXNET3_READ_TX_QUEUE_DESCR64(qdescr_pa, conf.txRingBasePA);
> + size = VMXNET3_READ_TX_QUEUE_DESCR32(qdescr_pa, conf.txRingSize);
> +
> + vmxnet3_ring_init(&s->txq_descr[i].tx_ring, pa, size,
> + sizeof(struct Vmxnet3_TxDesc), false);
> + vmxnet3_ring_dump(DCFPRINTF, "TX", i, &s->txq_descr[i].tx_ring);
> +
> + s->max_tx_frags += size;
> +
> + /* TXC ring */
> + pa = VMXNET3_READ_TX_QUEUE_DESCR64(qdescr_pa, conf.compRingBasePA);
> + size = VMXNET3_READ_TX_QUEUE_DESCR32(qdescr_pa, conf.compRingSize);
> + vmxnet3_ring_init(&s->txq_descr[i].comp_ring, pa, size,
> + sizeof(struct Vmxnet3_TxCompDesc), true);
> + vmxnet3_ring_dump(DCFPRINTF, "TXC", i, &s->txq_descr[i].comp_ring);
> +
> + s->txq_descr[i].tx_stats_pa =
> + qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats);
> +
> + memset(&s->txq_descr[i].txq_stats, 0,
> + sizeof(s->txq_descr[i].txq_stats));
> +
> + /* Fill device-managed parameters for queues */
> + VMXNET3_WRITE_TX_QUEUE_DESCR32(qdescr_pa,
> + ctrl.txThreshold,
> + VMXNET3_DEF_TX_THRESHOLD);
> + }
> +
> + /* Preallocate TX packet wrapper */
> + DCFPRINTF("Max TX fragments is %u", s->max_tx_frags);
> + if (!vmxnet3_txpkt_prealloc(&s->curr_txpkt, s->max_tx_frags,
> + s->peer_has_vhdr)) {
> + hw_error("TX rings configuration problem");
> + }
> +
> + /* Read rings memory locations for RX queues */
> + for (i = 0; i < s->rxq_num; i++) {
> + int j;
> + target_phys_addr_t qd_pa =
> + qdescr_table_pa + s->txq_num*sizeof(struct Vmxnet3_TxQueueDesc) +
> + i*sizeof(struct Vmxnet3_RxQueueDesc);
> +
> + /* Read interrupt number for this RX queue */
> + s->rxq_descr[i].intr_idx =
> + VMXNET3_READ_TX_QUEUE_DESCR8(qd_pa, conf.intrIdx);
> +
> + DCFPRINTF("RX Queue %d interrupt: %d", i, s->rxq_descr[i].intr_idx);
> +
> + /* Read rings memory locations */
> + for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) {
> + /* RX rings */
> + pa = VMXNET3_READ_RX_QUEUE_DESCR64(qd_pa, conf.rxRingBasePA[j]);
> + size = VMXNET3_READ_RX_QUEUE_DESCR32(qd_pa, conf.rxRingSize[j]);
> + vmxnet3_ring_init(&s->rxq_descr[i].rx_ring[j], pa, size,
> + sizeof(struct Vmxnet3_RxDesc), false);
> + DCFPRINTF("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
> + i, j, pa, size);
> + }
> +
> + /* RXC ring */
> + pa = VMXNET3_READ_RX_QUEUE_DESCR64(qd_pa, conf.compRingBasePA);
> + size = VMXNET3_READ_RX_QUEUE_DESCR32(qd_pa, conf.compRingSize);
> + vmxnet3_ring_init(&s->rxq_descr[i].comp_ring, pa, size,
> + sizeof(struct Vmxnet3_RxCompDesc), true);
> + DCFPRINTF("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa, size);
> +
> + s->rxq_descr[i].rx_stats_pa =
> + qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats);
> + memset(&s->rxq_descr[i].rxq_stats, 0,
> + sizeof(s->rxq_descr[i].rxq_stats));
> + }
> +
> + vmxnet3_flush_shmem_changes();
> + s->device_active = true;
> +}
> +
> +static void vmxnet3_handle_command(VMXNET3_State *s, uint64_t cmd)
> +{
> + s->last_command = cmd;
> +
> + switch (cmd) {
> + case VMXNET3_CMD_GET_PERM_MAC_HI:
> + DCBPRINTF("Set: Get upper part of permanent MAC");
> + break;
> +
> + case VMXNET3_CMD_GET_PERM_MAC_LO:
> + DCBPRINTF("Set: Get lower part of permanent MAC");
> + break;
> +
> + case VMXNET3_CMD_GET_STATS:
> + DCBPRINTF("Set: Get device statistics");
> + vmxnet3_fill_stats(s);
> + break;
> +
> + case VMXNET3_CMD_ACTIVATE_DEV:
> + DCBPRINTF("Set: Activating vmxnet3 device");
> + vmxnet3_activate_device(s);
> + break;
> +
> + case VMXNET3_CMD_UPDATE_RX_MODE:
> + DCBPRINTF("Set: Update rx mode");
> + vmxnet3_update_rx_mode(s);
> + break;
> +
> + case VMXNET3_CMD_UPDATE_VLAN_FILTERS:
> + DCBPRINTF("Set: Update VLAN filters");
> + vmxnet3_update_vlan_filters(s);
> + break;
> +
> + case VMXNET3_CMD_UPDATE_MAC_FILTERS:
> + DCBPRINTF("Set: Update MAC filters");
> + vmxnet3_update_mcast_filters(s);
> + break;
> +
> + case VMXNET3_CMD_UPDATE_FEATURE:
> + DCBPRINTF("Set: Update features");
> + vmxnet3_update_features(s);
> + break;
> +
> + case VMXNET3_CMD_UPDATE_PMCFG:
> + DCBPRINTF("Set: Update power management config");
> + vmxnet3_update_pm_state(s);
> + break;
> +
> + case VMXNET3_CMD_GET_LINK:
> + DCBPRINTF("Set: Get link");
> + break;
> +
> + case VMXNET3_CMD_RESET_DEV:
> + DCBPRINTF("Set: Reset device");
> + vmxnet3_reset(s);
> + break;
> +
> + case VMXNET3_CMD_QUIESCE_DEV:
> + DCBPRINTF("Set: VMXNET3_CMD_QUIESCE_DEV - pause the device");
> + vmxnet3_deactivate_device(s);
> + break;
> +
> + case VMXNET3_CMD_GET_CONF_INTR:
> + DCBPRINTF("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt configuration");
> + break;
> +
> + default:
> + DCBPRINTF("Received unknown command: %" PRIx64, cmd);
> + break;
> + }
> +}
> +
> +static uint64_t vmxnet3_get_command_status(VMXNET3_State *s)
> +{
> + uint64_t ret;
> +
> + switch (s->last_command) {
> + case VMXNET3_CMD_ACTIVATE_DEV:
> + ret = (s->device_active) ? 0 : -1;
> + DCFPRINTF("Device active: %" PRIx64, ret);
> + break;
> +
> + case VMXNET3_CMD_GET_LINK:
> + ret = s->link_status_and_speed;
> + DCFPRINTF("Link and speed: %" PRIx64, ret);
> + break;
> +
> + case VMXNET3_CMD_GET_PERM_MAC_LO:
> + ret = vmxnet3_get_mac_low(&s->perm_mac);
> + break;
> +
> + case VMXNET3_CMD_GET_PERM_MAC_HI:
> + ret = vmxnet3_get_mac_high(&s->perm_mac);
> + break;
> +
> + case VMXNET3_CMD_GET_CONF_INTR:
> + ret = vmxnet3_get_interrupt_config(s);
> + break;
> +
> + default:
> + DWRPRINTF("Received request for unknown command: %x", s->last_command);
> + ret = -1;
> + break;
> + }
> +
> + return ret;
> +}
> +
> +static void vmxnet3_set_events(VMXNET3_State *s, uint32_t val)
> +{
> + uint32_t events;
> +
> + DCBPRINTF("Setting events: 0x%x", val);
> + events = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, ecr) | val;
> + VMXNET3_WRITE_DRV_SHARED32(s->drv_shmem, ecr, events);
> +}
> +
> +static void vmxnet3_ack_events(VMXNET3_State *s, uint32_t val)
> +{
> + uint32_t events;
> +
> + DCBPRINTF("Clearing events: 0x%x", val);
> + events = VMXNET3_READ_DRV_SHARED32(s->drv_shmem, ecr) & ~val;
> + VMXNET3_WRITE_DRV_SHARED32(s->drv_shmem, ecr, events);
> +}
> +
> +static void
> +vmxnet3_io_bar1_write(void *opaque,
> + target_phys_addr_t addr,
> + uint64_t val,
> + unsigned size)
> +{
> + VMXNET3_State *s = opaque;
> +
> + switch (addr) {
> + /* Vmxnet3 Revision Report Selection */
> + case VMXNET3_REG_VRRS:
> + DCBPRINTF("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d",
> + val, size);
> + break;
> +
> + /* UPT Version Report Selection */
> + case VMXNET3_REG_UVRS:
> + DCBPRINTF("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d",
> + val, size);
> + break;
> +
> + /* Driver Shared Address Low */
> + case VMXNET3_REG_DSAL:
> + DCBPRINTF("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d",
> + val, size);
> + /* Guest driver will first write the low part of the shared */
> + /* memory address. We save it to temp variable and set the */
> + /* shared address only after we get the high part */
> + if (0 == val) {
> + s->device_active = false;
> + }
> + s->temp_shared_guest_driver_memory = val;
> + s->drv_shmem = 0;
> + break;
> +
> + /* Driver Shared Address High */
> + case VMXNET3_REG_DSAH:
> + DCBPRINTF("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d",
> + val, size);
> + /* Set the shared memory between guest driver and device. */
> + /* We already should have low address part. */
> + s->drv_shmem = s->temp_shared_guest_driver_memory | (val << 32);
> + break;
> +
> + /* Command */
> + case VMXNET3_REG_CMD:
> + DCBPRINTF("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d",
> + val, size);
> + vmxnet3_handle_command(s, val);
> + break;
> +
> + /* MAC Address Low */
> + case VMXNET3_REG_MACL:
> + DCBPRINTF("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d",
> + val, size);
> + s->temp_mac = val;
> + break;
> +
> + /* MAC Address High */
> + case VMXNET3_REG_MACH:
> + DCBPRINTF("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d",
> + val, size);
> + vmxnet3_set_variable_mac(s, val, s->temp_mac);
> + break;
> +
> + /* Interrupt Cause Register */
> + case VMXNET3_REG_ICR:
> + DCBPRINTF("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
> + val, size);
> + assert(false);
> + break;
> +
> + /* Event Cause Register */
> + case VMXNET3_REG_ECR:
> + DCBPRINTF("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
> + val, size);
> + vmxnet3_ack_events(s, val);
> + break;
> +
> + default:
> + DCBPRINTF("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ", size %d",
> + (uint64_t) addr, val, size);
> + break;
> + }
> +}
> +
> +static uint64_t
> +vmxnet3_io_bar1_read(void *opaque, target_phys_addr_t addr, unsigned size)
> +{
> + VMXNET3_State *s = opaque;
> + uint64_t ret = 0;
> +
> + switch (addr) {
> + /* Vmxnet3 Revision Report Selection */
> + case VMXNET3_REG_VRRS:
> + DCBPRINTF("Read BAR1 [VMXNET3_REG_VRRS], size %d", size);
> + ret = VMXNET3_DEVICE_REVISION;
> + break;
> +
> + /* UPT Version Report Selection */
> + case VMXNET3_REG_UVRS:
> + DCBPRINTF("Read BAR1 [VMXNET3_REG_UVRS], size %d", size);
> + ret = VMXNET3_DEVICE_VERSION;
> + break;
> +
> + /* Command */
> + case VMXNET3_REG_CMD:
> + DCBPRINTF("Read BAR1 [VMXNET3_REG_CMD], size %d", size);
> + ret = vmxnet3_get_command_status(s);
> + break;
> +
> + /* MAC Address Low */
> + case VMXNET3_REG_MACL:
> + DCBPRINTF("Read BAR1 [VMXNET3_REG_MACL], size %d", size);
> + ret = vmxnet3_get_mac_low(&s->conf.macaddr);
> + break;
> +
> + /* MAC Address High */
> + case VMXNET3_REG_MACH:
> + DCBPRINTF("Read BAR1 [VMXNET3_REG_MACH], size %d", size);
> + ret = vmxnet3_get_mac_high(&s->conf.macaddr);
> + break;
> +
> + /* Interrupt Cause Register */
> + /* Used for legacy interrupts only so interrupt index always 0 */
> + case VMXNET3_REG_ICR:
> + DCBPRINTF("Read BAR1 [VMXNET3_REG_ICR], size %d", size);
> + if (vmxnet3_interrupt_asserted(s, 0)) {
> + vmxnet3_clear_interrupt(s, 0);
> + ret = true;
> + } else {
> + ret = false;
> + }
> + break;
> +
> + default:
> + DCBPRINTF("Unknow read BAR1[%" PRIx64 "], %d bytes",
> + (uint64_t) addr, size);
> + break;
> + }
> +
> + return ret;
> +}
> +
> +static int
> +vmxnet3_can_receive(VLANClientState *nc)
> +{
> + VMXNET3_State *s = DO_UPCAST(NICState, nc, nc)->opaque;
> + return s->device_active &&
> + FLAG_IS_SET(s->link_status_and_speed, VMXNET3_LINK_STATUS_UP);
> +}
> +
> +static inline bool
> +vmxnet3_is_registered_vlan(VMXNET3_State *s, const void *data)
> +{
> + uint16_t vlan_tag = eth_get_pkt_vlan_tag(data) & VLAN_VID_MASK;
> +
> + if (IS_SPECIAL_VLAN_ID(vlan_tag)) {
> + return true;
> + }
> +
> + return VMXNET3_VFTABLE_ENTRY_IS_SET(s->vlan_table, vlan_tag);
> +}
> +
> +static bool
> +vmxnet3_is_allowed_mcast_group(VMXNET3_State *s, const uint8_t *group_mac)
> +{
> + int i;
> + for (i = 0; i < s->mcast_list_len; i++) {
> + if (memcmp(group_mac, s->mcast_list[i].a, sizeof(s->mcast_list[i]))) {
> + return true;
> + }
> + }
> + return false;
> +}
> +
> +static bool
> +vmxnet3_rx_filter_may_indicate(VMXNET3_State *s, const void *data,
> + size_t size, eth_pkt_types_e packet_type)
> +{
> + struct eth_header *ehdr = PKT_GET_ETH_HDR(data);
> +
> + if (FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) {
> + return true;
> + }
> +
> + switch (packet_type) {
> + case VMXNET3_PKT_UCAST:
> + if (!FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) {
> + return false;
> + }
> + if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) {
> + return false;
> + }
> + break;
> +
> + case VMXNET3_PKT_BCAST:
> + if (!FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) {
> + return false;
> + }
> + break;
> +
> + case VMXNET3_PKT_MCAST:
> + if (FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) {
> + return true;
> + }
> + if (!FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) {
> + return false;
> + }
> + if (!vmxnet3_is_allowed_mcast_group(s, ehdr->h_dest)) {
> + return false;
> + }
> + break;
> +
> + default:
> + assert(false);
> + }
> +
> + return vmxnet3_is_registered_vlan(s, data);
> +}
> +
> +static void
> +vmxnet3_rxpkt_attach_data(VMXNET3_State *s, Vmxnet3_RxPkt *pkt,
> + const void *data, size_t len)
> +{
> + uint16_t vtag = 0;
> + uint16_t ploff;
> + Vmxnet3_RxPktMdata *mdata = vmxnet3_rxpkt_get_mdata(pkt);
> +
> + if (s->rx_vlan_stripping) {
> + mdata->vlan_stripped =
> + eth_strip_vlan(&data, vmxnet3_rxpkt_get_ehdr(pkt), &ploff, &vtag);
> + } else {
> + mdata->vlan_stripped = false;
> + }
> +
> + if (mdata->vlan_stripped) {
> + vmxnet3_rxpkt_attach_ehdr(pkt);
> + vmxnet3_rxpkt_get_frag(pkt, 1)->iov_base = (uint8_t *) data + ploff;
> + vmxnet3_rxpkt_get_frag(pkt, 1)->iov_len = len - ploff;
> + vmxnet3_rxpkt_set_num_frags(pkt, 2);
> + mdata->tot_len = len - ploff + sizeof(struct eth_header);
> + } else {
> + vmxnet3_rxpkt_get_frag(pkt, 0)->iov_base = (void *) data;
> + vmxnet3_rxpkt_get_frag(pkt, 0)->iov_len = len;
> + vmxnet3_rxpkt_set_num_frags(pkt, 1);
> + mdata->tot_len = len;
> + }
> +
> + mdata->vlan_tag = vtag;
> +}
> +
> +static ssize_t
> +vmxnet3_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
> +{
> + VMXNET3_State *s = DO_UPCAST(NICState, nc, nc)->opaque;
> + Vmxnet3_RxPkt pkt;
> + size_t bytes_indicated;
> + Vmxnet3_RxPktMdata *mdata;
> +
> + if (!vmxnet3_can_receive(&s->nic->nc)) {
> + DPKPRINTF("Cannot receive now");
> + return -1;
> + }
> +
> + vmxnet3_rxpkt_init(&pkt);
> + mdata = vmxnet3_rxpkt_get_mdata(&pkt);
> +
> + if (s->peer_has_vhdr) {
> + buf += sizeof(struct virtio_net_hdr);
> + size -= sizeof(struct virtio_net_hdr);
> + *vmxnet3_rxpkt_get_vhdr(&pkt) = *(const struct virtio_net_hdr *) buf;
> + mdata->vhdr_valid = true;
> + } else {
> + mdata->vhdr_valid = false;
> + }
> +
> + mdata->packet_type = get_eth_packet_type(PKT_GET_ETH_HDR(buf));
> +
> + if (vmxnet3_rx_filter_may_indicate(s, buf, size, mdata->packet_type)) {
> + vmxnet3_rxpkt_attach_data(s, &pkt, buf, size);
> + bytes_indicated = vmxnet3_indicate_packet(s, &pkt) ? size : -1;
> + if (bytes_indicated < size) {
> + DPKPRINTF("RX: %lu of %lu bytes indicated", bytes_indicated, size);
> + }
> + } else {
> + DPKPRINTF("Packet dropped by RX filter");
> + bytes_indicated = size;
> + }
> +
> + assert(size > 0);
> + assert(bytes_indicated != 0);
> + return bytes_indicated;
> +}
> +
> +static void vmxnet3_cleanup(VLANClientState *nc)
> +{
> + VMXNET3_State *s = DO_UPCAST(NICState, nc, nc)->opaque;
> + s->nic = NULL;
> +}
> +
> +static void vmxnet3_set_link_status(VLANClientState *nc)
> +{
> + VMXNET3_State *s = DO_UPCAST(NICState, nc, nc)->opaque;
> +
> + if (nc->link_down) {
> + s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP;
> + } else {
> + s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP;
> + }
> +
> + vmxnet3_set_events(s, VMXNET3_ECR_LINK);
> + vmxnet3_trigger_interrupt(s, s->event_int_idx);
> +}
> +
> +static NetClientInfo net_vmxnet3_info = {
> + .type = NET_CLIENT_TYPE_NIC,
> + .size = sizeof(NICState),
> + .can_receive = vmxnet3_can_receive,
> + .receive = vmxnet3_receive,
> + .cleanup = vmxnet3_cleanup,
> + .link_status_changed = vmxnet3_set_link_status,
> +};
> +
> +static bool vmxnet3_peer_has_vnet_hdr(VMXNET3_State *s)
> +{
> + VLANClientState *peer = s->nic->nc.peer;
> +
> + if ((NULL != peer) &&
> + (NET_CLIENT_TYPE_TAP == peer->info->type) &&
> + tap_has_vnet_hdr(peer)) {
> + return true;
> + }
> +
> + DWRPRINTF("Peer has no virtio extension. Task offloads will not work.");
> + return false;
> +}
> +
> +static void vmxnet3_net_uninit(VMXNET3_State *s)
> +{
> + if (NULL != s->mcast_list) {
> + g_free(s->mcast_list);
> + }
> +
> + vmxnet3_txpkt_cleanup(&s->curr_txpkt);
> +}
> +
> +static void vmxnet3_net_init(VMXNET3_State *s)
> +{
> + DCBPRINTF("vmxnet3_net_init called...");
> +
> + vmxnet3_txpkt_init(&s->curr_txpkt);
> +
> + qemu_macaddr_default_if_unset(&s->conf.macaddr);
> +
> + /* Windows guest will query the address that was set on init */
> + memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a));
> +
> + s->mcast_list = NULL;
> + s->mcast_list_len = 0;
> +
> + s->link_status_and_speed = VMXNET3_LINK_SPEED | VMXNET3_LINK_STATUS_UP;
> +
> + DCFPRINTF("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a));
> +
> + s->nic = qemu_new_nic(&net_vmxnet3_info, &s->conf,
> + object_get_typename(OBJECT(s)),
> + s->dev.qdev.id, s);
> +
> + s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s);
> + s->curr_txpkt_pl_frags = 0;
> + s->curr_txpkt_skip = false;
> + s->curr_txpkt_header_processed = false;
> +
> + if (s->peer_has_vhdr) {
> + tap_set_vnet_hdr_len(s->nic->nc.peer, sizeof(struct virtio_net_hdr));
> + tap_using_vnet_hdr(s->nic->nc.peer, 1);
> + }
> +
> + qemu_format_nic_info_str(&s->nic->nc, s->conf.macaddr.a);
> +}
> +
> +#ifdef VMXNET3_ENABLE_MSIX
> +
> +static void
> +vmxnet3_unuse_msix_vectors(VMXNET3_State *s, int num_vectors)
> +{
> + int i;
> + for (i = 0; i < num_vectors; i++) {
> + msix_vector_unuse(&s->dev, i);
> + }
> +}
> +
> +static bool
> +vmxnet3_use_msix_vectors(VMXNET3_State *s, int num_vectors)
> +{
> + int i;
> + for (i = 0; i < num_vectors; i++) {
> + int res = msix_vector_use(&s->dev, i);
> + if (0 > res) {
> + DWRPRINTF("Failed to use MSI-X vector %d, error %d", i, res);
> + vmxnet3_unuse_msix_vectors(s, i);
> + return false;
> + }
> + }
> + return true;
> +}
> +
> +static bool
> +vmxnet3_init_msix(VMXNET3_State *s)
> +{
> + int res = msix_init(&s->dev, VMXNET3_MAX_INTRS,
> + &s->msix_bar, VMXNET3_MSIX_BAR_IDX, 0);
> + if (0 > res) {
> + DWRPRINTF("Failed to initialize MSI-X, error %d", res);
> + s->msix_used = false;
> + } else {
> + if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
> + DWRPRINTF("Failed to use MSI-X vectors, error %d", res);
> + msix_uninit(&s->dev, &s->msix_bar);
> + s->msix_used = false;
> + } else {
> + s->msix_used = true;
> + }
> + }
> + return s->msix_used;
> +}
> +
> +static void
> +vmxnet3_cleanup_msix(VMXNET3_State *s)
> +{
> + if (s->msix_used) {
> + msix_vector_unuse(&s->dev, VMXNET3_MAX_INTRS);
> + msix_uninit(&s->dev, &s->msix_bar);
> + }
> +}
> +#endif
> +
> +#ifdef VMXNET3_ENABLE_MSI
> +
> +static bool
> +vmxnet3_init_msi(VMXNET3_State *s)
> +{
> +#define VMXNET3_MSI_NUM_VECTORS (1)
> +#define VMXNET3_MSI_OFFSET (0x50)
> +#define VMXNET3_USE_64BIT (true)
> +#define VMXNET3_PER_VECTOR_MASK (false)
> +
> + int res;
> + res = msi_init(&s->dev, VMXNET3_MSI_OFFSET, VMXNET3_MSI_NUM_VECTORS,
> + VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK);
> + if (0 > res) {
> + DWRPRINTF("Failed to initialize MSI, error %d", res);
> + s->msi_used = false;
> + } else {
> + s->msi_used = true;
> + }
> +
> + return s->msi_used;
> +}
> +
> +static void
> +vmxnet3_cleanup_msi(VMXNET3_State *s)
> +{
> + if (s->msi_used) {
> + msi_uninit(&s->dev);
> + }
> +}
> +#endif
> +
> +static int vmxnet3_pci_init(PCIDevice *dev)
> +{
> + static const MemoryRegionOps b0_ops = {
> + .read = vmxnet3_io_bar0_read,
> + .write = vmxnet3_io_bar0_write,
> + .endianness = DEVICE_LITTLE_ENDIAN,
> + .impl = {
> + .min_access_size = 4,
> + .max_access_size = 4,
> + },
> + };
> +
> + static const MemoryRegionOps b1_ops = {
> + .read = vmxnet3_io_bar1_read,
> + .write = vmxnet3_io_bar1_write,
> + .endianness = DEVICE_LITTLE_ENDIAN,
> + .impl = {
> + .min_access_size = 4,
> + .max_access_size = 4,
> + },
> + };
> +
> + VMXNET3_State *s = DO_UPCAST(VMXNET3_State, dev, dev);
> + int i;
> +
> + DCBPRINTF("Starting init...");
> +
> + memory_region_init_io(&s->bar0, &b0_ops, s,
> + "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
> + pci_register_bar(&s->dev, VMXNET3_BAR0_IDX,
> + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0);
> +
> + memory_region_init_io(&s->bar1, &b1_ops, s,
> + "vmxnet3-b1", VMXNET3_VD_REG_SIZE);
> + pci_register_bar(&s->dev, VMXNET3_BAR1_IDX,
> + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1);
> +
> + memory_region_init(&s->msix_bar, "vmxnet3-msix-bar",
> + VMXNET3_MSIX_BAR_SIZE);
> + pci_register_bar(&s->dev, VMXNET3_MSIX_BAR_IDX,
> + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar);
> +
> + for (i = 0; i < ARRAY_SIZE(s->interrupt_states); i++) {
> + s->interrupt_states[i].is_asserted = false;
> + s->interrupt_states[i].is_pending = false;
> + s->interrupt_states[i].is_masked = true;
> + }
> +
> + /* Interrupt pin A */
> + s->dev.config[PCI_INTERRUPT_PIN] = 0x01;
> +
> +#ifdef VMXNET3_ENABLE_MSIX
> + if (!vmxnet3_init_msix(s)) {
> + hw_error("Failed to initialize MSI-X, configuration is inconsistent.");
> + }
> +#endif
> +
> +#ifdef VMXNET3_ENABLE_MSI
> + if (!vmxnet3_init_msi(s)) {
> + hw_error("Failed to initialize MSI, configuration is inconsistent.");
> + }
> +#endif
> +
> + vmxnet3_net_init(s);
> + add_boot_device_path(s->conf.bootindex, &dev->qdev, "/ethernet-phy@0");
> +
> + return 0;
> +}
> +
> +
> +static int vmxnet3_pci_uninit(PCIDevice *dev)
> +{
> + VMXNET3_State *s = DO_UPCAST(VMXNET3_State, dev, dev);
> +
> + DCBPRINTF("Starting uninit...");
> +
> + vmxnet3_net_uninit(s);
> +
> +#ifdef VMXNET3_ENABLE_MSIX
> + vmxnet3_cleanup_msix(s);
> +#endif
> +
> +#ifdef VMXNET3_ENABLE_MSI
> + vmxnet3_cleanup_msi(s);
> +#endif
> +
> + memory_region_destroy(&s->bar0);
> + memory_region_destroy(&s->bar1);
> + memory_region_destroy(&s->msix_bar);
> +
> + return 0;
> +}
> +
> +static void vmxnet3_qdev_reset(DeviceState *dev)
> +{
> + VMXNET3_State *s = DO_UPCAST(VMXNET3_State, dev.qdev, dev);
> + DCBPRINTF("Starting QDEV reset...");
> + vmxnet3_reset(s);
> +}
> +
> +static const VMStateDescription vmstate_vmxnet3 = {
> + .name = "vmxnet3",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .minimum_version_id_old = 1,
> + .fields = (VMStateField[]) {
> + VMSTATE_PCI_DEVICE(dev, VMXNET3_State),
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> +#if defined(VMXNET3_ENABLE_MSI) || defined(VMXNET3_ENABLE_MSIX)
> +static void
> +vmxnet3_write_config(PCIDevice *pci, uint32_t addr, uint32_t val, int len)
> +{
> + pci_default_write_config(pci, addr, val, len);
> +#if defined(VMXNET3_ENABLE_MSIX)
> + msix_write_config(pci, addr, val, len);
> +#endif
> +#if defined(VMXNET3_ENABLE_MSI)
> + msi_write_config(pci, addr, val, len);
> +#endif
> +}
> +#endif
> +
> +static Property vmxnet3_properties[] = {
> + DEFINE_NIC_PROPERTIES(VMXNET3_State, conf),
> + DEFINE_PROP_END_OF_LIST(),
> +};
> +
> +static void vmxnet3_class_init(ObjectClass *class, void *data)
> +{
> + DeviceClass *dc = DEVICE_CLASS(class);
> + PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
> +
> + c->init = vmxnet3_pci_init;
> + c->exit = vmxnet3_pci_uninit;
> + c->romfile = "pxe-e1000.rom";
> + c->vendor_id = PCI_VENDOR_ID_VMWARE;
> + c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
> + c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_REVISION;
> + c->class_id = PCI_CLASS_NETWORK_ETHERNET;
> + c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
> + c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
> +#if defined(VMXNET3_ENABLE_MSI) || defined(VMXNET3_ENABLE_MSIX)
> + c->config_write = vmxnet3_write_config,
> +#endif
> + dc->desc = "VMWare Paravirtualized Ethernet v3";
> + dc->reset = vmxnet3_qdev_reset;
> + dc->vmsd = &vmstate_vmxnet3;
> + dc->props = vmxnet3_properties;
> +}
> +
> +static TypeInfo vmxnet3_info = {
> + .name = "vmxnet3",
> + .parent = TYPE_PCI_DEVICE,
> + .instance_size = sizeof(VMXNET3_State),
> + .class_init = vmxnet3_class_init,
> +};
> +
> +static void vmxnet3_register_types(void)
> +{
> + DCBPRINTF("vmxnet3_register_types called...");
> + type_register_static(&vmxnet3_info);
> +}
> +
> +type_init(vmxnet3_register_types)
> diff --git a/qemu/hw/vmxnet3.h b/qemu/hw/vmxnet3.h
> new file mode 100644
> index 0000000..6ec3fd5
> --- /dev/null
> +++ b/qemu/hw/vmxnet3.h
> @@ -0,0 +1,727 @@
> +/*
> + * QEMU VMWARE VMXNET3 paravirtual NIC
> + *
> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Authors:
> + * Dmitry Fleytman <dmitry@daynix.com>
> + * Yan Vugenfirer <yan@daynix.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef _QEMU_VMXNET3_H
> +#define _QEMU_VMXNET3_H
> +
> +#define VMXNET3_DEVICE_MAX_TX_QUEUES 8
> +#define VMXNET3_DEVICE_MAX_RX_QUEUES 8 /* Keep this value as a power of 2 */
> +
> +/* Defines needed to integrate VMWARE headers */
> +#define u64 uint64_t
> +#define u32 uint32_t
> +#define u16 uint16_t
> +#define u8 uint8_t
> +#define __le16 uint16_t
> +#define __le32 uint32_t
> +#define __le64 uint64_t
> +#define __packed QEMU_PACKED
> +
> +#if defined(HOST_WORDS_BIGENDIAN)
> +#define const_cpu_to_le64(x) bswap_64(x)
> +#define __BIG_ENDIAN_BITFIELD
> +#else
> +#define const_cpu_to_le64(x) (x)
> +#endif
> +
> +/* Following is an interface definition for */
> +/* VMXNET3 device as provided by VMWARE */
> +/* Original file and copyright is available */
> +/* in Linux kernel v3.2.8 at */
> +/* drivers/net/vmxnet3/vmxnet3_defs.h */
> +
> +struct UPT1_TxStats {
> + u64 TSOPktsTxOK; /* TSO pkts post-segmentation */
> + u64 TSOBytesTxOK;
> + u64 ucastPktsTxOK;
> + u64 ucastBytesTxOK;
> + u64 mcastPktsTxOK;
> + u64 mcastBytesTxOK;
> + u64 bcastPktsTxOK;
> + u64 bcastBytesTxOK;
> + u64 pktsTxError;
> + u64 pktsTxDiscard;
> +};
> +
> +struct UPT1_RxStats {
> + u64 LROPktsRxOK; /* LRO pkts */
> + u64 LROBytesRxOK; /* bytes from LRO pkts */
> + /* the following counters are for pkts from the wire, i.e., pre-LRO */
> + u64 ucastPktsRxOK;
> + u64 ucastBytesRxOK;
> + u64 mcastPktsRxOK;
> + u64 mcastBytesRxOK;
> + u64 bcastPktsRxOK;
> + u64 bcastBytesRxOK;
> + u64 pktsRxOutOfBuf;
> + u64 pktsRxError;
> +};
> +
> +/* interrupt moderation level */
> +enum {
> + UPT1_IML_NONE = 0, /* no interrupt moderation */
> + UPT1_IML_HIGHEST = 7, /* least intr generated */
> + UPT1_IML_ADAPTIVE = 8, /* adpative intr moderation */
> +};
> +/* values for UPT1_RSSConf.hashFunc */
> +enum {
> + UPT1_RSS_HASH_TYPE_NONE = 0x0,
> + UPT1_RSS_HASH_TYPE_IPV4 = 0x01,
> + UPT1_RSS_HASH_TYPE_TCP_IPV4 = 0x02,
> + UPT1_RSS_HASH_TYPE_IPV6 = 0x04,
> + UPT1_RSS_HASH_TYPE_TCP_IPV6 = 0x08,
> +};
> +
> +enum {
> + UPT1_RSS_HASH_FUNC_NONE = 0x0,
> + UPT1_RSS_HASH_FUNC_TOEPLITZ = 0x01,
> +};
> +
> +#define UPT1_RSS_MAX_KEY_SIZE 40
> +#define UPT1_RSS_MAX_IND_TABLE_SIZE 128
> +
> +struct UPT1_RSSConf {
> + u16 hashType;
> + u16 hashFunc;
> + u16 hashKeySize;
> + u16 indTableSize;
> + u8 hashKey[UPT1_RSS_MAX_KEY_SIZE];
> + u8 indTable[UPT1_RSS_MAX_IND_TABLE_SIZE];
> +};
> +
> +/* features */
> +enum {
> + UPT1_F_RXCSUM = const_cpu_to_le64(0x0001), /* rx csum verification */
> + UPT1_F_RSS = const_cpu_to_le64(0x0002),
> + UPT1_F_RXVLAN = const_cpu_to_le64(0x0004), /* VLAN tag stripping */
> + UPT1_F_LRO = const_cpu_to_le64(0x0008),
> +};
> +
> +/* all registers are 32 bit wide */
> +/* BAR 1 */
> +enum {
> + VMXNET3_REG_VRRS = 0x0, /* Vmxnet3 Revision Report Selection */
> + VMXNET3_REG_UVRS = 0x8, /* UPT Version Report Selection */
> + VMXNET3_REG_DSAL = 0x10, /* Driver Shared Address Low */
> + VMXNET3_REG_DSAH = 0x18, /* Driver Shared Address High */
> + VMXNET3_REG_CMD = 0x20, /* Command */
> + VMXNET3_REG_MACL = 0x28, /* MAC Address Low */
> + VMXNET3_REG_MACH = 0x30, /* MAC Address High */
> + VMXNET3_REG_ICR = 0x38, /* Interrupt Cause Register */
> + VMXNET3_REG_ECR = 0x40 /* Event Cause Register */
> +};
> +
> +/* BAR 0 */
> +enum {
> + VMXNET3_REG_IMR = 0x0, /* Interrupt Mask Register */
> + VMXNET3_REG_TXPROD = 0x600, /* Tx Producer Index */
> + VMXNET3_REG_RXPROD = 0x800, /* Rx Producer Index for ring 1 */
> + VMXNET3_REG_RXPROD2 = 0xA00 /* Rx Producer Index for ring 2 */
> +};
> +
> +#define VMXNET3_PT_REG_SIZE 4096 /* BAR 0 */
> +#define VMXNET3_VD_REG_SIZE 4096 /* BAR 1 */
> +
> +#define VMXNET3_REG_ALIGN 8 /* All registers are 8-byte aligned. */
> +#define VMXNET3_REG_ALIGN_MASK 0x7
> +
> +/* I/O Mapped access to registers */
> +#define VMXNET3_IO_TYPE_PT 0
> +#define VMXNET3_IO_TYPE_VD 1
> +#define VMXNET3_IO_ADDR(type, reg) (((type) << 24) | ((reg) & 0xFFFFFF))
> +#define VMXNET3_IO_TYPE(addr) ((addr) >> 24)
> +#define VMXNET3_IO_REG(addr) ((addr) & 0xFFFFFF)
> +
> +enum {
> + VMXNET3_CMD_FIRST_SET = 0xCAFE0000,
> + VMXNET3_CMD_ACTIVATE_DEV = VMXNET3_CMD_FIRST_SET, /* 0xCAFE0000 */
> + VMXNET3_CMD_QUIESCE_DEV, /* 0xCAFE0001 */
> + VMXNET3_CMD_RESET_DEV, /* 0xCAFE0002 */
> + VMXNET3_CMD_UPDATE_RX_MODE, /* 0xCAFE0003 */
> + VMXNET3_CMD_UPDATE_MAC_FILTERS, /* 0xCAFE0004 */
> + VMXNET3_CMD_UPDATE_VLAN_FILTERS, /* 0xCAFE0005 */
> + VMXNET3_CMD_UPDATE_RSSIDT, /* 0xCAFE0006 */
> + VMXNET3_CMD_UPDATE_IML, /* 0xCAFE0007 */
> + VMXNET3_CMD_UPDATE_PMCFG, /* 0xCAFE0008 */
> + VMXNET3_CMD_UPDATE_FEATURE, /* 0xCAFE0009 */
> + VMXNET3_CMD_LOAD_PLUGIN, /* 0xCAFE000A */
> +
> + VMXNET3_CMD_FIRST_GET = 0xF00D0000,
> + VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET, /* 0xF00D0000 */
> + VMXNET3_CMD_GET_STATS, /* 0xF00D0001 */
> + VMXNET3_CMD_GET_LINK, /* 0xF00D0002 */
> + VMXNET3_CMD_GET_PERM_MAC_LO, /* 0xF00D0003 */
> + VMXNET3_CMD_GET_PERM_MAC_HI, /* 0xF00D0004 */
> + VMXNET3_CMD_GET_DID_LO, /* 0xF00D0005 */
> + VMXNET3_CMD_GET_DID_HI, /* 0xF00D0006 */
> + VMXNET3_CMD_GET_DEV_EXTRA_INFO, /* 0xF00D0007 */
> + VMXNET3_CMD_GET_CONF_INTR /* 0xF00D0008 */
> +};
> +
> +/*
> + * Little Endian layout of bitfields -
> + * Byte 0 : 7.....len.....0
> + * Byte 1 : rsvd gen 13.len.8
> + * Byte 2 : 5.msscof.0 ext1 dtype
> + * Byte 3 : 13...msscof...6
> + *
> + * Big Endian layout of bitfields -
> + * Byte 0: 13...msscof...6
> + * Byte 1 : 5.msscof.0 ext1 dtype
> + * Byte 2 : rsvd gen 13.len.8
> + * Byte 3 : 7.....len.....0
> + *
> + * Thus, le32_to_cpu on the dword will allow the big endian driver to read
> + * the bit fields correctly. And cpu_to_le32 will convert bitfields
> + * bit fields written by big endian driver to format required by device.
> + */
> +
> +struct Vmxnet3_TxDesc {
> + __le64 addr;
> +
> +#ifdef __BIG_ENDIAN_BITFIELD
> + u32 msscof:14; /* MSS, checksum offset, flags */
> + u32 ext1:1;
> + u32 dtype:1; /* descriptor type */
> + u32 rsvd:1;
> + u32 gen:1; /* generation bit */
> + u32 len:14;
> +#else
> + u32 len:14;
> + u32 gen:1; /* generation bit */
> + u32 rsvd:1;
> + u32 dtype:1; /* descriptor type */
> + u32 ext1:1;
> + u32 msscof:14; /* MSS, checksum offset, flags */
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +
> +#ifdef __BIG_ENDIAN_BITFIELD
> + u32 tci:16; /* Tag to Insert */
> + u32 ti:1; /* VLAN Tag Insertion */
> + u32 ext2:1;
> + u32 cq:1; /* completion request */
> + u32 eop:1; /* End Of Packet */
> + u32 om:2; /* offload mode */
> + u32 hlen:10; /* header len */
> +#else
> + u32 hlen:10; /* header len */
> + u32 om:2; /* offload mode */
> + u32 eop:1; /* End Of Packet */
> + u32 cq:1; /* completion request */
> + u32 ext2:1;
> + u32 ti:1; /* VLAN Tag Insertion */
> + u32 tci:16; /* Tag to Insert */
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +};
> +
> +/* TxDesc.OM values */
> +#define VMXNET3_OM_NONE 0
> +#define VMXNET3_OM_CSUM 2
> +#define VMXNET3_OM_TSO 3
> +
> +/* fields in TxDesc we access w/o using bit fields */
> +#define VMXNET3_TXD_EOP_SHIFT 12
> +#define VMXNET3_TXD_CQ_SHIFT 13
> +#define VMXNET3_TXD_GEN_SHIFT 14
> +#define VMXNET3_TXD_EOP_DWORD_SHIFT 3
> +#define VMXNET3_TXD_GEN_DWORD_SHIFT 2
> +
> +#define VMXNET3_TXD_CQ (1 << VMXNET3_TXD_CQ_SHIFT)
> +#define VMXNET3_TXD_EOP (1 << VMXNET3_TXD_EOP_SHIFT)
> +#define VMXNET3_TXD_GEN (1 << VMXNET3_TXD_GEN_SHIFT)
> +
> +#define VMXNET3_HDR_COPY_SIZE 128
> +
> +
> +struct Vmxnet3_TxDataDesc {
> + u8 data[VMXNET3_HDR_COPY_SIZE];
> +};
> +
> +#define VMXNET3_TCD_GEN_SHIFT 31
> +#define VMXNET3_TCD_GEN_SIZE 1
> +#define VMXNET3_TCD_TXIDX_SHIFT 0
> +#define VMXNET3_TCD_TXIDX_SIZE 12
> +#define VMXNET3_TCD_GEN_DWORD_SHIFT 3
> +
> +struct Vmxnet3_TxCompDesc {
> + u32 txdIdx:12; /* Index of the EOP TxDesc */
> + u32 ext1:20;
> +
> + __le32 ext2;
> + __le32 ext3;
> +
> + u32 rsvd:24;
> + u32 type:7; /* completion type */
> + u32 gen:1; /* generation bit */
> +};
> +
> +struct Vmxnet3_RxDesc {
> + __le64 addr;
> +
> +#ifdef __BIG_ENDIAN_BITFIELD
> + u32 gen:1; /* Generation bit */
> + u32 rsvd:15;
> + u32 dtype:1; /* Descriptor type */
> + u32 btype:1; /* Buffer Type */
> + u32 len:14;
> +#else
> + u32 len:14;
> + u32 btype:1; /* Buffer Type */
> + u32 dtype:1; /* Descriptor type */
> + u32 rsvd:15;
> + u32 gen:1; /* Generation bit */
> +#endif
> + u32 ext1;
> +};
> +
> +/* values of RXD.BTYPE */
> +#define VMXNET3_RXD_BTYPE_HEAD 0 /* head only */
> +#define VMXNET3_RXD_BTYPE_BODY 1 /* body only */
> +
> +/* fields in RxDesc we access w/o using bit fields */
> +#define VMXNET3_RXD_BTYPE_SHIFT 14
> +#define VMXNET3_RXD_GEN_SHIFT 31
> +
> +struct Vmxnet3_RxCompDesc {
> +#ifdef __BIG_ENDIAN_BITFIELD
> + u32 ext2:1;
> + u32 cnc:1; /* Checksum Not Calculated */
> + u32 rssType:4; /* RSS hash type used */
> + u32 rqID:10; /* rx queue/ring ID */
> + u32 sop:1; /* Start of Packet */
> + u32 eop:1; /* End of Packet */
> + u32 ext1:2;
> + u32 rxdIdx:12; /* Index of the RxDesc */
> +#else
> + u32 rxdIdx:12; /* Index of the RxDesc */
> + u32 ext1:2;
> + u32 eop:1; /* End of Packet */
> + u32 sop:1; /* Start of Packet */
> + u32 rqID:10; /* rx queue/ring ID */
> + u32 rssType:4; /* RSS hash type used */
> + u32 cnc:1; /* Checksum Not Calculated */
> + u32 ext2:1;
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +
> + __le32 rssHash; /* RSS hash value */
> +
> +#ifdef __BIG_ENDIAN_BITFIELD
> + u32 tci:16; /* Tag stripped */
> + u32 ts:1; /* Tag is stripped */
> + u32 err:1; /* Error */
> + u32 len:14; /* data length */
> +#else
> + u32 len:14; /* data length */
> + u32 err:1; /* Error */
> + u32 ts:1; /* Tag is stripped */
> + u32 tci:16; /* Tag stripped */
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +
> +
> +#ifdef __BIG_ENDIAN_BITFIELD
> + u32 gen:1; /* generation bit */
> + u32 type:7; /* completion type */
> + u32 fcs:1; /* Frame CRC correct */
> + u32 frg:1; /* IP Fragment */
> + u32 v4:1; /* IPv4 */
> + u32 v6:1; /* IPv6 */
> + u32 ipc:1; /* IP Checksum Correct */
> + u32 tcp:1; /* TCP packet */
> + u32 udp:1; /* UDP packet */
> + u32 tuc:1; /* TCP/UDP Checksum Correct */
> + u32 csum:16;
> +#else
> + u32 csum:16;
> + u32 tuc:1; /* TCP/UDP Checksum Correct */
> + u32 udp:1; /* UDP packet */
> + u32 tcp:1; /* TCP packet */
> + u32 ipc:1; /* IP Checksum Correct */
> + u32 v6:1; /* IPv6 */
> + u32 v4:1; /* IPv4 */
> + u32 frg:1; /* IP Fragment */
> + u32 fcs:1; /* Frame CRC correct */
> + u32 type:7; /* completion type */
> + u32 gen:1; /* generation bit */
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +};
> +
> +/* fields in RxCompDesc we access via Vmxnet3_GenericDesc.dword[3] */
> +#define VMXNET3_RCD_TUC_SHIFT 16
> +#define VMXNET3_RCD_IPC_SHIFT 19
> +
> +/* fields in RxCompDesc we access via Vmxnet3_GenericDesc.qword[1] */
> +#define VMXNET3_RCD_TYPE_SHIFT 56
> +#define VMXNET3_RCD_GEN_SHIFT 63
> +
> +/* csum OK for TCP/UDP pkts over IP */
> +#define VMXNET3_RCD_CSUM_OK (1 << VMXNET3_RCD_TUC_SHIFT | \
> + 1 << VMXNET3_RCD_IPC_SHIFT)
> +#define VMXNET3_TXD_GEN_SIZE 1
> +#define VMXNET3_TXD_EOP_SIZE 1
> +
> +/* value of RxCompDesc.rssType */
> +enum {
> + VMXNET3_RCD_RSS_TYPE_NONE = 0,
> + VMXNET3_RCD_RSS_TYPE_IPV4 = 1,
> + VMXNET3_RCD_RSS_TYPE_TCPIPV4 = 2,
> + VMXNET3_RCD_RSS_TYPE_IPV6 = 3,
> + VMXNET3_RCD_RSS_TYPE_TCPIPV6 = 4,
> +};
> +
> +
> +/* a union for accessing all cmd/completion descriptors */
> +union Vmxnet3_GenericDesc {
> + __le64 qword[2];
> + __le32 dword[4];
> + __le16 word[8];
> + struct Vmxnet3_TxDesc txd;
> + struct Vmxnet3_RxDesc rxd;
> + struct Vmxnet3_TxCompDesc tcd;
> + struct Vmxnet3_RxCompDesc rcd;
> +};
> +
> +#define VMXNET3_INIT_GEN 1
> +
> +/* Max size of a single tx buffer */
> +#define VMXNET3_MAX_TX_BUF_SIZE (1 << 14)
> +
> +/* # of tx desc needed for a tx buffer size */
> +#define VMXNET3_TXD_NEEDED(size) (((size) + VMXNET3_MAX_TX_BUF_SIZE - 1) / \
> + VMXNET3_MAX_TX_BUF_SIZE)
> +
> +/* max # of tx descs for a non-tso pkt */
> +#define VMXNET3_MAX_TXD_PER_PKT 16
> +
> +/* Max size of a single rx buffer */
> +#define VMXNET3_MAX_RX_BUF_SIZE ((1 << 14) - 1)
> +/* Minimum size of a type 0 buffer */
> +#define VMXNET3_MIN_T0_BUF_SIZE 128
> +#define VMXNET3_MAX_CSUM_OFFSET 1024
> +
> +/* Ring base address alignment */
> +#define VMXNET3_RING_BA_ALIGN 512
> +#define VMXNET3_RING_BA_MASK (VMXNET3_RING_BA_ALIGN - 1)
> +
> +/* Ring size must be a multiple of 32 */
> +#define VMXNET3_RING_SIZE_ALIGN 32
> +#define VMXNET3_RING_SIZE_MASK (VMXNET3_RING_SIZE_ALIGN - 1)
> +
> +/* Max ring size */
> +#define VMXNET3_TX_RING_MAX_SIZE 4096
> +#define VMXNET3_TC_RING_MAX_SIZE 4096
> +#define VMXNET3_RX_RING_MAX_SIZE 4096
> +#define VMXNET3_RC_RING_MAX_SIZE 8192
> +
> +/* a list of reasons for queue stop */
> +
> +enum {
> + VMXNET3_ERR_NOEOP = 0x80000000, /* cannot find the EOP desc of a pkt */
> + VMXNET3_ERR_TXD_REUSE = 0x80000001, /* reuse TxDesc before tx completion */
> + VMXNET3_ERR_BIG_PKT = 0x80000002, /* too many TxDesc for a pkt */
> + VMXNET3_ERR_DESC_NOT_SPT = 0x80000003, /* descriptor type not supported */
> + VMXNET3_ERR_SMALL_BUF = 0x80000004, /* type 0 buffer too small */
> + VMXNET3_ERR_STRESS = 0x80000005, /* stress option firing in vmkernel */
> + VMXNET3_ERR_SWITCH = 0x80000006, /* mode switch failure */
> + VMXNET3_ERR_TXD_INVALID = 0x80000007, /* invalid TxDesc */
> +};
> +
> +/* completion descriptor types */
> +#define VMXNET3_CDTYPE_TXCOMP 0 /* Tx Completion Descriptor */
> +#define VMXNET3_CDTYPE_RXCOMP 3 /* Rx Completion Descriptor */
> +
> +enum {
> + VMXNET3_GOS_BITS_UNK = 0, /* unknown */
> + VMXNET3_GOS_BITS_32 = 1,
> + VMXNET3_GOS_BITS_64 = 2,
> +};
> +
> +#define VMXNET3_GOS_TYPE_UNK 0 /* unknown */
> +#define VMXNET3_GOS_TYPE_LINUX 1
> +#define VMXNET3_GOS_TYPE_WIN 2
> +#define VMXNET3_GOS_TYPE_SOLARIS 3
> +#define VMXNET3_GOS_TYPE_FREEBSD 4
> +#define VMXNET3_GOS_TYPE_PXE 5
> +
> +struct Vmxnet3_GOSInfo {
> +#ifdef __BIG_ENDIAN_BITFIELD
> + u32 gosMisc:10; /* other info about gos */
> + u32 gosVer:16; /* gos version */
> + u32 gosType:4; /* which guest */
> + u32 gosBits:2; /* 32-bit or 64-bit? */
> +#else
> + u32 gosBits:2; /* 32-bit or 64-bit? */
> + u32 gosType:4; /* which guest */
> + u32 gosVer:16; /* gos version */
> + u32 gosMisc:10; /* other info about gos */
> +#endif /* __BIG_ENDIAN_BITFIELD */
> +};
> +
> +struct Vmxnet3_DriverInfo {
> + __le32 version;
> + struct Vmxnet3_GOSInfo gos;
> + __le32 vmxnet3RevSpt;
> + __le32 uptVerSpt;
> +};
> +
> +
> +#define VMXNET3_REV1_MAGIC 0xbabefee1
> +
> +/*
> + * QueueDescPA must be 128 bytes aligned. It points to an array of
> + * Vmxnet3_TxQueueDesc followed by an array of Vmxnet3_RxQueueDesc.
> + * The number of Vmxnet3_TxQueueDesc/Vmxnet3_RxQueueDesc are specified by
> + * Vmxnet3_MiscConf.numTxQueues/numRxQueues, respectively.
> + */
> +#define VMXNET3_QUEUE_DESC_ALIGN 128
> +
> +
> +struct Vmxnet3_MiscConf {
> + struct Vmxnet3_DriverInfo driverInfo;
> + __le64 uptFeatures;
> + __le64 ddPA; /* driver data PA */
> + __le64 queueDescPA; /* queue descriptor table PA */
> + __le32 ddLen; /* driver data len */
> + __le32 queueDescLen; /* queue desc. table len in bytes */
> + __le32 mtu;
> + __le16 maxNumRxSG;
> + u8 numTxQueues;
> + u8 numRxQueues;
> + __le32 reserved[4];
> +};
> +
> +
> +struct Vmxnet3_TxQueueConf {
> + __le64 txRingBasePA;
> + __le64 dataRingBasePA;
> + __le64 compRingBasePA;
> + __le64 ddPA; /* driver data */
> + __le64 reserved;
> + __le32 txRingSize; /* # of tx desc */
> + __le32 dataRingSize; /* # of data desc */
> + __le32 compRingSize; /* # of comp desc */
> + __le32 ddLen; /* size of driver data */
> + u8 intrIdx;
> + u8 _pad[7];
> +};
> +
> +
> +struct Vmxnet3_RxQueueConf {
> + __le64 rxRingBasePA[2];
> + __le64 compRingBasePA;
> + __le64 ddPA; /* driver data */
> + __le64 reserved;
> + __le32 rxRingSize[2]; /* # of rx desc */
> + __le32 compRingSize; /* # of rx comp desc */
> + __le32 ddLen; /* size of driver data */
> + u8 intrIdx;
> + u8 _pad[7];
> +};
> +
> +
> +enum vmxnet3_intr_mask_mode {
> + VMXNET3_IMM_AUTO = 0,
> + VMXNET3_IMM_ACTIVE = 1,
> + VMXNET3_IMM_LAZY = 2
> +};
> +
> +enum vmxnet3_intr_type {
> + VMXNET3_IT_AUTO = 0,
> + VMXNET3_IT_INTX = 1,
> + VMXNET3_IT_MSI = 2,
> + VMXNET3_IT_MSIX = 3
> +};
> +
> +#define VMXNET3_MAX_TX_QUEUES 8
> +#define VMXNET3_MAX_RX_QUEUES 16
> +/* addition 1 for events */
> +#define VMXNET3_MAX_INTRS 25
> +
> +/* value of intrCtrl */
> +#define VMXNET3_IC_DISABLE_ALL 0x1 /* bit 0 */
> +
> +
> +struct Vmxnet3_IntrConf {
> + bool autoMask;
> + u8 numIntrs; /* # of interrupts */
> + u8 eventIntrIdx;
> + u8 modLevels[VMXNET3_MAX_INTRS]; /* moderation level for
> + * each intr */
> + __le32 intrCtrl;
> + __le32 reserved[2];
> +};
> +
> +/* one bit per VLAN ID, the size is in the units of u32 */
> +#define VMXNET3_VFT_SIZE (4096/(sizeof(uint32_t)*8))
> +
> +
> +struct Vmxnet3_QueueStatus {
> + bool stopped;
> + u8 _pad[3];
> + __le32 error;
> +};
> +
> +
> +struct Vmxnet3_TxQueueCtrl {
> + __le32 txNumDeferred;
> + __le32 txThreshold;
> + __le64 reserved;
> +};
> +
> +
> +struct Vmxnet3_RxQueueCtrl {
> + bool updateRxProd;
> + u8 _pad[7];
> + __le64 reserved;
> +};
> +
> +enum {
> + VMXNET3_RXM_UCAST = 0x01, /* unicast only */
> + VMXNET3_RXM_MCAST = 0x02, /* multicast passing the filters */
> + VMXNET3_RXM_BCAST = 0x04, /* broadcast only */
> + VMXNET3_RXM_ALL_MULTI = 0x08, /* all multicast */
> + VMXNET3_RXM_PROMISC = 0x10 /* promiscuous */
> +};
> +
> +struct Vmxnet3_RxFilterConf {
> + __le32 rxMode; /* VMXNET3_RXM_xxx */
> + __le16 mfTableLen; /* size of the multicast filter table */
> + __le16 _pad1;
> + __le64 mfTablePA; /* PA of the multicast filters table */
> + __le32 vfTable[VMXNET3_VFT_SIZE]; /* vlan filter */
> +};
> +
> +
> +#define VMXNET3_PM_MAX_FILTERS 6
> +#define VMXNET3_PM_MAX_PATTERN_SIZE 128
> +#define VMXNET3_PM_MAX_MASK_SIZE (VMXNET3_PM_MAX_PATTERN_SIZE / 8)
> +
> +#define VMXNET3_PM_WAKEUP_MAGIC cpu_to_le16(0x01) /* wake up on magic pkts */
> +#define VMXNET3_PM_WAKEUP_FILTER cpu_to_le16(0x02) /* wake up on pkts matching
> + * filters */
> +
> +
> +struct Vmxnet3_PM_PktFilter {
> + u8 maskSize;
> + u8 patternSize;
> + u8 mask[VMXNET3_PM_MAX_MASK_SIZE];
> + u8 pattern[VMXNET3_PM_MAX_PATTERN_SIZE];
> + u8 pad[6];
> +};
> +
> +
> +struct Vmxnet3_PMConf {
> + __le16 wakeUpEvents; /* VMXNET3_PM_WAKEUP_xxx */
> + u8 numFilters;
> + u8 pad[5];
> + struct Vmxnet3_PM_PktFilter filters[VMXNET3_PM_MAX_FILTERS];
> +};
> +
> +
> +struct Vmxnet3_VariableLenConfDesc {
> + __le32 confVer;
> + __le32 confLen;
> + __le64 confPA;
> +};
> +
> +
> +struct Vmxnet3_TxQueueDesc {
> + struct Vmxnet3_TxQueueCtrl ctrl;
> + struct Vmxnet3_TxQueueConf conf;
> +
> + /* Driver read after a GET command */
> + struct Vmxnet3_QueueStatus status;
> + struct UPT1_TxStats stats;
> + u8 _pad[88]; /* 128 aligned */
> +};
> +
> +
> +struct Vmxnet3_RxQueueDesc {
> + struct Vmxnet3_RxQueueCtrl ctrl;
> + struct Vmxnet3_RxQueueConf conf;
> + /* Driver read after a GET commad */
> + struct Vmxnet3_QueueStatus status;
> + struct UPT1_RxStats stats;
> + u8 __pad[88]; /* 128 aligned */
> +};
> +
> +
> +struct Vmxnet3_DSDevRead {
> + /* read-only region for device, read by dev in response to a SET cmd */
> + struct Vmxnet3_MiscConf misc;
> + struct Vmxnet3_IntrConf intrConf;
> + struct Vmxnet3_RxFilterConf rxFilterConf;
> + struct Vmxnet3_VariableLenConfDesc rssConfDesc;
> + struct Vmxnet3_VariableLenConfDesc pmConfDesc;
> + struct Vmxnet3_VariableLenConfDesc pluginConfDesc;
> +};
> +
> +/* All structures in DriverShared are padded to multiples of 8 bytes */
> +struct Vmxnet3_DriverShared {
> + __le32 magic;
> + /* make devRead start at 64bit boundaries */
> + __le32 pad;
> + struct Vmxnet3_DSDevRead devRead;
> + __le32 ecr;
> + __le32 reserved[5];
> +};
> +
> +
> +#define VMXNET3_ECR_RQERR (1 << 0)
> +#define VMXNET3_ECR_TQERR (1 << 1)
> +#define VMXNET3_ECR_LINK (1 << 2)
> +#define VMXNET3_ECR_DIC (1 << 3)
> +#define VMXNET3_ECR_DEBUG (1 << 4)
> +
> +/* flip the gen bit of a ring */
> +#define VMXNET3_FLIP_RING_GEN(gen) ((gen) = (gen) ^ 0x1)
> +
> +/* only use this if moving the idx won't affect the gen bit */
> +#define VMXNET3_INC_RING_IDX_ONLY(idx, ring_size) \
> + do {\
> + (idx)++;\
> + if (unlikely((idx) == (ring_size))) {\
> + (idx) = 0;\
> + } \
> + } while (0)
> +
> +#define VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid) \
> + (vfTable[vid >> 5] |= (1 << (vid & 31)))
> +#define VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid) \
> + (vfTable[vid >> 5] &= ~(1 << (vid & 31)))
> +
> +#define VMXNET3_VFTABLE_ENTRY_IS_SET(vfTable, vid) \
> + ((vfTable[vid >> 5] & (1 << (vid & 31))) != 0)
> +
> +#define VMXNET3_MAX_MTU 9000
> +#define VMXNET3_MIN_MTU 60
> +
> +#define VMXNET3_LINK_UP (10000 << 16 | 1) /* 10 Gbps, up */
> +#define VMXNET3_LINK_DOWN 0
> +
> +#undef u64
> +#undef u32
> +#undef u16
> +#undef u8
> +#undef __le16
> +#undef __le32
> +#undef __le64
> +#undef __packed
> +#undef const_cpu_to_le64
> +#if defined(HOST_WORDS_BIGENDIAN)
> +#undef __BIG_ENDIAN_BITFIELD
> +#endif
> +
> +#endif
> diff --git a/qemu/hw/vmxnet3_debug.h b/qemu/hw/vmxnet3_debug.h
> new file mode 100644
> index 0000000..8383c22
> --- /dev/null
> +++ b/qemu/hw/vmxnet3_debug.h
> @@ -0,0 +1,104 @@
> +/*
> + * QEMU VMWARE VMXNET3 paravirtual NIC - debugging facilities
> + *
> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Authors:
> + * Dmitry Fleytman <dmitry@daynix.com>
> + * Yan Vugenfirer <yan@daynix.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef _QEMU_VMXNET3_DEBUG_H
> +#define _QEMU_VMXNET3_DEBUG_H
> +
> +/* #define DEBUG_VMXNET3_CB */
> +#define DEBUG_VMXNET3_WARNINGS
> +#define DEBUG_VMXNET3_ERRORS
> +/* #define DEBUG_VMXNET3_INTERRUPTS */
> +/* #define DEBUG_VMXNET3_CONFIG */
> +/* #define DEBUG_VMXNET3_SHMEM_ACCESS */
> +/* #define DEBUG_VMXNET3_RINGS */
> +/* #define DEBUG_VMXNET3_PACKETS */
> +
> +#ifdef DEBUG_VMXNET3_SHMEM_ACCESS
> +#define DSHPRINTF(fmt, ...) \
> + do { \
> + printf("[vmxnet3][SH][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
> + } while (0)
> +#else
> +#define DSHPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_VMXNET3_CB
> +#define DCBPRINTF(fmt, ...) \
> + do { \
> + printf("[vmxnet3][CB][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
> + } while (0)
> +#else
> +#define DCBPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_VMXNET3_PACKETS
> +#define DPKPRINTF(fmt, ...) \
> + do { \
> + printf("[vmxnet3][PK][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
> + } while (0)
> +#else
> +#define DPKPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_VMXNET3_WARNINGS
> +#define DWRPRINTF(fmt, ...) \
> + do { \
> + printf("[vmxnet3][WR][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
> + } while (0)
> +#else
> +#define DWRPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_VMXNET3_ERRORS
> +#define DERPRINTF(fmt, ...) \
> + do { \
> + printf("[vmxnet3][ER][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
> + } while (0)
> +#else
> +#define DERPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_VMXNET3_INTERRUPTS
> +#define DIRPRINTF(fmt, ...) \
> + do { \
> + printf("[vmxnet3][IR][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
> + } while (0)
> +#else
> +#define DIRPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_VMXNET3_CONFIG
> +#define DCFPRINTF(fmt, ...) \
> + do { \
> + printf("[vmxnet3][CF][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
> + } while (0)
> +#else
> +#define DCFPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#ifdef DEBUG_VMXNET3_RINGS
> +#define DRIPRINTF(fmt, ...) \
> + do { \
> + printf("[vmxnet3][RI][%s]: " fmt "\n", __func__, ## __VA_ARGS__); \
> + } while (0)
> +#else
> +#define DRIPRINTF(fmt, ...) do {} while (0)
> +#endif
> +
> +#define MAC_FMT "%02X:%02X:%02X:%02X:%02X:%02X"
> +#define MAC_ARG(a) (a)[0], (a)[1], (a)[2], (a)[3], (a)[4], (a)[5]
> +
> +#endif /* _QEMU_VMXNET3_DEBUG_H */
> diff --git a/qemu/hw/vmxnet_utils.c b/qemu/hw/vmxnet_utils.c
> new file mode 100644
> index 0000000..e310828
> --- /dev/null
> +++ b/qemu/hw/vmxnet_utils.c
> @@ -0,0 +1,172 @@
> +/*
> + * QEMU VMWARE paravirtual devices - network auxiliary code
> + *
> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Authors:
> + * Dmitry Fleytman <dmitry@daynix.com>
> + * Yan Vugenfirer <yan@daynix.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "hw.h"
> +#include "virtio-net.h"
> +#include "vmxnet_utils.h"
> +#include "net/checksum.h"
> +
> +void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag)
> +{
> + switch (be16_to_cpu(ehdr->h_proto)) {
> + case ETH_P_VLAN:
> + case ETH_P_DVLAN: {
> + /* Header already present, just put proper VLAN tag */
> + struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
> + vhdr->h_tci = cpu_to_be16(vlan_tag);
> + }
> + default: {
> + /* No VLAN header, put a new one */
> + struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
> + vhdr->h_proto = ehdr->h_proto;
> + ehdr->h_proto = cpu_to_be16(ETH_P_VLAN);
> + vhdr->h_tci = cpu_to_be16(vlan_tag);
> + }
> + }
> +}
> +
> +bool eth_setup_tx_offloads(uint8_t *l3hdr,
> + size_t l3hdr_len,
> + size_t l3hdr_off,
> + uint32_t l3payload_len,
> + struct virtio_net_hdr *vhdr,
> + bool more_frags,
> + uint16_t fragmentation_offset)
> +{
> + uint16_t csum;
> +
> + switch (vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
> + case VIRTIO_NET_HDR_GSO_TCPV4:
> + case VIRTIO_NET_HDR_GSO_UDP: {
> + struct ip_header *iphdr = (struct ip_header *) l3hdr;
> + uint16_t new_ip_off;
> +
> + vhdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
> + vhdr->csum_start = l3hdr_off;
> + vhdr->csum_offset = offsetof(struct ip_header, ip_sum);
> +
> + if (l3payload_len + l3hdr_len > ETH_MAX_IP_DGRAM_LEN) {
> + /* This must never happen with fragmentation enabled */
> + assert(0 == more_frags);
> + assert(0 == fragmentation_offset);
> + return false;
> + }
> +
> + iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
> + assert(0 == (~IP_OFFMASK & fragmentation_offset));
> +
> + new_ip_off = fragmentation_offset | (more_frags ? IP_MF : 0) |
> + (be16_to_cpu(iphdr->ip_off) & ~(IP_OFFMASK|IP_MF));
> +
> + iphdr->ip_off = cpu_to_be16(new_ip_off);
> +
> + /* Due to Linux bridge bugs/features IP header checksum */
> + /* must be calculated in order to make it process */
> + /* packet with segmentation requirements successfully */
> + eth_put_csum(l3hdr, vhdr->csum_offset, 0);
> + csum = net_raw_checksum(l3hdr, l3hdr_len);
> + eth_put_csum(l3hdr, vhdr->csum_offset, csum);
> + }
> + break;
> +
> + case VIRTIO_NET_HDR_GSO_TCPV6:
> + default:
> + vhdr->flags = 0;
> + break;
> + }
> +
> + return true;
> +}
> +
> +uint8_t
> +eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr)
> +{
> + uint8_t ecn_state = 0;
> +
> + if (ETH_P_IP == l3_proto) {
> + struct ip_header *iphdr = (struct ip_header *) l3_hdr;
> +
> + if (IP_HEADER_VERSION_4 == IP_HEADER_VERSION(iphdr)) {
> + if (IPTOS_ECN_CE == IPTOS_ECN(iphdr->ip_tos)) {
> + ecn_state = VIRTIO_NET_HDR_GSO_ECN;
> + }
> + if (IP_PROTO_TCP == iphdr->ip_p) {
> + return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
> + } else if (IP_PROTO_UDP == iphdr->ip_p) {
> + return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
> + }
> + }
> + } else if (ETH_P_IPV6 == l3_proto) {
> + struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
> +
> + if (IP6_ECN_CE == IP6_ECN(ip6hdr->ip6_ecn_acc)) {
> + ecn_state = VIRTIO_NET_HDR_GSO_ECN;
> + }
> +
> + if (IP_PROTO_TCP == ip6hdr->ip6_nxt) {
> + return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
> + }
> + }
> +
> + /* Unsupported offload */
> + assert(false);
> +
> + return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
> +}
> +
> +void eth_get_protocols(const uint8_t *headers,
> + uint32_t hdr_length,
> + bool *isip4, bool *isip6,
> + bool *isudp, bool *istcp)
> +{
> + int proto;
> + size_t l2hdr_len = eth_get_l2_hdr_length(headers);
> + assert(hdr_length >= eth_get_l2_hdr_length(headers));
> + *isip4 = *isip6 = *isudp = *istcp = false;
> +
> + proto = eth_get_l3_proto(headers, l2hdr_len);
> + if (ETH_P_IP == proto) {
> + *isip4 = true;
> +
> + struct ip_header *iphdr;
> +
> + assert(hdr_length >=
> + eth_get_l2_hdr_length(headers) + sizeof(struct ip_header));
> +
> + iphdr = PKT_GET_IP_HDR(headers);
> +
> + if (IP_HEADER_VERSION_4 == IP_HEADER_VERSION(iphdr)) {
> + if (IP_PROTO_TCP == iphdr->ip_p) {
> + *istcp = true;
> + } else if (IP_PROTO_UDP == iphdr->ip_p) {
> + *isudp = true;
> + }
> + }
> + } else if (ETH_P_IPV6 == proto) {
> + *isip6 = true;
> +
> + struct ip6_header *ip6hdr;
> + assert(hdr_length >=
> + eth_get_l2_hdr_length(headers) + sizeof(struct ip6_header));
> + ip6hdr = PKT_GET_IP6_HDR(headers);
> +
> + if (IP_PROTO_TCP == ip6hdr->ip6_nxt) {
> + *istcp = true;
> + } else if (IP_PROTO_UDP == ip6hdr->ip6_nxt) {
> + *isudp = true;
> + }
> + }
> +}
> diff --git a/qemu/hw/vmxnet_utils.h b/qemu/hw/vmxnet_utils.h
> new file mode 100644
> index 0000000..f5e79dd
> --- /dev/null
> +++ b/qemu/hw/vmxnet_utils.h
> @@ -0,0 +1,242 @@
> +/*
> + * QEMU VMWARE paravirtual devices - network auxiliary code
> + *
> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
> + *
> + * Developed by Daynix Computing LTD (http://www.daynix.com)
> + *
> + * Authors:
> + * Dmitry Fleytman <dmitry@daynix.com>
> + * Yan Vugenfirer <yan@daynix.com>
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#define ETH_ALEN 6
> +
> +struct eth_header {
> + uint8_t h_dest[ETH_ALEN]; /* destination eth addr */
> + uint8_t h_source[ETH_ALEN]; /* source ether addr */
> + uint16_t h_proto; /* packet type ID field */
> +};
> +
> +struct vlan_header {
> + uint16_t h_tci; /* priority and VLAN ID */
> + uint16_t h_proto; /* encapsulated protocol */
> +};
> +
> +struct ip_header {
> + uint8_t ip_ver_len; /* version and header length */
> + uint8_t ip_tos; /* type of service */
> + uint16_t ip_len; /* total length */
> + uint16_t ip_id; /* identification */
> + uint16_t ip_off; /* fragment offset field */
> + uint8_t ip_ttl; /* time to live */
> + uint8_t ip_p; /* protocol */
> + uint16_t ip_sum; /* checksum */
> + uint32_t ip_src, ip_dst; /* source and dest address */
> +};
> +
> +/* IPv6 address */
> +struct in6_addr {
> + union {
> + uint8_t __u6_addr8[16];
> + } __in6_u;
> +};
> +
> +struct ip6_header {
> + union {
> + struct ip6_hdrctl {
> + uint32_t ip6_un1_flow; /* 4 bits version, 8 bits TC,
> + 20 bits flow-ID */
> + uint16_t ip6_un1_plen; /* payload length */
> + uint8_t ip6_un1_nxt; /* next header */
> + uint8_t ip6_un1_hlim; /* hop limit */
> + } ip6_un1;
> + uint8_t ip6_un2_vfc; /* 4 bits version, top 4 bits tclass */
> + struct ip6_ecn_access {
> + uint8_t ip6_un3_vfc; /* 4 bits version, top 4 bits tclass */
> + uint8_t ip6_un3_ecn; /* 2 bits ECN, top 6 bits payload length */
> + } ip6_un3;
> + } ip6_ctlun;
> + struct in6_addr ip6_src; /* source address */
> + struct in6_addr ip6_dst; /* destination address */
> +};
> +#define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt
> +#define ip6_ecn_acc ip6_ctlun.ip6_un3.ip6_un3_ecn
> +
> +#define PKT_GET_ETH_HDR(p) \
> + ((struct eth_header *)(p))
> +#define PKT_GET_VLAN_HDR(p) \
> + ((struct vlan_header *) (((uint8_t *)(p)) + sizeof(struct eth_header)))
> +#define PKT_GET_IP_HDR(p) \
> + ((struct ip_header *)(((uint8_t *)(p)) + eth_get_l2_hdr_length(p)))
> +#define IP_HDR_GET_LEN(p) \
> + ((((struct ip_header *)p)->ip_ver_len & 0x0F) << 2)
> +#define PKT_GET_IP_HDR_LEN(p) \
> + (IP_HDR_GET_LEN(PKT_GET_IP_HDR(p)))
> +#define PKT_GET_IP6_HDR(p) \
> + ((struct ip6_header *) (((uint8_t *)(p)) + eth_get_l2_hdr_length(p)))
> +#define IP_HEADER_VERSION(ip) \
> + ((ip->ip_ver_len >> 4)&0xf)
> +
> +#define ETH_P_IP (0x0800)
> +#define ETH_P_IPV6 (0x86dd)
> +#define ETH_P_VLAN (0x8100)
> +#define ETH_P_DVLAN (0x88a8)
> +#define VLAN_VID_MASK 0x0fff
> +#define IP_HEADER_VERSION_4 (4)
> +#define IP_HEADER_VERSION_6 (6)
> +#define IP_PROTO_TCP (6)
> +#define IP_PROTO_UDP (17)
> +#define IPTOS_ECN_MASK 0x03
> +#define IPTOS_ECN(x) ((x) & IPTOS_ECN_MASK)
> +#define IPTOS_ECN_CE 0x03
> +#define IP6_ECN_MASK 0xC0
> +#define IP6_ECN(x) ((x) & IP6_ECN_MASK)
> +#define IP6_ECN_CE 0xC0
> +#define IP4_DONT_FRAGMENT_FLAG (1 << 14)
> +
> +#define IS_SPECIAL_VLAN_ID(x) \
> + ((0 == (x)) || (0xFFF == (x)))
> +
> +#define ETH_MAX_L2_HDR_LEN \
> + (sizeof(struct eth_header) + 2*sizeof(struct vlan_header))
> +
> +#define ETH_MAX_IP4_HDR_LEN (60)
> +#define ETH_MAX_IP6_HDR_LEN \
> + (sizeof(struct ip6_header))
> +#define ETH_MAX_L3_HDR_LEN \
> + (MAX(ETH_MAX_IP4_HDR_LEN, ETH_MAX_IP6_HDR_LEN))
> +#define ETH_MAX_IP_DGRAM_LEN (0xFFFF)
> +#define ETH_MAX_IP_PLOAD_LEN \
> + (ETH_MAX_IP_DGRAM_LEN - ETH_MAX_IP4_HDR_LEN - ETH_MAX_L2_HDR_LEN)
> +
> +#define IP_FRAG_UNIT_SIZE (8)
> +#define IP_FRAG_ALIGN_SIZE(x) ((x) & ~0x7)
> +#define IP_RF 0x8000 /* reserved fragment flag */
> +#define IP_DF 0x4000 /* don't fragment flag */
> +#define IP_MF 0x2000 /* more fragments flag */
> +#define IP_OFFMASK 0x1fff /* mask for fragmenting bits */
> +
> +
> +static inline int is_multicast_ether_addr(const uint8_t *addr)
> +{
> + return 0x01 & addr[0];
> +}
> +
> +static inline int is_broadcast_ether_addr(const uint8_t *addr)
> +{
> + return (addr[0] & addr[1] & addr[2] & addr[3] & addr[4] & addr[5]) == 0xff;
> +}
> +
> +static inline int is_unicast_ether_addr(const uint8_t *addr)
> +{
> + return !is_multicast_ether_addr(addr);
> +}
> +
> +typedef enum {
> + VMXNET3_PKT_UCAST = 0xAABBCC00,
> + VMXNET3_PKT_BCAST,
> + VMXNET3_PKT_MCAST
> +} eth_pkt_types_e;
> +
> +static inline eth_pkt_types_e
> +get_eth_packet_type(const struct eth_header *ehdr)
> +{
> + if (is_broadcast_ether_addr(ehdr->h_dest)) {
> + return VMXNET3_PKT_BCAST;
> + } else if (is_multicast_ether_addr(ehdr->h_dest)) {
> + return VMXNET3_PKT_MCAST;
> + } else { /* unicast */
> + return VMXNET3_PKT_UCAST;
> + }
> +}
> +
> +static inline uint32_t
> +eth_get_l2_hdr_length(const void *p)
> +{
> + uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
> + struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p);
> + switch (proto) {
> + case ETH_P_VLAN:
> + return sizeof(struct eth_header) + sizeof(struct vlan_header);
> + case ETH_P_DVLAN:
> + if (ETH_P_VLAN == hvlan->h_proto) {
> + return sizeof(struct eth_header) + 2*sizeof(struct vlan_header);
> + } else {
> + return sizeof(struct eth_header) + sizeof(struct vlan_header);
> + }
> + default:
> + return sizeof(struct eth_header);
> + }
> +}
> +
> +static inline uint16_t
> +eth_get_pkt_vlan_tag(const void *p)
> +{
> + uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
> + struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p);
> + switch (proto) {
> + case ETH_P_VLAN:
> + case ETH_P_DVLAN:
> + return be16_to_cpu(hvlan->h_proto);
> + default:
> + return 0;
> + }
> +}
> +
> +static inline bool
> +eth_strip_vlan(const void *p, struct eth_header *new_ehdr,
> + uint16_t *payload_offset, uint16_t *vlan_tag)
> +{
> + uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
> + struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p);
> +
> + switch (proto) {
> + case ETH_P_VLAN:
> + case ETH_P_DVLAN:
> + memcpy(new_ehdr->h_source, PKT_GET_ETH_HDR(p)->h_source, ETH_ALEN);
> + memcpy(new_ehdr->h_dest, PKT_GET_ETH_HDR(p)->h_dest, ETH_ALEN);
> + new_ehdr->h_proto = hvlan->h_proto;
> + *vlan_tag = be16_to_cpu(hvlan->h_tci);
> + *payload_offset =
> + sizeof(struct eth_header) + sizeof(struct vlan_header);
> + return true;
> + default:
> + return false;
> + }
> +}
> +
> +static inline uint16_t
> +eth_get_l3_proto(const void *l2hdr, size_t l2hdr_len)
> +{
> + uint8_t *proto_ptr = (uint8_t *) l2hdr + l2hdr_len - sizeof(uint16_t);
> + return be16_to_cpup((uint16_t *)proto_ptr);
> +}
> +
> +static inline void
> +eth_put_csum(uint8_t *buf, uint32_t cso, uint16_t csum)
> +{
> + cpu_to_be16wu((uint16_t *)(buf + cso), csum);
> +}
> +
> +void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag);
> +
> +
> +bool eth_setup_tx_offloads(uint8_t *l3hdr,
> + size_t l3hdr_len,
> + size_t l3hdr_off,
> + uint32_t l3payload_len,
> + struct virtio_net_hdr *vhdr,
> + bool more_frags,
> + uint16_t fragmentation_offset);
> +
> +uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr);
> +
> +void eth_get_protocols(const uint8_t *headers,
> + uint32_t hdr_length,
> + bool *isip4, bool *isip6,
> + bool *isudp, bool *istcp);
> diff --git a/qemu/net.c b/qemu/net.c
> index c34474f..e2f586c 100644
> --- a/qemu/net.c
> +++ b/qemu/net.c
> @@ -857,7 +857,7 @@ static const struct {
> }, {
> .name = "model",
> .type = QEMU_OPT_STRING,
> - .help = "device model (e1000, rtl8139, virtio etc.)",
> + .help = "device model (e1000, rtl8139, virtio, vmxnet3 etc.)",
> }, {
> .name = "addr",
> .type = QEMU_OPT_STRING,
> diff --git a/qemu/net/checksum.h b/qemu/net/checksum.h
> index 1f05298..5f42a02 100644
> --- a/qemu/net/checksum.h
> +++ b/qemu/net/checksum.h
> @@ -26,4 +26,11 @@ uint16_t net_checksum_tcpudp(uint16_t length, uint16_t proto,
> uint8_t *addrs, uint8_t *buf);
> void net_checksum_calculate(uint8_t *data, int length);
>
> +static inline uint16_t
> +net_raw_checksum(uint8_t *data, int length)
> +{
> + return net_checksum_finish(net_checksum_add(length, data));
> +}
> +
> +
> #endif /* QEMU_NET_CHECKSUM_H */
> --
> 1.7.7.6
>
>
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [Qemu-devel] [PATCH v2] VMXNET3 paravirtual NIC device implementation
2012-03-03 16:55 ` Gerhard Wiesinger
@ 2012-03-04 17:09 ` Dmitry Fleytman
0 siblings, 0 replies; 5+ messages in thread
From: Dmitry Fleytman @ 2012-03-04 17:09 UTC (permalink / raw)
To: Gerhard Wiesinger
Cc: Anthony Liguori, Alex Fishman, Dmitry Fleytman, yvugenfi,
Izik Eidus, qemu-devel, Michael S. Tsirkin, Yan Vugenfirer
[-- Attachment #1: Type: text/plain, Size: 148485 bytes --]
Hello, Gerhard
Thanks for your input.
We've reproduced the problem, it happens when tap virtio support is not
configured.
We'll prepare a patch that fixes this soon.
Dmitry.
On Sat, Mar 3, 2012 at 6:55 PM, Gerhard Wiesinger <lists@wiesinger.com>wrote:
> Hello,
>
> Tried V2 of VMXNET3 under Knoppix Live CD/Linux (-cdrom
> ISO/KNOPPIX_V6.7.1CD-2011-09-**14-DE.iso) but it cored:
> #0 tap_set_offload (nc=0x0, csum=1, tso4=1, tso6=1, ecn=0, ufo=0) at
> net/tap.c:271
> #1 0x00007fa60a9986f8 in vmxnet3_activate_device (s=0x7fa60cbf37f0) at
> /root/download/qemu/git/qemu-**kvm/hw/vmxnet3.c:1672
> #2 vmxnet3_handle_command (cmd=<optimized out>, s=0x7fa60cbf37f0) at
> /root/download/qemu/git/qemu-**kvm/hw/vmxnet3.c:1817
> #3 vmxnet3_io_bar1_write (opaque=0x7fa60cbf37f0, addr=<optimized out>,
> val=<optimized out>, size=<optimized out>) at
> /root/download/qemu/git/qemu-**kvm/hw/vmxnet3.c:1971
> #4 0x00007fa60aa4a280 in access_with_adjusted_size (addr=32,
> value=0x7fa603917c60, size=4, access_size_min=<optimized out>,
> access_size_max=<optimized out>, access=0x7fa60aa4a1a0
> <memory_region_write_accessor>**, opaque=0x7fa60cbf3d48) at
> /root/download/qemu/git/qemu-**kvm/memory.c:304
> #5 0x00007fa60aa4ec80 in memory_region_dispatch_write (size=4,
> data=3405643776, addr=32, mr=0x7fa60cbf3d48) at
> /root/download/qemu/git/qemu-**kvm/memory.c:982
> #6 io_mem_write (io_index=<optimized out>, addr=32, val=<optimized out>,
> size=4) at /root/download/qemu/git/qemu-**kvm/memory.c:1564
> #7 0x00007fa60aa21b82 in cpu_physical_memory_rw (addr=4273954848,
> buf=0x7fa60a844028 <Address 0x7fa60a844028 out of bounds>, len=4,
> is_write=1) at /root/download/qemu/git/qemu-**kvm/exec.c:3584
> #8 0x00007fa60aa3ec75 in kvm_cpu_exec (env=0x7fa60c308a60) at
> /root/download/qemu/git/qemu-**kvm/kvm-all.c:1192
> #9 0x00007fa60aa14ce1 in qemu_kvm_cpu_thread_fn (arg=0x7fa60c308a60) at
> /root/download/qemu/git/qemu-**kvm/cpus.c:732
> #10 0x00007fa608ce6d90 in start_thread () from /lib64/libpthread.so.0
> #11 0x00007fa606f82f5d in clone () from /lib64/libc.so.6
>
> Relevant command line:
> -cdrom ISO/KNOPPIX_V6.7.1CD-2011-09-**14-DE.iso
> -device vmxnet3,mac=1a:46:0b:ca:bc:7e,**vlan=1,romfile=
> -net tap,ifname=tap1,script=no,**downscript=no,vlan=1
>
> Can you please try to reproduce it and fix it.
>
> Thnx.
>
> Ciao,
> Gerhard
>
> --
> http://www.wiesinger.com/
>
>
> On Wed, 29 Feb 2012, Dmitry Fleytman wrote:
>
> Changes in V2:
>> License text changed accoring to community suggestions
>> Standard license header from GPLv2+ - licensed QEMU files used
>>
>> Implementation of VMWare VMXNET3 paravirtual NIC device.
>> Supports of all the device features including offload capabilties,
>> VLANs and etc.
>> The device is tested on different OSes:
>> Fedora 15
>> Ubuntu 10.4
>> Centos 6.2
>> Windows 2008R2
>> Windows 2008 64bit
>> Windows 2008 32bit
>> Windows 2003 64bit
>> Windows 2003 32bit
>> Currently live migration is not supported.
>>
>> Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
>> Signed-off-by: Yan Vugenfirer <yan@daynix.com>
>> ---
>> qemu/Makefile.objs | 1 +
>> qemu/default-configs/pci.mak | 1 +
>> qemu/hw/pci.c | 2 +
>> qemu/hw/pci.h | 1 +
>> qemu/hw/virtio-net.h | 13 +-
>> qemu/hw/vmware_utils.h | 131 +++
>> qemu/hw/vmxnet3.c | 2559 ++++++++++++++++++++++++++++++**
>> ++++++++++++
>> qemu/hw/vmxnet3.h | 727 ++++++++++++
>> qemu/hw/vmxnet3_debug.h | 104 ++
>> qemu/hw/vmxnet_utils.c | 172 +++
>> qemu/hw/vmxnet_utils.h | 242 ++++
>> qemu/net.c | 2 +-
>> qemu/net/checksum.h | 7 +
>> 13 files changed, 3955 insertions(+), 7 deletions(-)
>> create mode 100644 qemu/hw/vmware_utils.h
>> create mode 100644 qemu/hw/vmxnet3.c
>> create mode 100644 qemu/hw/vmxnet3.h
>> create mode 100644 qemu/hw/vmxnet3_debug.h
>> create mode 100644 qemu/hw/vmxnet_utils.c
>> create mode 100644 qemu/hw/vmxnet_utils.h
>>
>> diff --git a/qemu/Makefile.objs b/qemu/Makefile.objs
>> index 808de6a..3f846a6 100644
>> --- a/qemu/Makefile.objs
>> +++ b/qemu/Makefile.objs
>> @@ -264,6 +264,7 @@ hw-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o
>> hw-obj-$(CONFIG_PCNET_COMMON) += pcnet.o
>> hw-obj-$(CONFIG_E1000_PCI) += e1000.o
>> hw-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o
>> +hw-obj-$(CONFIG_VMXNET3_PCI) += vmxnet3.o vmxnet_utils.o
>>
>> hw-obj-$(CONFIG_SMC91C111) += smc91c111.o
>> hw-obj-$(CONFIG_LAN9118) += lan9118.o
>> diff --git a/qemu/default-configs/pci.mak b/qemu/default-configs/pci.mak
>> index 21e4ccf..f8e6ee1 100644
>> --- a/qemu/default-configs/pci.mak
>> +++ b/qemu/default-configs/pci.mak
>> @@ -13,6 +13,7 @@ CONFIG_PCNET_COMMON=y
>> CONFIG_LSI_SCSI_PCI=y
>> CONFIG_RTL8139_PCI=y
>> CONFIG_E1000_PCI=y
>> +CONFIG_VMXNET3_PCI=y
>> CONFIG_IDE_CORE=y
>> CONFIG_IDE_QDEV=y
>> CONFIG_IDE_PCI=y
>> diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
>> index bf046bf..f0fb1ee 100644
>> --- a/qemu/hw/pci.c
>> +++ b/qemu/hw/pci.c
>> @@ -1350,6 +1350,7 @@ static const char * const pci_nic_models[] = {
>> "e1000",
>> "pcnet",
>> "virtio",
>> + "vmxnet3",
>> NULL
>> };
>>
>> @@ -1362,6 +1363,7 @@ static const char * const pci_nic_names[] = {
>> "e1000",
>> "pcnet",
>> "virtio-net-pci",
>> + "vmxnet3",
>> NULL
>> };
>>
>> diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h
>> index 4f19fdb..fee8250 100644
>> --- a/qemu/hw/pci.h
>> +++ b/qemu/hw/pci.h
>> @@ -60,6 +60,7 @@
>> #define PCI_DEVICE_ID_VMWARE_NET 0x0720
>> #define PCI_DEVICE_ID_VMWARE_SCSI 0x0730
>> #define PCI_DEVICE_ID_VMWARE_IDE 0x1729
>> +#define PCI_DEVICE_ID_VMWARE_VMXNET3 0x07B0
>>
>> /* Intel (0x8086) */
>> #define PCI_DEVICE_ID_INTEL_82551IT 0x1209
>> diff --git a/qemu/hw/virtio-net.h b/qemu/hw/virtio-net.h
>> index 4468741..fa3c17b 100644
>> --- a/qemu/hw/virtio-net.h
>> +++ b/qemu/hw/virtio-net.h
>> @@ -78,13 +78,14 @@ struct virtio_net_config
>> * specify GSO or CSUM features, you can simply ignore the header. */
>> struct virtio_net_hdr
>> {
>> -#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 // Use csum_start,
>> csum_offset
>> +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start,
>> csum_offset */
>> +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid
>> */
>> uint8_t flags;
>> -#define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame
>> -#define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP
>> (TSO)
>> -#define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP
>> (UFO)
>> -#define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP
>> -#define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set
>> +#define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame
>> */
>> +#define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP
>> (TSO) */
>> +#define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP
>> (UFO) */
>> +#define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP
>> */
>> +#define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set
>> */
>> uint8_t gso_type;
>> uint16_t hdr_len;
>> uint16_t gso_size;
>> diff --git a/qemu/hw/vmware_utils.h b/qemu/hw/vmware_utils.h
>> new file mode 100644
>> index 0000000..304bb48
>> --- /dev/null
>> +++ b/qemu/hw/vmware_utils.h
>> @@ -0,0 +1,131 @@
>> +/*
>> + * QEMU VMWARE paravirtual devices - auxiliary code
>> + *
>> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
>> + *
>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
>> + *
>> + * Authors:
>> + * Dmitry Fleytman <dmitry@daynix.com>
>> + * Yan Vugenfirer <yan@daynix.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +/* Shared memory access functions with byte swap support */
>> +static inline void
>> +vmw_shmem_read(target_phys_**addr_t addr, void *buf, int len)
>> +{
>> + DSHPRINTF("SHMEM r: %" PRIx64 ", len: %d to %p",
>> + (uint64_t) addr, len, buf);
>> + cpu_physical_memory_read(addr, buf, len);
>> +}
>> +
>> +static inline void
>> +vmw_shmem_write(target_phys_**addr_t addr, void *buf, int len)
>> +{
>> + DSHPRINTF("SHMEM w: %" PRIx64 ", len: %d to %p",
>> + (uint64_t) addr, len, buf);
>> + cpu_physical_memory_write(**addr, buf, len);
>> +}
>> +
>> +static inline void
>> +vmw_shmem_rw(target_phys_**addr_t addr, void *buf, int len, int
>> is_write)
>> +{
>> + DSHPRINTF("SHMEM r/w: %" PRIx64 ", len: %d (to %p), is write: %d",
>> + (uint64_t) addr, len, buf, is_write);
>> +
>> + cpu_physical_memory_rw(addr, buf, len, is_write);
>> +}
>> +
>> +static inline void
>> +vmw_shmem_set(target_phys_**addr_t addr, uint8 val, int len)
>> +{
>> + int i;
>> + DSHPRINTF("SHMEM set: %" PRIx64 ", len: %d (value 0x%X)",
>> + (uint64_t) addr, len, val);
>> +
>> + for (i = 0; i < len; i++) {
>> + cpu_physical_memory_write(addr + i, &val, 1);
>> + }
>> +}
>> +
>> +static inline uint32_t
>> +vmw_shmem_ld8(target_phys_**addr_t addr)
>> +{
>> + uint8_t res = ldub_phys(addr);
>> + DSHPRINTF("SHMEM load8: %" PRIx64 " (value 0x%X)",
>> + (uint64_t) addr, res);
>> + return res;
>> +}
>> +
>> +static inline void
>> +vmw_shmem_st8(target_phys_**addr_t addr, uint8_t value)
>> +{
>> + DSHPRINTF("SHMEM store8: %" PRIx64 " (value 0x%X)",
>> + (uint64_t) addr, value);
>> + stb_phys(addr, value);
>> +}
>> +
>> +static inline uint32_t
>> +vmw_shmem_ld16(target_phys_**addr_t addr)
>> +{
>> + uint16_t res = lduw_le_phys(addr);
>> + DSHPRINTF("SHMEM load16: %" PRIx64 " (value 0x%X)",
>> + (uint64_t) addr, res);
>> + return res;
>> +}
>> +
>> +static inline void
>> +vmw_shmem_st16(target_phys_**addr_t addr, uint16_t value)
>> +{
>> + DSHPRINTF("SHMEM store16: %" PRIx64 " (value 0x%X)",
>> + (uint64_t) addr, value);
>> + stw_le_phys(addr, value);
>> +}
>> +
>> +static inline uint32_t
>> +vmw_shmem_ld32(target_phys_**addr_t addr)
>> +{
>> + uint32_t res = ldl_le_phys(addr);
>> + DSHPRINTF("SHMEM load32: %" PRIx64 " (value 0x%X)",
>> + (uint64_t) addr, res);
>> + return res;
>> +}
>> +
>> +static inline void
>> +vmw_shmem_st32(target_phys_**addr_t addr, uint32_t value)
>> +{
>> + DSHPRINTF("SHMEM store32: %" PRIx64 " (value 0x%X)",
>> + (uint64_t) addr, value);
>> + stl_le_phys(addr, value);
>> +}
>> +
>> +static inline uint64_t
>> +vmw_shmem_ld64(target_phys_**addr_t addr)
>> +{
>> + uint64_t res = ldq_le_phys(addr);
>> + DSHPRINTF("SHMEM load64: %" PRIx64 " (value %" PRIx64 ")",
>> + (uint64_t) addr, res);
>> + return res;
>> +}
>> +
>> +static inline void
>> +vmw_shmem_st64(target_phys_**addr_t addr, uint64_t value)
>> +{
>> + DSHPRINTF("SHMEM store64: %" PRIx64 " (value %" PRIx64 ")",
>> + (uint64_t) addr, value);
>> + stq_le_phys(addr, value);
>> +}
>> +
>> +/* MACROS for simplification of operations on array-style registers */
>> +#define IS_MULTIREG_ADDR(addr, base, cnt, regsize) \
>> + (((addr) >= (base)) && ((addr) < (base) + (cnt) * (regsize)))
>> +
>> +#define MULTIREG_IDX_BY_ADDR(addr, base, regsize) \
>> + (((addr) - (base)) / (regsize))
>> +
>> +/* Bitfields */
>> +#define FLAG_IS_SET(field, flag) (((field) & (flag)) == (flag))
>> diff --git a/qemu/hw/vmxnet3.c b/qemu/hw/vmxnet3.c
>> new file mode 100644
>> index 0000000..112d3b9
>> --- /dev/null
>> +++ b/qemu/hw/vmxnet3.c
>> @@ -0,0 +1,2559 @@
>> +/*
>> + * QEMU VMWARE VMXNET3 paravirtual NIC
>> + *
>> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
>> + *
>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
>> + *
>> + * Authors:
>> + * Dmitry Fleytman <dmitry@daynix.com>
>> + * Yan Vugenfirer <yan@daynix.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#define VMXNET3_ENABLE_MSIX
>> +#define VMXNET3_ENABLE_MSI
>> +
>> +/* Define this constant to non-zero to enable IP4 */
>> +/* fragmentation feature */
>> +
>> +/* #define VMXNET_MAX_IP_PLOAD_LEN ETH_MAX_IP_PLOAD_LEN */
>> +#define VMXNET3_MAX_IP_PLOAD_LEN 0
>> +
>> +#include "hw.h"
>> +#include "pci.h"
>> +#include "net.h"
>> +#include "virtio-net.h"
>> +#include "net/tap.h"
>> +#include "sysemu.h"
>> +#include "iov.h"
>> +#include "bswap.h"
>> +#ifdef VMXNET3_ENABLE_MSIX
>> +#include "msix.h"
>> +#endif
>> +#ifdef VMXNET3_ENABLE_MSI
>> +#include "msi.h"
>> +#endif
>> +
>> +#include "vmxnet3_debug.h"
>> +#include "vmxnet3.h"
>> +#include "vmware_utils.h"
>> +#include "vmxnet_utils.h"
>> +
>> +#define PCI_DEVICE_ID_VMWARE_VMXNET3_**REVISION 0x1
>> +#define VMXNET3_MSIX_BAR_SIZE 0x2000
>> +
>> +#define VMXNET3_BAR0_IDX (0)
>> +#define VMXNET3_BAR1_IDX (1)
>> +#define VMXNET3_MSIX_BAR_IDX (2)
>> +
>> +/* Link speed in Mbps should be shifted by 16 */
>> +#define VMXNET3_LINK_SPEED (1000 << 16)
>> +
>> +/* Link status: 1 - up, 0 - down. */
>> +#define VMXNET3_LINK_STATUS_UP 0x1
>> +
>> +/* Least significant bit should be set for revision and version */
>> +#define VMXNET3_DEVICE_VERSION 0x1
>> +#define VMXNET3_DEVICE_REVISION 0x1
>> +
>> +/* Macros for rings descriptors access */
>> +#define VMXNET3_READ_TX_QUEUE_DESCR8(**dpa, field) \
>> + (vmw_shmem_ld8(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
>> +
>> +#define VMXNET3_WRITE_TX_QUEUE_DESCR8(**dpa, field, value) \
>> + (vmw_shmem_st8(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field,
>> value)))
>> +
>> +#define VMXNET3_READ_TX_QUEUE_DESCR32(**dpa, field) \
>> + (vmw_shmem_ld32(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
>> +
>> +#define VMXNET3_WRITE_TX_QUEUE_**DESCR32(dpa, field, value) \
>> + (vmw_shmem_st32(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field),
>> value))
>> +
>> +#define VMXNET3_READ_TX_QUEUE_DESCR64(**dpa, field) \
>> + (vmw_shmem_ld64(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field)))
>> +
>> +#define VMXNET3_WRITE_TX_QUEUE_**DESCR64(dpa, field, value) \
>> + (vmw_shmem_st64(dpa + offsetof(struct Vmxnet3_TxQueueDesc, field),
>> value))
>> +
>> +#define VMXNET3_READ_RX_QUEUE_DESCR64(**dpa, field) \
>> + (vmw_shmem_ld64(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
>> +
>> +#define VMXNET3_READ_RX_QUEUE_DESCR32(**dpa, field) \
>> + (vmw_shmem_ld32(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field)))
>> +
>> +#define VMXNET3_WRITE_RX_QUEUE_**DESCR64(dpa, field, value) \
>> + (vmw_shmem_st64(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field),
>> value))
>> +
>> +#define VMXNET3_WRITE_RX_QUEUE_DESCR8(**dpa, field, value) \
>> + (vmw_shmem_st8(dpa + offsetof(struct Vmxnet3_RxQueueDesc, field),
>> value))
>> +
>> +/* Macros for guest driver shared area access */
>> +#define VMXNET3_READ_DRV_SHARED64(**shpa, field) \
>> + (vmw_shmem_ld64(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
>> +
>> +#define VMXNET3_READ_DRV_SHARED32(**shpa, field) \
>> + (vmw_shmem_ld32(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
>> +
>> +#define VMXNET3_WRITE_DRV_SHARED32(**shpa, field, val) \
>> + (vmw_shmem_st32(shpa + offsetof(struct Vmxnet3_DriverShared, field),
>> val))
>> +
>> +#define VMXNET3_READ_DRV_SHARED16(**shpa, field) \
>> + (vmw_shmem_ld16(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
>> +
>> +#define VMXNET3_READ_DRV_SHARED8(shpa, field) \
>> + (vmw_shmem_ld8(shpa + offsetof(struct Vmxnet3_DriverShared, field)))
>> +
>> +#define VMXNET3_READ_DRV_SHARED(shpa, field, b, l) \
>> + (vmw_shmem_read(shpa + offsetof(struct Vmxnet3_DriverShared, field),
>> b, l))
>> +
>> +/* TX/RX packets abstractions */
>> +typedef struct Vmxnet3_TxPktMdata {
>> + uint32_t offload_mode;
>> + uint32_t cso_or_gso_size;
>> + uint32_t hdr_length;
>> + eth_pkt_types_e packet_type;
>> +} Vmxnet3_TxPktMdata;
>> +
>> +typedef struct _Vmxnet3_RxPktMdata {
>> + uint32_t tot_len;
>> + uint16_t vlan_tag;
>> + bool vlan_stripped;
>> + bool vhdr_valid;
>> + eth_pkt_types_e packet_type;
>> +} Vmxnet3_RxPktMdata;
>> +
>> +#define VMXNET3_TXPKT_REBUILT_HDR_LEN (1024)
>> +
>> +typedef struct _Vmxnet3_TxPkt {
>> + Vmxnet3_TxPktMdata mdata;
>> + struct virtio_net_hdr virt_hdr;
>> + bool has_virt_hdr;
>> +
>> + struct iovec *vec;
>> +
>> + uint8_t __l2_hdr[ETH_MAX_L2_HDR_LEN];
>> + uint8_t __l3_hdr[ETH_MAX_L3_HDR_LEN];
>> +
>> + uint32_t payload_len;
>> + uint32_t max_payload_len;
>> +
>> + uint32_t payload_frags;
>> + uint32_t max_payload_frags;
>> +
>> + struct {
>> + uint32_t offset;
>> + bool more_frags;
>> + bool orig_more_frags;
>> + } fragmentation;
>> +} Vmxnet3_TxPkt;
>> +
>> +#define VMXNET3_TXPKT_VHDR_FRAG (0)
>> +#define VMXNET3_TXPKT_L2HDR_FRAG (1)
>> +#define VMXNET3_TXPKT_L3HDR_FRAG (2)
>> +#define VMXNET3_TXPKT_PL_START_FRAG (3)
>> +
>> +#define vmxnet3_txpkt_get_mdata(p) (&((p)->mdata))
>> +#define vmxnet3_txpkt_get_vhdr(p) (&((p)->virt_hdr))
>> +
>> +#define vmxnet3_txpkt_get_l2hdr(p) \
>> + ((p)->vec[VMXNET3_TXPKT_L2HDR_**FRAG].iov_base)
>> +#define vmxnet3_txpkt_get_l2hdr_len(p) \
>> + ((p)->vec[VMXNET3_TXPKT_L2HDR_**FRAG].iov_len)
>> +#define vmxnet3_txpkt_set_l2hdr_len(p, l) \
>> + ((p)->vec[VMXNET3_TXPKT_L2HDR_**FRAG].iov_len = l)
>> +#define vmxnet3_txpkt_get_l3hdr(p) \
>> + ((p)->vec[VMXNET3_TXPKT_L3HDR_**FRAG].iov_base)
>> +#define vmxnet3_txpkt_get_l3hdr_len(p) \
>> + ((p)->vec[VMXNET3_TXPKT_L3HDR_**FRAG].iov_len)
>> +#define vmxnet3_txpkt_set_l3hdr_len(p, l) \
>> + ((p)->vec[VMXNET3_TXPKT_L3HDR_**FRAG].iov_len = l)
>> +#define vmxnet3_txpkt_get_payload_len(**p) \
>> + ((p)->payload_len)
>> +
>> +#define vmxnet3_txpkt_set_more_frags(**p, mf) \
>> + ((p)->fragmentation.more_frags = mf)
>> +#define vmxnet3_txpkt_get_more_frags(**p) \
>> + ((p)->fragmentation.more_frags | \
>> + (p)->fragmentation.orig_more_**frags)
>> +#define vmxnet3_txpkt_set_frag_off(p, off) \
>> + ((p)->fragmentation.offset = off)
>> +#define vmxnet3_txpkt_get_frag_off(p) \
>> + ((p)->fragmentation.offset)
>> +#define vmxnet3_txpkt_advance_frag_**off(p, off) \
>> + ((p)->fragmentation.offset += off)
>> +
>> +static inline size_t
>> +vmxnet3_txpkt_get_total_len(**const Vmxnet3_TxPkt *p)
>> +{
>> + return vmxnet3_txpkt_get_l2hdr_len(p) +
>> + vmxnet3_txpkt_get_l3hdr_len(p) +
>> + vmxnet3_txpkt_get_payload_len(**p);
>> +}
>> +
>> +static inline struct iovec*
>> +vmxnet3_txpkt_get_payload_**frag(Vmxnet3_TxPkt *p, uint32_t num)
>> +{
>> + assert(num < p->max_payload_frags);
>> + return &p->vec[num + VMXNET3_TXPKT_PL_START_FRAG];
>> +}
>> +
>> +static inline void
>> +vmxnet3_txpkt_set_num_pl_**frags(Vmxnet3_TxPkt *p, uint32_t num)
>> +{
>> + assert(num <= p->max_payload_frags);
>> + p->payload_frags = num;
>> +}
>> +
>> +static inline void
>> +vmxnet3_txpkt_reset_payload(**Vmxnet3_TxPkt *p)
>> +{
>> + p->payload_len = 0;
>> +}
>> +
>> +static void vmxnet3_txpkt_reset(Vmxnet3_**TxPkt *p)
>> +{
>> + memset(&p->mdata, 0, sizeof(p->mdata));
>> + vmxnet3_txpkt_set_num_pl_**frags(p, 0);
>> + vmxnet3_txpkt_reset_payload(p)**;
>> + vmxnet3_txpkt_set_more_frags(**p, 0);
>> + vmxnet3_txpkt_set_frag_off(p, 0);
>> + p->max_payload_len = 0;
>> +
>> + if (NULL != p->vec) {
>> + p->vec[VMXNET3_TXPKT_L2HDR_**FRAG].iov_len = 0;
>> + p->vec[VMXNET3_TXPKT_L3HDR_**FRAG].iov_len = 0;
>> + }
>> +}
>> +
>> +static bool
>> +vmxnet3_txpkt_prealloc(**Vmxnet3_TxPkt *p, uint32_t max_frags, bool
>> has_virt_hdr)
>> +{
>> + if (NULL != p->vec) {
>> + g_free(p->vec);
>> + }
>> +
>> + p->vec =
>> + g_malloc(sizeof(*p->vec) * (max_frags +
>> VMXNET3_TXPKT_PL_START_FRAG));
>> + if (NULL == p->vec) {
>> + return false;
>> + }
>> +
>> + p->max_payload_frags = max_frags;
>> + p->has_virt_hdr = has_virt_hdr;
>> + p->vec[VMXNET3_TXPKT_VHDR_**FRAG].iov_base = &p->virt_hdr;
>> + p->vec[VMXNET3_TXPKT_VHDR_**FRAG].iov_len =
>> + p->has_virt_hdr ? sizeof(p->virt_hdr) : 0;
>> + p->vec[VMXNET3_TXPKT_L2HDR_**FRAG].iov_base = &p->__l2_hdr;
>> + p->vec[VMXNET3_TXPKT_L3HDR_**FRAG].iov_base = &p->__l3_hdr;
>> + vmxnet3_txpkt_reset(p);
>> + return true;
>> +}
>> +
>> +static void vmxnet3_txpkt_init(Vmxnet3_**TxPkt *p)
>> +{
>> + p->vec = NULL;
>> +}
>> +
>> +static void vmxnet3_txpkt_cleanup(Vmxnet3_**TxPkt *p)
>> +{
>> + g_free(p->vec);
>> +}
>> +
>> +static void vmxnet3_txpkt_unmap(Vmxnet3_**TxPkt *p, bool is_write)
>> +{
>> + int i;
>> +
>> + for (i = VMXNET3_TXPKT_PL_START_FRAG;
>> + i < p->payload_frags + VMXNET3_TXPKT_PL_START_FRAG; i++) {
>> + cpu_physical_memory_unmap(p->**vec[i].iov_base,
>> p->vec[i].iov_len,
>> + is_write, p->vec[i].iov_len);
>> + }
>> +}
>> +
>> +static void*
>> +vmxnet3_txpkt_map(Vmxnet3_**TxPkt *p, uint32_t *mapped_fragments, bool
>> is_write)
>> +{
>> + int i;
>> +
>> + for (i = VMXNET3_TXPKT_PL_START_FRAG;
>> + i < p->payload_frags + VMXNET3_TXPKT_PL_START_FRAG; i++) {
>> + target_phys_addr_t mapped_len = p->vec[i].iov_len;
>> + size_t orig_len = p->vec[i].iov_len;
>> + p->vec[i].iov_base =
>> + cpu_physical_memory_map((**uint64_t) p->vec[i].iov_base,
>> + &mapped_len, is_write);
>> + p->vec[i].iov_len = mapped_len;
>> +
>> + if ((NULL == p->vec[i].iov_base) || (orig_len != mapped_len)) {
>> + p->payload_frags = i + !!p->vec[i].iov_base;
>> + vmxnet3_txpkt_unmap(p, is_write);
>> + return NULL;
>> + }
>> + }
>> +
>> + *mapped_fragments = VMXNET3_TXPKT_PL_START_FRAG + p->payload_frags;
>> + return p->vec;
>> +}
>> +
>> +static inline void
>> +vmxnet3_txpkt_dump(Vmxnet3_**TxPkt *p)
>> +{
>> +#ifdef DEBUG_VMXNET3_PACKETS
>> + Vmxnet3_TxPktMdata *m = vmxnet3_txpkt_get_mdata(p);
>> +#endif
>> +
>> + DPKPRINTF("TXPKT MDATA: om: %d, cso/gso_size: %d, hdr_len: %d, "
>> + "pkt_type: 0x%X, l2hdr_len: %lu l3hdr_len: %lu,
>> payload_len: %u",
>> + m->offload_mode, m->cso_or_gso_size,
>> + m->hdr_length, m->packet_type,
>> + vmxnet3_txpkt_get_l2hdr_len(p)**,
>> + vmxnet3_txpkt_get_l3hdr_len(p)**,
>> + vmxnet3_txpkt_get_payload_len(**p));
>> +};
>> +
>> +/* RX packet may contain up to 2 fragments - rebuilt eth header */
>> +/* in case of VLAN tag stripping */
>> +/* and payload received from QEMU - in any case */
>> +#define VMXNET3_MAX_RX_PACKET_**FRAGMENTS (2)
>> +
>> +typedef struct _Vmxnet3_RxPkt {
>> + Vmxnet3_RxPktMdata mdata;
>> + struct virtio_net_hdr virt_hdr;
>> + struct eth_header eth_hdr;
>> + struct iovec vec[VMXNET3_MAX_RX_PACKET_**FRAGMENTS];
>> + uint16 vec_len;
>> +} Vmxnet3_RxPkt;
>> +
>> +#define vmxnet3_rxpkt_get_mdata(p) (&((p)->mdata))
>> +#define vmxnet3_rxpkt_get_ehdr(p) (&((p)->eth_hdr))
>> +#define vmxnet3_rxpkt_get_vhdr(p) (&((p)->virt_hdr))
>> +#define vmxnet3_rxpkt_get_frag(p, n) (&((p)->vec[(n)]))
>> +#define vmxnet3_rxpkt_set_num_frags(p, n) ((p)->vec_len = (n))
>> +#define vmxnet3_rxpkt_get_num_frags(p) ((p)->vec_len)
>> +
>> +static inline void vmxnet3_rxpkt_attach_ehdr(**Vmxnet3_RxPkt *p)
>> +{
>> + vmxnet3_rxpkt_get_frag(p, 0)->iov_base = &p->eth_hdr;
>> + vmxnet3_rxpkt_get_frag(p, 0)->iov_len = sizeof(p->eth_hdr);
>> +}
>> +
>> +static inline void vmxnet3_rxpkt_reset(Vmxnet3_**RxPkt *p)
>> +{
>> + memset(&p->mdata, 0, sizeof(p->mdata));
>> + memset(&p->virt_hdr, 0, sizeof(p->virt_hdr));
>> + vmxnet3_rxpkt_set_num_frags(p, 0);
>> +}
>> +
>> +static void vmxnet3_rxpkt_init(Vmxnet3_**RxPkt *p)
>> +{
>> + vmxnet3_rxpkt_reset(p);
>> +}
>> +
>> +static inline void
>> +vmxnet3_rxpkt_dump(Vmxnet3_**RxPkt *p)
>> +{
>> +#ifdef DEBUG_VMXNET3_PACKETS
>> + Vmxnet3_RxPktMdata *m = vmxnet3_rxpkt_get_mdata(p);
>> +#endif
>> +
>> + DPKPRINTF("RXPKT MDATA: tot_len: %d, pkt_type: 0x%X, "
>> + "vlan_stripped: %d, vlan_tag: %d, vhdr_valid: %d",
>> + m->tot_len, m->packet_type,
>> + m->vlan_stripped, m->vlan_tag, m->vhdr_valid);
>> +};
>> +
>> +/* Cyclic ring abstraction */
>> +typedef struct _Vmxnet3_Ring {
>> + target_phys_addr_t pa;
>> + size_t size;
>> + size_t cell_size;
>> + size_t next;
>> + uint8_t gen;
>> +} Vmxnet3_Ring;
>> +
>> +static inline void vmxnet3_ring_init(Vmxnet3_Ring *ring,
>> + target_phys_addr_t pa,
>> + size_t size,
>> + size_t cell_size,
>> + bool zero_region)
>> +{
>> + ring->pa = pa;
>> + ring->size = size;
>> + ring->cell_size = cell_size;
>> + ring->gen = VMXNET3_INIT_GEN;
>> + ring->next = 0;
>> +
>> + if (zero_region) {
>> + vmw_shmem_set(pa, 0, size*cell_size);
>> + }
>> +}
>> +
>> +#define vmxnet3_ring_dump(macro, ring_name, ridx, r)
>> \
>> + macro("%s#%d: base %" PRIx64 " size %lu cell_size %lu gen %d next
>> %lu", \
>> + (ring_name), (ridx), (uint64_t) (r)->pa,
>> \
>> + (r)->size, (r)->cell_size, (r)->gen, (r)->next)
>> +
>> +static inline void vmxnet3_ring_inc(Vmxnet3_Ring *ring)
>> +{
>> + if (++ring->next >= ring->size) {
>> + ring->next = 0;
>> + ring->gen ^= 1;
>> + }
>> +}
>> +
>> +static inline void vmxnet3_ring_dec(Vmxnet3_Ring *ring)
>> +{
>> + if (0 == ring->next--) {
>> + ring->next = ring->size - 1;
>> + ring->gen ^= 1;
>> + }
>> +}
>> +
>> +static inline target_phys_addr_t vmxnet3_ring_curr_cell_pa(**Vmxnet3_Ring
>> *ring)
>> +{
>> + return ring->pa + ring->next * ring->cell_size;
>> +}
>> +
>> +static inline void vmxnet3_ring_read_curr_cell(**Vmxnet3_Ring *ring,
>> void *buff)
>> +{
>> + vmw_shmem_read(vmxnet3_ring_**curr_cell_pa(ring), buff,
>> ring->cell_size);
>> +}
>> +
>> +static inline void vmxnet3_ring_write_curr_cell(**Vmxnet3_Ring *ring,
>> void *buff)
>> +{
>> + vmw_shmem_write(vmxnet3_ring_**curr_cell_pa(ring), buff,
>> ring->cell_size);
>> +}
>> +
>> +static inline size_t vmxnet3_ring_curr_cell_idx(**Vmxnet3_Ring *ring)
>> +{
>> + return ring->next;
>> +}
>> +
>> +static inline uint8_t vmxnet3_ring_curr_gen(Vmxnet3_**Ring *ring)
>> +{
>> + return ring->gen;
>> +}
>> +
>> +/* Debug trace-related functions */
>> +static inline void
>> +vmxnet3_dump_tx_descr(struct Vmxnet3_TxDesc *descr)
>> +{
>> + DPKPRINTF("TX DESCR: "
>> + "addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
>> + "dtype: %d, ext1: %d, msscof: %d, hlen: %d, om: %d, "
>> + "eop: %d, cq: %d, ext2: %d, ti: %d, tci: %d",
>> + le64_to_cpu(descr->addr), descr->len, descr->gen,
>> descr->rsvd,
>> + descr->dtype, descr->ext1, descr->msscof, descr->hlen,
>> descr->om,
>> + descr->eop, descr->cq, descr->ext2, descr->ti, descr->tci);
>> +}
>> +
>> +static inline void
>> +vmxnet3_dump_virt_hdr(struct virtio_net_hdr *vhdr)
>> +{
>> + DPKPRINTF("VHDR: flags 0x%x, gso_type: 0x%x, hdr_len: %d, gso_size:
>> %d, "
>> + "csum_start: %d, csum_offset: %d",
>> + vhdr->flags, vhdr->gso_type, vhdr->hdr_len, vhdr->gso_size,
>> + vhdr->csum_start, vhdr->csum_offset);
>> +}
>> +
>> +static inline void
>> +vmxnet3_dump_rx_descr(struct Vmxnet3_RxDesc *descr)
>> +{
>> + DPKPRINTF("RX DESCR: addr %" PRIx64 ", len: %d, gen: %d, rsvd: %d, "
>> + "dtype: %d, ext1: %d, btype: %d",
>> + le64_to_cpu(descr->addr), descr->len, descr->gen,
>> + descr->rsvd, descr->dtype, descr->ext1, descr->btype);
>> +}
>> +
>> +/* Device state and helper functions */
>> +#define VMXNET3_RX_RINGS_PER_QUEUE (2)
>> +
>> +typedef struct _VMXNET3_State {
>> + PCIDevice dev;
>> + NICState *nic;
>> + NICConf conf;
>> + MemoryRegion bar0;
>> + MemoryRegion bar1;
>> + MemoryRegion msix_bar;
>> +
>> +#ifdef VMXNET3_ENABLE_MSIX
>> + /* Whether MSI-X support was installed successfully */
>> + uint8_t msix_used;
>> +#endif
>> +#ifdef VMXNET3_ENABLE_MSI
>> + /* Whether MSI support was installed successfully */
>> + uint8_t msi_used;
>> +#endif
>> +
>> + target_phys_addr_t drv_shmem;
>> + target_phys_addr_t temp_shared_guest_driver_**memory;
>> +
>> + uint8_t txq_num;
>> + struct {
>> + Vmxnet3_Ring tx_ring;
>> + Vmxnet3_Ring comp_ring;
>> +
>> + uint8_t intr_idx;
>> + target_phys_addr_t tx_stats_pa;
>> + struct UPT1_TxStats txq_stats;
>> + } txq_descr[VMXNET3_DEVICE_MAX_**TX_QUEUES];
>> +
>> + /* This boolean tells whether RX packet being indicated has to */
>> + /* be split into head and body chunks from different RX rings */
>> + bool rx_packets_compound;
>> +
>> + bool rx_vlan_stripping;
>> + bool lro_supported;
>> +
>> + uint8_t rxq_num;
>> + struct {
>> + Vmxnet3_Ring rx_ring[VMXNET3_RX_RINGS_PER_**QUEUE];
>> + Vmxnet3_Ring comp_ring;
>> + uint8_t intr_idx;
>> + target_phys_addr_t rx_stats_pa;
>> + struct UPT1_RxStats rxq_stats;
>> + } rxq_descr[VMXNET3_DEVICE_MAX_**RX_QUEUES];
>> +
>> + /* Network MTU */
>> + uint32_t mtu;
>> +
>> + /* Maximum number of fragments for indicated TX packets */
>> + uint32_t max_tx_frags;
>> +
>> + /* Maximum number of fragments for indicated RX packets */
>> + uint16_t max_rx_frags;
>> +
>> + /* Index for events interrupt */
>> + uint8_t event_int_idx;
>> +
>> + /* Whether automatic interrupts masking enabled */
>> + uint8_t auto_int_masking;
>> +
>> + bool peer_has_vhdr;
>> +
>> + /* TX packets to QEMU interface */
>> + Vmxnet3_TxPkt curr_txpkt;
>> + size_t curr_txpkt_pl_frags;
>> + bool curr_txpkt_skip;
>> + bool curr_txpkt_header_processed;
>> +
>> + uint32_t device_active;
>> + uint32_t last_command;
>> +
>> + uint32_t link_status_and_speed;
>> +
>> + struct {
>> + bool is_masked;
>> + bool is_pending;
>> + bool is_asserted;
>> + } interrupt_states[VMXNET3_MAX_**INTRS];
>> +
>> + uint32_t temp_mac; /* To store the low part first */
>> +
>> + MACAddr perm_mac;
>> + uint32_t vlan_table[VMXNET3_VFT_SIZE];
>> + uint32_t rx_mode;
>> + MACAddr *mcast_list;
>> + uint16_t mcast_list_len;
>> +} VMXNET3_State;
>> +
>> +/* Interrupt management */
>> +
>> +/* This function returns sign whether interrupt line is in asserted
>> state */
>> +/* This depends on the type of interrupt used. For INTX interrupt line
>> will */
>> +/* be asserted until explicit deassertion, for MSI(X) interrupt line
>> will */
>> +/* be deasserted automatically due to notifiction symantics of the
>> MSI(X) */
>> +/* interrupts
>> */
>> +static bool _vmxnet3_assert_interrupt_**line(VMXNET3_State *s, uint32_t
>> int_idx)
>> +{
>> +#ifdef VMXNET3_ENABLE_MSIX
>> + if (s->msix_used && msix_enabled(&s->dev)) {
>> + DIRPRINTF("Sending MSI-X notification for vector %u", int_idx);
>> + msix_notify(&s->dev, int_idx);
>> + return false;
>> + }
>> +#endif
>> +#ifdef VMXNET3_ENABLE_MSI
>> + if (s->msi_used && msi_enabled(&s->dev)) {
>> + DIRPRINTF("Sending MSI notification for vector %u", int_idx);
>> + msi_notify(&s->dev, int_idx);
>> + return false;
>> + }
>> +#endif
>> +
>> + DIRPRINTF("Asserting line for interrupt %u", int_idx);
>> + qemu_set_irq(s->dev.irq[int_**idx], 1);
>> + return true;
>> +}
>> +
>> +static void _vmxnet3_deassert_interrupt_**line(VMXNET3_State *s, int
>> lidx)
>> +{
>> +#ifdef VMXNET3_ENABLE_MSIX
>> + /* This function should never be called for MSI(X) interrupts */
>> + /* because deassertion never required for message interrupts */
>> + assert(!s->msix_used || !msix_enabled(&s->dev));
>> +#endif
>> +#ifdef VMXNET3_ENABLE_MSI
>> + /* This function should never be called for MSI(X) interrupts */
>> + /* because deassertion never required for message interrupts */
>> + assert(!s->msi_used || !msi_enabled(&s->dev));
>> +#endif
>> +
>> + DIRPRINTF("Deasserting line for interrupt %u", lidx);
>> + qemu_set_irq(s->dev.irq[lidx], 0);
>> +}
>> +
>> +static void vmxnet3_update_interrupt_line_**state(VMXNET3_State *s, int
>> lidx)
>> +{
>> + if (!s->interrupt_states[lidx].**is_pending &&
>> + s->interrupt_states[lidx].is_**asserted) {
>> + DIRPRINTF("New interrupt line state for index %d is DOWN", lidx);
>> + _vmxnet3_deassert_interrupt_**line(s, lidx);
>> + s->interrupt_states[lidx].is_**asserted = false;
>> + return;
>> + }
>> +
>> + if (s->interrupt_states[lidx].is_**pending &&
>> + !s->interrupt_states[lidx].is_**masked &&
>> + !s->interrupt_states[lidx].is_**asserted) {
>> + DIRPRINTF("New interrupt line state for index %d is UP", lidx);
>> + s->interrupt_states[lidx].is_**asserted =
>> + _vmxnet3_assert_interrupt_**line(s, lidx);
>> + s->interrupt_states[lidx].is_**pending = false;
>> + return;
>> + }
>> +}
>> +
>> +static void vmxnet3_trigger_interrupt(**VMXNET3_State *s, int lidx)
>> +{
>> + s->interrupt_states[lidx].is_**pending = true;
>> + vmxnet3_update_interrupt_line_**state(s, lidx);
>> +
>> +#ifdef VMXNET3_ENABLE_MSIX
>> + if (s->msix_used && msix_enabled(&s->dev) && s->auto_int_masking) {
>> + goto do_automask;
>> + }
>> +#endif
>> +
>> +#ifdef VMXNET3_ENABLE_MSI
>> + if (s->msi_used && msi_enabled(&s->dev) && s->auto_int_masking) {
>> + goto do_automask;
>> + }
>> +#endif
>> +
>> + return;
>> +
>> +do_automask:
>> + s->interrupt_states[lidx].is_**masked = true;
>> + vmxnet3_update_interrupt_line_**state(s, lidx);
>> +}
>> +
>> +static bool vmxnet3_interrupt_asserted(**VMXNET3_State *s, int lidx)
>> +{
>> + return s->interrupt_states[lidx].is_**asserted;
>> +}
>> +
>> +static void vmxnet3_clear_interrupt(**VMXNET3_State *s, int int_idx)
>> +{
>> + s->interrupt_states[int_idx].**is_pending = false;
>> + if (s->auto_int_masking) {
>> + s->interrupt_states[int_idx].**is_masked = true;
>> + }
>> + vmxnet3_update_interrupt_line_**state(s, int_idx);
>> +}
>> +
>> +static void
>> +vmxnet3_on_interrupt_mask_**changed(VMXNET3_State *s, int lidx, bool
>> is_masked)
>> +{
>> + s->interrupt_states[lidx].is_**masked = is_masked;
>> + vmxnet3_update_interrupt_line_**state(s, lidx);
>> +}
>> +
>> +static bool vmxnet3_verify_driver_magic(**target_phys_addr_t dshmem)
>> +{
>> + return (VMXNET3_REV1_MAGIC == VMXNET3_READ_DRV_SHARED32(**dshmem,
>> magic));
>> +}
>> +
>> +#define _GET_BYTE(x, byte_num) (((x) >> (byte_num)*8) & 0xFF)
>> +#define _MAKE_BYTE(byte_num, val) (((uint32_t)((val) & 0xFF)) <<
>> (byte_num)*8)
>> +
>> +static void vmxnet3_set_variable_mac(**VMXNET3_State *s, uint32_t h,
>> uint32_t l)
>> +{
>> + s->conf.macaddr.a[0] = _GET_BYTE(l, 0);
>> + s->conf.macaddr.a[1] = _GET_BYTE(l, 1);
>> + s->conf.macaddr.a[2] = _GET_BYTE(l, 2);
>> + s->conf.macaddr.a[3] = _GET_BYTE(l, 3);
>> + s->conf.macaddr.a[4] = _GET_BYTE(h, 0);
>> + s->conf.macaddr.a[5] = _GET_BYTE(h, 1);
>> +
>> + DCFPRINTF("Variable MAC: " MAC_FMT, MAC_ARG(s->conf.macaddr.a));
>> +
>> + qemu_format_nic_info_str(&s->**nic->nc, s->conf.macaddr.a);
>> +}
>> +
>> +static uint64_t vmxnet3_get_mac_low(MACAddr *addr)
>> +{
>> + return _MAKE_BYTE(0, addr->a[0]) |
>> + _MAKE_BYTE(1, addr->a[1]) |
>> + _MAKE_BYTE(2, addr->a[2]) |
>> + _MAKE_BYTE(3, addr->a[3]);
>> +}
>> +
>> +static uint64_t vmxnet3_get_mac_high(MACAddr *addr)
>> +{
>> + return _MAKE_BYTE(0, addr->a[4]) |
>> + _MAKE_BYTE(1, addr->a[5]);
>> +}
>> +
>> +static void
>> +vmxnet3_inc_tx_consumption_**counter(VMXNET3_State *s, int qidx)
>> +{
>> + vmxnet3_ring_inc(&s->txq_**descr[qidx].tx_ring);
>> +}
>> +
>> +static inline void
>> +vmxnet3_inc_rx_consumption_**counter(VMXNET3_State *s, int qidx, int
>> ridx)
>> +{
>> + vmxnet3_ring_inc(&s->rxq_**descr[qidx].rx_ring[ridx]);
>> +}
>> +
>> +static inline void
>> +vmxnet3_inc_tx_completion_**counter(VMXNET3_State *s, int qidx)
>> +{
>> + vmxnet3_ring_inc(&s->txq_**descr[qidx].comp_ring);
>> +}
>> +
>> +static void
>> +vmxnet3_inc_rx_completion_**counter(VMXNET3_State *s, int qidx)
>> +{
>> + vmxnet3_ring_inc(&s->rxq_**descr[qidx].comp_ring);
>> +}
>> +
>> +static void
>> +vmxnet3_dec_rx_completion_**counter(VMXNET3_State *s, int qidx)
>> +{
>> + vmxnet3_ring_dec(&s->rxq_**descr[qidx].comp_ring);
>> +}
>> +
>> +static inline void vmxnet3_flush_shmem_changes(**void)
>> +{
>> + /* Flush shared memory changes */
>> + /* Needed before transferring comntrol to guest */
>> + smp_wmb();
>> +}
>> +
>> +static void vmxnet3_complete_packet(**VMXNET3_State *s, int qidx,
>> uint32 tx_ridx)
>> +{
>> + struct Vmxnet3_TxCompDesc txcq_descr;
>> +
>> + vmxnet3_ring_dump(DRIPRINTF, "TXC", qidx,
>> &s->txq_descr[qidx].comp_ring)**;
>> +
>> + txcq_descr.txdIdx = tx_ridx;
>> + txcq_descr.gen = vmxnet3_ring_curr_gen(&s->txq_**
>> descr[qidx].comp_ring);
>> +
>> + vmxnet3_ring_write_curr_cell(&**s->txq_descr[qidx].comp_ring,
>> &txcq_descr);
>> + vmxnet3_inc_tx_completion_**counter(s, qidx);
>> +
>> + vmxnet3_flush_shmem_changes();
>> + vmxnet3_trigger_interrupt(s, s->txq_descr[qidx].intr_idx);
>> +}
>> +
>> +static bool
>> +vmxnet3_setup_tx_offloads(**Vmxnet3_TxPkt *pkt)
>> +{
>> + Vmxnet3_TxPktMdata *mdata = vmxnet3_txpkt_get_mdata(pkt);
>> + struct virtio_net_hdr *vhdr = vmxnet3_txpkt_get_vhdr(pkt);
>> + bool res = true;
>> +
>> + vhdr->hdr_len = mdata->hdr_length;
>> +
>> + switch (mdata->offload_mode) {
>> + case VMXNET3_OM_NONE:
>> + vhdr->flags = 0;
>> + vhdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
>> + vhdr->gso_size = 0;
>> + break;
>> +
>> + case VMXNET3_OM_CSUM: {
>> + DPKPRINTF("L4 CSO requested data_offset: %d, csoff: %d",
>> + mdata->hdr_length, mdata->cso_or_gso_size);
>> + vhdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
>> + vhdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
>> + vhdr->gso_size = 0;
>> + vhdr->csum_start = mdata->hdr_length;
>> + vhdr->csum_offset = mdata->cso_or_gso_size -
>> mdata->hdr_length;
>> + }
>> + break;
>> +
>> + case VMXNET3_OM_TSO: {
>> + uint8_t *l2hdr = vmxnet3_txpkt_get_l2hdr(pkt);
>> + size_t l2hdr_len = vmxnet3_txpkt_get_l2hdr_len(**pkt);
>> + uint8_t *l3hdr = vmxnet3_txpkt_get_l3hdr(pkt);
>> + size_t l3hdr_len = vmxnet3_txpkt_get_l3hdr_len(**pkt);
>> + uint16_t l3_proto = eth_get_l3_proto(l2hdr, l2hdr_len);
>> + size_t payload_len = vmxnet3_txpkt_get_payload_len(**
>> pkt);
>> +
>> + vhdr->gso_type = eth_get_gso_type(l3_proto, l3hdr);
>> + vhdr->gso_size = mdata->cso_or_gso_size;
>> + res = eth_setup_tx_offloads(l3hdr, l3hdr_len, l2hdr_len,
>> + payload_len, vhdr,
>> + vmxnet3_txpkt_get_more_frags(**
>> pkt),
>> + vmxnet3_txpkt_get_frag_off(**
>> pkt));
>> + DPKPRINTF("GSO offload type %d requested.", vhdr->gso_type);
>> + }
>> + break;
>> +
>> + default:
>> + assert(false);
>> + }
>> +
>> + return res;
>> +}
>> +
>> +static size_t
>> +vmxnet3_txpkt_adopt_headers(**Vmxnet3_TxPkt *pkt,
>> + size_t data_len,
>> + target_phys_addr_t data_pa,
>> + bool needs_tso)
>> +{
>> + /* Copy L2 header */
>> + uint8_t *l2hdr = vmxnet3_txpkt_get_l2hdr(pkt);
>> + uint8_t *l3hdr = vmxnet3_txpkt_get_l3hdr(pkt);
>> + size_t l2hdr_len = 0;
>> + size_t l3hdr_len = 0;
>> +
>> + assert(data_len >= ETH_MAX_L2_HDR_LEN);
>> + cpu_physical_memory_read(data_**pa, l2hdr, ETH_MAX_L2_HDR_LEN);
>> + l2hdr_len = eth_get_l2_hdr_length(l2hdr);
>> + vmxnet3_txpkt_set_l2hdr_len(**pkt, l2hdr_len);
>> +
>> + /* If packet requires offload - copy L3 header */
>> + if (needs_tso) {
>> + switch (eth_get_l3_proto(l2hdr, l2hdr_len)) {
>> + case ETH_P_IP: {
>> + target_phys_addr_t ip_opt_pa;
>> + struct ip_header *iphdr = (struct ip_header *) l3hdr;
>> + assert(data_len >= l2hdr_len + sizeof(struct ip_header));
>> + cpu_physical_memory_read(data_**pa + l2hdr_len,
>> + l3hdr, sizeof(struct
>> ip_header));
>> + l3hdr_len = IP_HDR_GET_LEN(l3hdr);
>> + ip_opt_pa = data_pa + l2hdr_len + sizeof(struct
>> ip_header);
>> + cpu_physical_memory_read(ip_**opt_pa,
>> + l3hdr + sizeof(struct
>> ip_header),
>> + l3hdr_len - sizeof(struct
>> ip_header));
>> + pkt->max_payload_len =
>> + IP_FRAG_ALIGN_SIZE(VMXNET3_**MAX_IP_PLOAD_LEN);
>> + pkt->fragmentation.orig_more_**frags =
>> + FLAG_IS_SET(be16_to_cpu(iphdr-**>ip_off), IP_MF);
>> + }
>> + break;
>> +
>> + case ETH_P_IPV6: {
>> + target_phys_addr_t l3hdr_pa = data_pa + l2hdr_len;
>> + l3hdr_len = sizeof(struct ip6_header);
>> + assert(data_len >= l2hdr_len + l3hdr_len);
>> + cpu_physical_memory_read(**l3hdr_pa, l3hdr, l3hdr_len);
>> + pkt->max_payload_len = 0;
>> + }
>> + break;
>> +
>> + default: {
>> + l3hdr_len = 0;
>> + pkt->max_payload_len = 0;
>> + }
>> + break;
>> + }
>> + }
>> +
>> + vmxnet3_txpkt_set_l3hdr_len(**pkt, l3hdr_len);
>> +
>> + /* Return amount of data adopted */
>> + return l2hdr_len + l3hdr_len;
>> +}
>> +
>> +static void
>> +vmxnet3_tx_retrieve_metadata(**Vmxnet3_TxPkt *pkt,
>> + const struct Vmxnet3_TxDesc *txd)
>> +{
>> + struct eth_header *ehdr =
>> + (struct eth_header *) vmxnet3_txpkt_get_l2hdr(pkt);
>> +
>> + Vmxnet3_TxPktMdata *mdata = vmxnet3_txpkt_get_mdata(pkt);
>> + mdata->offload_mode = txd->om;
>> + mdata->cso_or_gso_size = txd->msscof;
>> + mdata->hdr_length = txd->hlen;
>> + mdata->packet_type = get_eth_packet_type(ehdr);
>> +
>> + if (txd->ti) {
>> + eth_setup_vlan_headers(ehdr, txd->tci);
>> + }
>> +}
>> +
>> +static size_t
>> +vmxnet3_txpkt_adopt_data_**fragment(Vmxnet3_TxPkt *pkt,
>> + target_phys_addr_t data_pa,
>> + size_t data_len,
>> + uint32_t fragment_num)
>> +{
>> + struct iovec *v = vmxnet3_txpkt_get_payload_**frag(pkt,
>> fragment_num);
>> + size_t bytes_to_adopt = data_len;
>> +
>> + if ((0 != pkt->max_payload_len) &&
>> + (pkt->payload_len + data_len > pkt->max_payload_len)) {
>> + bytes_to_adopt = pkt->max_payload_len - pkt->payload_len;
>> + }
>> +
>> + v->iov_base = (void *) (uint64_t) data_pa;
>> + v->iov_len = bytes_to_adopt;
>> + pkt->payload_len += bytes_to_adopt;
>> + return bytes_to_adopt;
>> +}
>> +
>> +typedef enum {
>> + VMXNET3_SUCCEEDED = 0xBEEFBEEF,
>> + VMXNET3_OUT_OF_BUF,
>> + VMXNET3_PKT_ERROR
>> +} Vmxnet3_PktStatus;
>> +
>> +static void
>> +vmxnet3_on_tx_done_update_**stats(VMXNET3_State *s,
>> + Vmxnet3_TxPkt *pkt,
>> + int qidx,
>> + Vmxnet3_PktStatus status)
>> +{
>> + Vmxnet3_TxPktMdata *mdata = vmxnet3_txpkt_get_mdata(&s->**
>> curr_txpkt);
>> + size_t tot_len = vmxnet3_txpkt_get_total_len(**pkt);
>> + struct UPT1_TxStats *stats = &s->txq_descr[qidx].txq_stats;
>> +
>> + switch (status) {
>> + case VMXNET3_SUCCEEDED: {
>> + switch (mdata->packet_type) {
>> + case VMXNET3_PKT_BCAST:
>> + stats->bcastPktsTxOK++;
>> + stats->bcastBytesTxOK += tot_len;
>> + break;
>> + case VMXNET3_PKT_MCAST:
>> + stats->mcastPktsTxOK++;
>> + stats->mcastBytesTxOK += tot_len;
>> + break;
>> + case VMXNET3_PKT_UCAST:
>> + stats->ucastPktsTxOK++;
>> + stats->ucastBytesTxOK += tot_len;
>> + break;
>> + default:
>> + assert(false);
>> + }
>> +
>> + if (VMXNET3_OM_TSO == mdata->offload_mode) {
>> + /* According to VMWARE headers this statistic is a number
>> */
>> + /* of packets after segmentation but since we don't have
>> */
>> + /* this information in QEMU model, the best we can do is to
>> */
>> + /* provide number of non-segmented packets
>> */
>> + stats->TSOPktsTxOK++;
>> + stats->TSOBytesTxOK += tot_len;
>> + }
>> + }
>> + break;
>> +
>> + case VMXNET3_PKT_ERROR: {
>> + stats->pktsTxDiscard++;
>> + }
>> + break;
>> +
>> + case VMXNET3_OUT_OF_BUF: {
>> + stats->pktsTxError++;
>> + }
>> + break;
>> +
>> + default:
>> + assert(false);
>> + }
>> +}
>> +
>> +static void
>> +vmxnet3_on_rx_done_update_**stats(VMXNET3_State *s,
>> + Vmxnet3_RxPkt *pkt,
>> + int qidx,
>> + Vmxnet3_PktStatus status)
>> +{
>> + Vmxnet3_RxPktMdata *mdata = vmxnet3_rxpkt_get_mdata(pkt);
>> + struct UPT1_RxStats *stats = &s->rxq_descr[qidx].rxq_stats;
>> +
>> + switch (status) {
>> + case VMXNET3_OUT_OF_BUF:
>> + stats->pktsRxOutOfBuf++;
>> + break;
>> +
>> + case VMXNET3_PKT_ERROR:
>> + stats->pktsRxError++;
>> + break;
>> +
>> + case VMXNET3_SUCCEEDED:
>> + switch (mdata->packet_type) {
>> + case VMXNET3_PKT_BCAST:
>> + stats->bcastPktsRxOK++;
>> + stats->bcastBytesRxOK += mdata->tot_len;
>> + break;
>> + case VMXNET3_PKT_MCAST:
>> + stats->mcastPktsRxOK++;
>> + stats->mcastBytesRxOK += mdata->tot_len;
>> + break;
>> + case VMXNET3_PKT_UCAST:
>> + stats->ucastPktsRxOK++;
>> + stats->ucastBytesRxOK += mdata->tot_len;
>> + break;
>> + default:
>> + assert(false);
>> + }
>> +
>> + if (mdata->tot_len > s->mtu) {
>> + stats->LROPktsRxOK++;
>> + stats->LROBytesRxOK += mdata->tot_len;
>> + }
>> + break;
>> +
>> + default:
>> + assert(false);
>> + }
>> +}
>> +
>> +static inline bool
>> +vmxnet3_pop_next_tx_descr(**VMXNET3_State *s,
>> + int qidx,
>> + struct Vmxnet3_TxDesc *txd,
>> + uint32_t *descr_idx)
>> +{
>> + Vmxnet3_Ring *ring = &s->txq_descr[qidx].tx_ring;
>> +
>> + vmxnet3_ring_read_curr_cell(**ring, txd);
>> + if (txd->gen == vmxnet3_ring_curr_gen(ring)) {
>> + vmxnet3_ring_dump(DRIPRINTF, "TX", qidx, ring);
>> + *descr_idx = vmxnet3_ring_curr_cell_idx(**ring);
>> + vmxnet3_inc_tx_consumption_**counter(s, qidx);
>> + return true;
>> + }
>> +
>> + return false;
>> +}
>> +
>> +static bool
>> +vmxnet3_send_packet(VMXNET3_**State *s, Vmxnet3_TxPkt *pkt, uint32_t
>> qidx)
>> +{
>> + uint32_t mapped_fragments;
>> + void *mapped = vmxnet3_txpkt_map(&s->curr_**txpkt,
>> &mapped_fragments, false);
>> + if (NULL != mapped) {
>> + bool res;
>> + if (vmxnet3_setup_tx_offloads(**pkt)) {
>> + vmxnet3_dump_virt_hdr(vmxnet3_**txpkt_get_vhdr(pkt));
>> + vmxnet3_txpkt_dump(pkt);
>> + qemu_sendv_packet(&s->nic->nc, mapped, mapped_fragments);
>> + vmxnet3_on_tx_done_update_**stats(s, pkt, qidx,
>> VMXNET3_SUCCEEDED);
>> + res = true;
>> + } else {
>> + vmxnet3_on_tx_done_update_**stats(s, pkt, qidx,
>> VMXNET3_PKT_ERROR);
>> + res = false;
>> + }
>> + vmxnet3_txpkt_unmap(pkt, false);
>> + return res;
>> + } else {
>> + vmxnet3_on_tx_done_update_**stats(s, &s->curr_txpkt,
>> + qidx, VMXNET3_OUT_OF_BUF);
>> + return false;
>> + }
>> +}
>> +
>> +static void vmxnet3_process_tx_queue(**VMXNET3_State *s, int qidx)
>> +{
>> + struct Vmxnet3_TxDesc txd;
>> + uint32_t txd_idx;
>> + uint32_t data_len;
>> + target_phys_addr_t data_pa;
>> + size_t bytes_adopted;
>> +
>> + for (;;) {
>> + if (!vmxnet3_pop_next_tx_descr(s, qidx, &txd, &txd_idx)) {
>> + break;
>> + }
>> +
>> + vmxnet3_dump_tx_descr(&txd);
>> +
>> + if (!s->curr_txpkt_skip) {
>> + data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
>> + data_pa = le64_to_cpu(txd.addr);
>> +
>> + if (!s->curr_txpkt_header_**processed) {
>> + bool needs_tso = (VMXNET3_OM_TSO == txd.om);
>> + bytes_adopted = vmxnet3_txpkt_adopt_headers(&**
>> s->curr_txpkt,
>> + data_len,
>> + data_pa,
>> + needs_tso);
>> + vmxnet3_tx_retrieve_metadata(&**s->curr_txpkt, &txd);
>> + data_pa += bytes_adopted;
>> + data_len -= bytes_adopted;
>> + s->curr_txpkt_header_processed = true;
>> + }
>> +
>> + do {
>> + if (0 != data_len) {
>> + int frag_num = s->curr_txpkt_pl_frags++;
>> + bytes_adopted =
>> + vmxnet3_txpkt_adopt_data_**
>> fragment(&s->curr_txpkt,
>> + data_pa,
>> + data_len,
>> + frag_num);
>> + data_pa += bytes_adopted;
>> + data_len -= bytes_adopted;
>> + }
>> +
>> + if ((0 != data_len) || txd.eop) {
>> + size_t frag_off;
>> +
>> + vmxnet3_txpkt_set_num_pl_**frags(&s->curr_txpkt,
>> +
>> s->curr_txpkt_pl_frags);
>> +
>> + vmxnet3_txpkt_set_more_frags(&**s->curr_txpkt,
>> + (0 != data_len));
>> +
>> + s->curr_txpkt_skip =
>> + !vmxnet3_send_packet(s, &s->curr_txpkt, qidx);
>> +
>> + frag_off = vmxnet3_txpkt_get_payload_len(**&s->curr_txpkt)
>> /
>> + IP_FRAG_UNIT_SIZE;
>> +
>> + vmxnet3_txpkt_advance_frag_**off(&s->curr_txpkt,
>> frag_off);
>> +
>> + vmxnet3_txpkt_reset_payload(&**s->curr_txpkt);
>> + s->curr_txpkt_pl_frags = 0;
>> + }
>> + } while (0 != data_len);
>> + }
>> +
>> + if (txd.eop) {
>> + vmxnet3_complete_packet(s, qidx, txd_idx);
>> + vmxnet3_txpkt_reset(&s->curr_**txpkt);
>> + s->curr_txpkt_skip = false;
>> + s->curr_txpkt_header_processed = false;
>> + }
>> + }
>> +}
>> +
>> +static inline void
>> +vmxnet3_read_next_rx_descr(**VMXNET3_State *s, int qidx, int ridx,
>> + struct Vmxnet3_RxDesc *dbuf, uint32_t *didx)
>> +{
>> + Vmxnet3_Ring *ring = &s->rxq_descr[qidx].rx_ring[**ridx];
>> + *didx = vmxnet3_ring_curr_cell_idx(**ring);
>> + vmxnet3_ring_read_curr_cell(**ring, dbuf);
>> +}
>> +
>> +static inline uint8_t
>> +vmxnet3_get_rx_ring_gen(**VMXNET3_State *s, int qidx, int ridx)
>> +{
>> + return s->rxq_descr[qidx].rx_ring[**ridx].gen;
>> +}
>> +
>> +static inline target_phys_addr_t
>> +vmxnet3_pop_rxc_descr(**VMXNET3_State *s, int qidx, uint32_t *descr_gen)
>> +{
>> + uint8_t ring_gen;
>> + struct Vmxnet3_RxCompDesc rxcd;
>> +
>> + target_phys_addr_t daddr =
>> + vmxnet3_ring_curr_cell_pa(&s->**rxq_descr[qidx].comp_ring);
>> +
>> + cpu_physical_memory_read(**daddr, &rxcd, sizeof(struct
>> Vmxnet3_RxCompDesc));
>> + ring_gen = vmxnet3_ring_curr_gen(&s->rxq_**descr[qidx].comp_ring);
>> +
>> + if (rxcd.gen != ring_gen) {
>> + *descr_gen = ring_gen;
>> + vmxnet3_inc_rx_completion_**counter(s, qidx);
>> + return daddr;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> +static inline void
>> +vmxnet3_revert_rxc_descr(**VMXNET3_State *s, int qidx)
>> +{
>> + vmxnet3_dec_rx_completion_**counter(s, qidx);
>> +}
>> +
>> +#define RXQ_IDX (0)
>> +#define RX_HEAD_BODY_RING (0)
>> +#define RX_BODY_ONLY_RING (1)
>> +
>> +static bool
>> +vmxnet3_get_next_head_rx_**descr(VMXNET3_State *s,
>> + struct Vmxnet3_RxDesc *descr_buf,
>> + uint32_t *descr_idx,
>> + uint32_t *ridx)
>> +{
>> + for (;;) {
>> + uint32_t ring_gen;
>> + vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING,
>> + descr_buf, descr_idx);
>> +
>> + /* If no more free descriptors - return */
>> + ring_gen = vmxnet3_get_rx_ring_gen(s, RXQ_IDX,
>> RX_HEAD_BODY_RING);
>> + if (descr_buf->gen != ring_gen) {
>> + return false;
>> + }
>> +
>> + /* Mark current descriptor as used/skipped */
>> + vmxnet3_inc_rx_consumption_**counter(s, RXQ_IDX,
>> RX_HEAD_BODY_RING);
>> +
>> + /* If this is what we are looking for - return */
>> + if (VMXNET3_RXD_BTYPE_HEAD == descr_buf->btype) {
>> + *ridx = RX_HEAD_BODY_RING;
>> + return true;
>> + }
>> + }
>> +}
>> +
>> +static bool
>> +vmxnet3_get_next_body_rx_**descr(VMXNET3_State *s,
>> + struct Vmxnet3_RxDesc *dbuf,
>> + uint32_t *didx,
>> + uint32_t *ridx)
>> +{
>> + vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_HEAD_BODY_RING, dbuf,
>> didx);
>> +
>> + /* Try to find corresponding descriptor in head/body ring */
>> + if ((dbuf->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX,
>> RX_HEAD_BODY_RING)) &&
>> + (VMXNET3_RXD_BTYPE_BODY == dbuf->btype)) {
>> + vmxnet3_inc_rx_consumption_**counter(s, RXQ_IDX,
>> RX_HEAD_BODY_RING);
>> + *ridx = RX_HEAD_BODY_RING;
>> + return true;
>> + }
>> +
>> + /* If there is no free descriptors on head/body ring or next free */
>> + /* descriptor is a head descriptor switch to body only ring */
>> + vmxnet3_read_next_rx_descr(s, RXQ_IDX, RX_BODY_ONLY_RING, dbuf,
>> didx);
>> +
>> + /* If no more free descriptors - return */
>> + if (dbuf->gen == vmxnet3_get_rx_ring_gen(s, RXQ_IDX,
>> RX_BODY_ONLY_RING)) {
>> + assert(VMXNET3_RXD_BTYPE_BODY == dbuf->btype);
>> + *ridx = RX_BODY_ONLY_RING;
>> + vmxnet3_inc_rx_consumption_**counter(s, RXQ_IDX,
>> RX_BODY_ONLY_RING);
>> + return true;
>> + }
>> +
>> + return false;
>> +}
>> +
>> +static inline bool
>> +vmxnet3_get_next_rx_descr(**VMXNET3_State *s, bool is_head,
>> + struct Vmxnet3_RxDesc *descr_buf,
>> + uint32_t *descr_idx,
>> + uint32_t *ridx)
>> +{
>> + if (is_head || !s->rx_packets_compound) {
>> + return vmxnet3_get_next_head_rx_**descr(s, descr_buf,
>> descr_idx, ridx);
>> + } else {
>> + return vmxnet3_get_next_body_rx_**descr(s, descr_buf,
>> descr_idx, ridx);
>> + }
>> +}
>> +
>> +static void
>> +vmxnet3_rx_put_metadata_to_**descr(Vmxnet3_RxPkt *pkt,
>> + struct Vmxnet3_RxCompDesc *rxcd)
>> +{
>> + int csum_correct, is_gso;
>> + bool isip4, isip6, istcp, isudp;
>> + uint8_t headers[ETH_MAX_L2_HDR_LEN + ETH_MAX_L3_HDR_LEN];
>> + Vmxnet3_RxPktMdata *mdata = vmxnet3_rxpkt_get_mdata(pkt);
>> + struct virtio_net_hdr *vhdr;
>> + uint8_t offload_type;
>> +
>> + if (mdata->vlan_stripped) {
>> + rxcd->ts = 1;
>> + rxcd->tci = mdata->vlan_tag;
>> + }
>> +
>> + if (!mdata->vhdr_valid) {
>> + goto nocsum;
>> + }
>> +
>> + vhdr = vmxnet3_rxpkt_get_vhdr(pkt);
>> + /* Checksum is valid when lower level tell so or when lower level
>> */
>> + /* requires checksum offload telling that packet produced/bridged
>> */
>> + /* locally and did travel over network after last checksum
>> calculation */
>> + /* or production
>> */
>> + csum_correct = FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_DATA_VALID)
>> ||
>> + FLAG_IS_SET(vhdr->flags, VIRTIO_NET_HDR_F_NEEDS_CSUM);
>> +
>> + offload_type = vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
>> + is_gso = (offload_type != VIRTIO_NET_HDR_GSO_NONE) ? 1 : 0;
>> +
>> + if (!csum_correct && !is_gso) {
>> + goto nocsum;
>> + }
>> +
>> + /* Make linear copy of L2-L3 headers and parse it */
>> + iov_to_buf(vmxnet3_rxpkt_get_**frag(pkt, 0),
>> + vmxnet3_rxpkt_get_num_frags(**pkt),
>> + headers, 0, MIN(sizeof(headers), mdata->tot_len));
>> +
>> + eth_get_protocols(headers, MIN(sizeof(headers), mdata->tot_len),
>> + &isip4, &isip6, &isudp, &istcp);
>> + if ((!istcp && !isudp) || (!isip4 && !isip6)) {
>> + goto nocsum;
>> + }
>> +
>> + rxcd->cnc = 0;
>> + rxcd->v4 = isip4 ? 1 : 0;
>> + rxcd->v6 = isip6 ? 1 : 0;
>> + rxcd->tcp = istcp ? 1 : 0;
>> + rxcd->udp = isudp ? 1 : 0;
>> + rxcd->fcs = rxcd->tuc = rxcd->ipc = 1;
>> + return;
>> +
>> +nocsum:
>> + rxcd->cnc = 1;
>> + return;
>> +}
>> +
>> +static void
>> +vmxnet3_physical_memory_**writev(const struct iovec *iov,
>> + size_t start_iov_off,
>> + target_phys_addr_t target_addr,
>> + size_t bytes_to_copy)
>> +{
>> + size_t curr_off = 0;
>> + size_t copied = 0;
>> +
>> + while (bytes_to_copy) {
>> + if (start_iov_off < (curr_off + iov->iov_len)) {
>> + size_t chunk_len =
>> + MIN((curr_off + iov->iov_len) - start_iov_off,
>> bytes_to_copy);
>> +
>> + cpu_physical_memory_write(**target_addr + copied,
>> + iov->iov_base + start_iov_off -
>> curr_off,
>> + chunk_len);
>> +
>> + copied += chunk_len;
>> + start_iov_off += chunk_len;
>> + curr_off = start_iov_off;
>> + bytes_to_copy -= chunk_len;
>> + } else {
>> + curr_off += iov->iov_len;
>> + }
>> + iov++;
>> + }
>> +}
>> +
>> +static bool
>> +vmxnet3_indicate_packet(**VMXNET3_State *s, Vmxnet3_RxPkt *pkt)
>> +{
>> + struct Vmxnet3_RxDesc rxd;
>> + bool is_head = true;
>> + uint32_t rxd_idx;
>> + uint32_t rx_ridx;
>> +
>> + struct Vmxnet3_RxCompDesc rxcd;
>> + uint32_t new_rxcd_gen = VMXNET3_INIT_GEN;
>> + target_phys_addr_t new_rxcd_pa = 0;
>> + target_phys_addr_t ready_rxcd_pa = 0;
>> + struct iovec *data = vmxnet3_rxpkt_get_frag(pkt, 0);
>> + Vmxnet3_RxPktMdata *mdata = vmxnet3_rxpkt_get_mdata(pkt);
>> + size_t bytes_copied = 0;
>> + size_t bytes_left = mdata->tot_len;
>> + uint16_t num_frags = 0;
>> +
>> + vmxnet3_rxpkt_dump(pkt);
>> +
>> + while ((num_frags < s->max_rx_frags) &&
>> + (bytes_left > 0) &&
>> + (new_rxcd_pa = vmxnet3_pop_rxc_descr(s, RXQ_IDX,
>> &new_rxcd_gen)) &&
>> + vmxnet3_get_next_rx_descr(s, is_head, &rxd, &rxd_idx,
>> &rx_ridx)) {
>> + size_t chunk_size = MIN(bytes_left, rxd.len);
>> + vmxnet3_physical_memory_**writev(data, bytes_copied,
>> + le64_to_cpu(rxd.addr),
>> chunk_size);
>> + bytes_copied += chunk_size;
>> + bytes_left -= chunk_size;
>> +
>> + vmxnet3_dump_rx_descr(&rxd);
>> +
>> + if (0 != ready_rxcd_pa) {
>> + cpu_physical_memory_write(**ready_rxcd_pa, &rxcd,
>> sizeof(rxcd));
>> + }
>> +
>> + memset(&rxcd, 0, sizeof(struct Vmxnet3_RxCompDesc));
>> + rxcd.rxdIdx = rxd_idx;
>> + rxcd.len = chunk_size;
>> + rxcd.sop = is_head;
>> + rxcd.gen = new_rxcd_gen;
>> + rxcd.rqID = RXQ_IDX + rx_ridx*s->rxq_num;
>> +
>> + if (0 == bytes_left) {
>> + vmxnet3_rx_put_metadata_to_**descr(pkt, &rxcd);
>> + }
>> +
>> + DRIPRINTF("RX Completion descriptor: rxRing: %lu rxIdx %lu len
>> %lu "
>> + "sop %d csum_correct %lu",
>> + (unsigned long) rx_ridx,
>> + (unsigned long) rxcd.rxdIdx,
>> + (unsigned long) rxcd.len,
>> + (int) rxcd.sop,
>> + (unsigned long) rxcd.tuc);
>> +
>> + is_head = false;
>> + ready_rxcd_pa = new_rxcd_pa;
>> + new_rxcd_pa = 0;
>> + }
>> +
>> + if (0 != ready_rxcd_pa) {
>> + rxcd.eop = 1;
>> + rxcd.err = (0 != bytes_left);
>> + cpu_physical_memory_write(**ready_rxcd_pa, &rxcd, sizeof(rxcd));
>> + vmxnet3_flush_shmem_changes();
>> + }
>> +
>> + if (0 != new_rxcd_pa) {
>> + vmxnet3_revert_rxc_descr(s, RXQ_IDX);
>> + }
>> +
>> + vmxnet3_trigger_interrupt(s, s->rxq_descr[RXQ_IDX].intr_**idx);
>> +
>> + if (bytes_left == 0) {
>> + vmxnet3_on_rx_done_update_**stats(s, pkt, RXQ_IDX,
>> VMXNET3_SUCCEEDED);
>> + return true;
>> + } else if (num_frags == s->max_rx_frags) {
>> + vmxnet3_on_rx_done_update_**stats(s, pkt, RXQ_IDX,
>> VMXNET3_PKT_ERROR);
>> + return false;
>> + } else {
>> + vmxnet3_on_rx_done_update_**stats(s, pkt, RXQ_IDX,
>> VMXNET3_OUT_OF_BUF);
>> + return false;
>> + }
>> +}
>> +
>> +static void
>> +vmxnet3_io_bar0_write(void *opaque, target_phys_addr_t addr,
>> + uint64_t val, unsigned size)
>> +{
>> + VMXNET3_State *s = opaque;
>> +
>> + if (IS_MULTIREG_ADDR(addr, VMXNET3_REG_TXPROD,
>> + VMXNET3_DEVICE_MAX_TX_QUEUES,
>> VMXNET3_REG_ALIGN)) {
>> + int tx_queue_idx =
>> + MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_TXPROD,
>> VMXNET3_REG_ALIGN);
>> + assert(tx_queue_idx <= s->txq_num);
>> + vmxnet3_process_tx_queue(s, tx_queue_idx);
>> + return;
>> + }
>> +
>> +#pragma GCC diagnostic push
>> +#pragma GCC diagnostic ignored "-Wtype-limits"
>> + if (IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
>> + VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
>> + int l = MULTIREG_IDX_BY_ADDR(addr, VMXNET3_REG_IMR,
>> VMXNET3_REG_ALIGN);
>> +
>> + DCBPRINTF("Interrupt mask for line %d written: 0x%" PRIx64, l,
>> val);
>> +
>> + vmxnet3_on_interrupt_mask_**changed(s, l, val);
>> + return;
>> + }
>> +#pragma GCC diagnostic pop
>> +
>> + if (IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD,
>> + VMXNET3_DEVICE_MAX_RX_QUEUES, VMXNET3_REG_ALIGN)
>> ||
>> + IS_MULTIREG_ADDR(addr, VMXNET3_REG_RXPROD2,
>> + VMXNET3_DEVICE_MAX_RX_QUEUES,
>> VMXNET3_REG_ALIGN)) {
>> + return;
>> + }
>> +
>> + DWRPRINTF("BAR0 unknown write [%" PRIx64 "] = %" PRIx64 ", size %d",
>> + (uint64_t) addr, val, size);
>> +}
>> +
>> +static uint64_t
>> +vmxnet3_io_bar0_read(void *opaque, target_phys_addr_t addr, unsigned
>> size)
>> +{
>> +#pragma GCC diagnostic push
>> +#pragma GCC diagnostic ignored "-Wtype-limits"
>> + if (IS_MULTIREG_ADDR(addr, VMXNET3_REG_IMR,
>> + VMXNET3_MAX_INTRS, VMXNET3_REG_ALIGN)) {
>> + assert(false);
>> + }
>> +#pragma GCC diagnostic pop
>> +
>> + DCBPRINTF("BAR0 unknown read [%" PRIx64 "], size %d",
>> + (uint64_t) addr, size);
>> + return 0;
>> +}
>> +
>> +static void vmxnet3_reset(VMXNET3_State *s)
>> +{
>> + DCBPRINTF("Resetting vmxnet3...");
>> +
>> + vmxnet3_txpkt_reset(&s->curr_**txpkt);
>> + s->curr_txpkt_pl_frags = 0;
>> + s->curr_txpkt_skip = false;
>> + s->curr_txpkt_header_processed = false;
>> +}
>> +
>> +static void vmxnet3_deactivate_device(**VMXNET3_State *s)
>> +{
>> + DCBPRINTF("Deactivating vmxnet3...");
>> + s->device_active = false;
>> +}
>> +
>> +static void vmxnet3_update_rx_mode(**VMXNET3_State *s)
>> +{
>> + s->rx_mode = VMXNET3_READ_DRV_SHARED32(s->**drv_shmem,
>> + devRead.rxFilterConf.rxMode);
>> + DCFPRINTF("RX mode: 0x%08X", s->rx_mode);
>> +}
>> +
>> +static void vmxnet3_update_vlan_filters(**VMXNET3_State *s)
>> +{
>> + int i;
>> +
>> + /* Copy configuration from shared memory */
>> + VMXNET3_READ_DRV_SHARED(s->**drv_shmem,
>> + devRead.rxFilterConf.vfTable,
>> + s->vlan_table,
>> + sizeof(s->vlan_table));
>> +
>> + /* Invert byte order when needed */
>> + for (i = 0; i < ARRAY_SIZE(s->vlan_table); i++) {
>> + s->vlan_table[i] = le32_to_cpu(s->vlan_table[i]);
>> + }
>> +
>> + /* Dump configuration for debugging purposes */
>> + DCFPRINTF("Configured VLANs:");
>> + for (i = 0; i < sizeof(s->vlan_table) * 8; i++) {
>> + if (VMXNET3_VFTABLE_ENTRY_IS_SET(**s->vlan_table, i)) {
>> + DCFPRINTF("\tVLAN %d is present", i);
>> + }
>> + }
>> +}
>> +
>> +static void vmxnet3_update_mcast_filters(**VMXNET3_State *s)
>> +{
>> + uint16_t list_bytes =
>> + VMXNET3_READ_DRV_SHARED16(s->**drv_shmem,
>> + devRead.rxFilterConf.**mfTableLen);
>> +
>> + s->mcast_list_len = list_bytes / sizeof(s->mcast_list[0]);
>> +
>> + s->mcast_list = g_realloc(s->mcast_list, list_bytes);
>> + if (NULL == s->mcast_list) {
>> + if (0 == s->mcast_list_len) {
>> + DCFPRINTF("Current multicast list is empty");
>> + } else {
>> + DERPRINTF("Failed to allocate multicast list of %d elements",
>> + s->mcast_list_len);
>> + }
>> + s->mcast_list_len = 0;
>> + } else {
>> + int i;
>> + target_phys_addr_t mcast_list_pa =
>> + VMXNET3_READ_DRV_SHARED64(s->**drv_shmem,
>> + devRead.rxFilterConf.**mfTablePA);
>> +
>> + cpu_physical_memory_read(**mcast_list_pa, s->mcast_list,
>> list_bytes);
>> + DCFPRINTF("Current multicast list len is %d:",
>> s->mcast_list_len);
>> + for (i = 0; i < s->mcast_list_len; i++) {
>> + DCFPRINTF("\t" MAC_FMT, MAC_ARG(s->mcast_list[i].a));
>> + }
>> + }
>> +}
>> +
>> +static void vmxnet3_setup_rx_filtering(**VMXNET3_State *s)
>> +{
>> + vmxnet3_update_rx_mode(s);
>> + vmxnet3_update_vlan_filters(s)**;
>> + vmxnet3_update_mcast_filters(**s);
>> +}
>> +
>> +static uint32_t vmxnet3_get_interrupt_config(**VMXNET3_State *s)
>> +{
>> + uint32_t interrupt_mode = VMXNET3_IT_AUTO | (VMXNET3_IMM_AUTO << 2);
>> + DCFPRINTF("Interrupt config is 0x%X", interrupt_mode);
>> + return interrupt_mode;
>> +}
>> +
>> +static void vmxnet3_fill_stats(VMXNET3_**State *s)
>> +{
>> + int i;
>> + for (i = 0; i < s->txq_num; i++) {
>> + cpu_physical_memory_write(s->**txq_descr[i].tx_stats_pa,
>> + &s->txq_descr[i].txq_stats,
>> + sizeof(s->txq_descr[i].txq_**stats));
>> + }
>> +
>> + for (i = 0; i < s->rxq_num; i++) {
>> + cpu_physical_memory_write(s->**rxq_descr[i].rx_stats_pa,
>> + &s->rxq_descr[i].rxq_stats,
>> + sizeof(s->rxq_descr[i].rxq_**stats));
>> + }
>> +}
>> +
>> +static void vmxnet3_adjust_by_guest_type(**VMXNET3_State *s)
>> +{
>> + struct Vmxnet3_GOSInfo gos;
>> +
>> + VMXNET3_READ_DRV_SHARED(s->**drv_shmem, devRead.misc.driverInfo.gos,
>> + &gos, sizeof(gos));
>> + s->rx_packets_compound =
>> + (VMXNET3_GOS_TYPE_WIN == gos.gosType) ? false : true;
>> +
>> + DCFPRINTF("Guest type specifics: RXCOMPOUND: %d",
>> s->rx_packets_compound);
>> +}
>> +
>> +static void
>> +vmxnet3_dump_conf_descr(const char *name,
>> + struct Vmxnet3_VariableLenConfDesc *pm_descr)
>> +{
>> + DCFPRINTF("%s descriptor dump: Version %u, Length %u",
>> + name, pm_descr->confVer, pm_descr->confLen);
>> +
>> +};
>> +
>> +static void vmxnet3_update_pm_state(**VMXNET3_State *s)
>> +{
>> + struct Vmxnet3_VariableLenConfDesc pm_descr;
>> +
>> + pm_descr.confLen =
>> + VMXNET3_READ_DRV_SHARED32(s->**drv_shmem,
>> devRead.pmConfDesc.confLen);
>> + pm_descr.confVer =
>> + VMXNET3_READ_DRV_SHARED32(s->**drv_shmem,
>> devRead.pmConfDesc.confVer);
>> + pm_descr.confPA =
>> + VMXNET3_READ_DRV_SHARED64(s->**drv_shmem,
>> devRead.pmConfDesc.confPA);
>> +
>> + vmxnet3_dump_conf_descr("PM State", &pm_descr);
>> +}
>> +
>> +static void vmxnet3_update_features(**VMXNET3_State *s)
>> +{
>> + uint32_t guest_features;
>> + int rxcsum_offload_supported;
>> +
>> + guest_features = VMXNET3_READ_DRV_SHARED32(s->**drv_shmem,
>> + devRead.misc.uptFeatures);
>> +
>> + rxcsum_offload_supported = FLAG_IS_SET(guest_features,
>> UPT1_F_RXCSUM);
>> + s->rx_vlan_stripping = FLAG_IS_SET(guest_features, UPT1_F_RXVLAN);
>> + s->lro_supported = FLAG_IS_SET(guest_features, UPT1_F_LRO);
>> +
>> + DCFPRINTF("Features configuration: LRO: %d, RXCSUM: %d, VLANSTRIP:
>> %d",
>> + s->lro_supported, rxcsum_offload_supported,
>> + s->rx_vlan_stripping);
>> +
>> + tap_set_offload(s->nic->nc.**peer,
>> + rxcsum_offload_supported,
>> + s->lro_supported,
>> + s->lro_supported,
>> + 0,
>> + 0);
>> +}
>> +
>> +static void vmxnet3_activate_device(**VMXNET3_State *s)
>> +{
>> + int i;
>> + static const uint32_t VMXNET3_DEF_TX_THRESHOLD = 1;
>> + target_phys_addr_t qdescr_table_pa;
>> + uint64_t pa;
>> + uint32_t size;
>> +
>> + /* Verify configuration consistency */
>> + if (!vmxnet3_verify_driver_magic(**s->drv_shmem)) {
>> + DERPRINTF("Device configuration received from driver is
>> invalid");
>> + return;
>> + }
>> +
>> + vmxnet3_adjust_by_guest_type(**s);
>> + vmxnet3_update_features(s);
>> + vmxnet3_update_pm_state(s);
>> + vmxnet3_setup_rx_filtering(s);
>> + /* Cache fields from shared memory */
>> + s->mtu = VMXNET3_READ_DRV_SHARED32(s->**drv_shmem,
>> devRead.misc.mtu);
>> + DCFPRINTF("MTU is %u", s->mtu);
>> +
>> + s->max_rx_frags =
>> + VMXNET3_READ_DRV_SHARED16(s->**drv_shmem,
>> devRead.misc.maxNumRxSG);
>> +
>> + DCFPRINTF("Max RX fragments is %u", s->max_rx_frags);
>> +
>> + s->event_int_idx =
>> + VMXNET3_READ_DRV_SHARED8(s->**drv_shmem,
>> devRead.intrConf.eventIntrIdx)**;
>> + DCFPRINTF("Events interrupt line is %u", s->event_int_idx);
>> +
>> + s->auto_int_masking =
>> + VMXNET3_READ_DRV_SHARED8(s->**drv_shmem,
>> devRead.intrConf.autoMask);
>> + DCFPRINTF("Automatic interrupt masking is %d",
>> (int)s->auto_int_masking);
>> +
>> + s->txq_num =
>> + VMXNET3_READ_DRV_SHARED8(s->**drv_shmem,
>> devRead.misc.numTxQueues);
>> + s->rxq_num =
>> + VMXNET3_READ_DRV_SHARED8(s->**drv_shmem,
>> devRead.misc.numRxQueues);
>> +
>> + DCFPRINTF("Number of TX/RX queues %u/%u", s->txq_num, s->rxq_num);
>> + assert(s->txq_num <= VMXNET3_DEVICE_MAX_TX_QUEUES);
>> +
>> + qdescr_table_pa =
>> + VMXNET3_READ_DRV_SHARED64(s->**drv_shmem,
>> devRead.misc.queueDescPA);
>> + DCFPRINTF("TX queues descriptors table is at 0x%" PRIx64,
>> + (uint64_t) qdescr_table_pa);
>> +
>> + /* Worst-case scenario is a packet that holds all TX rings space so
>> */
>> + /* we calculate total size of all TX rings for max TX fragments
>> number */
>> + s->max_tx_frags = 0;
>> +
>> + /* TX queues */
>> + for (i = 0; i < s->txq_num; i++) {
>> + target_phys_addr_t qdescr_pa =
>> + qdescr_table_pa + i*sizeof(struct Vmxnet3_TxQueueDesc);
>> +
>> + /* Read interrupt number for this TX queue */
>> + s->txq_descr[i].intr_idx =
>> + VMXNET3_READ_TX_QUEUE_DESCR8(**qdescr_pa, conf.intrIdx);
>> +
>> + DCFPRINTF("TX Queue %d interrupt: %d", i,
>> s->txq_descr[i].intr_idx);
>> +
>> + /* Read rings memory locations for TX queues */
>> + pa = VMXNET3_READ_TX_QUEUE_DESCR64(**qdescr_pa,
>> conf.txRingBasePA);
>> + size = VMXNET3_READ_TX_QUEUE_DESCR32(**qdescr_pa,
>> conf.txRingSize);
>> +
>> + vmxnet3_ring_init(&s->txq_**descr[i].tx_ring, pa, size,
>> + sizeof(struct Vmxnet3_TxDesc), false);
>> + vmxnet3_ring_dump(DCFPRINTF, "TX", i, &s->txq_descr[i].tx_ring);
>> +
>> + s->max_tx_frags += size;
>> +
>> + /* TXC ring */
>> + pa = VMXNET3_READ_TX_QUEUE_DESCR64(**qdescr_pa,
>> conf.compRingBasePA);
>> + size = VMXNET3_READ_TX_QUEUE_DESCR32(**qdescr_pa,
>> conf.compRingSize);
>> + vmxnet3_ring_init(&s->txq_**descr[i].comp_ring, pa, size,
>> + sizeof(struct Vmxnet3_TxCompDesc), true);
>> + vmxnet3_ring_dump(DCFPRINTF, "TXC", i,
>> &s->txq_descr[i].comp_ring);
>> +
>> + s->txq_descr[i].tx_stats_pa =
>> + qdescr_pa + offsetof(struct Vmxnet3_TxQueueDesc, stats);
>> +
>> + memset(&s->txq_descr[i].txq_**stats, 0,
>> + sizeof(s->txq_descr[i].txq_**stats));
>> +
>> + /* Fill device-managed parameters for queues */
>> + VMXNET3_WRITE_TX_QUEUE_**DESCR32(qdescr_pa,
>> + ctrl.txThreshold,
>> + VMXNET3_DEF_TX_THRESHOLD);
>> + }
>> +
>> + /* Preallocate TX packet wrapper */
>> + DCFPRINTF("Max TX fragments is %u", s->max_tx_frags);
>> + if (!vmxnet3_txpkt_prealloc(&s->**curr_txpkt, s->max_tx_frags,
>> + s->peer_has_vhdr)) {
>> + hw_error("TX rings configuration problem");
>> + }
>> +
>> + /* Read rings memory locations for RX queues */
>> + for (i = 0; i < s->rxq_num; i++) {
>> + int j;
>> + target_phys_addr_t qd_pa =
>> + qdescr_table_pa + s->txq_num*sizeof(struct
>> Vmxnet3_TxQueueDesc) +
>> + i*sizeof(struct Vmxnet3_RxQueueDesc);
>> +
>> + /* Read interrupt number for this RX queue */
>> + s->rxq_descr[i].intr_idx =
>> + VMXNET3_READ_TX_QUEUE_DESCR8(**qd_pa, conf.intrIdx);
>> +
>> + DCFPRINTF("RX Queue %d interrupt: %d", i,
>> s->rxq_descr[i].intr_idx);
>> +
>> + /* Read rings memory locations */
>> + for (j = 0; j < VMXNET3_RX_RINGS_PER_QUEUE; j++) {
>> + /* RX rings */
>> + pa = VMXNET3_READ_RX_QUEUE_DESCR64(**qd_pa,
>> conf.rxRingBasePA[j]);
>> + size = VMXNET3_READ_RX_QUEUE_DESCR32(**qd_pa,
>> conf.rxRingSize[j]);
>> + vmxnet3_ring_init(&s->rxq_**descr[i].rx_ring[j], pa, size,
>> + sizeof(struct Vmxnet3_RxDesc), false);
>> + DCFPRINTF("RX queue %d:%d: Base: %" PRIx64 ", Size: %d",
>> + i, j, pa, size);
>> + }
>> +
>> + /* RXC ring */
>> + pa = VMXNET3_READ_RX_QUEUE_DESCR64(**qd_pa,
>> conf.compRingBasePA);
>> + size = VMXNET3_READ_RX_QUEUE_DESCR32(**qd_pa,
>> conf.compRingSize);
>> + vmxnet3_ring_init(&s->rxq_**descr[i].comp_ring, pa, size,
>> + sizeof(struct Vmxnet3_RxCompDesc), true);
>> + DCFPRINTF("RXC queue %d: Base: %" PRIx64 ", Size: %d", i, pa,
>> size);
>> +
>> + s->rxq_descr[i].rx_stats_pa =
>> + qd_pa + offsetof(struct Vmxnet3_RxQueueDesc, stats);
>> + memset(&s->rxq_descr[i].rxq_**stats, 0,
>> + sizeof(s->rxq_descr[i].rxq_**stats));
>> + }
>> +
>> + vmxnet3_flush_shmem_changes();
>> + s->device_active = true;
>> +}
>> +
>> +static void vmxnet3_handle_command(**VMXNET3_State *s, uint64_t cmd)
>> +{
>> + s->last_command = cmd;
>> +
>> + switch (cmd) {
>> + case VMXNET3_CMD_GET_PERM_MAC_HI:
>> + DCBPRINTF("Set: Get upper part of permanent MAC");
>> + break;
>> +
>> + case VMXNET3_CMD_GET_PERM_MAC_LO:
>> + DCBPRINTF("Set: Get lower part of permanent MAC");
>> + break;
>> +
>> + case VMXNET3_CMD_GET_STATS:
>> + DCBPRINTF("Set: Get device statistics");
>> + vmxnet3_fill_stats(s);
>> + break;
>> +
>> + case VMXNET3_CMD_ACTIVATE_DEV:
>> + DCBPRINTF("Set: Activating vmxnet3 device");
>> + vmxnet3_activate_device(s);
>> + break;
>> +
>> + case VMXNET3_CMD_UPDATE_RX_MODE:
>> + DCBPRINTF("Set: Update rx mode");
>> + vmxnet3_update_rx_mode(s);
>> + break;
>> +
>> + case VMXNET3_CMD_UPDATE_VLAN_**FILTERS:
>> + DCBPRINTF("Set: Update VLAN filters");
>> + vmxnet3_update_vlan_filters(s)**;
>> + break;
>> +
>> + case VMXNET3_CMD_UPDATE_MAC_**FILTERS:
>> + DCBPRINTF("Set: Update MAC filters");
>> + vmxnet3_update_mcast_filters(**s);
>> + break;
>> +
>> + case VMXNET3_CMD_UPDATE_FEATURE:
>> + DCBPRINTF("Set: Update features");
>> + vmxnet3_update_features(s);
>> + break;
>> +
>> + case VMXNET3_CMD_UPDATE_PMCFG:
>> + DCBPRINTF("Set: Update power management config");
>> + vmxnet3_update_pm_state(s);
>> + break;
>> +
>> + case VMXNET3_CMD_GET_LINK:
>> + DCBPRINTF("Set: Get link");
>> + break;
>> +
>> + case VMXNET3_CMD_RESET_DEV:
>> + DCBPRINTF("Set: Reset device");
>> + vmxnet3_reset(s);
>> + break;
>> +
>> + case VMXNET3_CMD_QUIESCE_DEV:
>> + DCBPRINTF("Set: VMXNET3_CMD_QUIESCE_DEV - pause the device");
>> + vmxnet3_deactivate_device(s);
>> + break;
>> +
>> + case VMXNET3_CMD_GET_CONF_INTR:
>> + DCBPRINTF("Set: VMXNET3_CMD_GET_CONF_INTR - interrupt
>> configuration");
>> + break;
>> +
>> + default:
>> + DCBPRINTF("Received unknown command: %" PRIx64, cmd);
>> + break;
>> + }
>> +}
>> +
>> +static uint64_t vmxnet3_get_command_status(**VMXNET3_State *s)
>> +{
>> + uint64_t ret;
>> +
>> + switch (s->last_command) {
>> + case VMXNET3_CMD_ACTIVATE_DEV:
>> + ret = (s->device_active) ? 0 : -1;
>> + DCFPRINTF("Device active: %" PRIx64, ret);
>> + break;
>> +
>> + case VMXNET3_CMD_GET_LINK:
>> + ret = s->link_status_and_speed;
>> + DCFPRINTF("Link and speed: %" PRIx64, ret);
>> + break;
>> +
>> + case VMXNET3_CMD_GET_PERM_MAC_LO:
>> + ret = vmxnet3_get_mac_low(&s->perm_**mac);
>> + break;
>> +
>> + case VMXNET3_CMD_GET_PERM_MAC_HI:
>> + ret = vmxnet3_get_mac_high(&s->perm_**mac);
>> + break;
>> +
>> + case VMXNET3_CMD_GET_CONF_INTR:
>> + ret = vmxnet3_get_interrupt_config(**s);
>> + break;
>> +
>> + default:
>> + DWRPRINTF("Received request for unknown command: %x",
>> s->last_command);
>> + ret = -1;
>> + break;
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static void vmxnet3_set_events(VMXNET3_**State *s, uint32_t val)
>> +{
>> + uint32_t events;
>> +
>> + DCBPRINTF("Setting events: 0x%x", val);
>> + events = VMXNET3_READ_DRV_SHARED32(s->**drv_shmem, ecr) | val;
>> + VMXNET3_WRITE_DRV_SHARED32(s->**drv_shmem, ecr, events);
>> +}
>> +
>> +static void vmxnet3_ack_events(VMXNET3_**State *s, uint32_t val)
>> +{
>> + uint32_t events;
>> +
>> + DCBPRINTF("Clearing events: 0x%x", val);
>> + events = VMXNET3_READ_DRV_SHARED32(s->**drv_shmem, ecr) & ~val;
>> + VMXNET3_WRITE_DRV_SHARED32(s->**drv_shmem, ecr, events);
>> +}
>> +
>> +static void
>> +vmxnet3_io_bar1_write(void *opaque,
>> + target_phys_addr_t addr,
>> + uint64_t val,
>> + unsigned size)
>> +{
>> + VMXNET3_State *s = opaque;
>> +
>> + switch (addr) {
>> + /* Vmxnet3 Revision Report Selection */
>> + case VMXNET3_REG_VRRS:
>> + DCBPRINTF("Write BAR1 [VMXNET3_REG_VRRS] = %" PRIx64 ", size %d",
>> + val, size);
>> + break;
>> +
>> + /* UPT Version Report Selection */
>> + case VMXNET3_REG_UVRS:
>> + DCBPRINTF("Write BAR1 [VMXNET3_REG_UVRS] = %" PRIx64 ", size %d",
>> + val, size);
>> + break;
>> +
>> + /* Driver Shared Address Low */
>> + case VMXNET3_REG_DSAL:
>> + DCBPRINTF("Write BAR1 [VMXNET3_REG_DSAL] = %" PRIx64 ", size %d",
>> + val, size);
>> + /* Guest driver will first write the low part of the shared */
>> + /* memory address. We save it to temp variable and set the */
>> + /* shared address only after we get the high part */
>> + if (0 == val) {
>> + s->device_active = false;
>> + }
>> + s->temp_shared_guest_driver_**memory = val;
>> + s->drv_shmem = 0;
>> + break;
>> +
>> + /* Driver Shared Address High */
>> + case VMXNET3_REG_DSAH:
>> + DCBPRINTF("Write BAR1 [VMXNET3_REG_DSAH] = %" PRIx64 ", size %d",
>> + val, size);
>> + /* Set the shared memory between guest driver and device. */
>> + /* We already should have low address part. */
>> + s->drv_shmem = s->temp_shared_guest_driver_**memory | (val <<
>> 32);
>> + break;
>> +
>> + /* Command */
>> + case VMXNET3_REG_CMD:
>> + DCBPRINTF("Write BAR1 [VMXNET3_REG_CMD] = %" PRIx64 ", size %d",
>> + val, size);
>> + vmxnet3_handle_command(s, val);
>> + break;
>> +
>> + /* MAC Address Low */
>> + case VMXNET3_REG_MACL:
>> + DCBPRINTF("Write BAR1 [VMXNET3_REG_MACL] = %" PRIx64 ", size %d",
>> + val, size);
>> + s->temp_mac = val;
>> + break;
>> +
>> + /* MAC Address High */
>> + case VMXNET3_REG_MACH:
>> + DCBPRINTF("Write BAR1 [VMXNET3_REG_MACH] = %" PRIx64 ", size %d",
>> + val, size);
>> + vmxnet3_set_variable_mac(s, val, s->temp_mac);
>> + break;
>> +
>> + /* Interrupt Cause Register */
>> + case VMXNET3_REG_ICR:
>> + DCBPRINTF("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
>> + val, size);
>> + assert(false);
>> + break;
>> +
>> + /* Event Cause Register */
>> + case VMXNET3_REG_ECR:
>> + DCBPRINTF("Write BAR1 [VMXNET3_REG_ECR] = %" PRIx64 ", size %d",
>> + val, size);
>> + vmxnet3_ack_events(s, val);
>> + break;
>> +
>> + default:
>> + DCBPRINTF("Unknown Write to BAR1 [%" PRIx64 "] = %" PRIx64 ",
>> size %d",
>> + (uint64_t) addr, val, size);
>> + break;
>> + }
>> +}
>> +
>> +static uint64_t
>> +vmxnet3_io_bar1_read(void *opaque, target_phys_addr_t addr, unsigned
>> size)
>> +{
>> + VMXNET3_State *s = opaque;
>> + uint64_t ret = 0;
>> +
>> + switch (addr) {
>> + /* Vmxnet3 Revision Report Selection */
>> + case VMXNET3_REG_VRRS:
>> + DCBPRINTF("Read BAR1 [VMXNET3_REG_VRRS], size %d", size);
>> + ret = VMXNET3_DEVICE_REVISION;
>> + break;
>> +
>> + /* UPT Version Report Selection */
>> + case VMXNET3_REG_UVRS:
>> + DCBPRINTF("Read BAR1 [VMXNET3_REG_UVRS], size %d", size);
>> + ret = VMXNET3_DEVICE_VERSION;
>> + break;
>> +
>> + /* Command */
>> + case VMXNET3_REG_CMD:
>> + DCBPRINTF("Read BAR1 [VMXNET3_REG_CMD], size %d", size);
>> + ret = vmxnet3_get_command_status(s);
>> + break;
>> +
>> + /* MAC Address Low */
>> + case VMXNET3_REG_MACL:
>> + DCBPRINTF("Read BAR1 [VMXNET3_REG_MACL], size %d", size);
>> + ret = vmxnet3_get_mac_low(&s->conf.**macaddr);
>> + break;
>> +
>> + /* MAC Address High */
>> + case VMXNET3_REG_MACH:
>> + DCBPRINTF("Read BAR1 [VMXNET3_REG_MACH], size %d", size);
>> + ret = vmxnet3_get_mac_high(&s->conf.**macaddr);
>> + break;
>> +
>> + /* Interrupt Cause Register */
>> + /* Used for legacy interrupts only so interrupt index always 0 */
>> + case VMXNET3_REG_ICR:
>> + DCBPRINTF("Read BAR1 [VMXNET3_REG_ICR], size %d", size);
>> + if (vmxnet3_interrupt_asserted(s, 0)) {
>> + vmxnet3_clear_interrupt(s, 0);
>> + ret = true;
>> + } else {
>> + ret = false;
>> + }
>> + break;
>> +
>> + default:
>> + DCBPRINTF("Unknow read BAR1[%" PRIx64 "], %d bytes",
>> + (uint64_t) addr, size);
>> + break;
>> + }
>> +
>> + return ret;
>> +}
>> +
>> +static int
>> +vmxnet3_can_receive(**VLANClientState *nc)
>> +{
>> + VMXNET3_State *s = DO_UPCAST(NICState, nc, nc)->opaque;
>> + return s->device_active &&
>> + FLAG_IS_SET(s->link_status_**and_speed,
>> VMXNET3_LINK_STATUS_UP);
>> +}
>> +
>> +static inline bool
>> +vmxnet3_is_registered_vlan(**VMXNET3_State *s, const void *data)
>> +{
>> + uint16_t vlan_tag = eth_get_pkt_vlan_tag(data) & VLAN_VID_MASK;
>> +
>> + if (IS_SPECIAL_VLAN_ID(vlan_tag)) {
>> + return true;
>> + }
>> +
>> + return VMXNET3_VFTABLE_ENTRY_IS_SET(**s->vlan_table, vlan_tag);
>> +}
>> +
>> +static bool
>> +vmxnet3_is_allowed_mcast_**group(VMXNET3_State *s, const uint8_t
>> *group_mac)
>> +{
>> + int i;
>> + for (i = 0; i < s->mcast_list_len; i++) {
>> + if (memcmp(group_mac, s->mcast_list[i].a,
>> sizeof(s->mcast_list[i]))) {
>> + return true;
>> + }
>> + }
>> + return false;
>> +}
>> +
>> +static bool
>> +vmxnet3_rx_filter_may_**indicate(VMXNET3_State *s, const void *data,
>> + size_t size, eth_pkt_types_e packet_type)
>> +{
>> + struct eth_header *ehdr = PKT_GET_ETH_HDR(data);
>> +
>> + if (FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_PROMISC)) {
>> + return true;
>> + }
>> +
>> + switch (packet_type) {
>> + case VMXNET3_PKT_UCAST:
>> + if (!FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_UCAST)) {
>> + return false;
>> + }
>> + if (memcmp(s->conf.macaddr.a, ehdr->h_dest, ETH_ALEN)) {
>> + return false;
>> + }
>> + break;
>> +
>> + case VMXNET3_PKT_BCAST:
>> + if (!FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_BCAST)) {
>> + return false;
>> + }
>> + break;
>> +
>> + case VMXNET3_PKT_MCAST:
>> + if (FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_ALL_MULTI)) {
>> + return true;
>> + }
>> + if (!FLAG_IS_SET(s->rx_mode, VMXNET3_RXM_MCAST)) {
>> + return false;
>> + }
>> + if (!vmxnet3_is_allowed_mcast_**group(s, ehdr->h_dest)) {
>> + return false;
>> + }
>> + break;
>> +
>> + default:
>> + assert(false);
>> + }
>> +
>> + return vmxnet3_is_registered_vlan(s, data);
>> +}
>> +
>> +static void
>> +vmxnet3_rxpkt_attach_data(**VMXNET3_State *s, Vmxnet3_RxPkt *pkt,
>> + const void *data, size_t len)
>> +{
>> + uint16_t vtag = 0;
>> + uint16_t ploff;
>> + Vmxnet3_RxPktMdata *mdata = vmxnet3_rxpkt_get_mdata(pkt);
>> +
>> + if (s->rx_vlan_stripping) {
>> + mdata->vlan_stripped =
>> + eth_strip_vlan(&data, vmxnet3_rxpkt_get_ehdr(pkt), &ploff,
>> &vtag);
>> + } else {
>> + mdata->vlan_stripped = false;
>> + }
>> +
>> + if (mdata->vlan_stripped) {
>> + vmxnet3_rxpkt_attach_ehdr(pkt)**;
>> + vmxnet3_rxpkt_get_frag(pkt, 1)->iov_base = (uint8_t *) data +
>> ploff;
>> + vmxnet3_rxpkt_get_frag(pkt, 1)->iov_len = len - ploff;
>> + vmxnet3_rxpkt_set_num_frags(**pkt, 2);
>> + mdata->tot_len = len - ploff + sizeof(struct eth_header);
>> + } else {
>> + vmxnet3_rxpkt_get_frag(pkt, 0)->iov_base = (void *) data;
>> + vmxnet3_rxpkt_get_frag(pkt, 0)->iov_len = len;
>> + vmxnet3_rxpkt_set_num_frags(**pkt, 1);
>> + mdata->tot_len = len;
>> + }
>> +
>> + mdata->vlan_tag = vtag;
>> +}
>> +
>> +static ssize_t
>> +vmxnet3_receive(**VLANClientState *nc, const uint8_t *buf, size_t size)
>> +{
>> + VMXNET3_State *s = DO_UPCAST(NICState, nc, nc)->opaque;
>> + Vmxnet3_RxPkt pkt;
>> + size_t bytes_indicated;
>> + Vmxnet3_RxPktMdata *mdata;
>> +
>> + if (!vmxnet3_can_receive(&s->nic-**>nc)) {
>> + DPKPRINTF("Cannot receive now");
>> + return -1;
>> + }
>> +
>> + vmxnet3_rxpkt_init(&pkt);
>> + mdata = vmxnet3_rxpkt_get_mdata(&pkt);
>> +
>> + if (s->peer_has_vhdr) {
>> + buf += sizeof(struct virtio_net_hdr);
>> + size -= sizeof(struct virtio_net_hdr);
>> + *vmxnet3_rxpkt_get_vhdr(&pkt) = *(const struct virtio_net_hdr *)
>> buf;
>> + mdata->vhdr_valid = true;
>> + } else {
>> + mdata->vhdr_valid = false;
>> + }
>> +
>> + mdata->packet_type = get_eth_packet_type(PKT_GET_**ETH_HDR(buf));
>> +
>> + if (vmxnet3_rx_filter_may_**indicate(s, buf, size,
>> mdata->packet_type)) {
>> + vmxnet3_rxpkt_attach_data(s, &pkt, buf, size);
>> + bytes_indicated = vmxnet3_indicate_packet(s, &pkt) ? size : -1;
>> + if (bytes_indicated < size) {
>> + DPKPRINTF("RX: %lu of %lu bytes indicated", bytes_indicated,
>> size);
>> + }
>> + } else {
>> + DPKPRINTF("Packet dropped by RX filter");
>> + bytes_indicated = size;
>> + }
>> +
>> + assert(size > 0);
>> + assert(bytes_indicated != 0);
>> + return bytes_indicated;
>> +}
>> +
>> +static void vmxnet3_cleanup(**VLANClientState *nc)
>> +{
>> + VMXNET3_State *s = DO_UPCAST(NICState, nc, nc)->opaque;
>> + s->nic = NULL;
>> +}
>> +
>> +static void vmxnet3_set_link_status(**VLANClientState *nc)
>> +{
>> + VMXNET3_State *s = DO_UPCAST(NICState, nc, nc)->opaque;
>> +
>> + if (nc->link_down) {
>> + s->link_status_and_speed &= ~VMXNET3_LINK_STATUS_UP;
>> + } else {
>> + s->link_status_and_speed |= VMXNET3_LINK_STATUS_UP;
>> + }
>> +
>> + vmxnet3_set_events(s, VMXNET3_ECR_LINK);
>> + vmxnet3_trigger_interrupt(s, s->event_int_idx);
>> +}
>> +
>> +static NetClientInfo net_vmxnet3_info = {
>> + .type = NET_CLIENT_TYPE_NIC,
>> + .size = sizeof(NICState),
>> + .can_receive = vmxnet3_can_receive,
>> + .receive = vmxnet3_receive,
>> + .cleanup = vmxnet3_cleanup,
>> + .link_status_changed = vmxnet3_set_link_status,
>> +};
>> +
>> +static bool vmxnet3_peer_has_vnet_hdr(**VMXNET3_State *s)
>> +{
>> + VLANClientState *peer = s->nic->nc.peer;
>> +
>> + if ((NULL != peer) &&
>> + (NET_CLIENT_TYPE_TAP == peer->info->type) &&
>> + tap_has_vnet_hdr(peer)) {
>> + return true;
>> + }
>> +
>> + DWRPRINTF("Peer has no virtio extension. Task offloads will not
>> work.");
>> + return false;
>> +}
>> +
>> +static void vmxnet3_net_uninit(VMXNET3_**State *s)
>> +{
>> + if (NULL != s->mcast_list) {
>> + g_free(s->mcast_list);
>> + }
>> +
>> + vmxnet3_txpkt_cleanup(&s->**curr_txpkt);
>> +}
>> +
>> +static void vmxnet3_net_init(VMXNET3_State *s)
>> +{
>> + DCBPRINTF("vmxnet3_net_init called...");
>> +
>> + vmxnet3_txpkt_init(&s->curr_**txpkt);
>> +
>> + qemu_macaddr_default_if_unset(**&s->conf.macaddr);
>> +
>> + /* Windows guest will query the address that was set on init */
>> + memcpy(&s->perm_mac.a, &s->conf.macaddr.a, sizeof(s->perm_mac.a));
>> +
>> + s->mcast_list = NULL;
>> + s->mcast_list_len = 0;
>> +
>> + s->link_status_and_speed = VMXNET3_LINK_SPEED |
>> VMXNET3_LINK_STATUS_UP;
>> +
>> + DCFPRINTF("Permanent MAC: " MAC_FMT, MAC_ARG(s->perm_mac.a));
>> +
>> + s->nic = qemu_new_nic(&net_vmxnet3_**info, &s->conf,
>> + object_get_typename(OBJECT(s))**,
>> + s->dev.qdev.id, s);
>> +
>> + s->peer_has_vhdr = vmxnet3_peer_has_vnet_hdr(s);
>> + s->curr_txpkt_pl_frags = 0;
>> + s->curr_txpkt_skip = false;
>> + s->curr_txpkt_header_processed = false;
>> +
>> + if (s->peer_has_vhdr) {
>> + tap_set_vnet_hdr_len(s->nic->**nc.peer, sizeof(struct
>> virtio_net_hdr));
>> + tap_using_vnet_hdr(s->nic->nc.**peer, 1);
>> + }
>> +
>> + qemu_format_nic_info_str(&s->**nic->nc, s->conf.macaddr.a);
>> +}
>> +
>> +#ifdef VMXNET3_ENABLE_MSIX
>> +
>> +static void
>> +vmxnet3_unuse_msix_vectors(**VMXNET3_State *s, int num_vectors)
>> +{
>> + int i;
>> + for (i = 0; i < num_vectors; i++) {
>> + msix_vector_unuse(&s->dev, i);
>> + }
>> +}
>> +
>> +static bool
>> +vmxnet3_use_msix_vectors(**VMXNET3_State *s, int num_vectors)
>> +{
>> + int i;
>> + for (i = 0; i < num_vectors; i++) {
>> + int res = msix_vector_use(&s->dev, i);
>> + if (0 > res) {
>> + DWRPRINTF("Failed to use MSI-X vector %d, error %d", i, res);
>> + vmxnet3_unuse_msix_vectors(s, i);
>> + return false;
>> + }
>> + }
>> + return true;
>> +}
>> +
>> +static bool
>> +vmxnet3_init_msix(VMXNET3_**State *s)
>> +{
>> + int res = msix_init(&s->dev, VMXNET3_MAX_INTRS,
>> + &s->msix_bar, VMXNET3_MSIX_BAR_IDX, 0);
>> + if (0 > res) {
>> + DWRPRINTF("Failed to initialize MSI-X, error %d", res);
>> + s->msix_used = false;
>> + } else {
>> + if (!vmxnet3_use_msix_vectors(s, VMXNET3_MAX_INTRS)) {
>> + DWRPRINTF("Failed to use MSI-X vectors, error %d", res);
>> + msix_uninit(&s->dev, &s->msix_bar);
>> + s->msix_used = false;
>> + } else {
>> + s->msix_used = true;
>> + }
>> + }
>> + return s->msix_used;
>> +}
>> +
>> +static void
>> +vmxnet3_cleanup_msix(VMXNET3_**State *s)
>> +{
>> + if (s->msix_used) {
>> + msix_vector_unuse(&s->dev, VMXNET3_MAX_INTRS);
>> + msix_uninit(&s->dev, &s->msix_bar);
>> + }
>> +}
>> +#endif
>> +
>> +#ifdef VMXNET3_ENABLE_MSI
>> +
>> +static bool
>> +vmxnet3_init_msi(VMXNET3_**State *s)
>> +{
>> +#define VMXNET3_MSI_NUM_VECTORS (1)
>> +#define VMXNET3_MSI_OFFSET (0x50)
>> +#define VMXNET3_USE_64BIT (true)
>> +#define VMXNET3_PER_VECTOR_MASK (false)
>> +
>> + int res;
>> + res = msi_init(&s->dev, VMXNET3_MSI_OFFSET, VMXNET3_MSI_NUM_VECTORS,
>> + VMXNET3_USE_64BIT, VMXNET3_PER_VECTOR_MASK);
>> + if (0 > res) {
>> + DWRPRINTF("Failed to initialize MSI, error %d", res);
>> + s->msi_used = false;
>> + } else {
>> + s->msi_used = true;
>> + }
>> +
>> + return s->msi_used;
>> +}
>> +
>> +static void
>> +vmxnet3_cleanup_msi(VMXNET3_**State *s)
>> +{
>> + if (s->msi_used) {
>> + msi_uninit(&s->dev);
>> + }
>> +}
>> +#endif
>> +
>> +static int vmxnet3_pci_init(PCIDevice *dev)
>> +{
>> + static const MemoryRegionOps b0_ops = {
>> + .read = vmxnet3_io_bar0_read,
>> + .write = vmxnet3_io_bar0_write,
>> + .endianness = DEVICE_LITTLE_ENDIAN,
>> + .impl = {
>> + .min_access_size = 4,
>> + .max_access_size = 4,
>> + },
>> + };
>> +
>> + static const MemoryRegionOps b1_ops = {
>> + .read = vmxnet3_io_bar1_read,
>> + .write = vmxnet3_io_bar1_write,
>> + .endianness = DEVICE_LITTLE_ENDIAN,
>> + .impl = {
>> + .min_access_size = 4,
>> + .max_access_size = 4,
>> + },
>> + };
>> +
>> + VMXNET3_State *s = DO_UPCAST(VMXNET3_State, dev, dev);
>> + int i;
>> +
>> + DCBPRINTF("Starting init...");
>> +
>> + memory_region_init_io(&s->**bar0, &b0_ops, s,
>> + "vmxnet3-b0", VMXNET3_PT_REG_SIZE);
>> + pci_register_bar(&s->dev, VMXNET3_BAR0_IDX,
>> + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar0);
>> +
>> + memory_region_init_io(&s->**bar1, &b1_ops, s,
>> + "vmxnet3-b1", VMXNET3_VD_REG_SIZE);
>> + pci_register_bar(&s->dev, VMXNET3_BAR1_IDX,
>> + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar1);
>> +
>> + memory_region_init(&s->msix_**bar, "vmxnet3-msix-bar",
>> + VMXNET3_MSIX_BAR_SIZE);
>> + pci_register_bar(&s->dev, VMXNET3_MSIX_BAR_IDX,
>> + PCI_BASE_ADDRESS_SPACE_MEMORY, &s->msix_bar);
>> +
>> + for (i = 0; i < ARRAY_SIZE(s->interrupt_**states); i++) {
>> + s->interrupt_states[i].is_**asserted = false;
>> + s->interrupt_states[i].is_**pending = false;
>> + s->interrupt_states[i].is_**masked = true;
>> + }
>> +
>> + /* Interrupt pin A */
>> + s->dev.config[PCI_INTERRUPT_**PIN] = 0x01;
>> +
>> +#ifdef VMXNET3_ENABLE_MSIX
>> + if (!vmxnet3_init_msix(s)) {
>> + hw_error("Failed to initialize MSI-X, configuration is
>> inconsistent.");
>> + }
>> +#endif
>> +
>> +#ifdef VMXNET3_ENABLE_MSI
>> + if (!vmxnet3_init_msi(s)) {
>> + hw_error("Failed to initialize MSI, configuration is
>> inconsistent.");
>> + }
>> +#endif
>> +
>> + vmxnet3_net_init(s);
>> + add_boot_device_path(s->conf.**bootindex, &dev->qdev,
>> "/ethernet-phy@0");
>> +
>> + return 0;
>> +}
>> +
>> +
>> +static int vmxnet3_pci_uninit(PCIDevice *dev)
>> +{
>> + VMXNET3_State *s = DO_UPCAST(VMXNET3_State, dev, dev);
>> +
>> + DCBPRINTF("Starting uninit...");
>> +
>> + vmxnet3_net_uninit(s);
>> +
>> +#ifdef VMXNET3_ENABLE_MSIX
>> + vmxnet3_cleanup_msix(s);
>> +#endif
>> +
>> +#ifdef VMXNET3_ENABLE_MSI
>> + vmxnet3_cleanup_msi(s);
>> +#endif
>> +
>> + memory_region_destroy(&s->**bar0);
>> + memory_region_destroy(&s->**bar1);
>> + memory_region_destroy(&s->**msix_bar);
>> +
>> + return 0;
>> +}
>> +
>> +static void vmxnet3_qdev_reset(DeviceState *dev)
>> +{
>> + VMXNET3_State *s = DO_UPCAST(VMXNET3_State, dev.qdev, dev);
>> + DCBPRINTF("Starting QDEV reset...");
>> + vmxnet3_reset(s);
>> +}
>> +
>> +static const VMStateDescription vmstate_vmxnet3 = {
>> + .name = "vmxnet3",
>> + .version_id = 1,
>> + .minimum_version_id = 1,
>> + .minimum_version_id_old = 1,
>> + .fields = (VMStateField[]) {
>> + VMSTATE_PCI_DEVICE(dev, VMXNET3_State),
>> + VMSTATE_END_OF_LIST()
>> + }
>> +};
>> +
>> +#if defined(VMXNET3_ENABLE_MSI) || defined(VMXNET3_ENABLE_MSIX)
>> +static void
>> +vmxnet3_write_config(**PCIDevice *pci, uint32_t addr, uint32_t val, int
>> len)
>> +{
>> + pci_default_write_config(pci, addr, val, len);
>> +#if defined(VMXNET3_ENABLE_MSIX)
>> + msix_write_config(pci, addr, val, len);
>> +#endif
>> +#if defined(VMXNET3_ENABLE_MSI)
>> + msi_write_config(pci, addr, val, len);
>> +#endif
>> +}
>> +#endif
>> +
>> +static Property vmxnet3_properties[] = {
>> + DEFINE_NIC_PROPERTIES(VMXNET3_**State, conf),
>> + DEFINE_PROP_END_OF_LIST(),
>> +};
>> +
>> +static void vmxnet3_class_init(ObjectClass *class, void *data)
>> +{
>> + DeviceClass *dc = DEVICE_CLASS(class);
>> + PCIDeviceClass *c = PCI_DEVICE_CLASS(class);
>> +
>> + c->init = vmxnet3_pci_init;
>> + c->exit = vmxnet3_pci_uninit;
>> + c->romfile = "pxe-e1000.rom";
>> + c->vendor_id = PCI_VENDOR_ID_VMWARE;
>> + c->device_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
>> + c->revision = PCI_DEVICE_ID_VMWARE_VMXNET3_**REVISION;
>> + c->class_id = PCI_CLASS_NETWORK_ETHERNET;
>> + c->subsystem_vendor_id = PCI_VENDOR_ID_VMWARE;
>> + c->subsystem_id = PCI_DEVICE_ID_VMWARE_VMXNET3;
>> +#if defined(VMXNET3_ENABLE_MSI) || defined(VMXNET3_ENABLE_MSIX)
>> + c->config_write = vmxnet3_write_config,
>> +#endif
>> + dc->desc = "VMWare Paravirtualized Ethernet v3";
>> + dc->reset = vmxnet3_qdev_reset;
>> + dc->vmsd = &vmstate_vmxnet3;
>> + dc->props = vmxnet3_properties;
>> +}
>> +
>> +static TypeInfo vmxnet3_info = {
>> + .name = "vmxnet3",
>> + .parent = TYPE_PCI_DEVICE,
>> + .instance_size = sizeof(VMXNET3_State),
>> + .class_init = vmxnet3_class_init,
>> +};
>> +
>> +static void vmxnet3_register_types(void)
>> +{
>> + DCBPRINTF("vmxnet3_register_**types called...");
>> + type_register_static(&vmxnet3_**info);
>> +}
>> +
>> +type_init(vmxnet3_register_**types)
>> diff --git a/qemu/hw/vmxnet3.h b/qemu/hw/vmxnet3.h
>> new file mode 100644
>> index 0000000..6ec3fd5
>> --- /dev/null
>> +++ b/qemu/hw/vmxnet3.h
>> @@ -0,0 +1,727 @@
>> +/*
>> + * QEMU VMWARE VMXNET3 paravirtual NIC
>> + *
>> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
>> + *
>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
>> + *
>> + * Authors:
>> + * Dmitry Fleytman <dmitry@daynix.com>
>> + * Yan Vugenfirer <yan@daynix.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#ifndef _QEMU_VMXNET3_H
>> +#define _QEMU_VMXNET3_H
>> +
>> +#define VMXNET3_DEVICE_MAX_TX_QUEUES 8
>> +#define VMXNET3_DEVICE_MAX_RX_QUEUES 8 /* Keep this value as a power
>> of 2 */
>> +
>> +/* Defines needed to integrate VMWARE headers */
>> +#define u64 uint64_t
>> +#define u32 uint32_t
>> +#define u16 uint16_t
>> +#define u8 uint8_t
>> +#define __le16 uint16_t
>> +#define __le32 uint32_t
>> +#define __le64 uint64_t
>> +#define __packed QEMU_PACKED
>> +
>> +#if defined(HOST_WORDS_BIGENDIAN)
>> +#define const_cpu_to_le64(x) bswap_64(x)
>> +#define __BIG_ENDIAN_BITFIELD
>> +#else
>> +#define const_cpu_to_le64(x) (x)
>> +#endif
>> +
>> +/* Following is an interface definition for */
>> +/* VMXNET3 device as provided by VMWARE */
>> +/* Original file and copyright is available */
>> +/* in Linux kernel v3.2.8 at */
>> +/* drivers/net/vmxnet3/vmxnet3_**defs.h */
>> +
>> +struct UPT1_TxStats {
>> + u64 TSOPktsTxOK; /* TSO pkts post-segmentation */
>> + u64 TSOBytesTxOK;
>> + u64 ucastPktsTxOK;
>> + u64 ucastBytesTxOK;
>> + u64 mcastPktsTxOK;
>> + u64 mcastBytesTxOK;
>> + u64 bcastPktsTxOK;
>> + u64 bcastBytesTxOK;
>> + u64 pktsTxError;
>> + u64 pktsTxDiscard;
>> +};
>> +
>> +struct UPT1_RxStats {
>> + u64 LROPktsRxOK; /* LRO pkts */
>> + u64 LROBytesRxOK; /* bytes from LRO pkts */
>> + /* the following counters are for pkts from the wire, i.e., pre-LRO
>> */
>> + u64 ucastPktsRxOK;
>> + u64 ucastBytesRxOK;
>> + u64 mcastPktsRxOK;
>> + u64 mcastBytesRxOK;
>> + u64 bcastPktsRxOK;
>> + u64 bcastBytesRxOK;
>> + u64 pktsRxOutOfBuf;
>> + u64 pktsRxError;
>> +};
>> +
>> +/* interrupt moderation level */
>> +enum {
>> + UPT1_IML_NONE = 0, /* no interrupt moderation */
>> + UPT1_IML_HIGHEST = 7, /* least intr generated */
>> + UPT1_IML_ADAPTIVE = 8, /* adpative intr moderation */
>> +};
>> +/* values for UPT1_RSSConf.hashFunc */
>> +enum {
>> + UPT1_RSS_HASH_TYPE_NONE = 0x0,
>> + UPT1_RSS_HASH_TYPE_IPV4 = 0x01,
>> + UPT1_RSS_HASH_TYPE_TCP_IPV4 = 0x02,
>> + UPT1_RSS_HASH_TYPE_IPV6 = 0x04,
>> + UPT1_RSS_HASH_TYPE_TCP_IPV6 = 0x08,
>> +};
>> +
>> +enum {
>> + UPT1_RSS_HASH_FUNC_NONE = 0x0,
>> + UPT1_RSS_HASH_FUNC_TOEPLITZ = 0x01,
>> +};
>> +
>> +#define UPT1_RSS_MAX_KEY_SIZE 40
>> +#define UPT1_RSS_MAX_IND_TABLE_SIZE 128
>> +
>> +struct UPT1_RSSConf {
>> + u16 hashType;
>> + u16 hashFunc;
>> + u16 hashKeySize;
>> + u16 indTableSize;
>> + u8 hashKey[UPT1_RSS_MAX_KEY_SIZE]**;
>> + u8 indTable[UPT1_RSS_MAX_IND_**TABLE_SIZE];
>> +};
>> +
>> +/* features */
>> +enum {
>> + UPT1_F_RXCSUM = const_cpu_to_le64(0x0001), /* rx csum
>> verification */
>> + UPT1_F_RSS = const_cpu_to_le64(0x0002),
>> + UPT1_F_RXVLAN = const_cpu_to_le64(0x0004), /* VLAN tag stripping
>> */
>> + UPT1_F_LRO = const_cpu_to_le64(0x0008),
>> +};
>> +
>> +/* all registers are 32 bit wide */
>> +/* BAR 1 */
>> +enum {
>> + VMXNET3_REG_VRRS = 0x0, /* Vmxnet3 Revision Report Selection */
>> + VMXNET3_REG_UVRS = 0x8, /* UPT Version Report Selection */
>> + VMXNET3_REG_DSAL = 0x10, /* Driver Shared Address Low */
>> + VMXNET3_REG_DSAH = 0x18, /* Driver Shared Address High */
>> + VMXNET3_REG_CMD = 0x20, /* Command */
>> + VMXNET3_REG_MACL = 0x28, /* MAC Address Low */
>> + VMXNET3_REG_MACH = 0x30, /* MAC Address High */
>> + VMXNET3_REG_ICR = 0x38, /* Interrupt Cause Register */
>> + VMXNET3_REG_ECR = 0x40 /* Event Cause Register */
>> +};
>> +
>> +/* BAR 0 */
>> +enum {
>> + VMXNET3_REG_IMR = 0x0, /* Interrupt Mask Register */
>> + VMXNET3_REG_TXPROD = 0x600, /* Tx Producer Index */
>> + VMXNET3_REG_RXPROD = 0x800, /* Rx Producer Index for ring 1 */
>> + VMXNET3_REG_RXPROD2 = 0xA00 /* Rx Producer Index for ring 2 */
>> +};
>> +
>> +#define VMXNET3_PT_REG_SIZE 4096 /* BAR 0 */
>> +#define VMXNET3_VD_REG_SIZE 4096 /* BAR 1 */
>> +
>> +#define VMXNET3_REG_ALIGN 8 /* All registers are 8-byte
>> aligned. */
>> +#define VMXNET3_REG_ALIGN_MASK 0x7
>> +
>> +/* I/O Mapped access to registers */
>> +#define VMXNET3_IO_TYPE_PT 0
>> +#define VMXNET3_IO_TYPE_VD 1
>> +#define VMXNET3_IO_ADDR(type, reg) (((type) << 24) | ((reg) &
>> 0xFFFFFF))
>> +#define VMXNET3_IO_TYPE(addr) ((addr) >> 24)
>> +#define VMXNET3_IO_REG(addr) ((addr) & 0xFFFFFF)
>> +
>> +enum {
>> + VMXNET3_CMD_FIRST_SET = 0xCAFE0000,
>> + VMXNET3_CMD_ACTIVATE_DEV = VMXNET3_CMD_FIRST_SET, /* 0xCAFE0000 */
>> + VMXNET3_CMD_QUIESCE_DEV, /* 0xCAFE0001 */
>> + VMXNET3_CMD_RESET_DEV, /* 0xCAFE0002 */
>> + VMXNET3_CMD_UPDATE_RX_MODE, /* 0xCAFE0003 */
>> + VMXNET3_CMD_UPDATE_MAC_**FILTERS, /* 0xCAFE0004 */
>> + VMXNET3_CMD_UPDATE_VLAN_**FILTERS, /* 0xCAFE0005 */
>> + VMXNET3_CMD_UPDATE_RSSIDT, /* 0xCAFE0006 */
>> + VMXNET3_CMD_UPDATE_IML, /* 0xCAFE0007 */
>> + VMXNET3_CMD_UPDATE_PMCFG, /* 0xCAFE0008 */
>> + VMXNET3_CMD_UPDATE_FEATURE, /* 0xCAFE0009 */
>> + VMXNET3_CMD_LOAD_PLUGIN, /* 0xCAFE000A */
>> +
>> + VMXNET3_CMD_FIRST_GET = 0xF00D0000,
>> + VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET, /* 0xF00D0000
>> */
>> + VMXNET3_CMD_GET_STATS, /* 0xF00D0001
>> */
>> + VMXNET3_CMD_GET_LINK, /* 0xF00D0002
>> */
>> + VMXNET3_CMD_GET_PERM_MAC_LO, /* 0xF00D0003
>> */
>> + VMXNET3_CMD_GET_PERM_MAC_HI, /* 0xF00D0004
>> */
>> + VMXNET3_CMD_GET_DID_LO, /* 0xF00D0005
>> */
>> + VMXNET3_CMD_GET_DID_HI, /* 0xF00D0006
>> */
>> + VMXNET3_CMD_GET_DEV_EXTRA_**INFO, /*
>> 0xF00D0007 */
>> + VMXNET3_CMD_GET_CONF_INTR /* 0xF00D0008
>> */
>> +};
>> +
>> +/*
>> + * Little Endian layout of bitfields -
>> + * Byte 0 : 7.....len.....0
>> + * Byte 1 : rsvd gen 13.len.8
>> + * Byte 2 : 5.msscof.0 ext1 dtype
>> + * Byte 3 : 13...msscof...6
>> + *
>> + * Big Endian layout of bitfields -
>> + * Byte 0: 13...msscof...6
>> + * Byte 1 : 5.msscof.0 ext1 dtype
>> + * Byte 2 : rsvd gen 13.len.8
>> + * Byte 3 : 7.....len.....0
>> + *
>> + * Thus, le32_to_cpu on the dword will allow the big endian driver to
>> read
>> + * the bit fields correctly. And cpu_to_le32 will convert bitfields
>> + * bit fields written by big endian driver to format required by
>> device.
>> + */
>> +
>> +struct Vmxnet3_TxDesc {
>> + __le64 addr;
>> +
>> +#ifdef __BIG_ENDIAN_BITFIELD
>> + u32 msscof:14; /* MSS, checksum offset, flags */
>> + u32 ext1:1;
>> + u32 dtype:1; /* descriptor type */
>> + u32 rsvd:1;
>> + u32 gen:1; /* generation bit */
>> + u32 len:14;
>> +#else
>> + u32 len:14;
>> + u32 gen:1; /* generation bit */
>> + u32 rsvd:1;
>> + u32 dtype:1; /* descriptor type */
>> + u32 ext1:1;
>> + u32 msscof:14; /* MSS, checksum offset, flags */
>> +#endif /* __BIG_ENDIAN_BITFIELD */
>> +
>> +#ifdef __BIG_ENDIAN_BITFIELD
>> + u32 tci:16; /* Tag to Insert */
>> + u32 ti:1; /* VLAN Tag Insertion */
>> + u32 ext2:1;
>> + u32 cq:1; /* completion request */
>> + u32 eop:1; /* End Of Packet */
>> + u32 om:2; /* offload mode */
>> + u32 hlen:10; /* header len */
>> +#else
>> + u32 hlen:10; /* header len */
>> + u32 om:2; /* offload mode */
>> + u32 eop:1; /* End Of Packet */
>> + u32 cq:1; /* completion request */
>> + u32 ext2:1;
>> + u32 ti:1; /* VLAN Tag Insertion */
>> + u32 tci:16; /* Tag to Insert */
>> +#endif /* __BIG_ENDIAN_BITFIELD */
>> +};
>> +
>> +/* TxDesc.OM values */
>> +#define VMXNET3_OM_NONE 0
>> +#define VMXNET3_OM_CSUM 2
>> +#define VMXNET3_OM_TSO 3
>> +
>> +/* fields in TxDesc we access w/o using bit fields */
>> +#define VMXNET3_TXD_EOP_SHIFT 12
>> +#define VMXNET3_TXD_CQ_SHIFT 13
>> +#define VMXNET3_TXD_GEN_SHIFT 14
>> +#define VMXNET3_TXD_EOP_DWORD_SHIFT 3
>> +#define VMXNET3_TXD_GEN_DWORD_SHIFT 2
>> +
>> +#define VMXNET3_TXD_CQ (1 << VMXNET3_TXD_CQ_SHIFT)
>> +#define VMXNET3_TXD_EOP (1 << VMXNET3_TXD_EOP_SHIFT)
>> +#define VMXNET3_TXD_GEN (1 << VMXNET3_TXD_GEN_SHIFT)
>> +
>> +#define VMXNET3_HDR_COPY_SIZE 128
>> +
>> +
>> +struct Vmxnet3_TxDataDesc {
>> + u8 data[VMXNET3_HDR_COPY_SIZE];
>> +};
>> +
>> +#define VMXNET3_TCD_GEN_SHIFT 31
>> +#define VMXNET3_TCD_GEN_SIZE 1
>> +#define VMXNET3_TCD_TXIDX_SHIFT 0
>> +#define VMXNET3_TCD_TXIDX_SIZE 12
>> +#define VMXNET3_TCD_GEN_DWORD_SHIFT 3
>> +
>> +struct Vmxnet3_TxCompDesc {
>> + u32 txdIdx:12; /* Index of the EOP TxDesc */
>> + u32 ext1:20;
>> +
>> + __le32 ext2;
>> + __le32 ext3;
>> +
>> + u32 rsvd:24;
>> + u32 type:7; /* completion type */
>> + u32 gen:1; /* generation bit */
>> +};
>> +
>> +struct Vmxnet3_RxDesc {
>> + __le64 addr;
>> +
>> +#ifdef __BIG_ENDIAN_BITFIELD
>> + u32 gen:1; /* Generation bit */
>> + u32 rsvd:15;
>> + u32 dtype:1; /* Descriptor type */
>> + u32 btype:1; /* Buffer Type */
>> + u32 len:14;
>> +#else
>> + u32 len:14;
>> + u32 btype:1; /* Buffer Type */
>> + u32 dtype:1; /* Descriptor type */
>> + u32 rsvd:15;
>> + u32 gen:1; /* Generation bit */
>> +#endif
>> + u32 ext1;
>> +};
>> +
>> +/* values of RXD.BTYPE */
>> +#define VMXNET3_RXD_BTYPE_HEAD 0 /* head only */
>> +#define VMXNET3_RXD_BTYPE_BODY 1 /* body only */
>> +
>> +/* fields in RxDesc we access w/o using bit fields */
>> +#define VMXNET3_RXD_BTYPE_SHIFT 14
>> +#define VMXNET3_RXD_GEN_SHIFT 31
>> +
>> +struct Vmxnet3_RxCompDesc {
>> +#ifdef __BIG_ENDIAN_BITFIELD
>> + u32 ext2:1;
>> + u32 cnc:1; /* Checksum Not Calculated */
>> + u32 rssType:4; /* RSS hash type used */
>> + u32 rqID:10; /* rx queue/ring ID */
>> + u32 sop:1; /* Start of Packet */
>> + u32 eop:1; /* End of Packet */
>> + u32 ext1:2;
>> + u32 rxdIdx:12; /* Index of the RxDesc */
>> +#else
>> + u32 rxdIdx:12; /* Index of the RxDesc */
>> + u32 ext1:2;
>> + u32 eop:1; /* End of Packet */
>> + u32 sop:1; /* Start of Packet */
>> + u32 rqID:10; /* rx queue/ring ID */
>> + u32 rssType:4; /* RSS hash type used */
>> + u32 cnc:1; /* Checksum Not Calculated */
>> + u32 ext2:1;
>> +#endif /* __BIG_ENDIAN_BITFIELD */
>> +
>> + __le32 rssHash; /* RSS hash value */
>> +
>> +#ifdef __BIG_ENDIAN_BITFIELD
>> + u32 tci:16; /* Tag stripped */
>> + u32 ts:1; /* Tag is stripped */
>> + u32 err:1; /* Error */
>> + u32 len:14; /* data length */
>> +#else
>> + u32 len:14; /* data length */
>> + u32 err:1; /* Error */
>> + u32 ts:1; /* Tag is stripped */
>> + u32 tci:16; /* Tag stripped */
>> +#endif /* __BIG_ENDIAN_BITFIELD */
>> +
>> +
>> +#ifdef __BIG_ENDIAN_BITFIELD
>> + u32 gen:1; /* generation bit */
>> + u32 type:7; /* completion type */
>> + u32 fcs:1; /* Frame CRC correct */
>> + u32 frg:1; /* IP Fragment */
>> + u32 v4:1; /* IPv4 */
>> + u32 v6:1; /* IPv6 */
>> + u32 ipc:1; /* IP Checksum Correct */
>> + u32 tcp:1; /* TCP packet */
>> + u32 udp:1; /* UDP packet */
>> + u32 tuc:1; /* TCP/UDP Checksum Correct */
>> + u32 csum:16;
>> +#else
>> + u32 csum:16;
>> + u32 tuc:1; /* TCP/UDP Checksum Correct */
>> + u32 udp:1; /* UDP packet */
>> + u32 tcp:1; /* TCP packet */
>> + u32 ipc:1; /* IP Checksum Correct */
>> + u32 v6:1; /* IPv6 */
>> + u32 v4:1; /* IPv4 */
>> + u32 frg:1; /* IP Fragment */
>> + u32 fcs:1; /* Frame CRC correct */
>> + u32 type:7; /* completion type */
>> + u32 gen:1; /* generation bit */
>> +#endif /* __BIG_ENDIAN_BITFIELD */
>> +};
>> +
>> +/* fields in RxCompDesc we access via Vmxnet3_GenericDesc.dword[3] */
>> +#define VMXNET3_RCD_TUC_SHIFT 16
>> +#define VMXNET3_RCD_IPC_SHIFT 19
>> +
>> +/* fields in RxCompDesc we access via Vmxnet3_GenericDesc.qword[1] */
>> +#define VMXNET3_RCD_TYPE_SHIFT 56
>> +#define VMXNET3_RCD_GEN_SHIFT 63
>> +
>> +/* csum OK for TCP/UDP pkts over IP */
>> +#define VMXNET3_RCD_CSUM_OK (1 << VMXNET3_RCD_TUC_SHIFT | \
>> + 1 << VMXNET3_RCD_IPC_SHIFT)
>> +#define VMXNET3_TXD_GEN_SIZE 1
>> +#define VMXNET3_TXD_EOP_SIZE 1
>> +
>> +/* value of RxCompDesc.rssType */
>> +enum {
>> + VMXNET3_RCD_RSS_TYPE_NONE = 0,
>> + VMXNET3_RCD_RSS_TYPE_IPV4 = 1,
>> + VMXNET3_RCD_RSS_TYPE_TCPIPV4 = 2,
>> + VMXNET3_RCD_RSS_TYPE_IPV6 = 3,
>> + VMXNET3_RCD_RSS_TYPE_TCPIPV6 = 4,
>> +};
>> +
>> +
>> +/* a union for accessing all cmd/completion descriptors */
>> +union Vmxnet3_GenericDesc {
>> + __le64 qword[2];
>> + __le32 dword[4];
>> + __le16 word[8];
>> + struct Vmxnet3_TxDesc txd;
>> + struct Vmxnet3_RxDesc rxd;
>> + struct Vmxnet3_TxCompDesc tcd;
>> + struct Vmxnet3_RxCompDesc rcd;
>> +};
>> +
>> +#define VMXNET3_INIT_GEN 1
>> +
>> +/* Max size of a single tx buffer */
>> +#define VMXNET3_MAX_TX_BUF_SIZE (1 << 14)
>> +
>> +/* # of tx desc needed for a tx buffer size */
>> +#define VMXNET3_TXD_NEEDED(size) (((size) + VMXNET3_MAX_TX_BUF_SIZE - 1)
>> / \
>> + VMXNET3_MAX_TX_BUF_SIZE)
>> +
>> +/* max # of tx descs for a non-tso pkt */
>> +#define VMXNET3_MAX_TXD_PER_PKT 16
>> +
>> +/* Max size of a single rx buffer */
>> +#define VMXNET3_MAX_RX_BUF_SIZE ((1 << 14) - 1)
>> +/* Minimum size of a type 0 buffer */
>> +#define VMXNET3_MIN_T0_BUF_SIZE 128
>> +#define VMXNET3_MAX_CSUM_OFFSET 1024
>> +
>> +/* Ring base address alignment */
>> +#define VMXNET3_RING_BA_ALIGN 512
>> +#define VMXNET3_RING_BA_MASK (VMXNET3_RING_BA_ALIGN - 1)
>> +
>> +/* Ring size must be a multiple of 32 */
>> +#define VMXNET3_RING_SIZE_ALIGN 32
>> +#define VMXNET3_RING_SIZE_MASK (VMXNET3_RING_SIZE_ALIGN - 1)
>> +
>> +/* Max ring size */
>> +#define VMXNET3_TX_RING_MAX_SIZE 4096
>> +#define VMXNET3_TC_RING_MAX_SIZE 4096
>> +#define VMXNET3_RX_RING_MAX_SIZE 4096
>> +#define VMXNET3_RC_RING_MAX_SIZE 8192
>> +
>> +/* a list of reasons for queue stop */
>> +
>> +enum {
>> + VMXNET3_ERR_NOEOP = 0x80000000, /* cannot find the EOP desc of a
>> pkt */
>> + VMXNET3_ERR_TXD_REUSE = 0x80000001, /* reuse TxDesc before tx
>> completion */
>> + VMXNET3_ERR_BIG_PKT = 0x80000002, /* too many TxDesc for a pkt */
>> + VMXNET3_ERR_DESC_NOT_SPT = 0x80000003, /* descriptor type not supported
>> */
>> + VMXNET3_ERR_SMALL_BUF = 0x80000004, /* type 0 buffer too small */
>> + VMXNET3_ERR_STRESS = 0x80000005, /* stress option firing in
>> vmkernel */
>> + VMXNET3_ERR_SWITCH = 0x80000006, /* mode switch failure */
>> + VMXNET3_ERR_TXD_INVALID = 0x80000007, /* invalid TxDesc */
>> +};
>> +
>> +/* completion descriptor types */
>> +#define VMXNET3_CDTYPE_TXCOMP 0 /* Tx Completion Descriptor */
>> +#define VMXNET3_CDTYPE_RXCOMP 3 /* Rx Completion Descriptor */
>> +
>> +enum {
>> + VMXNET3_GOS_BITS_UNK = 0, /* unknown */
>> + VMXNET3_GOS_BITS_32 = 1,
>> + VMXNET3_GOS_BITS_64 = 2,
>> +};
>> +
>> +#define VMXNET3_GOS_TYPE_UNK 0 /* unknown */
>> +#define VMXNET3_GOS_TYPE_LINUX 1
>> +#define VMXNET3_GOS_TYPE_WIN 2
>> +#define VMXNET3_GOS_TYPE_SOLARIS 3
>> +#define VMXNET3_GOS_TYPE_FREEBSD 4
>> +#define VMXNET3_GOS_TYPE_PXE 5
>> +
>> +struct Vmxnet3_GOSInfo {
>> +#ifdef __BIG_ENDIAN_BITFIELD
>> + u32 gosMisc:10; /* other info about gos */
>> + u32 gosVer:16; /* gos version */
>> + u32 gosType:4; /* which guest */
>> + u32 gosBits:2; /* 32-bit or 64-bit? */
>> +#else
>> + u32 gosBits:2; /* 32-bit or 64-bit? */
>> + u32 gosType:4; /* which guest */
>> + u32 gosVer:16; /* gos version */
>> + u32 gosMisc:10; /* other info about gos */
>> +#endif /* __BIG_ENDIAN_BITFIELD */
>> +};
>> +
>> +struct Vmxnet3_DriverInfo {
>> + __le32 version;
>> + struct Vmxnet3_GOSInfo gos;
>> + __le32 vmxnet3RevSpt;
>> + __le32 uptVerSpt;
>> +};
>> +
>> +
>> +#define VMXNET3_REV1_MAGIC 0xbabefee1
>> +
>> +/*
>> + * QueueDescPA must be 128 bytes aligned. It points to an array of
>> + * Vmxnet3_TxQueueDesc followed by an array of Vmxnet3_RxQueueDesc.
>> + * The number of Vmxnet3_TxQueueDesc/Vmxnet3_**RxQueueDesc are
>> specified by
>> + * Vmxnet3_MiscConf.numTxQueues/**numRxQueues, respectively.
>> + */
>> +#define VMXNET3_QUEUE_DESC_ALIGN 128
>> +
>> +
>> +struct Vmxnet3_MiscConf {
>> + struct Vmxnet3_DriverInfo driverInfo;
>> + __le64 uptFeatures;
>> + __le64 ddPA; /* driver data PA */
>> + __le64 queueDescPA; /* queue descriptor table PA */
>> + __le32 ddLen; /* driver data len */
>> + __le32 queueDescLen; /* queue desc. table len in bytes */
>> + __le32 mtu;
>> + __le16 maxNumRxSG;
>> + u8 numTxQueues;
>> + u8 numRxQueues;
>> + __le32 reserved[4];
>> +};
>> +
>> +
>> +struct Vmxnet3_TxQueueConf {
>> + __le64 txRingBasePA;
>> + __le64 dataRingBasePA;
>> + __le64 compRingBasePA;
>> + __le64 ddPA; /* driver data */
>> + __le64 reserved;
>> + __le32 txRingSize; /* # of tx desc */
>> + __le32 dataRingSize; /* # of data desc */
>> + __le32 compRingSize; /* # of comp desc */
>> + __le32 ddLen; /* size of driver data */
>> + u8 intrIdx;
>> + u8 _pad[7];
>> +};
>> +
>> +
>> +struct Vmxnet3_RxQueueConf {
>> + __le64 rxRingBasePA[2];
>> + __le64 compRingBasePA;
>> + __le64 ddPA; /* driver data */
>> + __le64 reserved;
>> + __le32 rxRingSize[2]; /* # of rx desc */
>> + __le32 compRingSize; /* # of rx comp desc */
>> + __le32 ddLen; /* size of driver data */
>> + u8 intrIdx;
>> + u8 _pad[7];
>> +};
>> +
>> +
>> +enum vmxnet3_intr_mask_mode {
>> + VMXNET3_IMM_AUTO = 0,
>> + VMXNET3_IMM_ACTIVE = 1,
>> + VMXNET3_IMM_LAZY = 2
>> +};
>> +
>> +enum vmxnet3_intr_type {
>> + VMXNET3_IT_AUTO = 0,
>> + VMXNET3_IT_INTX = 1,
>> + VMXNET3_IT_MSI = 2,
>> + VMXNET3_IT_MSIX = 3
>> +};
>> +
>> +#define VMXNET3_MAX_TX_QUEUES 8
>> +#define VMXNET3_MAX_RX_QUEUES 16
>> +/* addition 1 for events */
>> +#define VMXNET3_MAX_INTRS 25
>> +
>> +/* value of intrCtrl */
>> +#define VMXNET3_IC_DISABLE_ALL 0x1 /* bit 0 */
>> +
>> +
>> +struct Vmxnet3_IntrConf {
>> + bool autoMask;
>> + u8 numIntrs; /* # of interrupts */
>> + u8 eventIntrIdx;
>> + u8 modLevels[VMXNET3_MAX_INTRS]; /* moderation level for
>> + * each intr */
>> + __le32 intrCtrl;
>> + __le32 reserved[2];
>> +};
>> +
>> +/* one bit per VLAN ID, the size is in the units of u32 */
>> +#define VMXNET3_VFT_SIZE (4096/(sizeof(uint32_t)*8))
>> +
>> +
>> +struct Vmxnet3_QueueStatus {
>> + bool stopped;
>> + u8 _pad[3];
>> + __le32 error;
>> +};
>> +
>> +
>> +struct Vmxnet3_TxQueueCtrl {
>> + __le32 txNumDeferred;
>> + __le32 txThreshold;
>> + __le64 reserved;
>> +};
>> +
>> +
>> +struct Vmxnet3_RxQueueCtrl {
>> + bool updateRxProd;
>> + u8 _pad[7];
>> + __le64 reserved;
>> +};
>> +
>> +enum {
>> + VMXNET3_RXM_UCAST = 0x01, /* unicast only */
>> + VMXNET3_RXM_MCAST = 0x02, /* multicast passing the filters */
>> + VMXNET3_RXM_BCAST = 0x04, /* broadcast only */
>> + VMXNET3_RXM_ALL_MULTI = 0x08, /* all multicast */
>> + VMXNET3_RXM_PROMISC = 0x10 /* promiscuous */
>> +};
>> +
>> +struct Vmxnet3_RxFilterConf {
>> + __le32 rxMode; /* VMXNET3_RXM_xxx */
>> + __le16 mfTableLen; /* size of the multicast filter table */
>> + __le16 _pad1;
>> + __le64 mfTablePA; /* PA of the multicast filters table */
>> + __le32 vfTable[VMXNET3_VFT_SIZE]; /* vlan filter */
>> +};
>> +
>> +
>> +#define VMXNET3_PM_MAX_FILTERS 6
>> +#define VMXNET3_PM_MAX_PATTERN_SIZE 128
>> +#define VMXNET3_PM_MAX_MASK_SIZE (VMXNET3_PM_MAX_PATTERN_SIZE / 8)
>> +
>> +#define VMXNET3_PM_WAKEUP_MAGIC cpu_to_le16(0x01) /* wake up on magic
>> pkts */
>> +#define VMXNET3_PM_WAKEUP_FILTER cpu_to_le16(0x02) /* wake up on pkts
>> matching
>> + * filters */
>> +
>> +
>> +struct Vmxnet3_PM_PktFilter {
>> + u8 maskSize;
>> + u8 patternSize;
>> + u8 mask[VMXNET3_PM_MAX_MASK_SIZE]**;
>> + u8 pattern[VMXNET3_PM_MAX_**PATTERN_SIZE];
>> + u8 pad[6];
>> +};
>> +
>> +
>> +struct Vmxnet3_PMConf {
>> + __le16 wakeUpEvents; /* VMXNET3_PM_WAKEUP_xxx */
>> + u8 numFilters;
>> + u8 pad[5];
>> + struct Vmxnet3_PM_PktFilter filters[VMXNET3_PM_MAX_**FILTERS];
>> +};
>> +
>> +
>> +struct Vmxnet3_VariableLenConfDesc {
>> + __le32 confVer;
>> + __le32 confLen;
>> + __le64 confPA;
>> +};
>> +
>> +
>> +struct Vmxnet3_TxQueueDesc {
>> + struct Vmxnet3_TxQueueCtrl ctrl;
>> + struct Vmxnet3_TxQueueConf conf;
>> +
>> + /* Driver read after a GET command */
>> + struct Vmxnet3_QueueStatus status;
>> + struct UPT1_TxStats stats;
>> + u8 _pad[88]; /* 128 aligned */
>> +};
>> +
>> +
>> +struct Vmxnet3_RxQueueDesc {
>> + struct Vmxnet3_RxQueueCtrl ctrl;
>> + struct Vmxnet3_RxQueueConf conf;
>> + /* Driver read after a GET commad */
>> + struct Vmxnet3_QueueStatus status;
>> + struct UPT1_RxStats stats;
>> + u8 __pad[88]; /* 128 aligned */
>> +};
>> +
>> +
>> +struct Vmxnet3_DSDevRead {
>> + /* read-only region for device, read by dev in response to a SET cmd
>> */
>> + struct Vmxnet3_MiscConf misc;
>> + struct Vmxnet3_IntrConf intrConf;
>> + struct Vmxnet3_RxFilterConf rxFilterConf;
>> + struct Vmxnet3_VariableLenConfDesc rssConfDesc;
>> + struct Vmxnet3_VariableLenConfDesc pmConfDesc;
>> + struct Vmxnet3_VariableLenConfDesc pluginConfDesc;
>> +};
>> +
>> +/* All structures in DriverShared are padded to multiples of 8 bytes */
>> +struct Vmxnet3_DriverShared {
>> + __le32 magic;
>> + /* make devRead start at 64bit boundaries */
>> + __le32 pad;
>> + struct Vmxnet3_DSDevRead devRead;
>> + __le32 ecr;
>> + __le32 reserved[5];
>> +};
>> +
>> +
>> +#define VMXNET3_ECR_RQERR (1 << 0)
>> +#define VMXNET3_ECR_TQERR (1 << 1)
>> +#define VMXNET3_ECR_LINK (1 << 2)
>> +#define VMXNET3_ECR_DIC (1 << 3)
>> +#define VMXNET3_ECR_DEBUG (1 << 4)
>> +
>> +/* flip the gen bit of a ring */
>> +#define VMXNET3_FLIP_RING_GEN(gen) ((gen) = (gen) ^ 0x1)
>> +
>> +/* only use this if moving the idx won't affect the gen bit */
>> +#define VMXNET3_INC_RING_IDX_ONLY(idx, ring_size) \
>> + do {\
>> + (idx)++;\
>> + if (unlikely((idx) == (ring_size))) {\
>> + (idx) = 0;\
>> + } \
>> + } while (0)
>> +
>> +#define VMXNET3_SET_VFTABLE_ENTRY(**vfTable, vid) \
>> + (vfTable[vid >> 5] |= (1 << (vid & 31)))
>> +#define VMXNET3_CLEAR_VFTABLE_ENTRY(**vfTable, vid) \
>> + (vfTable[vid >> 5] &= ~(1 << (vid & 31)))
>> +
>> +#define VMXNET3_VFTABLE_ENTRY_IS_SET(**vfTable, vid) \
>> + ((vfTable[vid >> 5] & (1 << (vid & 31))) != 0)
>> +
>> +#define VMXNET3_MAX_MTU 9000
>> +#define VMXNET3_MIN_MTU 60
>> +
>> +#define VMXNET3_LINK_UP (10000 << 16 | 1) /* 10 Gbps, up */
>> +#define VMXNET3_LINK_DOWN 0
>> +
>> +#undef u64
>> +#undef u32
>> +#undef u16
>> +#undef u8
>> +#undef __le16
>> +#undef __le32
>> +#undef __le64
>> +#undef __packed
>> +#undef const_cpu_to_le64
>> +#if defined(HOST_WORDS_BIGENDIAN)
>> +#undef __BIG_ENDIAN_BITFIELD
>> +#endif
>> +
>> +#endif
>> diff --git a/qemu/hw/vmxnet3_debug.h b/qemu/hw/vmxnet3_debug.h
>> new file mode 100644
>> index 0000000..8383c22
>> --- /dev/null
>> +++ b/qemu/hw/vmxnet3_debug.h
>> @@ -0,0 +1,104 @@
>> +/*
>> + * QEMU VMWARE VMXNET3 paravirtual NIC - debugging facilities
>> + *
>> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
>> + *
>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
>> + *
>> + * Authors:
>> + * Dmitry Fleytman <dmitry@daynix.com>
>> + * Yan Vugenfirer <yan@daynix.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#ifndef _QEMU_VMXNET3_DEBUG_H
>> +#define _QEMU_VMXNET3_DEBUG_H
>> +
>> +/* #define DEBUG_VMXNET3_CB */
>> +#define DEBUG_VMXNET3_WARNINGS
>> +#define DEBUG_VMXNET3_ERRORS
>> +/* #define DEBUG_VMXNET3_INTERRUPTS */
>> +/* #define DEBUG_VMXNET3_CONFIG */
>> +/* #define DEBUG_VMXNET3_SHMEM_ACCESS */
>> +/* #define DEBUG_VMXNET3_RINGS */
>> +/* #define DEBUG_VMXNET3_PACKETS */
>> +
>> +#ifdef DEBUG_VMXNET3_SHMEM_ACCESS
>> +#define DSHPRINTF(fmt, ...)
>> \
>> + do {
>> \
>> + printf("[vmxnet3][SH][%s]: " fmt "\n", __func__, ##
>> __VA_ARGS__); \
>> + } while (0)
>> +#else
>> +#define DSHPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_VMXNET3_CB
>> +#define DCBPRINTF(fmt, ...)
>> \
>> + do {
>> \
>> + printf("[vmxnet3][CB][%s]: " fmt "\n", __func__, ##
>> __VA_ARGS__); \
>> + } while (0)
>> +#else
>> +#define DCBPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_VMXNET3_PACKETS
>> +#define DPKPRINTF(fmt, ...)
>> \
>> + do {
>> \
>> + printf("[vmxnet3][PK][%s]: " fmt "\n", __func__, ##
>> __VA_ARGS__); \
>> + } while (0)
>> +#else
>> +#define DPKPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_VMXNET3_WARNINGS
>> +#define DWRPRINTF(fmt, ...)
>> \
>> + do {
>> \
>> + printf("[vmxnet3][WR][%s]: " fmt "\n", __func__, ##
>> __VA_ARGS__); \
>> + } while (0)
>> +#else
>> +#define DWRPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_VMXNET3_ERRORS
>> +#define DERPRINTF(fmt, ...)
>> \
>> + do {
>> \
>> + printf("[vmxnet3][ER][%s]: " fmt "\n", __func__, ##
>> __VA_ARGS__); \
>> + } while (0)
>> +#else
>> +#define DERPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_VMXNET3_INTERRUPTS
>> +#define DIRPRINTF(fmt, ...)
>> \
>> + do {
>> \
>> + printf("[vmxnet3][IR][%s]: " fmt "\n", __func__, ##
>> __VA_ARGS__); \
>> + } while (0)
>> +#else
>> +#define DIRPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_VMXNET3_CONFIG
>> +#define DCFPRINTF(fmt, ...)
>> \
>> + do {
>> \
>> + printf("[vmxnet3][CF][%s]: " fmt "\n", __func__, ##
>> __VA_ARGS__); \
>> + } while (0)
>> +#else
>> +#define DCFPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#ifdef DEBUG_VMXNET3_RINGS
>> +#define DRIPRINTF(fmt, ...)
>> \
>> + do {
>> \
>> + printf("[vmxnet3][RI][%s]: " fmt "\n", __func__, ##
>> __VA_ARGS__); \
>> + } while (0)
>> +#else
>> +#define DRIPRINTF(fmt, ...) do {} while (0)
>> +#endif
>> +
>> +#define MAC_FMT "%02X:%02X:%02X:%02X:%02X:%**02X"
>> +#define MAC_ARG(a) (a)[0], (a)[1], (a)[2], (a)[3], (a)[4], (a)[5]
>> +
>> +#endif /* _QEMU_VMXNET3_DEBUG_H */
>> diff --git a/qemu/hw/vmxnet_utils.c b/qemu/hw/vmxnet_utils.c
>> new file mode 100644
>> index 0000000..e310828
>> --- /dev/null
>> +++ b/qemu/hw/vmxnet_utils.c
>> @@ -0,0 +1,172 @@
>> +/*
>> + * QEMU VMWARE paravirtual devices - network auxiliary code
>> + *
>> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
>> + *
>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
>> + *
>> + * Authors:
>> + * Dmitry Fleytman <dmitry@daynix.com>
>> + * Yan Vugenfirer <yan@daynix.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include "hw.h"
>> +#include "virtio-net.h"
>> +#include "vmxnet_utils.h"
>> +#include "net/checksum.h"
>> +
>> +void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag)
>> +{
>> + switch (be16_to_cpu(ehdr->h_proto)) {
>> + case ETH_P_VLAN:
>> + case ETH_P_DVLAN: {
>> + /* Header already present, just put proper VLAN tag */
>> + struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
>> + vhdr->h_tci = cpu_to_be16(vlan_tag);
>> + }
>> + default: {
>> + /* No VLAN header, put a new one */
>> + struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
>> + vhdr->h_proto = ehdr->h_proto;
>> + ehdr->h_proto = cpu_to_be16(ETH_P_VLAN);
>> + vhdr->h_tci = cpu_to_be16(vlan_tag);
>> + }
>> + }
>> +}
>> +
>> +bool eth_setup_tx_offloads(uint8_t *l3hdr,
>> + size_t l3hdr_len,
>> + size_t l3hdr_off,
>> + uint32_t l3payload_len,
>> + struct virtio_net_hdr *vhdr,
>> + bool more_frags,
>> + uint16_t fragmentation_offset)
>> +{
>> + uint16_t csum;
>> +
>> + switch (vhdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
>> + case VIRTIO_NET_HDR_GSO_TCPV4:
>> + case VIRTIO_NET_HDR_GSO_UDP: {
>> + struct ip_header *iphdr = (struct ip_header *) l3hdr;
>> + uint16_t new_ip_off;
>> +
>> + vhdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
>> + vhdr->csum_start = l3hdr_off;
>> + vhdr->csum_offset = offsetof(struct ip_header, ip_sum);
>> +
>> + if (l3payload_len + l3hdr_len > ETH_MAX_IP_DGRAM_LEN) {
>> + /* This must never happen with fragmentation enabled */
>> + assert(0 == more_frags);
>> + assert(0 == fragmentation_offset);
>> + return false;
>> + }
>> +
>> + iphdr->ip_len = cpu_to_be16(l3payload_len + l3hdr_len);
>> + assert(0 == (~IP_OFFMASK & fragmentation_offset));
>> +
>> + new_ip_off = fragmentation_offset | (more_frags ? IP_MF : 0) |
>> + (be16_to_cpu(iphdr->ip_off) &
>> ~(IP_OFFMASK|IP_MF));
>> +
>> + iphdr->ip_off = cpu_to_be16(new_ip_off);
>> +
>> + /* Due to Linux bridge bugs/features IP header checksum */
>> + /* must be calculated in order to make it process */
>> + /* packet with segmentation requirements successfully */
>> + eth_put_csum(l3hdr, vhdr->csum_offset, 0);
>> + csum = net_raw_checksum(l3hdr, l3hdr_len);
>> + eth_put_csum(l3hdr, vhdr->csum_offset, csum);
>> + }
>> + break;
>> +
>> + case VIRTIO_NET_HDR_GSO_TCPV6:
>> + default:
>> + vhdr->flags = 0;
>> + break;
>> + }
>> +
>> + return true;
>> +}
>> +
>> +uint8_t
>> +eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr)
>> +{
>> + uint8_t ecn_state = 0;
>> +
>> + if (ETH_P_IP == l3_proto) {
>> + struct ip_header *iphdr = (struct ip_header *) l3_hdr;
>> +
>> + if (IP_HEADER_VERSION_4 == IP_HEADER_VERSION(iphdr)) {
>> + if (IPTOS_ECN_CE == IPTOS_ECN(iphdr->ip_tos)) {
>> + ecn_state = VIRTIO_NET_HDR_GSO_ECN;
>> + }
>> + if (IP_PROTO_TCP == iphdr->ip_p) {
>> + return VIRTIO_NET_HDR_GSO_TCPV4 | ecn_state;
>> + } else if (IP_PROTO_UDP == iphdr->ip_p) {
>> + return VIRTIO_NET_HDR_GSO_UDP | ecn_state;
>> + }
>> + }
>> + } else if (ETH_P_IPV6 == l3_proto) {
>> + struct ip6_header *ip6hdr = (struct ip6_header *) l3_hdr;
>> +
>> + if (IP6_ECN_CE == IP6_ECN(ip6hdr->ip6_ecn_acc)) {
>> + ecn_state = VIRTIO_NET_HDR_GSO_ECN;
>> + }
>> +
>> + if (IP_PROTO_TCP == ip6hdr->ip6_nxt) {
>> + return VIRTIO_NET_HDR_GSO_TCPV6 | ecn_state;
>> + }
>> + }
>> +
>> + /* Unsupported offload */
>> + assert(false);
>> +
>> + return VIRTIO_NET_HDR_GSO_NONE | ecn_state;
>> +}
>> +
>> +void eth_get_protocols(const uint8_t *headers,
>> + uint32_t hdr_length,
>> + bool *isip4, bool *isip6,
>> + bool *isudp, bool *istcp)
>> +{
>> + int proto;
>> + size_t l2hdr_len = eth_get_l2_hdr_length(headers)**;
>> + assert(hdr_length >= eth_get_l2_hdr_length(headers)**);
>> + *isip4 = *isip6 = *isudp = *istcp = false;
>> +
>> + proto = eth_get_l3_proto(headers, l2hdr_len);
>> + if (ETH_P_IP == proto) {
>> + *isip4 = true;
>> +
>> + struct ip_header *iphdr;
>> +
>> + assert(hdr_length >=
>> + eth_get_l2_hdr_length(headers) + sizeof(struct ip_header));
>> +
>> + iphdr = PKT_GET_IP_HDR(headers);
>> +
>> + if (IP_HEADER_VERSION_4 == IP_HEADER_VERSION(iphdr)) {
>> + if (IP_PROTO_TCP == iphdr->ip_p) {
>> + *istcp = true;
>> + } else if (IP_PROTO_UDP == iphdr->ip_p) {
>> + *isudp = true;
>> + }
>> + }
>> + } else if (ETH_P_IPV6 == proto) {
>> + *isip6 = true;
>> +
>> + struct ip6_header *ip6hdr;
>> + assert(hdr_length >=
>> + eth_get_l2_hdr_length(headers) + sizeof(struct ip6_header));
>> + ip6hdr = PKT_GET_IP6_HDR(headers);
>> +
>> + if (IP_PROTO_TCP == ip6hdr->ip6_nxt) {
>> + *istcp = true;
>> + } else if (IP_PROTO_UDP == ip6hdr->ip6_nxt) {
>> + *isudp = true;
>> + }
>> + }
>> +}
>> diff --git a/qemu/hw/vmxnet_utils.h b/qemu/hw/vmxnet_utils.h
>> new file mode 100644
>> index 0000000..f5e79dd
>> --- /dev/null
>> +++ b/qemu/hw/vmxnet_utils.h
>> @@ -0,0 +1,242 @@
>> +/*
>> + * QEMU VMWARE paravirtual devices - network auxiliary code
>> + *
>> + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
>> + *
>> + * Developed by Daynix Computing LTD (http://www.daynix.com)
>> + *
>> + * Authors:
>> + * Dmitry Fleytman <dmitry@daynix.com>
>> + * Yan Vugenfirer <yan@daynix.com>
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or
>> later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#define ETH_ALEN 6
>> +
>> +struct eth_header {
>> + uint8_t h_dest[ETH_ALEN]; /* destination eth addr */
>> + uint8_t h_source[ETH_ALEN]; /* source ether addr */
>> + uint16_t h_proto; /* packet type ID field */
>> +};
>> +
>> +struct vlan_header {
>> + uint16_t h_tci; /* priority and VLAN ID */
>> + uint16_t h_proto; /* encapsulated protocol */
>> +};
>> +
>> +struct ip_header {
>> + uint8_t ip_ver_len; /* version and header length */
>> + uint8_t ip_tos; /* type of service */
>> + uint16_t ip_len; /* total length */
>> + uint16_t ip_id; /* identification */
>> + uint16_t ip_off; /* fragment offset field */
>> + uint8_t ip_ttl; /* time to live */
>> + uint8_t ip_p; /* protocol */
>> + uint16_t ip_sum; /* checksum */
>> + uint32_t ip_src, ip_dst; /* source and dest address */
>> +};
>> +
>> +/* IPv6 address */
>> +struct in6_addr {
>> + union {
>> + uint8_t __u6_addr8[16];
>> + } __in6_u;
>> +};
>> +
>> +struct ip6_header {
>> + union {
>> + struct ip6_hdrctl {
>> + uint32_t ip6_un1_flow; /* 4 bits version, 8 bits TC,
>> + 20 bits flow-ID */
>> + uint16_t ip6_un1_plen; /* payload length */
>> + uint8_t ip6_un1_nxt; /* next header */
>> + uint8_t ip6_un1_hlim; /* hop limit */
>> + } ip6_un1;
>> + uint8_t ip6_un2_vfc; /* 4 bits version, top 4 bits tclass
>> */
>> + struct ip6_ecn_access {
>> + uint8_t ip6_un3_vfc; /* 4 bits version, top 4 bits tclass
>> */
>> + uint8_t ip6_un3_ecn; /* 2 bits ECN, top 6 bits payload
>> length */
>> + } ip6_un3;
>> + } ip6_ctlun;
>> + struct in6_addr ip6_src; /* source address */
>> + struct in6_addr ip6_dst; /* destination address */
>> +};
>> +#define ip6_nxt ip6_ctlun.ip6_un1.ip6_un1_nxt
>> +#define ip6_ecn_acc ip6_ctlun.ip6_un3.ip6_un3_ecn
>> +
>> +#define PKT_GET_ETH_HDR(p) \
>> + ((struct eth_header *)(p))
>> +#define PKT_GET_VLAN_HDR(p) \
>> + ((struct vlan_header *) (((uint8_t *)(p)) + sizeof(struct
>> eth_header)))
>> +#define PKT_GET_IP_HDR(p) \
>> + ((struct ip_header *)(((uint8_t *)(p)) + eth_get_l2_hdr_length(p)))
>> +#define IP_HDR_GET_LEN(p) \
>> + ((((struct ip_header *)p)->ip_ver_len & 0x0F) << 2)
>> +#define PKT_GET_IP_HDR_LEN(p) \
>> + (IP_HDR_GET_LEN(PKT_GET_IP_**HDR(p)))
>> +#define PKT_GET_IP6_HDR(p) \
>> + ((struct ip6_header *) (((uint8_t *)(p)) + eth_get_l2_hdr_length(p)))
>> +#define IP_HEADER_VERSION(ip) \
>> + ((ip->ip_ver_len >> 4)&0xf)
>> +
>> +#define ETH_P_IP (0x0800)
>> +#define ETH_P_IPV6 (0x86dd)
>> +#define ETH_P_VLAN (0x8100)
>> +#define ETH_P_DVLAN (0x88a8)
>> +#define VLAN_VID_MASK 0x0fff
>> +#define IP_HEADER_VERSION_4 (4)
>> +#define IP_HEADER_VERSION_6 (6)
>> +#define IP_PROTO_TCP (6)
>> +#define IP_PROTO_UDP (17)
>> +#define IPTOS_ECN_MASK 0x03
>> +#define IPTOS_ECN(x) ((x) & IPTOS_ECN_MASK)
>> +#define IPTOS_ECN_CE 0x03
>> +#define IP6_ECN_MASK 0xC0
>> +#define IP6_ECN(x) ((x) & IP6_ECN_MASK)
>> +#define IP6_ECN_CE 0xC0
>> +#define IP4_DONT_FRAGMENT_FLAG (1 << 14)
>> +
>> +#define IS_SPECIAL_VLAN_ID(x) \
>> + ((0 == (x)) || (0xFFF == (x)))
>> +
>> +#define ETH_MAX_L2_HDR_LEN \
>> + (sizeof(struct eth_header) + 2*sizeof(struct vlan_header))
>> +
>> +#define ETH_MAX_IP4_HDR_LEN (60)
>> +#define ETH_MAX_IP6_HDR_LEN \
>> + (sizeof(struct ip6_header))
>> +#define ETH_MAX_L3_HDR_LEN \
>> + (MAX(ETH_MAX_IP4_HDR_LEN, ETH_MAX_IP6_HDR_LEN))
>> +#define ETH_MAX_IP_DGRAM_LEN (0xFFFF)
>> +#define ETH_MAX_IP_PLOAD_LEN \
>> + (ETH_MAX_IP_DGRAM_LEN - ETH_MAX_IP4_HDR_LEN - ETH_MAX_L2_HDR_LEN)
>> +
>> +#define IP_FRAG_UNIT_SIZE (8)
>> +#define IP_FRAG_ALIGN_SIZE(x) ((x) & ~0x7)
>> +#define IP_RF 0x8000 /* reserved fragment flag
>> */
>> +#define IP_DF 0x4000 /* don't fragment flag */
>> +#define IP_MF 0x2000 /* more fragments flag */
>> +#define IP_OFFMASK 0x1fff /* mask for fragmenting
>> bits */
>> +
>> +
>> +static inline int is_multicast_ether_addr(const uint8_t *addr)
>> +{
>> + return 0x01 & addr[0];
>> +}
>> +
>> +static inline int is_broadcast_ether_addr(const uint8_t *addr)
>> +{
>> + return (addr[0] & addr[1] & addr[2] & addr[3] & addr[4] & addr[5])
>> == 0xff;
>> +}
>> +
>> +static inline int is_unicast_ether_addr(const uint8_t *addr)
>> +{
>> + return !is_multicast_ether_addr(addr)**;
>> +}
>> +
>> +typedef enum {
>> + VMXNET3_PKT_UCAST = 0xAABBCC00,
>> + VMXNET3_PKT_BCAST,
>> + VMXNET3_PKT_MCAST
>> +} eth_pkt_types_e;
>> +
>> +static inline eth_pkt_types_e
>> +get_eth_packet_type(const struct eth_header *ehdr)
>> +{
>> + if (is_broadcast_ether_addr(ehdr-**>h_dest)) {
>> + return VMXNET3_PKT_BCAST;
>> + } else if (is_multicast_ether_addr(ehdr-**>h_dest)) {
>> + return VMXNET3_PKT_MCAST;
>> + } else { /* unicast */
>> + return VMXNET3_PKT_UCAST;
>> + }
>> +}
>> +
>> +static inline uint32_t
>> +eth_get_l2_hdr_length(const void *p)
>> +{
>> + uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)**->h_proto);
>> + struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p);
>> + switch (proto) {
>> + case ETH_P_VLAN:
>> + return sizeof(struct eth_header) + sizeof(struct vlan_header);
>> + case ETH_P_DVLAN:
>> + if (ETH_P_VLAN == hvlan->h_proto) {
>> + return sizeof(struct eth_header) + 2*sizeof(struct
>> vlan_header);
>> + } else {
>> + return sizeof(struct eth_header) + sizeof(struct
>> vlan_header);
>> + }
>> + default:
>> + return sizeof(struct eth_header);
>> + }
>> +}
>> +
>> +static inline uint16_t
>> +eth_get_pkt_vlan_tag(const void *p)
>> +{
>> + uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)**->h_proto);
>> + struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p);
>> + switch (proto) {
>> + case ETH_P_VLAN:
>> + case ETH_P_DVLAN:
>> + return be16_to_cpu(hvlan->h_proto);
>> + default:
>> + return 0;
>> + }
>> +}
>> +
>> +static inline bool
>> +eth_strip_vlan(const void *p, struct eth_header *new_ehdr,
>> + uint16_t *payload_offset, uint16_t *vlan_tag)
>> +{
>> + uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)**->h_proto);
>> + struct vlan_header *hvlan = PKT_GET_VLAN_HDR(p);
>> +
>> + switch (proto) {
>> + case ETH_P_VLAN:
>> + case ETH_P_DVLAN:
>> + memcpy(new_ehdr->h_source, PKT_GET_ETH_HDR(p)->h_source,
>> ETH_ALEN);
>> + memcpy(new_ehdr->h_dest, PKT_GET_ETH_HDR(p)->h_dest, ETH_ALEN);
>> + new_ehdr->h_proto = hvlan->h_proto;
>> + *vlan_tag = be16_to_cpu(hvlan->h_tci);
>> + *payload_offset =
>> + sizeof(struct eth_header) + sizeof(struct vlan_header);
>> + return true;
>> + default:
>> + return false;
>> + }
>> +}
>> +
>> +static inline uint16_t
>> +eth_get_l3_proto(const void *l2hdr, size_t l2hdr_len)
>> +{
>> + uint8_t *proto_ptr = (uint8_t *) l2hdr + l2hdr_len -
>> sizeof(uint16_t);
>> + return be16_to_cpup((uint16_t *)proto_ptr);
>> +}
>> +
>> +static inline void
>> +eth_put_csum(uint8_t *buf, uint32_t cso, uint16_t csum)
>> +{
>> + cpu_to_be16wu((uint16_t *)(buf + cso), csum);
>> +}
>> +
>> +void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag);
>> +
>> +
>> +bool eth_setup_tx_offloads(uint8_t *l3hdr,
>> + size_t l3hdr_len,
>> + size_t l3hdr_off,
>> + uint32_t l3payload_len,
>> + struct virtio_net_hdr *vhdr,
>> + bool more_frags,
>> + uint16_t fragmentation_offset);
>> +
>> +uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr);
>> +
>> +void eth_get_protocols(const uint8_t *headers,
>> + uint32_t hdr_length,
>> + bool *isip4, bool *isip6,
>> + bool *isudp, bool *istcp);
>> diff --git a/qemu/net.c b/qemu/net.c
>> index c34474f..e2f586c 100644
>> --- a/qemu/net.c
>> +++ b/qemu/net.c
>> @@ -857,7 +857,7 @@ static const struct {
>> }, {
>> .name = "model",
>> .type = QEMU_OPT_STRING,
>> - .help = "device model (e1000, rtl8139, virtio etc.)",
>> + .help = "device model (e1000, rtl8139, virtio, vmxnet3
>> etc.)",
>> }, {
>> .name = "addr",
>> .type = QEMU_OPT_STRING,
>> diff --git a/qemu/net/checksum.h b/qemu/net/checksum.h
>> index 1f05298..5f42a02 100644
>> --- a/qemu/net/checksum.h
>> +++ b/qemu/net/checksum.h
>> @@ -26,4 +26,11 @@ uint16_t net_checksum_tcpudp(uint16_t length, uint16_t
>> proto,
>> uint8_t *addrs, uint8_t *buf);
>> void net_checksum_calculate(uint8_t *data, int length);
>>
>> +static inline uint16_t
>> +net_raw_checksum(uint8_t *data, int length)
>> +{
>> + return net_checksum_finish(net_**checksum_add(length, data));
>> +}
>> +
>> +
>> #endif /* QEMU_NET_CHECKSUM_H */
>> --
>> 1.7.7.6
>>
>>
>>
>>
[-- Attachment #2: Type: text/html, Size: 166463 bytes --]
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2012-03-04 17:10 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-29 12:49 [Qemu-devel] [PATCH v2] VMXNET3 paravirtual NIC device implementation Dmitry Fleytman
2012-03-01 11:48 ` Michael Tokarev
2012-03-01 13:31 ` Dmitry Fleytman
2012-03-03 16:55 ` Gerhard Wiesinger
2012-03-04 17:09 ` Dmitry Fleytman
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).