From: Jason Wang <jasowang@redhat.com>
To: qemu-devel@nongnu.org, peter.maydell@linaro.org
Cc: Akihiko Odaki <akihiko.odaki@daynix.com>,
Jason Wang <jasowang@redhat.com>
Subject: [PULL V2 24/44] e1000e: Perform software segmentation for loopback
Date: Fri, 10 Mar 2023 17:35:06 +0800 [thread overview]
Message-ID: <20230310093526.30828-25-jasowang@redhat.com> (raw)
In-Reply-To: <20230310093526.30828-1-jasowang@redhat.com>
From: Akihiko Odaki <akihiko.odaki@daynix.com>
e1000e didn't perform software segmentation for loopback if virtio-net
header is enabled, which is wrong.
To fix the problem, introduce net_tx_pkt_send_custom(), which allows the
caller to specify whether offloading should be assumed or not.
net_tx_pkt_send_custom() also allows the caller to provide a custom
sending function. Packets with virtio-net headers and ones without
virtio-net headers will be provided at the same time so the function
can choose the preferred version. In case of e1000e loopback, it prefers
to have virtio-net headers as they allows to skip the checksum
verification if VIRTIO_NET_HDR_F_DATA_VALID is set.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
hw/net/e1000e_core.c | 27 +++++++++++++++++--
hw/net/net_rx_pkt.c | 7 +++++
hw/net/net_rx_pkt.h | 8 ++++++
hw/net/net_tx_pkt.c | 76 +++++++++++++++++++++++++---------------------------
hw/net/net_tx_pkt.h | 21 ++++++++-------
5 files changed, 88 insertions(+), 51 deletions(-)
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index 95245c4..ff93547 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -61,6 +61,10 @@ union e1000_rx_desc_union {
union e1000_rx_desc_packet_split packet_split;
};
+static ssize_t
+e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
+ bool has_vnet);
+
static inline void
e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val);
@@ -655,6 +659,15 @@ e1000e_setup_tx_offloads(E1000ECore *core, struct e1000e_tx *tx)
return true;
}
+static void e1000e_tx_pkt_callback(void *core,
+ const struct iovec *iov,
+ int iovcnt,
+ const struct iovec *virt_iov,
+ int virt_iovcnt)
+{
+ e1000e_receive_internal(core, virt_iov, virt_iovcnt, true);
+}
+
static bool
e1000e_tx_pkt_send(E1000ECore *core, struct e1000e_tx *tx, int queue_index)
{
@@ -669,7 +682,8 @@ e1000e_tx_pkt_send(E1000ECore *core, struct e1000e_tx *tx, int queue_index)
if ((core->phy[0][MII_BMCR] & MII_BMCR_LOOPBACK) ||
((core->mac[RCTL] & E1000_RCTL_LBM_MAC) == E1000_RCTL_LBM_MAC)) {
- return net_tx_pkt_send_loopback(tx->tx_pkt, queue);
+ return net_tx_pkt_send_custom(tx->tx_pkt, false,
+ e1000e_tx_pkt_callback, core);
} else {
return net_tx_pkt_send(tx->tx_pkt, queue);
}
@@ -1675,6 +1689,13 @@ e1000e_rx_fix_l4_csum(E1000ECore *core, struct NetRxPkt *pkt)
ssize_t
e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt)
{
+ return e1000e_receive_internal(core, iov, iovcnt, core->has_vnet);
+}
+
+static ssize_t
+e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
+ bool has_vnet)
+{
static const int maximum_ethernet_hdr_len = (ETH_HLEN + 4);
uint32_t n = 0;
@@ -1696,9 +1717,11 @@ e1000e_receive_iov(E1000ECore *core, const struct iovec *iov, int iovcnt)
}
/* Pull virtio header in */
- if (core->has_vnet) {
+ if (has_vnet) {
net_rx_pkt_set_vhdr_iovec(core->rx_pkt, iov, iovcnt);
iov_ofs = sizeof(struct virtio_net_hdr);
+ } else {
+ net_rx_pkt_unset_vhdr(core->rx_pkt);
}
filter_buf = iov->iov_base + iov_ofs;
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
index b309c2f..a53e756 100644
--- a/hw/net/net_rx_pkt.c
+++ b/hw/net/net_rx_pkt.c
@@ -463,6 +463,13 @@ void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
iov_to_buf(iov, iovcnt, 0, &pkt->virt_hdr, sizeof pkt->virt_hdr);
}
+void net_rx_pkt_unset_vhdr(struct NetRxPkt *pkt)
+{
+ assert(pkt);
+
+ memset(&pkt->virt_hdr, 0, sizeof(pkt->virt_hdr));
+}
+
bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt)
{
assert(pkt);
diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h
index 7277907..8b69ddb 100644
--- a/hw/net/net_rx_pkt.h
+++ b/hw/net/net_rx_pkt.h
@@ -313,6 +313,14 @@ void net_rx_pkt_set_vhdr_iovec(struct NetRxPkt *pkt,
const struct iovec *iov, int iovcnt);
/**
+ * unset vhdr data from packet context
+ *
+ * @pkt: packet
+ *
+ */
+void net_rx_pkt_unset_vhdr(struct NetRxPkt *pkt);
+
+/**
* save packet type in packet context
*
* @pkt: packet
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index cf46c84..6afd3f6 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -53,8 +53,6 @@ struct NetTxPkt {
uint16_t hdr_len;
eth_pkt_types_e packet_type;
uint8_t l4proto;
-
- bool is_loopback;
};
void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev,
@@ -508,12 +506,6 @@ static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt)
iov_from_buf(iov, iov_len, csum_offset, &csum, sizeof csum);
}
-enum {
- NET_TX_PKT_FRAGMENT_L2_HDR_POS = 0,
- NET_TX_PKT_FRAGMENT_L3_HDR_POS,
- NET_TX_PKT_FRAGMENT_HEADER_NUM
-};
-
#define NET_MAX_FRAG_SG_LIST (64)
static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
@@ -522,7 +514,7 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
size_t fetched = 0;
struct iovec *src = pkt->vec;
- *dst_idx = NET_TX_PKT_FRAGMENT_HEADER_NUM;
+ *dst_idx = NET_TX_PKT_PL_START_FRAG;
while (fetched < IP_FRAG_ALIGN_SIZE(pkt->virt_hdr.gso_size)) {
@@ -555,18 +547,22 @@ static size_t net_tx_pkt_fetch_fragment(struct NetTxPkt *pkt,
return fetched;
}
-static inline void net_tx_pkt_sendv(struct NetTxPkt *pkt,
- NetClientState *nc, const struct iovec *iov, int iov_cnt)
+static void net_tx_pkt_sendv(
+ void *opaque, const struct iovec *iov, int iov_cnt,
+ const struct iovec *virt_iov, int virt_iov_cnt)
{
- if (pkt->is_loopback) {
- qemu_receive_packet_iov(nc, iov, iov_cnt);
+ NetClientState *nc = opaque;
+
+ if (qemu_get_using_vnet_hdr(nc->peer)) {
+ qemu_sendv_packet(nc, virt_iov, virt_iov_cnt);
} else {
qemu_sendv_packet(nc, iov, iov_cnt);
}
}
static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
- NetClientState *nc)
+ NetTxPktCallback callback,
+ void *context)
{
struct iovec fragment[NET_MAX_FRAG_SG_LIST];
size_t fragment_len = 0;
@@ -578,6 +574,10 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
int src_idx = NET_TX_PKT_PL_START_FRAG, dst_idx;
size_t src_offset = 0;
size_t fragment_offset = 0;
+ struct virtio_net_hdr virt_hdr = {
+ .flags = pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM ?
+ VIRTIO_NET_HDR_F_DATA_VALID : 0
+ };
l2_iov_base = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base;
l2_iov_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len;
@@ -585,10 +585,12 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
l3_iov_len = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len;
/* Copy headers */
- fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_base = l2_iov_base;
- fragment[NET_TX_PKT_FRAGMENT_L2_HDR_POS].iov_len = l2_iov_len;
- fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_base = l3_iov_base;
- fragment[NET_TX_PKT_FRAGMENT_L3_HDR_POS].iov_len = l3_iov_len;
+ fragment[NET_TX_PKT_VHDR_FRAG].iov_base = &virt_hdr;
+ fragment[NET_TX_PKT_VHDR_FRAG].iov_len = sizeof(virt_hdr);
+ fragment[NET_TX_PKT_L2HDR_FRAG].iov_base = l2_iov_base;
+ fragment[NET_TX_PKT_L2HDR_FRAG].iov_len = l2_iov_len;
+ fragment[NET_TX_PKT_L3HDR_FRAG].iov_base = l3_iov_base;
+ fragment[NET_TX_PKT_L3HDR_FRAG].iov_len = l3_iov_len;
/* Put as much data as possible and send */
@@ -603,7 +605,9 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
eth_fix_ip4_checksum(l3_iov_base, l3_iov_len);
- net_tx_pkt_sendv(pkt, nc, fragment, dst_idx);
+ callback(context,
+ fragment + NET_TX_PKT_L2HDR_FRAG, dst_idx - NET_TX_PKT_L2HDR_FRAG,
+ fragment + NET_TX_PKT_VHDR_FRAG, dst_idx - NET_TX_PKT_VHDR_FRAG);
fragment_offset += fragment_len;
@@ -614,12 +618,16 @@ static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc)
{
- bool using_vnet_hdr = qemu_get_using_vnet_hdr(nc->peer);
+ bool offload = qemu_get_using_vnet_hdr(nc->peer);
+ return net_tx_pkt_send_custom(pkt, offload, net_tx_pkt_sendv, nc);
+}
+bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
+ NetTxPktCallback callback, void *context)
+{
assert(pkt);
- if (!using_vnet_hdr &&
- pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+ if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
net_tx_pkt_do_sw_csum(pkt);
}
@@ -635,28 +643,16 @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc)
}
}
- if (using_vnet_hdr ||
- pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
- int index = using_vnet_hdr ?
- NET_TX_PKT_VHDR_FRAG : NET_TX_PKT_L2HDR_FRAG;
+ if (offload || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
net_tx_pkt_fix_ip6_payload_len(pkt);
- net_tx_pkt_sendv(pkt, nc, pkt->vec + index,
- pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - index);
+ callback(context, pkt->vec + NET_TX_PKT_L2HDR_FRAG,
+ pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_L2HDR_FRAG,
+ pkt->vec + NET_TX_PKT_VHDR_FRAG,
+ pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - NET_TX_PKT_VHDR_FRAG);
return true;
}
- return net_tx_pkt_do_sw_fragmentation(pkt, nc);
-}
-
-bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc)
-{
- bool res;
-
- pkt->is_loopback = true;
- res = net_tx_pkt_send(pkt, nc);
- pkt->is_loopback = false;
-
- return res;
+ return net_tx_pkt_do_sw_fragmentation(pkt, callback, context);
}
void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt)
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
index 8d3faa4..f57b4e0 100644
--- a/hw/net/net_tx_pkt.h
+++ b/hw/net/net_tx_pkt.h
@@ -26,6 +26,8 @@
struct NetTxPkt;
+typedef void (* NetTxPktCallback)(void *, const struct iovec *, int, const struct iovec *, int);
+
/**
* Init function for tx packet functionality
*
@@ -161,15 +163,16 @@ void net_tx_pkt_reset(struct NetTxPkt *pkt);
bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc);
/**
-* Redirect packet directly to receive path (emulate loopback phy).
-* Handles sw offloads if vhdr is not supported.
-*
-* @pkt: packet
-* @nc: NetClientState
-* @ret: operation result
-*
-*/
-bool net_tx_pkt_send_loopback(struct NetTxPkt *pkt, NetClientState *nc);
+ * Send packet with a custom function.
+ *
+ * @pkt: packet
+ * @offload: whether the callback implements offloading
+ * @callback: a function to be called back for each transformed packet
+ * @context: a pointer to be passed to the callback.
+ * @ret: operation result
+ */
+bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
+ NetTxPktCallback callback, void *context);
/**
* parse raw packet data and analyze offload requirements.
--
2.7.4
next prev parent reply other threads:[~2023-03-10 9:43 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-03-10 9:34 [PULL V2 00/44] Net patches Jason Wang
2023-03-10 9:34 ` [PULL V2 01/44] e1000e: Fix the code style Jason Wang
2023-03-10 9:34 ` [PULL V2 02/44] hw/net: Add more MII definitions Jason Wang
2023-03-10 9:34 ` [PULL V2 03/44] fsl_etsec: Use hw/net/mii.h Jason Wang
2023-03-10 9:34 ` [PULL V2 04/44] e1000: " Jason Wang
2023-03-10 9:34 ` [PULL V2 05/44] e1000: Mask registers when writing Jason Wang
2023-03-10 9:34 ` [PULL V2 06/44] e1000e: Introduce E1000E_LOW_BITS_SET_FUNC Jason Wang
2023-03-10 9:34 ` [PULL V2 07/44] e1000e: Mask registers when writing Jason Wang
2023-03-10 9:34 ` [PULL V2 08/44] e1000: Use more constant definitions Jason Wang
2023-03-10 9:34 ` [PULL V2 09/44] e1000e: " Jason Wang
2023-03-10 9:34 ` [PULL V2 10/44] e1000: Use memcpy to intialize registers Jason Wang
2023-03-10 9:34 ` [PULL V2 11/44] e1000e: " Jason Wang
2023-03-10 9:34 ` [PULL V2 12/44] e1000e: Remove pending interrupt flags Jason Wang
2023-03-10 9:34 ` [PULL V2 13/44] e1000e: Improve software reset Jason Wang
2023-03-10 9:34 ` [PULL V2 14/44] e1000: Configure ResettableClass Jason Wang
2023-03-10 9:34 ` [PULL V2 15/44] e1000e: " Jason Wang
2023-03-10 9:34 ` [PULL V2 16/44] e1000e: Introduce e1000_rx_desc_union Jason Wang
2023-03-10 9:34 ` [PULL V2 17/44] e1000e: Set MII_ANER_NWAY Jason Wang
2023-03-10 9:35 ` [PULL V2 18/44] e1000e: Remove extra pointer indirection Jason Wang
2023-03-10 9:35 ` [PULL V2 19/44] net: Check L4 header size Jason Wang
2023-03-10 9:35 ` [PULL V2 20/44] e1000x: Alter the signature of e1000x_is_vlan_packet Jason Wang
2023-03-10 9:35 ` [PULL V2 21/44] net: Strip virtio-net header when dumping Jason Wang
2023-03-10 9:35 ` [PULL V2 22/44] hw/net/net_tx_pkt: Automatically determine if virtio-net header is used Jason Wang
2023-03-10 9:35 ` [PULL V2 23/44] hw/net/net_rx_pkt: Remove net_rx_pkt_has_virt_hdr Jason Wang
2023-03-10 9:35 ` Jason Wang [this message]
2023-03-10 9:35 ` [PULL V2 25/44] hw/net/net_tx_pkt: Implement TCP segmentation Jason Wang
2023-03-10 9:35 ` [PULL V2 26/44] hw/net/net_tx_pkt: Check the payload length Jason Wang
2023-03-10 9:35 ` [PULL V2 27/44] e1000e: Do not assert when MSI-X is disabled later Jason Wang
2023-03-10 9:35 ` [PULL V2 28/44] MAINTAINERS: Add Akihiko Odaki as a e1000e reviewer Jason Wang
2023-03-10 9:35 ` [PULL V2 29/44] MAINTAINERS: Add e1000e test files Jason Wang
2023-03-10 9:35 ` [PULL V2 30/44] e1000e: Combine rx traces Jason Wang
2023-03-10 9:35 ` [PULL V2 31/44] e1000: Count CRC in Tx statistics Jason Wang
2023-03-10 9:35 ` [PULL V2 32/44] e1000e: " Jason Wang
2023-03-10 9:35 ` [PULL V2 33/44] net/eth: Report if headers are actually present Jason Wang
2023-03-10 9:35 ` [PULL V2 34/44] e1000e: Implement system clock Jason Wang
2023-03-10 9:35 ` [PULL V2 35/44] net/eth: Introduce EthL4HdrProto Jason Wang
2023-03-10 9:35 ` [PULL V2 36/44] pcie: Introduce pcie_sriov_num_vfs Jason Wang
2023-03-10 9:35 ` [PULL V2 37/44] e1000: Split header files Jason Wang
2023-03-10 9:35 ` [PULL V2 38/44] Intrdocue igb device emulation Jason Wang
2023-03-10 9:35 ` [PULL V2 39/44] tests/qtest/e1000e-test: Fabricate ethernet header Jason Wang
2023-03-10 9:35 ` [PULL V2 40/44] tests/qtest/libqos/e1000e: Export macreg functions Jason Wang
2023-03-10 9:35 ` [PULL V2 41/44] igb: Introduce qtest for igb device Jason Wang
2023-03-10 9:35 ` [PULL V2 42/44] tests/avocado: Add igb test Jason Wang
2023-03-10 9:35 ` [PULL V2 43/44] docs/system/devices/igb: Add igb documentation Jason Wang
2023-03-10 9:35 ` [PULL V2 44/44] ebpf: fix compatibility with libbpf 1.0+ Jason Wang
2023-03-12 10:56 ` [PULL V2 00/44] Net patches Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230310093526.30828-25-jasowang@redhat.com \
--to=jasowang@redhat.com \
--cc=akihiko.odaki@daynix.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).