From: Jason Wang <jasowang@redhat.com>
To: qemu-devel@nongnu.org
Cc: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>,
Akihiko Odaki <akihiko.odaki@daynix.com>,
Jason Wang <jasowang@redhat.com>
Subject: [PULL 08/17] igb: RX payload guest writting refactoring
Date: Fri, 8 Sep 2023 14:44:58 +0800 [thread overview]
Message-ID: <20230908064507.14596-9-jasowang@redhat.com> (raw)
In-Reply-To: <20230908064507.14596-1-jasowang@redhat.com>
From: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>
Refactoring is done in preparation for support of multiple advanced
descriptors RX modes, especially packet-split modes.
Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>
Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Tested-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
hw/net/e1000e_core.c | 18 ++--
hw/net/igb_core.c | 213 ++++++++++++++++++++++++++++++-----------------
tests/qtest/libqos/igb.c | 5 ++
3 files changed, 150 insertions(+), 86 deletions(-)
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index 91aae37..cc243b7 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core,
}
static void
-e1000e_write_to_rx_buffers(E1000ECore *core,
- hwaddr ba[MAX_PS_BUFFERS],
- e1000e_ba_state *bastate,
- const char *data,
- dma_addr_t data_len)
+e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core,
+ hwaddr ba[MAX_PS_BUFFERS],
+ e1000e_ba_state *bastate,
+ const char *data,
+ dma_addr_t data_len)
{
while (data_len > 0) {
uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx];
@@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
while (copy_size) {
iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
- e1000e_write_to_rx_buffers(core, ba, &bastate,
- iov->iov_base + iov_ofs, iov_copy);
+ e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
+ iov->iov_base +
+ iov_ofs,
+ iov_copy);
copy_size -= iov_copy;
iov_ofs += iov_copy;
@@ -1607,7 +1609,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
if (desc_offset + desc_size >= total_size) {
/* Simulate FCS checksum presence in the last descriptor */
- e1000e_write_to_rx_buffers(core, ba, &bastate,
+ e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
(const char *) &fcs_pad, e1000x_fcs_len(core->mac));
}
}
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index e140358..6d2712e 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -941,6 +941,14 @@ igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size)
bufsize;
}
+static uint32_t
+igb_rxhdrbufsize(IGBCore *core, const E1000ERingInfo *r)
+{
+ uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
+ return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >>
+ E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+}
+
void
igb_start_recv(IGBCore *core)
{
@@ -1231,6 +1239,21 @@ igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
*buff_addr = le64_to_cpu(desc->read.pkt_addr);
}
+typedef struct IGBPacketRxDMAState {
+ size_t size;
+ size_t total_size;
+ size_t ps_hdr_len;
+ size_t desc_size;
+ size_t desc_offset;
+ uint32_t rx_desc_packet_buf_size;
+ uint32_t rx_desc_header_buf_size;
+ struct iovec *iov;
+ size_t iov_ofs;
+ bool is_first;
+ uint16_t written;
+ hwaddr ba;
+} IGBPacketRxDMAState;
+
static inline void
igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
hwaddr *buff_addr)
@@ -1515,19 +1538,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr,
}
static void
-igb_write_to_rx_buffers(IGBCore *core,
- PCIDevice *d,
- hwaddr ba,
- uint16_t *written,
- const char *data,
- dma_addr_t data_len)
-{
- trace_igb_rx_desc_buff_write(ba, *written, data, data_len);
- pci_dma_write(d, ba + *written, data, data_len);
- *written += data_len;
-}
-
-static void
igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi,
size_t pkt_size, size_t pkt_fcs_size)
{
@@ -1553,6 +1563,93 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi)
}
static void
+igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size)
+{
+ if (*size > pdma_st->rx_desc_packet_buf_size) {
+ *size = pdma_st->rx_desc_packet_buf_size;
+ }
+}
+
+static void
+igb_write_payload_frag_to_rx_buffers(IGBCore *core,
+ PCIDevice *d,
+ hwaddr ba,
+ uint16_t *written,
+ uint32_t cur_buf_len,
+ const char *data,
+ dma_addr_t data_len)
+{
+ trace_igb_rx_desc_buff_write(ba, *written, data, data_len);
+ pci_dma_write(d, ba + *written, data, data_len);
+ *written += data_len;
+}
+
+static void
+igb_write_payload_to_rx_buffers(IGBCore *core,
+ struct NetRxPkt *pkt,
+ PCIDevice *d,
+ IGBPacketRxDMAState *pdma_st,
+ size_t *copy_size)
+{
+ static const uint32_t fcs_pad;
+ size_t iov_copy;
+
+ /* Copy packet payload */
+ while (*copy_size) {
+ iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs);
+ igb_write_payload_frag_to_rx_buffers(core, d,
+ pdma_st->ba,
+ &pdma_st->written,
+ pdma_st->rx_desc_packet_buf_size,
+ pdma_st->iov->iov_base +
+ pdma_st->iov_ofs,
+ iov_copy);
+
+ *copy_size -= iov_copy;
+ pdma_st->iov_ofs += iov_copy;
+ if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
+ pdma_st->iov++;
+ pdma_st->iov_ofs = 0;
+ }
+ }
+
+ if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) {
+ /* Simulate FCS checksum presence in the last descriptor */
+ igb_write_payload_frag_to_rx_buffers(core, d,
+ pdma_st->ba,
+ &pdma_st->written,
+ pdma_st->rx_desc_packet_buf_size,
+ (const char *) &fcs_pad,
+ e1000x_fcs_len(core->mac));
+ }
+}
+
+static void
+igb_write_to_rx_buffers(IGBCore *core,
+ struct NetRxPkt *pkt,
+ PCIDevice *d,
+ IGBPacketRxDMAState *pdma_st)
+{
+ size_t copy_size;
+
+ if (!pdma_st->ba) {
+ /* as per intel docs; skip descriptors with null buf addr */
+ trace_e1000e_rx_null_descriptor();
+ return;
+ }
+
+ if (pdma_st->desc_offset >= pdma_st->size) {
+ return;
+ }
+
+ pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset;
+ igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size);
+ copy_size = pdma_st->size - pdma_st->desc_offset;
+ igb_truncate_to_descriptor_size(pdma_st, ©_size);
+ igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size);
+}
+
+static void
igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
const E1000E_RxRing *rxr,
const E1000E_RSSInfo *rss_info,
@@ -1561,91 +1658,51 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
PCIDevice *d;
dma_addr_t base;
union e1000_rx_desc_union desc;
- size_t desc_size;
- size_t desc_offset = 0;
- size_t iov_ofs = 0;
-
- struct iovec *iov = net_rx_pkt_get_iovec(pkt);
- size_t size = net_rx_pkt_get_total_len(pkt);
- size_t total_size = size + e1000x_fcs_len(core->mac);
- const E1000ERingInfo *rxi = rxr->i;
- size_t bufsize = igb_rxbufsize(core, rxi);
-
+ const E1000ERingInfo *rxi;
+ size_t rx_desc_len;
+
+ IGBPacketRxDMAState pdma_st = {0};
+ pdma_st.is_first = true;
+ pdma_st.size = net_rx_pkt_get_total_len(pkt);
+ pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac);
+
+ rxi = rxr->i;
+ rx_desc_len = core->rx_desc_len;
+ pdma_st.rx_desc_packet_buf_size = igb_rxbufsize(core, rxi);
+ pdma_st.rx_desc_header_buf_size = igb_rxhdrbufsize(core, rxi);
+ pdma_st.iov = net_rx_pkt_get_iovec(pkt);
d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8);
if (!d) {
d = core->owner;
}
do {
- hwaddr ba;
- uint16_t written = 0;
+ pdma_st.written = 0;
bool is_last = false;
- desc_size = total_size - desc_offset;
-
- if (desc_size > bufsize) {
- desc_size = bufsize;
- }
-
if (igb_ring_empty(core, rxi)) {
return;
}
base = igb_ring_head_descr(core, rxi);
+ pci_dma_read(d, base, &desc, rx_desc_len);
+ trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len);
- pci_dma_read(d, base, &desc, core->rx_desc_len);
-
- trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len);
-
- igb_read_rx_descr(core, &desc, &ba);
-
- if (ba) {
- if (desc_offset < size) {
- static const uint32_t fcs_pad;
- size_t iov_copy;
- size_t copy_size = size - desc_offset;
- if (copy_size > bufsize) {
- copy_size = bufsize;
- }
-
- /* Copy packet payload */
- while (copy_size) {
- iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
-
- igb_write_to_rx_buffers(core, d, ba, &written,
- iov->iov_base + iov_ofs, iov_copy);
+ igb_read_rx_descr(core, &desc, &pdma_st.ba);
- copy_size -= iov_copy;
- iov_ofs += iov_copy;
- if (iov_ofs == iov->iov_len) {
- iov++;
- iov_ofs = 0;
- }
- }
-
- if (desc_offset + desc_size >= total_size) {
- /* Simulate FCS checksum presence in the last descriptor */
- igb_write_to_rx_buffers(core, d, ba, &written,
- (const char *) &fcs_pad, e1000x_fcs_len(core->mac));
- }
- }
- } else { /* as per intel docs; skip descriptors with null buf addr */
- trace_e1000e_rx_null_descriptor();
- }
- desc_offset += desc_size;
- if (desc_offset >= total_size) {
+ igb_write_to_rx_buffers(core, pkt, d, &pdma_st);
+ pdma_st.desc_offset += pdma_st.desc_size;
+ if (pdma_st.desc_offset >= pdma_st.total_size) {
is_last = true;
}
igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL,
- rss_info, etqf, ts, written);
- igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len);
-
- igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN);
-
- } while (desc_offset < total_size);
+ rss_info, etqf, ts, pdma_st.written);
+ igb_pci_dma_write_rx_desc(core, d, base, &desc, rx_desc_len);
+ igb_ring_advance(core, rxi, rx_desc_len / E1000_MIN_RX_DESC_LEN);
+ } while (pdma_st.desc_offset < pdma_st.total_size);
- igb_update_rx_stats(core, rxi, size, total_size);
+ igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size);
}
static bool
diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c
index a603468..f40c4ec 100644
--- a/tests/qtest/libqos/igb.c
+++ b/tests/qtest/libqos/igb.c
@@ -109,6 +109,11 @@ static void igb_pci_start_hw(QOSGraphObject *obj)
E1000_RAH_AV | E1000_RAH_POOL_1 |
le16_to_cpu(*(uint16_t *)(address + 4)));
+ /* Set supported receive descriptor mode */
+ e1000e_macreg_write(&d->e1000e,
+ E1000_SRRCTL(0),
+ E1000_SRRCTL_DESCTYPE_ADV_ONEBUF);
+
/* Enable receive */
e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN);
e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN);
--
2.7.4
next prev parent reply other threads:[~2023-09-08 6:47 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-09-08 6:44 [PULL 00/17] Net patches Jason Wang
2023-09-08 6:44 ` [PULL 01/17] tap: Add USO support to tap device Jason Wang
2023-09-08 6:44 ` [PULL 02/17] tap: Add check for USO features Jason Wang
2023-09-08 6:44 ` [PULL 03/17] virtio-net: Add USO flags to vhost support Jason Wang
2023-09-08 6:44 ` [PULL 04/17] virtio-net: Add support for USO features Jason Wang
2024-05-16 13:43 ` Fiona Ebner
2024-05-17 0:47 ` Jason Wang
2023-09-08 6:44 ` [PULL 05/17] igb: remove TCP ACK detection Jason Wang
2023-09-08 6:44 ` [PULL 06/17] igb: rename E1000E_RingInfo_st Jason Wang
2023-09-08 6:44 ` [PULL 07/17] igb: RX descriptors guest writting refactoring Jason Wang
2023-09-08 6:44 ` Jason Wang [this message]
2023-09-08 6:44 ` [PULL 09/17] igb: add IPv6 extended headers traffic detection Jason Wang
2023-09-08 6:45 ` [PULL 10/17] igb: packet-split descriptors support Jason Wang
2023-09-08 6:45 ` [PULL 11/17] e1000e: rename e1000e_ba_state and e1000e_write_hdr_to_rx_buffers Jason Wang
2023-09-08 6:45 ` [PULL 12/17] net: add initial support for AF_XDP network backend Jason Wang
2023-09-08 11:48 ` Daniel P. Berrangé
2023-09-08 11:55 ` Ilya Maximets
2023-09-08 6:45 ` [PULL 13/17] ebpf: Added eBPF map update through mmap Jason Wang
2023-09-08 6:45 ` [PULL 14/17] ebpf: Added eBPF initialization by fds Jason Wang
2023-09-08 6:45 ` [PULL 15/17] virtio-net: Added property to load eBPF RSS with fds Jason Wang
2023-09-08 6:45 ` [PULL 16/17] qmp: Added new command to retrieve eBPF blob Jason Wang
2023-09-08 6:45 ` [PULL 17/17] ebpf: Updated eBPF program and skeleton Jason Wang
2023-09-08 11:19 ` [PULL 00/17] Net patches Stefan Hajnoczi
2023-09-08 11:34 ` Ilya Maximets
2023-09-08 11:49 ` Daniel P. Berrangé
2023-09-08 12:00 ` Ilya Maximets
2023-09-08 12:15 ` Daniel P. Berrangé
2023-09-08 14:06 ` Ilya Maximets
2023-09-08 14:15 ` Daniel P. Berrangé
2023-09-13 18:46 ` Ilya Maximets
2023-09-14 8:13 ` Daniel P. Berrangé
2023-09-18 19:36 ` Ilya Maximets
2023-09-19 8:40 ` Daniel P. Berrangé
2023-09-19 9:39 ` Ilya Maximets
2023-09-19 10:03 ` Daniel P. Berrangé
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230908064507.14596-9-jasowang@redhat.com \
--to=jasowang@redhat.com \
--cc=akihiko.odaki@daynix.com \
--cc=qemu-devel@nongnu.org \
--cc=t.dzieciol@partner.samsung.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).