* [PATCH v3 0/2] igb: packet-split descriptors support [not found] <CGME20230427104749eucas1p19480869211eed34117a518f3c3800946@eucas1p1.samsung.com> @ 2023-04-27 10:47 ` Tomasz Dzieciol [not found] ` <CGME20230427104750eucas1p1158eee5a37c71cacaea021a7abbd6ace@eucas1p1.samsung.com> [not found] ` <CGME20230427104750eucas1p1eb8fb7fac00cc13dea5e4a7e0df5c113@eucas1p1.samsung.com> 0 siblings, 2 replies; 12+ messages in thread From: Tomasz Dzieciol @ 2023-04-27 10:47 UTC (permalink / raw) To: qemu-devel, akihiko.odaki Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki Based-on: <20230423041833.5302-1-akihiko.odaki@daynix.com> ("[PATCH v3 00/47] igb: Fix for DPDK") This series of patches introduces packet-split RX descriptors support. This feature is used by Linux VF driver for MTU values from 2048. First patch makes RX descriptors handling cleanup. Second patch introduces feature itself. Tomasz Dzieciol (2): igb: RX descriptors handling cleanup igb: packet-split descriptors support hw/net/e1000e_core.c | 18 +- hw/net/e1000x_regs.h | 1 + hw/net/igb_core.c | 728 ++++++++++++++++++++++++++++----------- hw/net/igb_regs.h | 18 +- hw/net/trace-events | 6 +- tests/qtest/libqos/igb.c | 3 + 6 files changed, 567 insertions(+), 207 deletions(-) -- 2.25.1 ^ permalink raw reply [flat|nested] 12+ messages in thread
[parent not found: <CGME20230427104750eucas1p1158eee5a37c71cacaea021a7abbd6ace@eucas1p1.samsung.com>]
* [PATCH v3 1/2] igb: RX descriptors handling cleanup [not found] ` <CGME20230427104750eucas1p1158eee5a37c71cacaea021a7abbd6ace@eucas1p1.samsung.com> @ 2023-04-27 10:47 ` Tomasz Dzieciol 2023-04-28 10:31 ` Akihiko Odaki 2023-04-30 11:57 ` Sriram Yagnaraman 0 siblings, 2 replies; 12+ messages in thread From: Tomasz Dzieciol @ 2023-04-27 10:47 UTC (permalink / raw) To: qemu-devel, akihiko.odaki Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki Format of Intel 82576 was changed in comparison to Intel 82574 extended descriptors. This change updates filling of advanced descriptors fields accordingly: * remove TCP ACK detection * add IPv6 with extensions traffic detection * fragment checksum and IP ID is filled only when RXCSUM.IPPCSE is set (in addition to RXCSUM.PCSD bit cleared condition) Refactoring is done in preparation for support of multiple advanced descriptors RX modes, especially packet-split modes. Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> --- hw/net/e1000e_core.c | 18 +- hw/net/e1000x_regs.h | 1 + hw/net/igb_core.c | 478 ++++++++++++++++++++++++--------------- hw/net/igb_regs.h | 12 +- hw/net/trace-events | 6 +- tests/qtest/libqos/igb.c | 3 + 6 files changed, 316 insertions(+), 202 deletions(-) diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index 78373d7db7..0085ad53c2 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core, } static void -e1000e_write_to_rx_buffers(E1000ECore *core, - hwaddr ba[MAX_PS_BUFFERS], - e1000e_ba_state *bastate, - const char *data, - dma_addr_t data_len) +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core, + hwaddr ba[MAX_PS_BUFFERS], + e1000e_ba_state *bastate, + const char *data, + dma_addr_t data_len) { while (data_len > 0) { uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; @@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, while (copy_size) { iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); - e1000e_write_to_rx_buffers(core, ba, &bastate, - iov->iov_base + iov_ofs, iov_copy); + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, + iov->iov_base + + iov_ofs, + iov_copy); copy_size -= iov_copy; iov_ofs += iov_copy; @@ -1607,7 +1609,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, if (desc_offset + desc_size >= total_size) { /* Simulate FCS checksum presence in the last descriptor */ - e1000e_write_to_rx_buffers(core, ba, &bastate, + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); } } diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index 13760c66d3..344fd10359 100644 --- a/hw/net/e1000x_regs.h +++ b/hw/net/e1000x_regs.h @@ -827,6 +827,7 @@ union e1000_rx_desc_packet_split { /* Receive Checksum Control bits */ #define E1000_RXCSUM_IPOFLD 0x100 /* IP Checksum Offload Enable */ #define E1000_RXCSUM_TUOFLD 0x200 /* TCP/UDP Checksum Offload Enable */ +#define E1000_RXCSUM_IPPCSE 0x1000 /* IP Payload Checksum enable */ #define E1000_RXCSUM_PCSD 0x2000 /* Packet Checksum Disable */ #define E1000_RING_DESC_LEN (16) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index 96b7335b31..1cb64402aa 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -267,6 +267,21 @@ igb_rx_use_legacy_descriptor(IGBCore *core) return false; } +typedef struct E1000E_RingInfo_st { + int dbah; + int dbal; + int dlen; + int dh; + int dt; + int idx; +} E1000E_RingInfo; + +static uint32_t +igb_rx_queue_desctyp_get(IGBCore *core, const E1000E_RingInfo *r) +{ + return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK; +} + static inline bool igb_rss_enabled(IGBCore *core) { @@ -694,15 +709,6 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx) return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0; } -typedef struct E1000E_RingInfo_st { - int dbah; - int dbal; - int dlen; - int dh; - int dt; - int idx; -} E1000E_RingInfo; - static inline bool igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r) { @@ -941,6 +947,14 @@ igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size) bufsize; } +static uint32_t +igb_get_queue_rx_header_buf_size(IGBCore *core, const E1000E_RingInfo *r) +{ + uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2]; + return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >> + E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; +} + void igb_start_recv(IGBCore *core) { @@ -1232,7 +1246,7 @@ igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, } static inline void -igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, +igb_read_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, hwaddr *buff_addr) { if (igb_rx_use_legacy_descriptor(core)) { @@ -1281,15 +1295,11 @@ igb_verify_csum_in_sw(IGBCore *core, } static void -igb_build_rx_metadata(IGBCore *core, - struct NetRxPkt *pkt, - bool is_eop, - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, - uint16_t *pkt_info, uint16_t *hdr_info, - uint32_t *rss, - uint32_t *status_flags, - uint16_t *ip_id, - uint16_t *vlan_tag) +igb_build_rx_metadata_common(IGBCore *core, + struct NetRxPkt *pkt, + bool is_eop, + uint32_t *status_flags, + uint16_t *vlan_tag) { struct virtio_net_hdr *vhdr; bool hasip4, hasip6, csum_valid; @@ -1298,7 +1308,6 @@ igb_build_rx_metadata(IGBCore *core, *status_flags = E1000_RXD_STAT_DD; /* No additional metadata needed for non-EOP descriptors */ - /* TODO: EOP apply only to status so don't skip whole function. */ if (!is_eop) { goto func_exit; } @@ -1315,64 +1324,6 @@ igb_build_rx_metadata(IGBCore *core, trace_e1000e_rx_metadata_vlan(*vlan_tag); } - /* Packet parsing results */ - if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { - if (rss_info->enabled) { - *rss = cpu_to_le32(rss_info->hash); - trace_igb_rx_metadata_rss(*rss); - } - } else if (hasip4) { - *status_flags |= E1000_RXD_STAT_IPIDV; - *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); - trace_e1000e_rx_metadata_ip_id(*ip_id); - } - - if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && net_rx_pkt_is_tcp_ack(pkt)) { - *status_flags |= E1000_RXD_STAT_ACK; - trace_e1000e_rx_metadata_ack(); - } - - if (pkt_info) { - *pkt_info = rss_info->enabled ? rss_info->type : 0; - - if (etqf < 8) { - *pkt_info |= BIT(11) | (etqf << 4); - } else { - if (hasip4) { - *pkt_info |= E1000_ADVRXD_PKT_IP4; - } - - if (hasip6) { - *pkt_info |= E1000_ADVRXD_PKT_IP6; - } - - switch (l4hdr_proto) { - case ETH_L4_HDR_PROTO_TCP: - *pkt_info |= E1000_ADVRXD_PKT_TCP; - break; - - case ETH_L4_HDR_PROTO_UDP: - *pkt_info |= E1000_ADVRXD_PKT_UDP; - break; - - case ETH_L4_HDR_PROTO_SCTP: - *pkt_info |= E1000_ADVRXD_PKT_SCTP; - break; - - default: - break; - } - } - } - - if (hdr_info) { - *hdr_info = 0; - } - - if (ts) { - *status_flags |= BIT(16); - } - /* RX CSO information */ if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { trace_e1000e_rx_metadata_ipv6_sum_disabled(); @@ -1426,58 +1377,126 @@ func_exit: } static inline void -igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, +igb_write_lgcy_rx_descr(IGBCore *core, + struct e1000_rx_desc *desc, struct NetRxPkt *pkt, - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, + const E1000E_RSSInfo *rss_info, uint16_t length) { - uint32_t status_flags, rss; - uint16_t ip_id; + uint32_t status_flags; assert(!rss_info->enabled); + + memset(desc, 0, sizeof(*desc)); desc->length = cpu_to_le16(length); - desc->csum = 0; + igb_build_rx_metadata_common(core, pkt, pkt != NULL, + &status_flags, + &desc->special); - igb_build_rx_metadata(core, pkt, pkt != NULL, - rss_info, etqf, ts, - NULL, NULL, &rss, - &status_flags, &ip_id, - &desc->special); desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); desc->status = (uint8_t) le32_to_cpu(status_flags); } +static uint16_t +igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf) +{ + uint16_t pkt_type = 0; + bool hasip4, hasip6; + EthL4HdrProto l4hdr_proto; + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { + eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt); + pkt_type |= ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E : + E1000_ADVRXD_PKT_IP6; + } else if (hasip4) { + pkt_type = E1000_ADVRXD_PKT_IP4; + } + + if (etqf < 8) { + pkt_type |= (BIT(11) >> 4) | etqf; + return pkt_type; + } + + switch (l4hdr_proto) { + case ETH_L4_HDR_PROTO_TCP: + pkt_type |= E1000_ADVRXD_PKT_TCP; + break; + case ETH_L4_HDR_PROTO_UDP: + pkt_type |= E1000_ADVRXD_PKT_UDP; + break; + case ETH_L4_HDR_PROTO_SCTP: + pkt_type |= E1000_ADVRXD_PKT_SCTP; + break; + default: + break; + } + + return pkt_type; +} + static inline void -igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, +igb_write_adv_rx_descr(IGBCore *core, + union e1000_adv_rx_desc *d, struct NetRxPkt *pkt, - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, + const E1000E_RSSInfo *rss_info, + uint16_t etqf, + bool ts, uint16_t length) { - memset(&desc->wb, 0, sizeof(desc->wb)); + bool hasip4, hasip6; + EthL4HdrProto l4hdr_proto; + uint16_t rss_type = 0, pkt_type; + bool eop = (pkt != NULL); + memset(&d->wb, 0, sizeof(d->wb)); + + d->wb.upper.length = cpu_to_le16(length); + igb_build_rx_metadata_common(core, pkt, eop, + &d->wb.upper.status_error, + &d->wb.upper.vlan); + + if (!eop) { + return; + } + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { + if (rss_info->enabled) { + d->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash); + rss_type = rss_info->type; + trace_igb_rx_metadata_rss(d->wb.lower.hi_dword.rss, rss_type); + } + } else if (core->mac[RXCSUM] & E1000_RXCSUM_IPPCSE && + hasip4) { + d->wb.lower.hi_dword.csum_ip.ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); + trace_e1000e_rx_metadata_ip_id(d->wb.lower.hi_dword.csum_ip.ip_id); + } - desc->wb.upper.length = cpu_to_le16(length); + if (ts) { + d->wb.upper.status_error |= BIT(16); + } - igb_build_rx_metadata(core, pkt, pkt != NULL, - rss_info, etqf, ts, - &desc->wb.lower.lo_dword.pkt_info, - &desc->wb.lower.lo_dword.hdr_info, - &desc->wb.lower.hi_dword.rss, - &desc->wb.upper.status_error, - &desc->wb.lower.hi_dword.csum_ip.ip_id, - &desc->wb.upper.vlan); + pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf); + trace_e1000e_rx_metadata_pkt_type(pkt_type); + d->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4)); } static inline void -igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, - struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, - uint16_t etqf, bool ts, uint16_t length) +igb_write_rx_descr(IGBCore *core, + union e1000_rx_desc_union *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + uint16_t etqf, + bool ts, + uint16_t(*written)[MAX_PS_BUFFERS], + const E1000E_RingInfo *r) { if (igb_rx_use_legacy_descriptor(core)) { - igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, - etqf, ts, length); + igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, (*written)[1]); } else { - igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, - etqf, ts, length); + igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, etqf, ts, (*written)[1]); } } @@ -1513,19 +1532,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr, } } -static void -igb_write_to_rx_buffers(IGBCore *core, - PCIDevice *d, - hwaddr ba, - uint16_t *written, - const char *data, - dma_addr_t data_len) -{ - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); - pci_dma_write(d, ba + *written, data, data_len); - *written += data_len; -} - static void igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, size_t pkt_size, size_t pkt_fcs_size) @@ -1551,6 +1557,137 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi) ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; } +typedef struct IGB_BaState_st { + uint16_t written[MAX_PS_BUFFERS]; + uint8_t cur_idx; +} IGB_BaState; + +typedef struct IGB_PacketRxDMAState_st { + size_t size; + size_t total_size; + size_t ps_hdr_len; + size_t desc_size; + size_t desc_offset; + uint32_t rx_desc_packet_buf_size; + uint32_t rx_desc_header_buf_size; + struct iovec *iov; + size_t iov_ofs; + bool is_first; + IGB_BaState bastate; + hwaddr ba[MAX_PS_BUFFERS]; +} IGB_PacketRxDMAState; + +static void +igb_truncate_to_descriptor_size(IGB_PacketRxDMAState *pdma_st, size_t *size) +{ + if (*size > pdma_st->rx_desc_packet_buf_size) { + *size = pdma_st->rx_desc_packet_buf_size; + } +} + +static void +igb_write_payload_frag_to_rx_buffers(IGBCore *core, + PCIDevice *d, + hwaddr (*ba)[MAX_PS_BUFFERS], + IGB_BaState *bastate, + uint32_t cur_buf_len, + const char *data, + dma_addr_t data_len) +{ + while (data_len > 0) { + assert(bastate->cur_idx < MAX_PS_BUFFERS); + + uint32_t cur_buf_bytes_left = cur_buf_len - + bastate->written[bastate->cur_idx]; + uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left); + + trace_igb_rx_desc_buff_write(bastate->cur_idx, + (*ba)[bastate->cur_idx], + bastate->written[bastate->cur_idx], + data, + bytes_to_write); + + pci_dma_write(d, + (*ba)[bastate->cur_idx] + + bastate->written[bastate->cur_idx], + data, bytes_to_write); + + bastate->written[bastate->cur_idx] += bytes_to_write; + data += bytes_to_write; + data_len -= bytes_to_write; + + if (bastate->written[bastate->cur_idx] == cur_buf_len) { + bastate->cur_idx++; + } + } +} + +static void +igb_write_payload_to_rx_buffers(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + IGB_PacketRxDMAState *pdma_st, + size_t *copy_size) +{ + static const uint32_t fcs_pad; + size_t iov_copy; + + /* Copy packet payload */ + while (*copy_size) { + iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); + igb_write_payload_frag_to_rx_buffers(core, d, + &pdma_st->ba, + &pdma_st->bastate, + pdma_st->rx_desc_packet_buf_size, + pdma_st->iov->iov_base + + pdma_st->iov_ofs, + iov_copy); + + *copy_size -= iov_copy; + pdma_st->iov_ofs += iov_copy; + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { + pdma_st->iov++; + pdma_st->iov_ofs = 0; + } + } + + if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { + /* Simulate FCS checksum presence in the last descriptor */ + igb_write_payload_frag_to_rx_buffers(core, d, + &pdma_st->ba, + &pdma_st->bastate, + pdma_st->rx_desc_packet_buf_size, + (const char *) &fcs_pad, + e1000x_fcs_len(core->mac)); + } +} + +static void +igb_write_to_rx_buffers(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + IGB_PacketRxDMAState *pdma_st) +{ + size_t copy_size; + + if (!(pdma_st->ba)[1]) { + /* as per intel docs; skip descriptors with null buf addr */ + trace_e1000e_rx_null_descriptor(); + return; + } + + if (pdma_st->desc_offset >= pdma_st->size) { + return; + } + + pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset; + igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); + copy_size = pdma_st->size - pdma_st->desc_offset; + igb_truncate_to_descriptor_size(pdma_st, ©_size); + pdma_st->bastate.cur_idx = 1; + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size); +} + static void igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, const E1000E_RxRing *rxr, @@ -1560,91 +1697,58 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, PCIDevice *d; dma_addr_t base; union e1000_rx_desc_union desc; - size_t desc_size; - size_t desc_offset = 0; - size_t iov_ofs = 0; - - struct iovec *iov = net_rx_pkt_get_iovec(pkt); - size_t size = net_rx_pkt_get_total_len(pkt); - size_t total_size = size + e1000x_fcs_len(core->mac); - const E1000E_RingInfo *rxi = rxr->i; - size_t bufsize = igb_rxbufsize(core, rxi); - + const E1000E_RingInfo *rxi; + size_t rx_desc_len; + + IGB_PacketRxDMAState pdma_st = {0}; + pdma_st.is_first = true; + pdma_st.size = net_rx_pkt_get_total_len(pkt); + pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac); + + rxi = rxr->i; + rx_desc_len = core->rx_desc_len; + pdma_st.rx_desc_packet_buf_size = + igb_rxbufsize(core, rxi); + pdma_st.rx_desc_header_buf_size = + igb_get_queue_rx_header_buf_size(core, rxi); + pdma_st.iov = net_rx_pkt_get_iovec(pkt); d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); if (!d) { d = core->owner; } do { - hwaddr ba; - uint16_t written = 0; + memset(&pdma_st.bastate, 0, sizeof(IGB_BaState)); bool is_last = false; - desc_size = total_size - desc_offset; - - if (desc_size > bufsize) { - desc_size = bufsize; - } - if (igb_ring_empty(core, rxi)) { return; } base = igb_ring_head_descr(core, rxi); + pci_dma_read(d, base, &desc, rx_desc_len); + trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); - pci_dma_read(d, base, &desc, core->rx_desc_len); - - trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); - - igb_read_rx_descr(core, &desc, &ba); - - if (ba) { - if (desc_offset < size) { - static const uint32_t fcs_pad; - size_t iov_copy; - size_t copy_size = size - desc_offset; - if (copy_size > bufsize) { - copy_size = bufsize; - } - - /* Copy packet payload */ - while (copy_size) { - iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); - - igb_write_to_rx_buffers(core, d, ba, &written, - iov->iov_base + iov_ofs, iov_copy); + igb_read_rx_descr(core, &desc, &pdma_st->ba[1], rxi); - copy_size -= iov_copy; - iov_ofs += iov_copy; - if (iov_ofs == iov->iov_len) { - iov++; - iov_ofs = 0; - } - } - - if (desc_offset + desc_size >= total_size) { - /* Simulate FCS checksum presence in the last descriptor */ - igb_write_to_rx_buffers(core, d, ba, &written, - (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); - } - } - } else { /* as per intel docs; skip descriptors with null buf addr */ - trace_e1000e_rx_null_descriptor(); - } - desc_offset += desc_size; - if (desc_offset >= total_size) { + igb_write_to_rx_buffers(core, pkt, d, &pdma_st); + pdma_st.desc_offset += pdma_st.desc_size; + if (pdma_st.desc_offset >= pdma_st.total_size) { is_last = true; } - igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, - rss_info, etqf, ts, written); - igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len); - - igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN); - - } while (desc_offset < total_size); - - igb_update_rx_stats(core, rxi, size, total_size); + igb_write_rx_descr(core, &desc, + is_last ? pkt : NULL, + rss_info, + etqf, ts, + &pdma_st.bastate.written, + rxi); + pci_dma_write(d, base, &desc, rx_desc_len); + igb_ring_advance(core, rxi, + rx_desc_len / E1000_MIN_RX_DESC_LEN); + } while (pdma_st.desc_offset < pdma_st.total_size); + + igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size); } static bool diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index 82ff195dfc..c4ede22181 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -452,6 +452,7 @@ union e1000_adv_rx_desc { #define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00 #define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 +#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 #define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 #define E1000_SRRCTL_DROP_EN 0x80000000 @@ -692,11 +693,12 @@ union e1000_adv_rx_desc { #define E1000_STATUS_NUM_VFS_SHIFT 14 -#define E1000_ADVRXD_PKT_IP4 BIT(4) -#define E1000_ADVRXD_PKT_IP6 BIT(6) -#define E1000_ADVRXD_PKT_TCP BIT(8) -#define E1000_ADVRXD_PKT_UDP BIT(9) -#define E1000_ADVRXD_PKT_SCTP BIT(10) +#define E1000_ADVRXD_PKT_IP4 BIT(0) +#define E1000_ADVRXD_PKT_IP6 BIT(2) +#define E1000_ADVRXD_PKT_IP6E BIT(3) +#define E1000_ADVRXD_PKT_TCP BIT(4) +#define E1000_ADVRXD_PKT_UDP BIT(5) +#define E1000_ADVRXD_PKT_SCTP BIT(6) static inline uint8_t igb_ivar_entry_rx(uint8_t i) { diff --git a/hw/net/trace-events b/hw/net/trace-events index e4a98b2c7d..9a02261360 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -277,9 +277,9 @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED" igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d" igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" -igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" +igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer #%u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" -igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X" +igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X" igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled" igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x" @@ -294,6 +294,8 @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = 0x%x" igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x" igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x" +igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported descriptor type: %d" + # igbvf.c igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register 0x%"PRIx64 diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c index a603468beb..aa65b5452c 100644 --- a/tests/qtest/libqos/igb.c +++ b/tests/qtest/libqos/igb.c @@ -109,6 +109,9 @@ static void igb_pci_start_hw(QOSGraphObject *obj) E1000_RAH_AV | E1000_RAH_POOL_1 | le16_to_cpu(*(uint16_t *)(address + 4))); + /* Set supported receive descriptor mode */ + e1000e_macreg_write(&d->e1000e, E1000_SRRCTL(0), E1000_SRRCTL_DESCTYPE_ADV_ONEBUF); + /* Enable receive */ e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN); e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN); -- 2.25.1 ^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/2] igb: RX descriptors handling cleanup 2023-04-27 10:47 ` [PATCH v3 1/2] igb: RX descriptors handling cleanup Tomasz Dzieciol @ 2023-04-28 10:31 ` Akihiko Odaki 2023-04-28 12:51 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics 2023-04-28 12:53 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics 2023-04-30 11:57 ` Sriram Yagnaraman 1 sibling, 2 replies; 12+ messages in thread From: Akihiko Odaki @ 2023-04-28 10:31 UTC (permalink / raw) To: Tomasz Dzieciol, qemu-devel Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki On 2023/04/27 19:47, Tomasz Dzieciol wrote: > Format of Intel 82576 was changed in comparison to Intel 82574 extended > descriptors. This change updates filling of advanced descriptors fields > accordingly: > * remove TCP ACK detection > * add IPv6 with extensions traffic detection > * fragment checksum and IP ID is filled only when RXCSUM.IPPCSE is set (in > addition to RXCSUM.PCSD bit cleared condition) Please split up those changes into seperate patches. The relevant documentation is "Split up long patches" section of: docs/devel/submitting-a-patch.rst > > Refactoring is done in preparation for support of multiple advanced > descriptors RX modes, especially packet-split modes. > > Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> > --- > hw/net/e1000e_core.c | 18 +- > hw/net/e1000x_regs.h | 1 + > hw/net/igb_core.c | 478 ++++++++++++++++++++++++--------------- > hw/net/igb_regs.h | 12 +- > hw/net/trace-events | 6 +- > tests/qtest/libqos/igb.c | 3 + > 6 files changed, 316 insertions(+), 202 deletions(-) > > diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c > index 78373d7db7..0085ad53c2 100644 > --- a/hw/net/e1000e_core.c > +++ b/hw/net/e1000e_core.c > @@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core, > } > > static void > -e1000e_write_to_rx_buffers(E1000ECore *core, > - hwaddr ba[MAX_PS_BUFFERS], > - e1000e_ba_state *bastate, > - const char *data, > - dma_addr_t data_len) > +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core, > + hwaddr ba[MAX_PS_BUFFERS], > + e1000e_ba_state *bastate, > + const char *data, > + dma_addr_t data_len) > { > while (data_len > 0) { > uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; > @@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, > while (copy_size) { > iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); > > - e1000e_write_to_rx_buffers(core, ba, &bastate, > - iov->iov_base + iov_ofs, iov_copy); > + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, > + iov->iov_base + > + iov_ofs, > + iov_copy); > > copy_size -= iov_copy; > iov_ofs += iov_copy; > @@ -1607,7 +1609,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, > > if (desc_offset + desc_size >= total_size) { > /* Simulate FCS checksum presence in the last descriptor */ > - e1000e_write_to_rx_buffers(core, ba, &bastate, > + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, > (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); > } > } > diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h > index 13760c66d3..344fd10359 100644 > --- a/hw/net/e1000x_regs.h > +++ b/hw/net/e1000x_regs.h > @@ -827,6 +827,7 @@ union e1000_rx_desc_packet_split { > /* Receive Checksum Control bits */ > #define E1000_RXCSUM_IPOFLD 0x100 /* IP Checksum Offload Enable */ > #define E1000_RXCSUM_TUOFLD 0x200 /* TCP/UDP Checksum Offload Enable */ > +#define E1000_RXCSUM_IPPCSE 0x1000 /* IP Payload Checksum enable */ > #define E1000_RXCSUM_PCSD 0x2000 /* Packet Checksum Disable */ > > #define E1000_RING_DESC_LEN (16) > diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c > index 96b7335b31..1cb64402aa 100644 > --- a/hw/net/igb_core.c > +++ b/hw/net/igb_core.c > @@ -267,6 +267,21 @@ igb_rx_use_legacy_descriptor(IGBCore *core) > return false; > } > > +typedef struct E1000E_RingInfo_st { > + int dbah; > + int dbal; > + int dlen; > + int dh; > + int dt; > + int idx; > +} E1000E_RingInfo; > + > +static uint32_t > +igb_rx_queue_desctyp_get(IGBCore *core, const E1000E_RingInfo *r) > +{ > + return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK; > +} > + > static inline bool > igb_rss_enabled(IGBCore *core) > { > @@ -694,15 +709,6 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx) > return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0; > } > > -typedef struct E1000E_RingInfo_st { > - int dbah; > - int dbal; > - int dlen; > - int dh; > - int dt; > - int idx; > -} E1000E_RingInfo; > - > static inline bool > igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r) > { > @@ -941,6 +947,14 @@ igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size) > bufsize; > } > > +static uint32_t > +igb_get_queue_rx_header_buf_size(IGBCore *core, const E1000E_RingInfo *r) > +{ > + uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2]; > + return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >> > + E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; > +} > + > void > igb_start_recv(IGBCore *core) > { > @@ -1232,7 +1246,7 @@ igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > } > > static inline void > -igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, > +igb_read_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > hwaddr *buff_addr) > { > if (igb_rx_use_legacy_descriptor(core)) { > @@ -1281,15 +1295,11 @@ igb_verify_csum_in_sw(IGBCore *core, > } > > static void > -igb_build_rx_metadata(IGBCore *core, > - struct NetRxPkt *pkt, > - bool is_eop, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > - uint16_t *pkt_info, uint16_t *hdr_info, > - uint32_t *rss, > - uint32_t *status_flags, > - uint16_t *ip_id, > - uint16_t *vlan_tag) > +igb_build_rx_metadata_common(IGBCore *core, > + struct NetRxPkt *pkt, > + bool is_eop, > + uint32_t *status_flags, > + uint16_t *vlan_tag) > { > struct virtio_net_hdr *vhdr; > bool hasip4, hasip6, csum_valid; > @@ -1298,7 +1308,6 @@ igb_build_rx_metadata(IGBCore *core, > *status_flags = E1000_RXD_STAT_DD; > > /* No additional metadata needed for non-EOP descriptors */ > - /* TODO: EOP apply only to status so don't skip whole function. */ > if (!is_eop) { > goto func_exit; > } > @@ -1315,64 +1324,6 @@ igb_build_rx_metadata(IGBCore *core, > trace_e1000e_rx_metadata_vlan(*vlan_tag); > } > > - /* Packet parsing results */ > - if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { > - if (rss_info->enabled) { > - *rss = cpu_to_le32(rss_info->hash); > - trace_igb_rx_metadata_rss(*rss); > - } > - } else if (hasip4) { > - *status_flags |= E1000_RXD_STAT_IPIDV; > - *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); > - trace_e1000e_rx_metadata_ip_id(*ip_id); > - } > - > - if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && net_rx_pkt_is_tcp_ack(pkt)) { > - *status_flags |= E1000_RXD_STAT_ACK; > - trace_e1000e_rx_metadata_ack(); > - } > - > - if (pkt_info) { > - *pkt_info = rss_info->enabled ? rss_info->type : 0; > - > - if (etqf < 8) { > - *pkt_info |= BIT(11) | (etqf << 4); > - } else { > - if (hasip4) { > - *pkt_info |= E1000_ADVRXD_PKT_IP4; > - } > - > - if (hasip6) { > - *pkt_info |= E1000_ADVRXD_PKT_IP6; > - } > - > - switch (l4hdr_proto) { > - case ETH_L4_HDR_PROTO_TCP: > - *pkt_info |= E1000_ADVRXD_PKT_TCP; > - break; > - > - case ETH_L4_HDR_PROTO_UDP: > - *pkt_info |= E1000_ADVRXD_PKT_UDP; > - break; > - > - case ETH_L4_HDR_PROTO_SCTP: > - *pkt_info |= E1000_ADVRXD_PKT_SCTP; > - break; > - > - default: > - break; > - } > - } > - } > - > - if (hdr_info) { > - *hdr_info = 0; > - } > - > - if (ts) { > - *status_flags |= BIT(16); > - } > - > /* RX CSO information */ > if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { > trace_e1000e_rx_metadata_ipv6_sum_disabled(); > @@ -1426,58 +1377,126 @@ func_exit: > } > > static inline void > -igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, > +igb_write_lgcy_rx_descr(IGBCore *core, > + struct e1000_rx_desc *desc, > struct NetRxPkt *pkt, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > + const E1000E_RSSInfo *rss_info, > uint16_t length) > { > - uint32_t status_flags, rss; > - uint16_t ip_id; > + uint32_t status_flags; > > assert(!rss_info->enabled); > + > + memset(desc, 0, sizeof(*desc)); > desc->length = cpu_to_le16(length); > - desc->csum = 0; > + igb_build_rx_metadata_common(core, pkt, pkt != NULL, > + &status_flags, > + &desc->special); > > - igb_build_rx_metadata(core, pkt, pkt != NULL, > - rss_info, etqf, ts, > - NULL, NULL, &rss, > - &status_flags, &ip_id, > - &desc->special); > desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); > desc->status = (uint8_t) le32_to_cpu(status_flags); > } > > +static uint16_t > +igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf) > +{ > + uint16_t pkt_type = 0; > + bool hasip4, hasip6; > + EthL4HdrProto l4hdr_proto; > + > + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); > + > + if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { > + eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt); > + pkt_type |= ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E : > + E1000_ADVRXD_PKT_IP6; > + } else if (hasip4) { > + pkt_type = E1000_ADVRXD_PKT_IP4; > + } > + > + if (etqf < 8) { > + pkt_type |= (BIT(11) >> 4) | etqf; > + return pkt_type; > + } > + > + switch (l4hdr_proto) { > + case ETH_L4_HDR_PROTO_TCP: > + pkt_type |= E1000_ADVRXD_PKT_TCP; > + break; > + case ETH_L4_HDR_PROTO_UDP: > + pkt_type |= E1000_ADVRXD_PKT_UDP; > + break; > + case ETH_L4_HDR_PROTO_SCTP: > + pkt_type |= E1000_ADVRXD_PKT_SCTP; > + break; > + default: > + break; > + } > + > + return pkt_type; > +} > + > static inline void > -igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > +igb_write_adv_rx_descr(IGBCore *core, > + union e1000_adv_rx_desc *d, > struct NetRxPkt *pkt, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > + const E1000E_RSSInfo *rss_info, > + uint16_t etqf, > + bool ts, > uint16_t length) > { > - memset(&desc->wb, 0, sizeof(desc->wb)); > + bool hasip4, hasip6; > + EthL4HdrProto l4hdr_proto; > + uint16_t rss_type = 0, pkt_type; > + bool eop = (pkt != NULL); > + memset(&d->wb, 0, sizeof(d->wb)); > + > + d->wb.upper.length = cpu_to_le16(length); > + igb_build_rx_metadata_common(core, pkt, eop, > + &d->wb.upper.status_error, > + &d->wb.upper.vlan); > + > + if (!eop) { > + return; > + } > + > + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); > + > + if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { > + if (rss_info->enabled) { > + d->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash); > + rss_type = rss_info->type; > + trace_igb_rx_metadata_rss(d->wb.lower.hi_dword.rss, rss_type); > + } > + } else if (core->mac[RXCSUM] & E1000_RXCSUM_IPPCSE && > + hasip4) { > + d->wb.lower.hi_dword.csum_ip.ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); > + trace_e1000e_rx_metadata_ip_id(d->wb.lower.hi_dword.csum_ip.ip_id); > + } > > - desc->wb.upper.length = cpu_to_le16(length); > + if (ts) { > + d->wb.upper.status_error |= BIT(16); > + } > > - igb_build_rx_metadata(core, pkt, pkt != NULL, > - rss_info, etqf, ts, > - &desc->wb.lower.lo_dword.pkt_info, > - &desc->wb.lower.lo_dword.hdr_info, > - &desc->wb.lower.hi_dword.rss, > - &desc->wb.upper.status_error, > - &desc->wb.lower.hi_dword.csum_ip.ip_id, > - &desc->wb.upper.vlan); > + pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf); > + trace_e1000e_rx_metadata_pkt_type(pkt_type); > + d->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4)); > } > > static inline void > -igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, > - struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, > - uint16_t etqf, bool ts, uint16_t length) > +igb_write_rx_descr(IGBCore *core, > + union e1000_rx_desc_union *desc, > + struct NetRxPkt *pkt, > + const E1000E_RSSInfo *rss_info, > + uint16_t etqf, > + bool ts, > + uint16_t(*written)[MAX_PS_BUFFERS], As I wrote in the last review: > Don't use an array here. Instead use a struct as e1000_adv_rx_desc > does; it will be more explicit and easier to read. And I think this > should be rather part of the next patch. Please don't ignore comments in reviews, and if you have a question with them or you don't agree with them, please write so in a reply. You don't have to post a new version quickly so take time to address all problems pointed out. See also: "Stay around to fix problems raised in code review" section in docs/devel/submitting-a-patch.rst > + const E1000E_RingInfo *r) > { > if (igb_rx_use_legacy_descriptor(core)) { > - igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, > - etqf, ts, length); > + igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, (*written)[1]); > } else { > - igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, > - etqf, ts, length); > + igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, etqf, ts, (*written)[1]); > } > } > > @@ -1513,19 +1532,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr, > } > } > > -static void > -igb_write_to_rx_buffers(IGBCore *core, > - PCIDevice *d, > - hwaddr ba, > - uint16_t *written, > - const char *data, > - dma_addr_t data_len) > -{ > - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); > - pci_dma_write(d, ba + *written, data, data_len); > - *written += data_len; > -} > - > static void > igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, > size_t pkt_size, size_t pkt_fcs_size) > @@ -1551,6 +1557,137 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi) > ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; > } > > +typedef struct IGB_BaState_st { > + uint16_t written[MAX_PS_BUFFERS]; > + uint8_t cur_idx; > +} IGB_BaState; > + > +typedef struct IGB_PacketRxDMAState_st { > + size_t size; > + size_t total_size; > + size_t ps_hdr_len; > + size_t desc_size; > + size_t desc_offset; > + uint32_t rx_desc_packet_buf_size; > + uint32_t rx_desc_header_buf_size; > + struct iovec *iov; > + size_t iov_ofs; > + bool is_first; > + IGB_BaState bastate; > + hwaddr ba[MAX_PS_BUFFERS]; > +} IGB_PacketRxDMAState; Do *not*: - suffix struct name with _st. The convention is not common in QEMU code base, or even e1000e and igb do not always use the suffixes. - use _. See include/qemu/typedefs.h for examples. > + > +static void > +igb_truncate_to_descriptor_size(IGB_PacketRxDMAState *pdma_st, size_t *size) > +{ > + if (*size > pdma_st->rx_desc_packet_buf_size) { > + *size = pdma_st->rx_desc_packet_buf_size; > + } > +} > + > +static void > +igb_write_payload_frag_to_rx_buffers(IGBCore *core, > + PCIDevice *d, > + hwaddr (*ba)[MAX_PS_BUFFERS], > + IGB_BaState *bastate, > + uint32_t cur_buf_len, > + const char *data, > + dma_addr_t data_len) > +{ > + while (data_len > 0) { > + assert(bastate->cur_idx < MAX_PS_BUFFERS); > + > + uint32_t cur_buf_bytes_left = cur_buf_len - > + bastate->written[bastate->cur_idx]; > + uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left); > + > + trace_igb_rx_desc_buff_write(bastate->cur_idx, > + (*ba)[bastate->cur_idx], > + bastate->written[bastate->cur_idx], > + data, > + bytes_to_write); > + > + pci_dma_write(d, > + (*ba)[bastate->cur_idx] + > + bastate->written[bastate->cur_idx], > + data, bytes_to_write); > + > + bastate->written[bastate->cur_idx] += bytes_to_write; > + data += bytes_to_write; > + data_len -= bytes_to_write; > + > + if (bastate->written[bastate->cur_idx] == cur_buf_len) { > + bastate->cur_idx++; > + } > + } > +} > + > +static void > +igb_write_payload_to_rx_buffers(IGBCore *core, > + struct NetRxPkt *pkt, > + PCIDevice *d, > + IGB_PacketRxDMAState *pdma_st, > + size_t *copy_size) > +{ > + static const uint32_t fcs_pad; > + size_t iov_copy; > + > + /* Copy packet payload */ > + while (*copy_size) { > + iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); > + igb_write_payload_frag_to_rx_buffers(core, d, > + &pdma_st->ba, > + &pdma_st->bastate, > + pdma_st->rx_desc_packet_buf_size, > + pdma_st->iov->iov_base + > + pdma_st->iov_ofs, > + iov_copy); > + > + *copy_size -= iov_copy; > + pdma_st->iov_ofs += iov_copy; > + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { > + pdma_st->iov++; > + pdma_st->iov_ofs = 0; > + } > + } > + > + if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { > + /* Simulate FCS checksum presence in the last descriptor */ > + igb_write_payload_frag_to_rx_buffers(core, d, > + &pdma_st->ba, > + &pdma_st->bastate, > + pdma_st->rx_desc_packet_buf_size, > + (const char *) &fcs_pad, > + e1000x_fcs_len(core->mac)); > + } > +} > + > +static void > +igb_write_to_rx_buffers(IGBCore *core, > + struct NetRxPkt *pkt, > + PCIDevice *d, > + IGB_PacketRxDMAState *pdma_st) > +{ > + size_t copy_size; > + > + if (!(pdma_st->ba)[1]) { > + /* as per intel docs; skip descriptors with null buf addr */ > + trace_e1000e_rx_null_descriptor(); > + return; > + } > + > + if (pdma_st->desc_offset >= pdma_st->size) { > + return; > + } > + > + pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset; > + igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); > + copy_size = pdma_st->size - pdma_st->desc_offset; > + igb_truncate_to_descriptor_size(pdma_st, ©_size); > + pdma_st->bastate.cur_idx = 1; > + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size); > +} > + > static void > igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, > const E1000E_RxRing *rxr, > @@ -1560,91 +1697,58 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, > PCIDevice *d; > dma_addr_t base; > union e1000_rx_desc_union desc; > - size_t desc_size; > - size_t desc_offset = 0; > - size_t iov_ofs = 0; > - > - struct iovec *iov = net_rx_pkt_get_iovec(pkt); > - size_t size = net_rx_pkt_get_total_len(pkt); > - size_t total_size = size + e1000x_fcs_len(core->mac); > - const E1000E_RingInfo *rxi = rxr->i; > - size_t bufsize = igb_rxbufsize(core, rxi); > - > + const E1000E_RingInfo *rxi; > + size_t rx_desc_len; > + > + IGB_PacketRxDMAState pdma_st = {0}; > + pdma_st.is_first = true; > + pdma_st.size = net_rx_pkt_get_total_len(pkt); > + pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac); > + > + rxi = rxr->i; > + rx_desc_len = core->rx_desc_len; > + pdma_st.rx_desc_packet_buf_size = > + igb_rxbufsize(core, rxi); > + pdma_st.rx_desc_header_buf_size = > + igb_get_queue_rx_header_buf_size(core, rxi); > + pdma_st.iov = net_rx_pkt_get_iovec(pkt); > d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); > if (!d) { > d = core->owner; > } > > do { > - hwaddr ba; > - uint16_t written = 0; > + memset(&pdma_st.bastate, 0, sizeof(IGB_BaState)); > bool is_last = false; > > - desc_size = total_size - desc_offset; > - > - if (desc_size > bufsize) { > - desc_size = bufsize; > - } > - > if (igb_ring_empty(core, rxi)) { > return; > } > > base = igb_ring_head_descr(core, rxi); > + pci_dma_read(d, base, &desc, rx_desc_len); > + trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); > > - pci_dma_read(d, base, &desc, core->rx_desc_len); > - > - trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); > - > - igb_read_rx_descr(core, &desc, &ba); > - > - if (ba) { > - if (desc_offset < size) { > - static const uint32_t fcs_pad; > - size_t iov_copy; > - size_t copy_size = size - desc_offset; > - if (copy_size > bufsize) { > - copy_size = bufsize; > - } > - > - /* Copy packet payload */ > - while (copy_size) { > - iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); > - > - igb_write_to_rx_buffers(core, d, ba, &written, > - iov->iov_base + iov_ofs, iov_copy); > + igb_read_rx_descr(core, &desc, &pdma_st->ba[1], rxi); > > - copy_size -= iov_copy; > - iov_ofs += iov_copy; > - if (iov_ofs == iov->iov_len) { > - iov++; > - iov_ofs = 0; > - } > - } > - > - if (desc_offset + desc_size >= total_size) { > - /* Simulate FCS checksum presence in the last descriptor */ > - igb_write_to_rx_buffers(core, d, ba, &written, > - (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); > - } > - } > - } else { /* as per intel docs; skip descriptors with null buf addr */ > - trace_e1000e_rx_null_descriptor(); > - } > - desc_offset += desc_size; > - if (desc_offset >= total_size) { > + igb_write_to_rx_buffers(core, pkt, d, &pdma_st); > + pdma_st.desc_offset += pdma_st.desc_size; > + if (pdma_st.desc_offset >= pdma_st.total_size) { > is_last = true; > } > > - igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, > - rss_info, etqf, ts, written); > - igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len); > - > - igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN); > - > - } while (desc_offset < total_size); > - > - igb_update_rx_stats(core, rxi, size, total_size); > + igb_write_rx_descr(core, &desc, > + is_last ? pkt : NULL, > + rss_info, > + etqf, ts, > + &pdma_st.bastate.written, > + rxi); > + pci_dma_write(d, base, &desc, rx_desc_len); > + igb_ring_advance(core, rxi, > + rx_desc_len / E1000_MIN_RX_DESC_LEN); > + } while (pdma_st.desc_offset < pdma_st.total_size); > + > + igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size); > } > > static bool > diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h > index 82ff195dfc..c4ede22181 100644 > --- a/hw/net/igb_regs.h > +++ b/hw/net/igb_regs.h > @@ -452,6 +452,7 @@ union e1000_adv_rx_desc { > #define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00 > #define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ > #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 > +#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 > #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 > #define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 > #define E1000_SRRCTL_DROP_EN 0x80000000 > @@ -692,11 +693,12 @@ union e1000_adv_rx_desc { > > #define E1000_STATUS_NUM_VFS_SHIFT 14 > > -#define E1000_ADVRXD_PKT_IP4 BIT(4) > -#define E1000_ADVRXD_PKT_IP6 BIT(6) > -#define E1000_ADVRXD_PKT_TCP BIT(8) > -#define E1000_ADVRXD_PKT_UDP BIT(9) > -#define E1000_ADVRXD_PKT_SCTP BIT(10) > +#define E1000_ADVRXD_PKT_IP4 BIT(0) > +#define E1000_ADVRXD_PKT_IP6 BIT(2) > +#define E1000_ADVRXD_PKT_IP6E BIT(3) > +#define E1000_ADVRXD_PKT_TCP BIT(4) > +#define E1000_ADVRXD_PKT_UDP BIT(5) > +#define E1000_ADVRXD_PKT_SCTP BIT(6) > > static inline uint8_t igb_ivar_entry_rx(uint8_t i) > { > diff --git a/hw/net/trace-events b/hw/net/trace-events > index e4a98b2c7d..9a02261360 100644 > --- a/hw/net/trace-events > +++ b/hw/net/trace-events > @@ -277,9 +277,9 @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED" > igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d" > > igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" > -igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" > +igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer #%u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" > > -igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X" > +igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X" > > igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled" > igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x" > @@ -294,6 +294,8 @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = 0x%x" > igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x" > igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x" > > +igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported descriptor type: %d" > + > # igbvf.c > igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register 0x%"PRIx64 > > diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c > index a603468beb..aa65b5452c 100644 > --- a/tests/qtest/libqos/igb.c > +++ b/tests/qtest/libqos/igb.c > @@ -109,6 +109,9 @@ static void igb_pci_start_hw(QOSGraphObject *obj) > E1000_RAH_AV | E1000_RAH_POOL_1 | > le16_to_cpu(*(uint16_t *)(address + 4))); > > + /* Set supported receive descriptor mode */ > + e1000e_macreg_write(&d->e1000e, E1000_SRRCTL(0), E1000_SRRCTL_DESCTYPE_ADV_ONEBUF); > + > /* Enable receive */ > e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN); > e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN); ^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup 2023-04-28 10:31 ` Akihiko Odaki @ 2023-04-28 12:51 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics 2023-04-30 6:05 ` Akihiko Odaki 2023-04-28 12:53 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics 1 sibling, 1 reply; 12+ messages in thread From: Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics @ 2023-04-28 12:51 UTC (permalink / raw) To: 'Akihiko Odaki', qemu-devel Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki > Please don't ignore comments in reviews, and if you have a question with them or you don't agree with them, please write so in a reply. You don't have to post a new version quickly so take time to address all problems pointed out. I assumed that comments referred only to places pointed in the code and fixed only those places. Sorry about that. I will keep in mind that your comments are more general and fix all the places, where array is passed as parameter. > Please split up those changes into separate patches. I will extract TCP ACK detection removal and IPv6 extensions traffic detection to separate patches. Those will be small patches in comparison to the rest of cleanup, however those are functional changes. > Do *not*: > - suffix struct name with _st. The convention is not common in QEMU code base, or even e1000e and igb do not always use the suffixes. > - use _. ok, I was looking at E1000E_RingInfo_st, which was added recently with IGB code in commit 3a977deebe6b9a10043182e922f6883924ef21f5 ("Intrdocue igb device emulation"). -----Original Message----- From: Akihiko Odaki <akihiko.odaki@daynix.com> Sent: piątek, 28 kwietnia 2023 12:31 To: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>; qemu-devel@nongnu.org Cc: sriram.yagnaraman@est.tech; jasowang@redhat.com; k.kwiecien@samsung.com; m.sochacki@samsung.com Subject: Re: [PATCH v3 1/2] igb: RX descriptors handling cleanup On 2023/04/27 19:47, Tomasz Dzieciol wrote: > Format of Intel 82576 was changed in comparison to Intel 82574 > extended descriptors. This change updates filling of advanced > descriptors fields > accordingly: > * remove TCP ACK detection > * add IPv6 with extensions traffic detection > * fragment checksum and IP ID is filled only when RXCSUM.IPPCSE is set (in > addition to RXCSUM.PCSD bit cleared condition) Please split up those changes into seperate patches. The relevant documentation is "Split up long patches" section of: docs/devel/submitting-a-patch.rst > > Refactoring is done in preparation for support of multiple advanced > descriptors RX modes, especially packet-split modes. > > Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> > --- > hw/net/e1000e_core.c | 18 +- > hw/net/e1000x_regs.h | 1 + > hw/net/igb_core.c | 478 ++++++++++++++++++++++++--------------- > hw/net/igb_regs.h | 12 +- > hw/net/trace-events | 6 +- > tests/qtest/libqos/igb.c | 3 + > 6 files changed, 316 insertions(+), 202 deletions(-) > > diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index > 78373d7db7..0085ad53c2 100644 > --- a/hw/net/e1000e_core.c > +++ b/hw/net/e1000e_core.c > @@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core, > } > > static void > -e1000e_write_to_rx_buffers(E1000ECore *core, > - hwaddr ba[MAX_PS_BUFFERS], > - e1000e_ba_state *bastate, > - const char *data, > - dma_addr_t data_len) > +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core, > + hwaddr ba[MAX_PS_BUFFERS], > + e1000e_ba_state *bastate, > + const char *data, > + dma_addr_t data_len) > { > while (data_len > 0) { > uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; > @@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, > while (copy_size) { > iov_copy = MIN(copy_size, iov->iov_len - > iov_ofs); > > - e1000e_write_to_rx_buffers(core, ba, &bastate, > - iov->iov_base + iov_ofs, iov_copy); > + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, > + iov->iov_base + > + iov_ofs, > + > + iov_copy); > > copy_size -= iov_copy; > iov_ofs += iov_copy; @@ -1607,7 +1609,7 @@ > e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, > > if (desc_offset + desc_size >= total_size) { > /* Simulate FCS checksum presence in the last descriptor */ > - e1000e_write_to_rx_buffers(core, ba, &bastate, > + e1000e_write_payload_frag_to_rx_buffers(core, ba, > + &bastate, > (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); > } > } > diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index > 13760c66d3..344fd10359 100644 > --- a/hw/net/e1000x_regs.h > +++ b/hw/net/e1000x_regs.h > @@ -827,6 +827,7 @@ union e1000_rx_desc_packet_split { > /* Receive Checksum Control bits */ > #define E1000_RXCSUM_IPOFLD 0x100 /* IP Checksum Offload Enable */ > #define E1000_RXCSUM_TUOFLD 0x200 /* TCP/UDP Checksum Offload Enable */ > +#define E1000_RXCSUM_IPPCSE 0x1000 /* IP Payload Checksum enable */ > #define E1000_RXCSUM_PCSD 0x2000 /* Packet Checksum Disable */ > > #define E1000_RING_DESC_LEN (16) > diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index > 96b7335b31..1cb64402aa 100644 > --- a/hw/net/igb_core.c > +++ b/hw/net/igb_core.c > @@ -267,6 +267,21 @@ igb_rx_use_legacy_descriptor(IGBCore *core) > return false; > } > > +typedef struct E1000E_RingInfo_st { > + int dbah; > + int dbal; > + int dlen; > + int dh; > + int dt; > + int idx; > +} E1000E_RingInfo; > + > +static uint32_t > +igb_rx_queue_desctyp_get(IGBCore *core, const E1000E_RingInfo *r) { > + return core->mac[E1000_SRRCTL(r->idx) >> 2] & > +E1000_SRRCTL_DESCTYPE_MASK; } > + > static inline bool > igb_rss_enabled(IGBCore *core) > { > @@ -694,15 +709,6 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx) > return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0; > } > > -typedef struct E1000E_RingInfo_st { > - int dbah; > - int dbal; > - int dlen; > - int dh; > - int dt; > - int idx; > -} E1000E_RingInfo; > - > static inline bool > igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r) > { > @@ -941,6 +947,14 @@ igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size) > bufsize; > } > > +static uint32_t > +igb_get_queue_rx_header_buf_size(IGBCore *core, const E1000E_RingInfo > +*r) { > + uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2]; > + return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >> > + E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; } > + > void > igb_start_recv(IGBCore *core) > { > @@ -1232,7 +1246,7 @@ igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > } > > static inline void > -igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, > +igb_read_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > hwaddr *buff_addr) > { > if (igb_rx_use_legacy_descriptor(core)) { @@ -1281,15 +1295,11 > @@ igb_verify_csum_in_sw(IGBCore *core, > } > > static void > -igb_build_rx_metadata(IGBCore *core, > - struct NetRxPkt *pkt, > - bool is_eop, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > - uint16_t *pkt_info, uint16_t *hdr_info, > - uint32_t *rss, > - uint32_t *status_flags, > - uint16_t *ip_id, > - uint16_t *vlan_tag) > +igb_build_rx_metadata_common(IGBCore *core, > + struct NetRxPkt *pkt, > + bool is_eop, > + uint32_t *status_flags, > + uint16_t *vlan_tag) > { > struct virtio_net_hdr *vhdr; > bool hasip4, hasip6, csum_valid; @@ -1298,7 +1308,6 @@ > igb_build_rx_metadata(IGBCore *core, > *status_flags = E1000_RXD_STAT_DD; > > /* No additional metadata needed for non-EOP descriptors */ > - /* TODO: EOP apply only to status so don't skip whole function. */ > if (!is_eop) { > goto func_exit; > } > @@ -1315,64 +1324,6 @@ igb_build_rx_metadata(IGBCore *core, > trace_e1000e_rx_metadata_vlan(*vlan_tag); > } > > - /* Packet parsing results */ > - if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { > - if (rss_info->enabled) { > - *rss = cpu_to_le32(rss_info->hash); > - trace_igb_rx_metadata_rss(*rss); > - } > - } else if (hasip4) { > - *status_flags |= E1000_RXD_STAT_IPIDV; > - *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); > - trace_e1000e_rx_metadata_ip_id(*ip_id); > - } > - > - if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && net_rx_pkt_is_tcp_ack(pkt)) { > - *status_flags |= E1000_RXD_STAT_ACK; > - trace_e1000e_rx_metadata_ack(); > - } > - > - if (pkt_info) { > - *pkt_info = rss_info->enabled ? rss_info->type : 0; > - > - if (etqf < 8) { > - *pkt_info |= BIT(11) | (etqf << 4); > - } else { > - if (hasip4) { > - *pkt_info |= E1000_ADVRXD_PKT_IP4; > - } > - > - if (hasip6) { > - *pkt_info |= E1000_ADVRXD_PKT_IP6; > - } > - > - switch (l4hdr_proto) { > - case ETH_L4_HDR_PROTO_TCP: > - *pkt_info |= E1000_ADVRXD_PKT_TCP; > - break; > - > - case ETH_L4_HDR_PROTO_UDP: > - *pkt_info |= E1000_ADVRXD_PKT_UDP; > - break; > - > - case ETH_L4_HDR_PROTO_SCTP: > - *pkt_info |= E1000_ADVRXD_PKT_SCTP; > - break; > - > - default: > - break; > - } > - } > - } > - > - if (hdr_info) { > - *hdr_info = 0; > - } > - > - if (ts) { > - *status_flags |= BIT(16); > - } > - > /* RX CSO information */ > if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { > trace_e1000e_rx_metadata_ipv6_sum_disabled(); > @@ -1426,58 +1377,126 @@ func_exit: > } > > static inline void > -igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, > +igb_write_lgcy_rx_descr(IGBCore *core, > + struct e1000_rx_desc *desc, > struct NetRxPkt *pkt, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > + const E1000E_RSSInfo *rss_info, > uint16_t length) > { > - uint32_t status_flags, rss; > - uint16_t ip_id; > + uint32_t status_flags; > > assert(!rss_info->enabled); > + > + memset(desc, 0, sizeof(*desc)); > desc->length = cpu_to_le16(length); > - desc->csum = 0; > + igb_build_rx_metadata_common(core, pkt, pkt != NULL, > + &status_flags, > + &desc->special); > > - igb_build_rx_metadata(core, pkt, pkt != NULL, > - rss_info, etqf, ts, > - NULL, NULL, &rss, > - &status_flags, &ip_id, > - &desc->special); > desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); > desc->status = (uint8_t) le32_to_cpu(status_flags); > } > > +static uint16_t > +igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, > +uint16_t etqf) { > + uint16_t pkt_type = 0; > + bool hasip4, hasip6; > + EthL4HdrProto l4hdr_proto; > + > + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); > + > + if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { > + eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt); > + pkt_type |= ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E : > + E1000_ADVRXD_PKT_IP6; > + } else if (hasip4) { > + pkt_type = E1000_ADVRXD_PKT_IP4; > + } > + > + if (etqf < 8) { > + pkt_type |= (BIT(11) >> 4) | etqf; > + return pkt_type; > + } > + > + switch (l4hdr_proto) { > + case ETH_L4_HDR_PROTO_TCP: > + pkt_type |= E1000_ADVRXD_PKT_TCP; > + break; > + case ETH_L4_HDR_PROTO_UDP: > + pkt_type |= E1000_ADVRXD_PKT_UDP; > + break; > + case ETH_L4_HDR_PROTO_SCTP: > + pkt_type |= E1000_ADVRXD_PKT_SCTP; > + break; > + default: > + break; > + } > + > + return pkt_type; > +} > + > static inline void > -igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > +igb_write_adv_rx_descr(IGBCore *core, > + union e1000_adv_rx_desc *d, > struct NetRxPkt *pkt, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > + const E1000E_RSSInfo *rss_info, > + uint16_t etqf, > + bool ts, > uint16_t length) > { > - memset(&desc->wb, 0, sizeof(desc->wb)); > + bool hasip4, hasip6; > + EthL4HdrProto l4hdr_proto; > + uint16_t rss_type = 0, pkt_type; > + bool eop = (pkt != NULL); > + memset(&d->wb, 0, sizeof(d->wb)); > + > + d->wb.upper.length = cpu_to_le16(length); > + igb_build_rx_metadata_common(core, pkt, eop, > + &d->wb.upper.status_error, > + &d->wb.upper.vlan); > + > + if (!eop) { > + return; > + } > + > + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); > + > + if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { > + if (rss_info->enabled) { > + d->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash); > + rss_type = rss_info->type; > + trace_igb_rx_metadata_rss(d->wb.lower.hi_dword.rss, rss_type); > + } > + } else if (core->mac[RXCSUM] & E1000_RXCSUM_IPPCSE && > + hasip4) { > + d->wb.lower.hi_dword.csum_ip.ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); > + trace_e1000e_rx_metadata_ip_id(d->wb.lower.hi_dword.csum_ip.ip_id); > + } > > - desc->wb.upper.length = cpu_to_le16(length); > + if (ts) { > + d->wb.upper.status_error |= BIT(16); > + } > > - igb_build_rx_metadata(core, pkt, pkt != NULL, > - rss_info, etqf, ts, > - &desc->wb.lower.lo_dword.pkt_info, > - &desc->wb.lower.lo_dword.hdr_info, > - &desc->wb.lower.hi_dword.rss, > - &desc->wb.upper.status_error, > - &desc->wb.lower.hi_dword.csum_ip.ip_id, > - &desc->wb.upper.vlan); > + pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf); > + trace_e1000e_rx_metadata_pkt_type(pkt_type); > + d->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type > + << 4)); > } > > static inline void > -igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, > - struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, > - uint16_t etqf, bool ts, uint16_t length) > +igb_write_rx_descr(IGBCore *core, > + union e1000_rx_desc_union *desc, > + struct NetRxPkt *pkt, > + const E1000E_RSSInfo *rss_info, > + uint16_t etqf, > + bool ts, > + uint16_t(*written)[MAX_PS_BUFFERS], As I wrote in the last review: > Don't use an array here. Instead use a struct as e1000_adv_rx_desc > does; it will be more explicit and easier to read. And I think this > should be rather part of the next patch. Please don't ignore comments in reviews, and if you have a question with them or you don't agree with them, please write so in a reply. You don't have to post a new version quickly so take time to address all problems pointed out. See also: "Stay around to fix problems raised in code review" section in docs/devel/submitting-a-patch.rst > + const E1000E_RingInfo *r) > { > if (igb_rx_use_legacy_descriptor(core)) { > - igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, > - etqf, ts, length); > + igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, > + (*written)[1]); > } else { > - igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, > - etqf, ts, length); > + igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, etqf, > + ts, (*written)[1]); > } > } > > @@ -1513,19 +1532,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr, > } > } > > -static void > -igb_write_to_rx_buffers(IGBCore *core, > - PCIDevice *d, > - hwaddr ba, > - uint16_t *written, > - const char *data, > - dma_addr_t data_len) > -{ > - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); > - pci_dma_write(d, ba + *written, data, data_len); > - *written += data_len; > -} > - > static void > igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, > size_t pkt_size, size_t pkt_fcs_size) @@ -1551,6 > +1557,137 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi) > ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; > } > > +typedef struct IGB_BaState_st { > + uint16_t written[MAX_PS_BUFFERS]; > + uint8_t cur_idx; > +} IGB_BaState; > + > +typedef struct IGB_PacketRxDMAState_st { > + size_t size; > + size_t total_size; > + size_t ps_hdr_len; > + size_t desc_size; > + size_t desc_offset; > + uint32_t rx_desc_packet_buf_size; > + uint32_t rx_desc_header_buf_size; > + struct iovec *iov; > + size_t iov_ofs; > + bool is_first; > + IGB_BaState bastate; > + hwaddr ba[MAX_PS_BUFFERS]; > +} IGB_PacketRxDMAState; Do *not*: - suffix struct name with _st. The convention is not common in QEMU code base, or even e1000e and igb do not always use the suffixes. - use _. See include/qemu/typedefs.h for examples. > + > +static void > +igb_truncate_to_descriptor_size(IGB_PacketRxDMAState *pdma_st, size_t > +*size) { > + if (*size > pdma_st->rx_desc_packet_buf_size) { > + *size = pdma_st->rx_desc_packet_buf_size; > + } > +} > + > +static void > +igb_write_payload_frag_to_rx_buffers(IGBCore *core, > + PCIDevice *d, > + hwaddr (*ba)[MAX_PS_BUFFERS], > + IGB_BaState *bastate, > + uint32_t cur_buf_len, > + const char *data, > + dma_addr_t data_len) { > + while (data_len > 0) { > + assert(bastate->cur_idx < MAX_PS_BUFFERS); > + > + uint32_t cur_buf_bytes_left = cur_buf_len - > + bastate->written[bastate->cur_idx]; > + uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left); > + > + trace_igb_rx_desc_buff_write(bastate->cur_idx, > + (*ba)[bastate->cur_idx], > + bastate->written[bastate->cur_idx], > + data, > + bytes_to_write); > + > + pci_dma_write(d, > + (*ba)[bastate->cur_idx] + > + bastate->written[bastate->cur_idx], > + data, bytes_to_write); > + > + bastate->written[bastate->cur_idx] += bytes_to_write; > + data += bytes_to_write; > + data_len -= bytes_to_write; > + > + if (bastate->written[bastate->cur_idx] == cur_buf_len) { > + bastate->cur_idx++; > + } > + } > +} > + > +static void > +igb_write_payload_to_rx_buffers(IGBCore *core, > + struct NetRxPkt *pkt, > + PCIDevice *d, > + IGB_PacketRxDMAState *pdma_st, > + size_t *copy_size) { > + static const uint32_t fcs_pad; > + size_t iov_copy; > + > + /* Copy packet payload */ > + while (*copy_size) { > + iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); > + igb_write_payload_frag_to_rx_buffers(core, d, > + &pdma_st->ba, > + &pdma_st->bastate, > + pdma_st->rx_desc_packet_buf_size, > + pdma_st->iov->iov_base + > + pdma_st->iov_ofs, > + iov_copy); > + > + *copy_size -= iov_copy; > + pdma_st->iov_ofs += iov_copy; > + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { > + pdma_st->iov++; > + pdma_st->iov_ofs = 0; > + } > + } > + > + if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { > + /* Simulate FCS checksum presence in the last descriptor */ > + igb_write_payload_frag_to_rx_buffers(core, d, > + &pdma_st->ba, > + &pdma_st->bastate, > + pdma_st->rx_desc_packet_buf_size, > + (const char *) &fcs_pad, > + e1000x_fcs_len(core->mac)); > + } > +} > + > +static void > +igb_write_to_rx_buffers(IGBCore *core, > + struct NetRxPkt *pkt, > + PCIDevice *d, > + IGB_PacketRxDMAState *pdma_st) { > + size_t copy_size; > + > + if (!(pdma_st->ba)[1]) { > + /* as per intel docs; skip descriptors with null buf addr */ > + trace_e1000e_rx_null_descriptor(); > + return; > + } > + > + if (pdma_st->desc_offset >= pdma_st->size) { > + return; > + } > + > + pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset; > + igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); > + copy_size = pdma_st->size - pdma_st->desc_offset; > + igb_truncate_to_descriptor_size(pdma_st, ©_size); > + pdma_st->bastate.cur_idx = 1; > + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, > +©_size); } > + > static void > igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, > const E1000E_RxRing *rxr, @@ -1560,91 > +1697,58 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, > PCIDevice *d; > dma_addr_t base; > union e1000_rx_desc_union desc; > - size_t desc_size; > - size_t desc_offset = 0; > - size_t iov_ofs = 0; > - > - struct iovec *iov = net_rx_pkt_get_iovec(pkt); > - size_t size = net_rx_pkt_get_total_len(pkt); > - size_t total_size = size + e1000x_fcs_len(core->mac); > - const E1000E_RingInfo *rxi = rxr->i; > - size_t bufsize = igb_rxbufsize(core, rxi); > - > + const E1000E_RingInfo *rxi; > + size_t rx_desc_len; > + > + IGB_PacketRxDMAState pdma_st = {0}; > + pdma_st.is_first = true; > + pdma_st.size = net_rx_pkt_get_total_len(pkt); > + pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac); > + > + rxi = rxr->i; > + rx_desc_len = core->rx_desc_len; > + pdma_st.rx_desc_packet_buf_size = > + igb_rxbufsize(core, rxi); > + pdma_st.rx_desc_header_buf_size = > + igb_get_queue_rx_header_buf_size(core, rxi); > + pdma_st.iov = net_rx_pkt_get_iovec(pkt); > d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); > if (!d) { > d = core->owner; > } > > do { > - hwaddr ba; > - uint16_t written = 0; > + memset(&pdma_st.bastate, 0, sizeof(IGB_BaState)); > bool is_last = false; > > - desc_size = total_size - desc_offset; > - > - if (desc_size > bufsize) { > - desc_size = bufsize; > - } > - > if (igb_ring_empty(core, rxi)) { > return; > } > > base = igb_ring_head_descr(core, rxi); > + pci_dma_read(d, base, &desc, rx_desc_len); > + trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); > > - pci_dma_read(d, base, &desc, core->rx_desc_len); > - > - trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); > - > - igb_read_rx_descr(core, &desc, &ba); > - > - if (ba) { > - if (desc_offset < size) { > - static const uint32_t fcs_pad; > - size_t iov_copy; > - size_t copy_size = size - desc_offset; > - if (copy_size > bufsize) { > - copy_size = bufsize; > - } > - > - /* Copy packet payload */ > - while (copy_size) { > - iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); > - > - igb_write_to_rx_buffers(core, d, ba, &written, > - iov->iov_base + iov_ofs, iov_copy); > + igb_read_rx_descr(core, &desc, &pdma_st->ba[1], rxi); > > - copy_size -= iov_copy; > - iov_ofs += iov_copy; > - if (iov_ofs == iov->iov_len) { > - iov++; > - iov_ofs = 0; > - } > - } > - > - if (desc_offset + desc_size >= total_size) { > - /* Simulate FCS checksum presence in the last descriptor */ > - igb_write_to_rx_buffers(core, d, ba, &written, > - (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); > - } > - } > - } else { /* as per intel docs; skip descriptors with null buf addr */ > - trace_e1000e_rx_null_descriptor(); > - } > - desc_offset += desc_size; > - if (desc_offset >= total_size) { > + igb_write_to_rx_buffers(core, pkt, d, &pdma_st); > + pdma_st.desc_offset += pdma_st.desc_size; > + if (pdma_st.desc_offset >= pdma_st.total_size) { > is_last = true; > } > > - igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, > - rss_info, etqf, ts, written); > - igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len); > - > - igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN); > - > - } while (desc_offset < total_size); > - > - igb_update_rx_stats(core, rxi, size, total_size); > + igb_write_rx_descr(core, &desc, > + is_last ? pkt : NULL, > + rss_info, > + etqf, ts, > + &pdma_st.bastate.written, > + rxi); > + pci_dma_write(d, base, &desc, rx_desc_len); > + igb_ring_advance(core, rxi, > + rx_desc_len / E1000_MIN_RX_DESC_LEN); > + } while (pdma_st.desc_offset < pdma_st.total_size); > + > + igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size); > } > > static bool > diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index > 82ff195dfc..c4ede22181 100644 > --- a/hw/net/igb_regs.h > +++ b/hw/net/igb_regs.h > @@ -452,6 +452,7 @@ union e1000_adv_rx_desc { > #define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00 > #define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ > #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 > +#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 > #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 > #define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 > #define E1000_SRRCTL_DROP_EN 0x80000000 > @@ -692,11 +693,12 @@ union e1000_adv_rx_desc { > > #define E1000_STATUS_NUM_VFS_SHIFT 14 > > -#define E1000_ADVRXD_PKT_IP4 BIT(4) > -#define E1000_ADVRXD_PKT_IP6 BIT(6) > -#define E1000_ADVRXD_PKT_TCP BIT(8) > -#define E1000_ADVRXD_PKT_UDP BIT(9) > -#define E1000_ADVRXD_PKT_SCTP BIT(10) > +#define E1000_ADVRXD_PKT_IP4 BIT(0) > +#define E1000_ADVRXD_PKT_IP6 BIT(2) > +#define E1000_ADVRXD_PKT_IP6E BIT(3) > +#define E1000_ADVRXD_PKT_TCP BIT(4) > +#define E1000_ADVRXD_PKT_UDP BIT(5) > +#define E1000_ADVRXD_PKT_SCTP BIT(6) > > static inline uint8_t igb_ivar_entry_rx(uint8_t i) > { > diff --git a/hw/net/trace-events b/hw/net/trace-events index > e4a98b2c7d..9a02261360 100644 > --- a/hw/net/trace-events > +++ b/hw/net/trace-events > @@ -277,9 +277,9 @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED" > igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d" > > igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" > -igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" > +igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer #%u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" > > -igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X" > +igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X" > > igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled" > igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x" > @@ -294,6 +294,8 @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = 0x%x" > igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x" > igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x" > > +igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported descriptor type: %d" > + > # igbvf.c > igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register > 0x%"PRIx64 > > diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c index > a603468beb..aa65b5452c 100644 > --- a/tests/qtest/libqos/igb.c > +++ b/tests/qtest/libqos/igb.c > @@ -109,6 +109,9 @@ static void igb_pci_start_hw(QOSGraphObject *obj) > E1000_RAH_AV | E1000_RAH_POOL_1 | > le16_to_cpu(*(uint16_t *)(address + 4))); > > + /* Set supported receive descriptor mode */ > + e1000e_macreg_write(&d->e1000e, E1000_SRRCTL(0), > + E1000_SRRCTL_DESCTYPE_ADV_ONEBUF); > + > /* Enable receive */ > e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN); > e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN); ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/2] igb: RX descriptors handling cleanup 2023-04-28 12:51 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics @ 2023-04-30 6:05 ` Akihiko Odaki 0 siblings, 0 replies; 12+ messages in thread From: Akihiko Odaki @ 2023-04-30 6:05 UTC (permalink / raw) To: Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics, qemu-devel Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki On 2023/04/28 21:51, Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics wrote: >> Please don't ignore comments in reviews, and if you have a question with them or you don't agree with them, please write so in a reply. You don't have to post a new version quickly so take time to address all problems pointed out. > > I assumed that comments referred only to places pointed in the code and fixed only those places. Sorry about that. I will keep in mind that your comments are more general and fix all the places, where array is passed as parameter. > >> Please split up those changes into separate patches. > > I will extract TCP ACK detection removal and IPv6 extensions traffic detection to separate patches. Those will be small patches in comparison to the rest of cleanup, however those are functional changes. > >> Do *not*: >> - suffix struct name with _st. The convention is not common in QEMU code base, or even e1000e and igb do not always use the suffixes. >> - use _. > > ok, I was looking at E1000E_RingInfo_st, which was added recently with IGB code in commit 3a977deebe6b9a10043182e922f6883924ef21f5 ("Intrdocue igb device emulation"). It's just copied from e1000e code. Check for e1000e_core.c for history older than that commit. ^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup 2023-04-28 10:31 ` Akihiko Odaki 2023-04-28 12:51 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics @ 2023-04-28 12:53 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics 1 sibling, 0 replies; 12+ messages in thread From: Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics @ 2023-04-28 12:53 UTC (permalink / raw) To: 'Akihiko Odaki', qemu-devel Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki > Please don't ignore comments in reviews, and if you have a question with them or you don't agree with them, please write so in a reply. You don't have to post a new version quickly so take time to address all problems pointed out. I assumed that comments referred only to places pointed in the code and fixed only those places. Sorry about that. I will keep in mind that your comments are more general and fix all the places, where array is passed as parameter. > Please split up those changes into separate patches. I will extract TCP ACK detection removal and IPv6 extensions traffic detection to separate patches. Those will be small patches in comparison to the rest of cleanup, however those are functional changes. > Do *not*: > - suffix struct name with _st. The convention is not common in QEMU code base, or even e1000e and igb do not always use the suffixes. > - use _. ok, I was looking at E1000E_RingInfo_st, which was added with IGB code in commit 3a977deebe6b9a10043182e922f6883924ef21f5 ("Intrdocue igb device emulation"). -----Original Message----- From: qemu-devel-bounces+t.dzieciol=partner.samsung.com@nongnu.org <qemu-devel-bounces+t.dzieciol=partner.samsung.com@nongnu.org> On Behalf Of Akihiko Odaki Sent: piątek, 28 kwietnia 2023 12:31 To: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>; qemu-devel@nongnu.org Cc: sriram.yagnaraman@est.tech; jasowang@redhat.com; k.kwiecien@samsung.com; m.sochacki@samsung.com Subject: Re: [PATCH v3 1/2] igb: RX descriptors handling cleanup On 2023/04/27 19:47, Tomasz Dzieciol wrote: > Format of Intel 82576 was changed in comparison to Intel 82574 > extended descriptors. This change updates filling of advanced > descriptors fields > accordingly: > * remove TCP ACK detection > * add IPv6 with extensions traffic detection > * fragment checksum and IP ID is filled only when RXCSUM.IPPCSE is set (in > addition to RXCSUM.PCSD bit cleared condition) Please split up those changes into seperate patches. The relevant documentation is "Split up long patches" section of: docs/devel/submitting-a-patch.rst > > Refactoring is done in preparation for support of multiple advanced > descriptors RX modes, especially packet-split modes. > > Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> > --- > hw/net/e1000e_core.c | 18 +- > hw/net/e1000x_regs.h | 1 + > hw/net/igb_core.c | 478 ++++++++++++++++++++++++--------------- > hw/net/igb_regs.h | 12 +- > hw/net/trace-events | 6 +- > tests/qtest/libqos/igb.c | 3 + > 6 files changed, 316 insertions(+), 202 deletions(-) > > diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index > 78373d7db7..0085ad53c2 100644 > --- a/hw/net/e1000e_core.c > +++ b/hw/net/e1000e_core.c > @@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core, > } > > static void > -e1000e_write_to_rx_buffers(E1000ECore *core, > - hwaddr ba[MAX_PS_BUFFERS], > - e1000e_ba_state *bastate, > - const char *data, > - dma_addr_t data_len) > +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core, > + hwaddr ba[MAX_PS_BUFFERS], > + e1000e_ba_state *bastate, > + const char *data, > + dma_addr_t data_len) > { > while (data_len > 0) { > uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; > @@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, > while (copy_size) { > iov_copy = MIN(copy_size, iov->iov_len - > iov_ofs); > > - e1000e_write_to_rx_buffers(core, ba, &bastate, > - iov->iov_base + iov_ofs, iov_copy); > + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, > + iov->iov_base + > + iov_ofs, > + > + iov_copy); > > copy_size -= iov_copy; > iov_ofs += iov_copy; @@ -1607,7 +1609,7 @@ > e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, > > if (desc_offset + desc_size >= total_size) { > /* Simulate FCS checksum presence in the last descriptor */ > - e1000e_write_to_rx_buffers(core, ba, &bastate, > + e1000e_write_payload_frag_to_rx_buffers(core, ba, > + &bastate, > (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); > } > } > diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index > 13760c66d3..344fd10359 100644 > --- a/hw/net/e1000x_regs.h > +++ b/hw/net/e1000x_regs.h > @@ -827,6 +827,7 @@ union e1000_rx_desc_packet_split { > /* Receive Checksum Control bits */ > #define E1000_RXCSUM_IPOFLD 0x100 /* IP Checksum Offload Enable */ > #define E1000_RXCSUM_TUOFLD 0x200 /* TCP/UDP Checksum Offload Enable */ > +#define E1000_RXCSUM_IPPCSE 0x1000 /* IP Payload Checksum enable */ > #define E1000_RXCSUM_PCSD 0x2000 /* Packet Checksum Disable */ > > #define E1000_RING_DESC_LEN (16) > diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index > 96b7335b31..1cb64402aa 100644 > --- a/hw/net/igb_core.c > +++ b/hw/net/igb_core.c > @@ -267,6 +267,21 @@ igb_rx_use_legacy_descriptor(IGBCore *core) > return false; > } > > +typedef struct E1000E_RingInfo_st { > + int dbah; > + int dbal; > + int dlen; > + int dh; > + int dt; > + int idx; > +} E1000E_RingInfo; > + > +static uint32_t > +igb_rx_queue_desctyp_get(IGBCore *core, const E1000E_RingInfo *r) { > + return core->mac[E1000_SRRCTL(r->idx) >> 2] & > +E1000_SRRCTL_DESCTYPE_MASK; } > + > static inline bool > igb_rss_enabled(IGBCore *core) > { > @@ -694,15 +709,6 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx) > return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0; > } > > -typedef struct E1000E_RingInfo_st { > - int dbah; > - int dbal; > - int dlen; > - int dh; > - int dt; > - int idx; > -} E1000E_RingInfo; > - > static inline bool > igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r) > { > @@ -941,6 +947,14 @@ igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size) > bufsize; > } > > +static uint32_t > +igb_get_queue_rx_header_buf_size(IGBCore *core, const E1000E_RingInfo > +*r) { > + uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2]; > + return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >> > + E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; } > + > void > igb_start_recv(IGBCore *core) > { > @@ -1232,7 +1246,7 @@ igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > } > > static inline void > -igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, > +igb_read_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > hwaddr *buff_addr) > { > if (igb_rx_use_legacy_descriptor(core)) { @@ -1281,15 +1295,11 > @@ igb_verify_csum_in_sw(IGBCore *core, > } > > static void > -igb_build_rx_metadata(IGBCore *core, > - struct NetRxPkt *pkt, > - bool is_eop, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > - uint16_t *pkt_info, uint16_t *hdr_info, > - uint32_t *rss, > - uint32_t *status_flags, > - uint16_t *ip_id, > - uint16_t *vlan_tag) > +igb_build_rx_metadata_common(IGBCore *core, > + struct NetRxPkt *pkt, > + bool is_eop, > + uint32_t *status_flags, > + uint16_t *vlan_tag) > { > struct virtio_net_hdr *vhdr; > bool hasip4, hasip6, csum_valid; @@ -1298,7 +1308,6 @@ > igb_build_rx_metadata(IGBCore *core, > *status_flags = E1000_RXD_STAT_DD; > > /* No additional metadata needed for non-EOP descriptors */ > - /* TODO: EOP apply only to status so don't skip whole function. */ > if (!is_eop) { > goto func_exit; > } > @@ -1315,64 +1324,6 @@ igb_build_rx_metadata(IGBCore *core, > trace_e1000e_rx_metadata_vlan(*vlan_tag); > } > > - /* Packet parsing results */ > - if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { > - if (rss_info->enabled) { > - *rss = cpu_to_le32(rss_info->hash); > - trace_igb_rx_metadata_rss(*rss); > - } > - } else if (hasip4) { > - *status_flags |= E1000_RXD_STAT_IPIDV; > - *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); > - trace_e1000e_rx_metadata_ip_id(*ip_id); > - } > - > - if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && net_rx_pkt_is_tcp_ack(pkt)) { > - *status_flags |= E1000_RXD_STAT_ACK; > - trace_e1000e_rx_metadata_ack(); > - } > - > - if (pkt_info) { > - *pkt_info = rss_info->enabled ? rss_info->type : 0; > - > - if (etqf < 8) { > - *pkt_info |= BIT(11) | (etqf << 4); > - } else { > - if (hasip4) { > - *pkt_info |= E1000_ADVRXD_PKT_IP4; > - } > - > - if (hasip6) { > - *pkt_info |= E1000_ADVRXD_PKT_IP6; > - } > - > - switch (l4hdr_proto) { > - case ETH_L4_HDR_PROTO_TCP: > - *pkt_info |= E1000_ADVRXD_PKT_TCP; > - break; > - > - case ETH_L4_HDR_PROTO_UDP: > - *pkt_info |= E1000_ADVRXD_PKT_UDP; > - break; > - > - case ETH_L4_HDR_PROTO_SCTP: > - *pkt_info |= E1000_ADVRXD_PKT_SCTP; > - break; > - > - default: > - break; > - } > - } > - } > - > - if (hdr_info) { > - *hdr_info = 0; > - } > - > - if (ts) { > - *status_flags |= BIT(16); > - } > - > /* RX CSO information */ > if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { > trace_e1000e_rx_metadata_ipv6_sum_disabled(); > @@ -1426,58 +1377,126 @@ func_exit: > } > > static inline void > -igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, > +igb_write_lgcy_rx_descr(IGBCore *core, > + struct e1000_rx_desc *desc, > struct NetRxPkt *pkt, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > + const E1000E_RSSInfo *rss_info, > uint16_t length) > { > - uint32_t status_flags, rss; > - uint16_t ip_id; > + uint32_t status_flags; > > assert(!rss_info->enabled); > + > + memset(desc, 0, sizeof(*desc)); > desc->length = cpu_to_le16(length); > - desc->csum = 0; > + igb_build_rx_metadata_common(core, pkt, pkt != NULL, > + &status_flags, > + &desc->special); > > - igb_build_rx_metadata(core, pkt, pkt != NULL, > - rss_info, etqf, ts, > - NULL, NULL, &rss, > - &status_flags, &ip_id, > - &desc->special); > desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); > desc->status = (uint8_t) le32_to_cpu(status_flags); > } > > +static uint16_t > +igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, > +uint16_t etqf) { > + uint16_t pkt_type = 0; > + bool hasip4, hasip6; > + EthL4HdrProto l4hdr_proto; > + > + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); > + > + if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { > + eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt); > + pkt_type |= ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E : > + E1000_ADVRXD_PKT_IP6; > + } else if (hasip4) { > + pkt_type = E1000_ADVRXD_PKT_IP4; > + } > + > + if (etqf < 8) { > + pkt_type |= (BIT(11) >> 4) | etqf; > + return pkt_type; > + } > + > + switch (l4hdr_proto) { > + case ETH_L4_HDR_PROTO_TCP: > + pkt_type |= E1000_ADVRXD_PKT_TCP; > + break; > + case ETH_L4_HDR_PROTO_UDP: > + pkt_type |= E1000_ADVRXD_PKT_UDP; > + break; > + case ETH_L4_HDR_PROTO_SCTP: > + pkt_type |= E1000_ADVRXD_PKT_SCTP; > + break; > + default: > + break; > + } > + > + return pkt_type; > +} > + > static inline void > -igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > +igb_write_adv_rx_descr(IGBCore *core, > + union e1000_adv_rx_desc *d, > struct NetRxPkt *pkt, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > + const E1000E_RSSInfo *rss_info, > + uint16_t etqf, > + bool ts, > uint16_t length) > { > - memset(&desc->wb, 0, sizeof(desc->wb)); > + bool hasip4, hasip6; > + EthL4HdrProto l4hdr_proto; > + uint16_t rss_type = 0, pkt_type; > + bool eop = (pkt != NULL); > + memset(&d->wb, 0, sizeof(d->wb)); > + > + d->wb.upper.length = cpu_to_le16(length); > + igb_build_rx_metadata_common(core, pkt, eop, > + &d->wb.upper.status_error, > + &d->wb.upper.vlan); > + > + if (!eop) { > + return; > + } > + > + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); > + > + if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { > + if (rss_info->enabled) { > + d->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash); > + rss_type = rss_info->type; > + trace_igb_rx_metadata_rss(d->wb.lower.hi_dword.rss, rss_type); > + } > + } else if (core->mac[RXCSUM] & E1000_RXCSUM_IPPCSE && > + hasip4) { > + d->wb.lower.hi_dword.csum_ip.ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); > + trace_e1000e_rx_metadata_ip_id(d->wb.lower.hi_dword.csum_ip.ip_id); > + } > > - desc->wb.upper.length = cpu_to_le16(length); > + if (ts) { > + d->wb.upper.status_error |= BIT(16); > + } > > - igb_build_rx_metadata(core, pkt, pkt != NULL, > - rss_info, etqf, ts, > - &desc->wb.lower.lo_dword.pkt_info, > - &desc->wb.lower.lo_dword.hdr_info, > - &desc->wb.lower.hi_dword.rss, > - &desc->wb.upper.status_error, > - &desc->wb.lower.hi_dword.csum_ip.ip_id, > - &desc->wb.upper.vlan); > + pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf); > + trace_e1000e_rx_metadata_pkt_type(pkt_type); > + d->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type > + << 4)); > } > > static inline void > -igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, > - struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, > - uint16_t etqf, bool ts, uint16_t length) > +igb_write_rx_descr(IGBCore *core, > + union e1000_rx_desc_union *desc, > + struct NetRxPkt *pkt, > + const E1000E_RSSInfo *rss_info, > + uint16_t etqf, > + bool ts, > + uint16_t(*written)[MAX_PS_BUFFERS], As I wrote in the last review: > Don't use an array here. Instead use a struct as e1000_adv_rx_desc > does; it will be more explicit and easier to read. And I think this > should be rather part of the next patch. Please don't ignore comments in reviews, and if you have a question with them or you don't agree with them, please write so in a reply. You don't have to post a new version quickly so take time to address all problems pointed out. See also: "Stay around to fix problems raised in code review" section in docs/devel/submitting-a-patch.rst > + const E1000E_RingInfo *r) > { > if (igb_rx_use_legacy_descriptor(core)) { > - igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, > - etqf, ts, length); > + igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, > + (*written)[1]); > } else { > - igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, > - etqf, ts, length); > + igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, etqf, > + ts, (*written)[1]); > } > } > > @@ -1513,19 +1532,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr, > } > } > > -static void > -igb_write_to_rx_buffers(IGBCore *core, > - PCIDevice *d, > - hwaddr ba, > - uint16_t *written, > - const char *data, > - dma_addr_t data_len) > -{ > - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); > - pci_dma_write(d, ba + *written, data, data_len); > - *written += data_len; > -} > - > static void > igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, > size_t pkt_size, size_t pkt_fcs_size) @@ -1551,6 > +1557,137 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi) > ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; > } > > +typedef struct IGB_BaState_st { > + uint16_t written[MAX_PS_BUFFERS]; > + uint8_t cur_idx; > +} IGB_BaState; > + > +typedef struct IGB_PacketRxDMAState_st { > + size_t size; > + size_t total_size; > + size_t ps_hdr_len; > + size_t desc_size; > + size_t desc_offset; > + uint32_t rx_desc_packet_buf_size; > + uint32_t rx_desc_header_buf_size; > + struct iovec *iov; > + size_t iov_ofs; > + bool is_first; > + IGB_BaState bastate; > + hwaddr ba[MAX_PS_BUFFERS]; > +} IGB_PacketRxDMAState; Do *not*: - suffix struct name with _st. The convention is not common in QEMU code base, or even e1000e and igb do not always use the suffixes. - use _. See include/qemu/typedefs.h for examples. > + > +static void > +igb_truncate_to_descriptor_size(IGB_PacketRxDMAState *pdma_st, size_t > +*size) { > + if (*size > pdma_st->rx_desc_packet_buf_size) { > + *size = pdma_st->rx_desc_packet_buf_size; > + } > +} > + > +static void > +igb_write_payload_frag_to_rx_buffers(IGBCore *core, > + PCIDevice *d, > + hwaddr (*ba)[MAX_PS_BUFFERS], > + IGB_BaState *bastate, > + uint32_t cur_buf_len, > + const char *data, > + dma_addr_t data_len) { > + while (data_len > 0) { > + assert(bastate->cur_idx < MAX_PS_BUFFERS); > + > + uint32_t cur_buf_bytes_left = cur_buf_len - > + bastate->written[bastate->cur_idx]; > + uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left); > + > + trace_igb_rx_desc_buff_write(bastate->cur_idx, > + (*ba)[bastate->cur_idx], > + bastate->written[bastate->cur_idx], > + data, > + bytes_to_write); > + > + pci_dma_write(d, > + (*ba)[bastate->cur_idx] + > + bastate->written[bastate->cur_idx], > + data, bytes_to_write); > + > + bastate->written[bastate->cur_idx] += bytes_to_write; > + data += bytes_to_write; > + data_len -= bytes_to_write; > + > + if (bastate->written[bastate->cur_idx] == cur_buf_len) { > + bastate->cur_idx++; > + } > + } > +} > + > +static void > +igb_write_payload_to_rx_buffers(IGBCore *core, > + struct NetRxPkt *pkt, > + PCIDevice *d, > + IGB_PacketRxDMAState *pdma_st, > + size_t *copy_size) { > + static const uint32_t fcs_pad; > + size_t iov_copy; > + > + /* Copy packet payload */ > + while (*copy_size) { > + iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); > + igb_write_payload_frag_to_rx_buffers(core, d, > + &pdma_st->ba, > + &pdma_st->bastate, > + pdma_st->rx_desc_packet_buf_size, > + pdma_st->iov->iov_base + > + pdma_st->iov_ofs, > + iov_copy); > + > + *copy_size -= iov_copy; > + pdma_st->iov_ofs += iov_copy; > + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { > + pdma_st->iov++; > + pdma_st->iov_ofs = 0; > + } > + } > + > + if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { > + /* Simulate FCS checksum presence in the last descriptor */ > + igb_write_payload_frag_to_rx_buffers(core, d, > + &pdma_st->ba, > + &pdma_st->bastate, > + pdma_st->rx_desc_packet_buf_size, > + (const char *) &fcs_pad, > + e1000x_fcs_len(core->mac)); > + } > +} > + > +static void > +igb_write_to_rx_buffers(IGBCore *core, > + struct NetRxPkt *pkt, > + PCIDevice *d, > + IGB_PacketRxDMAState *pdma_st) { > + size_t copy_size; > + > + if (!(pdma_st->ba)[1]) { > + /* as per intel docs; skip descriptors with null buf addr */ > + trace_e1000e_rx_null_descriptor(); > + return; > + } > + > + if (pdma_st->desc_offset >= pdma_st->size) { > + return; > + } > + > + pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset; > + igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); > + copy_size = pdma_st->size - pdma_st->desc_offset; > + igb_truncate_to_descriptor_size(pdma_st, ©_size); > + pdma_st->bastate.cur_idx = 1; > + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, > +©_size); } > + > static void > igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, > const E1000E_RxRing *rxr, @@ -1560,91 > +1697,58 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, > PCIDevice *d; > dma_addr_t base; > union e1000_rx_desc_union desc; > - size_t desc_size; > - size_t desc_offset = 0; > - size_t iov_ofs = 0; > - > - struct iovec *iov = net_rx_pkt_get_iovec(pkt); > - size_t size = net_rx_pkt_get_total_len(pkt); > - size_t total_size = size + e1000x_fcs_len(core->mac); > - const E1000E_RingInfo *rxi = rxr->i; > - size_t bufsize = igb_rxbufsize(core, rxi); > - > + const E1000E_RingInfo *rxi; > + size_t rx_desc_len; > + > + IGB_PacketRxDMAState pdma_st = {0}; > + pdma_st.is_first = true; > + pdma_st.size = net_rx_pkt_get_total_len(pkt); > + pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac); > + > + rxi = rxr->i; > + rx_desc_len = core->rx_desc_len; > + pdma_st.rx_desc_packet_buf_size = > + igb_rxbufsize(core, rxi); > + pdma_st.rx_desc_header_buf_size = > + igb_get_queue_rx_header_buf_size(core, rxi); > + pdma_st.iov = net_rx_pkt_get_iovec(pkt); > d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); > if (!d) { > d = core->owner; > } > > do { > - hwaddr ba; > - uint16_t written = 0; > + memset(&pdma_st.bastate, 0, sizeof(IGB_BaState)); > bool is_last = false; > > - desc_size = total_size - desc_offset; > - > - if (desc_size > bufsize) { > - desc_size = bufsize; > - } > - > if (igb_ring_empty(core, rxi)) { > return; > } > > base = igb_ring_head_descr(core, rxi); > + pci_dma_read(d, base, &desc, rx_desc_len); > + trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); > > - pci_dma_read(d, base, &desc, core->rx_desc_len); > - > - trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); > - > - igb_read_rx_descr(core, &desc, &ba); > - > - if (ba) { > - if (desc_offset < size) { > - static const uint32_t fcs_pad; > - size_t iov_copy; > - size_t copy_size = size - desc_offset; > - if (copy_size > bufsize) { > - copy_size = bufsize; > - } > - > - /* Copy packet payload */ > - while (copy_size) { > - iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); > - > - igb_write_to_rx_buffers(core, d, ba, &written, > - iov->iov_base + iov_ofs, iov_copy); > + igb_read_rx_descr(core, &desc, &pdma_st->ba[1], rxi); > > - copy_size -= iov_copy; > - iov_ofs += iov_copy; > - if (iov_ofs == iov->iov_len) { > - iov++; > - iov_ofs = 0; > - } > - } > - > - if (desc_offset + desc_size >= total_size) { > - /* Simulate FCS checksum presence in the last descriptor */ > - igb_write_to_rx_buffers(core, d, ba, &written, > - (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); > - } > - } > - } else { /* as per intel docs; skip descriptors with null buf addr */ > - trace_e1000e_rx_null_descriptor(); > - } > - desc_offset += desc_size; > - if (desc_offset >= total_size) { > + igb_write_to_rx_buffers(core, pkt, d, &pdma_st); > + pdma_st.desc_offset += pdma_st.desc_size; > + if (pdma_st.desc_offset >= pdma_st.total_size) { > is_last = true; > } > > - igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, > - rss_info, etqf, ts, written); > - igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len); > - > - igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN); > - > - } while (desc_offset < total_size); > - > - igb_update_rx_stats(core, rxi, size, total_size); > + igb_write_rx_descr(core, &desc, > + is_last ? pkt : NULL, > + rss_info, > + etqf, ts, > + &pdma_st.bastate.written, > + rxi); > + pci_dma_write(d, base, &desc, rx_desc_len); > + igb_ring_advance(core, rxi, > + rx_desc_len / E1000_MIN_RX_DESC_LEN); > + } while (pdma_st.desc_offset < pdma_st.total_size); > + > + igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size); > } > > static bool > diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index > 82ff195dfc..c4ede22181 100644 > --- a/hw/net/igb_regs.h > +++ b/hw/net/igb_regs.h > @@ -452,6 +452,7 @@ union e1000_adv_rx_desc { > #define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00 > #define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ > #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 > +#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 > #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 > #define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 > #define E1000_SRRCTL_DROP_EN 0x80000000 > @@ -692,11 +693,12 @@ union e1000_adv_rx_desc { > > #define E1000_STATUS_NUM_VFS_SHIFT 14 > > -#define E1000_ADVRXD_PKT_IP4 BIT(4) > -#define E1000_ADVRXD_PKT_IP6 BIT(6) > -#define E1000_ADVRXD_PKT_TCP BIT(8) > -#define E1000_ADVRXD_PKT_UDP BIT(9) > -#define E1000_ADVRXD_PKT_SCTP BIT(10) > +#define E1000_ADVRXD_PKT_IP4 BIT(0) > +#define E1000_ADVRXD_PKT_IP6 BIT(2) > +#define E1000_ADVRXD_PKT_IP6E BIT(3) > +#define E1000_ADVRXD_PKT_TCP BIT(4) > +#define E1000_ADVRXD_PKT_UDP BIT(5) > +#define E1000_ADVRXD_PKT_SCTP BIT(6) > > static inline uint8_t igb_ivar_entry_rx(uint8_t i) > { > diff --git a/hw/net/trace-events b/hw/net/trace-events index > e4a98b2c7d..9a02261360 100644 > --- a/hw/net/trace-events > +++ b/hw/net/trace-events > @@ -277,9 +277,9 @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED" > igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d" > > igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" > -igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" > +igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer #%u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" > > -igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X" > +igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X" > > igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled" > igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x" > @@ -294,6 +294,8 @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = 0x%x" > igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x" > igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x" > > +igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported descriptor type: %d" > + > # igbvf.c > igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register > 0x%"PRIx64 > > diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c index > a603468beb..aa65b5452c 100644 > --- a/tests/qtest/libqos/igb.c > +++ b/tests/qtest/libqos/igb.c > @@ -109,6 +109,9 @@ static void igb_pci_start_hw(QOSGraphObject *obj) > E1000_RAH_AV | E1000_RAH_POOL_1 | > le16_to_cpu(*(uint16_t *)(address + 4))); > > + /* Set supported receive descriptor mode */ > + e1000e_macreg_write(&d->e1000e, E1000_SRRCTL(0), > + E1000_SRRCTL_DESCTYPE_ADV_ONEBUF); > + > /* Enable receive */ > e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN); > e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN); ^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup 2023-04-27 10:47 ` [PATCH v3 1/2] igb: RX descriptors handling cleanup Tomasz Dzieciol 2023-04-28 10:31 ` Akihiko Odaki @ 2023-04-30 11:57 ` Sriram Yagnaraman 2023-05-02 14:00 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics 1 sibling, 1 reply; 12+ messages in thread From: Sriram Yagnaraman @ 2023-04-30 11:57 UTC (permalink / raw) To: Tomasz Dzieciol, qemu-devel@nongnu.org, akihiko.odaki@daynix.com Cc: jasowang@redhat.com, k.kwiecien@samsung.com, m.sochacki@samsung.com > -----Original Message----- > From: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> > Sent: Thursday, 27 April 2023 12:48 > To: qemu-devel@nongnu.org; akihiko.odaki@daynix.com > Cc: Sriram Yagnaraman <sriram.yagnaraman@est.tech>; > jasowang@redhat.com; k.kwiecien@samsung.com; > m.sochacki@samsung.com > Subject: [PATCH v3 1/2] igb: RX descriptors handling cleanup > > Format of Intel 82576 was changed in comparison to Intel 82574 extended > descriptors. This change updates filling of advanced descriptors fields > accordingly: > * remove TCP ACK detection > * add IPv6 with extensions traffic detection > * fragment checksum and IP ID is filled only when RXCSUM.IPPCSE is set (in > addition to RXCSUM.PCSD bit cleared condition) Just curious if any device driver still uses IP payload checksum enable (IPPCSE)? > > Refactoring is done in preparation for support of multiple advanced descriptors > RX modes, especially packet-split modes. > > Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> > --- > hw/net/e1000e_core.c | 18 +- > hw/net/e1000x_regs.h | 1 + > hw/net/igb_core.c | 478 ++++++++++++++++++++++++--------------- > hw/net/igb_regs.h | 12 +- > hw/net/trace-events | 6 +- > tests/qtest/libqos/igb.c | 3 + > 6 files changed, 316 insertions(+), 202 deletions(-) > > diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index > 78373d7db7..0085ad53c2 100644 > --- a/hw/net/e1000e_core.c > +++ b/hw/net/e1000e_core.c > @@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore > *core, } > > static void > -e1000e_write_to_rx_buffers(E1000ECore *core, > - hwaddr ba[MAX_PS_BUFFERS], > - e1000e_ba_state *bastate, > - const char *data, > - dma_addr_t data_len) > +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core, > + hwaddr ba[MAX_PS_BUFFERS], > + e1000e_ba_state *bastate, > + const char *data, > + dma_addr_t data_len) > { > while (data_len > 0) { > uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; > @@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore > *core, struct NetRxPkt *pkt, > while (copy_size) { > iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); > > - e1000e_write_to_rx_buffers(core, ba, &bastate, > - iov->iov_base + iov_ofs, iov_copy); > + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, > + iov->iov_base + > + iov_ofs, > + iov_copy); > > copy_size -= iov_copy; > iov_ofs += iov_copy; @@ -1607,7 +1609,7 @@ > e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, > > if (desc_offset + desc_size >= total_size) { > /* Simulate FCS checksum presence in the last descriptor */ > - e1000e_write_to_rx_buffers(core, ba, &bastate, > + e1000e_write_payload_frag_to_rx_buffers(core, ba, > + &bastate, > (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); > } > } > diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index > 13760c66d3..344fd10359 100644 > --- a/hw/net/e1000x_regs.h > +++ b/hw/net/e1000x_regs.h > @@ -827,6 +827,7 @@ union e1000_rx_desc_packet_split { > /* Receive Checksum Control bits */ > #define E1000_RXCSUM_IPOFLD 0x100 /* IP Checksum Offload Enable */ > #define E1000_RXCSUM_TUOFLD 0x200 /* TCP/UDP Checksum Offload > Enable */ > +#define E1000_RXCSUM_IPPCSE 0x1000 /* IP Payload Checksum enable */ > #define E1000_RXCSUM_PCSD 0x2000 /* Packet Checksum Disable */ > > #define E1000_RING_DESC_LEN (16) > diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index > 96b7335b31..1cb64402aa 100644 > --- a/hw/net/igb_core.c > +++ b/hw/net/igb_core.c > @@ -267,6 +267,21 @@ igb_rx_use_legacy_descriptor(IGBCore *core) > return false; > } > > +typedef struct E1000E_RingInfo_st { > + int dbah; > + int dbal; > + int dlen; > + int dh; > + int dt; > + int idx; > +} E1000E_RingInfo; > + > +static uint32_t > +igb_rx_queue_desctyp_get(IGBCore *core, const E1000E_RingInfo *r) { > + return core->mac[E1000_SRRCTL(r->idx) >> 2] & > +E1000_SRRCTL_DESCTYPE_MASK; } > + > static inline bool > igb_rss_enabled(IGBCore *core) > { > @@ -694,15 +709,6 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int > queue_idx) > return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0; } > > -typedef struct E1000E_RingInfo_st { > - int dbah; > - int dbal; > - int dlen; > - int dh; > - int dt; > - int idx; > -} E1000E_RingInfo; > - > static inline bool > igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r) { @@ -941,6 > +947,14 @@ igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t > total_size) > bufsize; > } > > +static uint32_t > +igb_get_queue_rx_header_buf_size(IGBCore *core, const E1000E_RingInfo > +*r) { > + uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2]; > + return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >> > + E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; } > + > void > igb_start_recv(IGBCore *core) > { > @@ -1232,7 +1246,7 @@ igb_read_adv_rx_descr(IGBCore *core, union > e1000_adv_rx_desc *desc, } > > static inline void > -igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, > +igb_read_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > hwaddr *buff_addr) > { > if (igb_rx_use_legacy_descriptor(core)) { @@ -1281,15 +1295,11 @@ > igb_verify_csum_in_sw(IGBCore *core, } > > static void > -igb_build_rx_metadata(IGBCore *core, > - struct NetRxPkt *pkt, > - bool is_eop, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > - uint16_t *pkt_info, uint16_t *hdr_info, > - uint32_t *rss, > - uint32_t *status_flags, > - uint16_t *ip_id, > - uint16_t *vlan_tag) > +igb_build_rx_metadata_common(IGBCore *core, > + struct NetRxPkt *pkt, > + bool is_eop, > + uint32_t *status_flags, > + uint16_t *vlan_tag) > { > struct virtio_net_hdr *vhdr; > bool hasip4, hasip6, csum_valid; > @@ -1298,7 +1308,6 @@ igb_build_rx_metadata(IGBCore *core, > *status_flags = E1000_RXD_STAT_DD; > > /* No additional metadata needed for non-EOP descriptors */ > - /* TODO: EOP apply only to status so don't skip whole function. */ > if (!is_eop) { > goto func_exit; > } > @@ -1315,64 +1324,6 @@ igb_build_rx_metadata(IGBCore *core, > trace_e1000e_rx_metadata_vlan(*vlan_tag); > } > > - /* Packet parsing results */ > - if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { > - if (rss_info->enabled) { > - *rss = cpu_to_le32(rss_info->hash); > - trace_igb_rx_metadata_rss(*rss); > - } > - } else if (hasip4) { > - *status_flags |= E1000_RXD_STAT_IPIDV; > - *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); > - trace_e1000e_rx_metadata_ip_id(*ip_id); > - } > - > - if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && > net_rx_pkt_is_tcp_ack(pkt)) { > - *status_flags |= E1000_RXD_STAT_ACK; > - trace_e1000e_rx_metadata_ack(); > - } > - > - if (pkt_info) { > - *pkt_info = rss_info->enabled ? rss_info->type : 0; > - > - if (etqf < 8) { > - *pkt_info |= BIT(11) | (etqf << 4); > - } else { > - if (hasip4) { > - *pkt_info |= E1000_ADVRXD_PKT_IP4; > - } > - > - if (hasip6) { > - *pkt_info |= E1000_ADVRXD_PKT_IP6; > - } > - > - switch (l4hdr_proto) { > - case ETH_L4_HDR_PROTO_TCP: > - *pkt_info |= E1000_ADVRXD_PKT_TCP; > - break; > - > - case ETH_L4_HDR_PROTO_UDP: > - *pkt_info |= E1000_ADVRXD_PKT_UDP; > - break; > - > - case ETH_L4_HDR_PROTO_SCTP: > - *pkt_info |= E1000_ADVRXD_PKT_SCTP; > - break; > - > - default: > - break; > - } > - } > - } > - > - if (hdr_info) { > - *hdr_info = 0; > - } > - > - if (ts) { > - *status_flags |= BIT(16); > - } > - > /* RX CSO information */ > if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { > trace_e1000e_rx_metadata_ipv6_sum_disabled(); > @@ -1426,58 +1377,126 @@ func_exit: > } > > static inline void > -igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, > +igb_write_lgcy_rx_descr(IGBCore *core, > + struct e1000_rx_desc *desc, > struct NetRxPkt *pkt, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > + const E1000E_RSSInfo *rss_info, > uint16_t length) { > - uint32_t status_flags, rss; > - uint16_t ip_id; > + uint32_t status_flags; > > assert(!rss_info->enabled); > + > + memset(desc, 0, sizeof(*desc)); > desc->length = cpu_to_le16(length); > - desc->csum = 0; > + igb_build_rx_metadata_common(core, pkt, pkt != NULL, > + &status_flags, > + &desc->special); > > - igb_build_rx_metadata(core, pkt, pkt != NULL, > - rss_info, etqf, ts, > - NULL, NULL, &rss, > - &status_flags, &ip_id, > - &desc->special); > desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); > desc->status = (uint8_t) le32_to_cpu(status_flags); } > > +static uint16_t > +igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, > +uint16_t etqf) { > + uint16_t pkt_type = 0; > + bool hasip4, hasip6; > + EthL4HdrProto l4hdr_proto; > + > + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); > + > + if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { > + eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt); > + pkt_type |= ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E : > + E1000_ADVRXD_PKT_IP6; > + } else if (hasip4) { > + pkt_type = E1000_ADVRXD_PKT_IP4; > + } > + > + if (etqf < 8) { > + pkt_type |= (BIT(11) >> 4) | etqf; > + return pkt_type; > + } > + > + switch (l4hdr_proto) { > + case ETH_L4_HDR_PROTO_TCP: > + pkt_type |= E1000_ADVRXD_PKT_TCP; > + break; > + case ETH_L4_HDR_PROTO_UDP: > + pkt_type |= E1000_ADVRXD_PKT_UDP; > + break; > + case ETH_L4_HDR_PROTO_SCTP: > + pkt_type |= E1000_ADVRXD_PKT_SCTP; > + break; > + default: > + break; > + } > + > + return pkt_type; > +} > + > static inline void > -igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > +igb_write_adv_rx_descr(IGBCore *core, > + union e1000_adv_rx_desc *d, > struct NetRxPkt *pkt, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > + const E1000E_RSSInfo *rss_info, > + uint16_t etqf, > + bool ts, > uint16_t length) { > - memset(&desc->wb, 0, sizeof(desc->wb)); > + bool hasip4, hasip6; > + EthL4HdrProto l4hdr_proto; > + uint16_t rss_type = 0, pkt_type; > + bool eop = (pkt != NULL); > + memset(&d->wb, 0, sizeof(d->wb)); > + > + d->wb.upper.length = cpu_to_le16(length); > + igb_build_rx_metadata_common(core, pkt, eop, > + &d->wb.upper.status_error, > + &d->wb.upper.vlan); > + > + if (!eop) { > + return; > + } > + > + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); > + > + if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { > + if (rss_info->enabled) { > + d->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash); > + rss_type = rss_info->type; > + trace_igb_rx_metadata_rss(d->wb.lower.hi_dword.rss, rss_type); > + } > + } else if (core->mac[RXCSUM] & E1000_RXCSUM_IPPCSE && > + hasip4) { > + d->wb.lower.hi_dword.csum_ip.ip_id = > cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); > + trace_e1000e_rx_metadata_ip_id(d->wb.lower.hi_dword.csum_ip.ip_id); > + } > > - desc->wb.upper.length = cpu_to_le16(length); > + if (ts) { > + d->wb.upper.status_error |= BIT(16); > + } > > - igb_build_rx_metadata(core, pkt, pkt != NULL, > - rss_info, etqf, ts, > - &desc->wb.lower.lo_dword.pkt_info, > - &desc->wb.lower.lo_dword.hdr_info, > - &desc->wb.lower.hi_dword.rss, > - &desc->wb.upper.status_error, > - &desc->wb.lower.hi_dword.csum_ip.ip_id, > - &desc->wb.upper.vlan); > + pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf); > + trace_e1000e_rx_metadata_pkt_type(pkt_type); > + d->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << > + 4)); > } > > static inline void > -igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, > - struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, > - uint16_t etqf, bool ts, uint16_t length) > +igb_write_rx_descr(IGBCore *core, > + union e1000_rx_desc_union *desc, > + struct NetRxPkt *pkt, > + const E1000E_RSSInfo *rss_info, > + uint16_t etqf, > + bool ts, > + uint16_t(*written)[MAX_PS_BUFFERS], > + const E1000E_RingInfo *r) > { > if (igb_rx_use_legacy_descriptor(core)) { > - igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, > - etqf, ts, length); > + igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, > + (*written)[1]); > } else { > - igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, > - etqf, ts, length); > + igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, etqf, > + ts, (*written)[1]); > } > } > > @@ -1513,19 +1532,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core, > PCIDevice *dev, dma_addr_t addr, > } > } > > -static void > -igb_write_to_rx_buffers(IGBCore *core, > - PCIDevice *d, > - hwaddr ba, > - uint16_t *written, > - const char *data, > - dma_addr_t data_len) > -{ > - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); > - pci_dma_write(d, ba + *written, data, data_len); > - *written += data_len; > -} > - > static void > igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, > size_t pkt_size, size_t pkt_fcs_size) @@ -1551,6 +1557,137 @@ > igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi) > ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; } > > +typedef struct IGB_BaState_st { > + uint16_t written[MAX_PS_BUFFERS]; > + uint8_t cur_idx; > +} IGB_BaState; > + > +typedef struct IGB_PacketRxDMAState_st { > + size_t size; > + size_t total_size; > + size_t ps_hdr_len; > + size_t desc_size; > + size_t desc_offset; > + uint32_t rx_desc_packet_buf_size; > + uint32_t rx_desc_header_buf_size; > + struct iovec *iov; > + size_t iov_ofs; > + bool is_first; > + IGB_BaState bastate; > + hwaddr ba[MAX_PS_BUFFERS]; > +} IGB_PacketRxDMAState; > + > +static void > +igb_truncate_to_descriptor_size(IGB_PacketRxDMAState *pdma_st, size_t > +*size) { > + if (*size > pdma_st->rx_desc_packet_buf_size) { > + *size = pdma_st->rx_desc_packet_buf_size; > + } > +} > + > +static void > +igb_write_payload_frag_to_rx_buffers(IGBCore *core, > + PCIDevice *d, > + hwaddr (*ba)[MAX_PS_BUFFERS], > + IGB_BaState *bastate, > + uint32_t cur_buf_len, > + const char *data, > + dma_addr_t data_len) { > + while (data_len > 0) { > + assert(bastate->cur_idx < MAX_PS_BUFFERS); > + > + uint32_t cur_buf_bytes_left = cur_buf_len - > + bastate->written[bastate->cur_idx]; > + uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left); > + > + trace_igb_rx_desc_buff_write(bastate->cur_idx, > + (*ba)[bastate->cur_idx], > + bastate->written[bastate->cur_idx], > + data, > + bytes_to_write); > + > + pci_dma_write(d, > + (*ba)[bastate->cur_idx] + > + bastate->written[bastate->cur_idx], > + data, bytes_to_write); > + > + bastate->written[bastate->cur_idx] += bytes_to_write; > + data += bytes_to_write; > + data_len -= bytes_to_write; > + > + if (bastate->written[bastate->cur_idx] == cur_buf_len) { > + bastate->cur_idx++; > + } > + } > +} > + > +static void > +igb_write_payload_to_rx_buffers(IGBCore *core, > + struct NetRxPkt *pkt, > + PCIDevice *d, > + IGB_PacketRxDMAState *pdma_st, > + size_t *copy_size) { > + static const uint32_t fcs_pad; > + size_t iov_copy; > + > + /* Copy packet payload */ > + while (*copy_size) { > + iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); > + igb_write_payload_frag_to_rx_buffers(core, d, > + &pdma_st->ba, > + &pdma_st->bastate, > + pdma_st->rx_desc_packet_buf_size, > + pdma_st->iov->iov_base + > + pdma_st->iov_ofs, > + iov_copy); > + > + *copy_size -= iov_copy; > + pdma_st->iov_ofs += iov_copy; > + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { > + pdma_st->iov++; > + pdma_st->iov_ofs = 0; > + } > + } > + > + if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { > + /* Simulate FCS checksum presence in the last descriptor */ > + igb_write_payload_frag_to_rx_buffers(core, d, > + &pdma_st->ba, > + &pdma_st->bastate, > + pdma_st->rx_desc_packet_buf_size, > + (const char *) &fcs_pad, > + e1000x_fcs_len(core->mac)); > + } > +} > + > +static void > +igb_write_to_rx_buffers(IGBCore *core, > + struct NetRxPkt *pkt, > + PCIDevice *d, > + IGB_PacketRxDMAState *pdma_st) { > + size_t copy_size; > + > + if (!(pdma_st->ba)[1]) { > + /* as per intel docs; skip descriptors with null buf addr */ > + trace_e1000e_rx_null_descriptor(); > + return; > + } > + > + if (pdma_st->desc_offset >= pdma_st->size) { > + return; > + } > + > + pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset; > + igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); > + copy_size = pdma_st->size - pdma_st->desc_offset; > + igb_truncate_to_descriptor_size(pdma_st, ©_size); > + pdma_st->bastate.cur_idx = 1; > + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size); > +} > + > static void > igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, > const E1000E_RxRing *rxr, @@ -1560,91 +1697,58 @@ > igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, > PCIDevice *d; > dma_addr_t base; > union e1000_rx_desc_union desc; > - size_t desc_size; > - size_t desc_offset = 0; > - size_t iov_ofs = 0; > - > - struct iovec *iov = net_rx_pkt_get_iovec(pkt); > - size_t size = net_rx_pkt_get_total_len(pkt); > - size_t total_size = size + e1000x_fcs_len(core->mac); > - const E1000E_RingInfo *rxi = rxr->i; > - size_t bufsize = igb_rxbufsize(core, rxi); > - > + const E1000E_RingInfo *rxi; > + size_t rx_desc_len; > + > + IGB_PacketRxDMAState pdma_st = {0}; > + pdma_st.is_first = true; > + pdma_st.size = net_rx_pkt_get_total_len(pkt); > + pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac); > + > + rxi = rxr->i; > + rx_desc_len = core->rx_desc_len; > + pdma_st.rx_desc_packet_buf_size = > + igb_rxbufsize(core, rxi); > + pdma_st.rx_desc_header_buf_size = > + igb_get_queue_rx_header_buf_size(core, rxi); > + pdma_st.iov = net_rx_pkt_get_iovec(pkt); > d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); > if (!d) { > d = core->owner; > } > > do { > - hwaddr ba; > - uint16_t written = 0; > + memset(&pdma_st.bastate, 0, sizeof(IGB_BaState)); > bool is_last = false; > > - desc_size = total_size - desc_offset; > - > - if (desc_size > bufsize) { > - desc_size = bufsize; > - } > - > if (igb_ring_empty(core, rxi)) { > return; > } > > base = igb_ring_head_descr(core, rxi); > + pci_dma_read(d, base, &desc, rx_desc_len); > + trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); > > - pci_dma_read(d, base, &desc, core->rx_desc_len); > - > - trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); > - > - igb_read_rx_descr(core, &desc, &ba); > - > - if (ba) { > - if (desc_offset < size) { > - static const uint32_t fcs_pad; > - size_t iov_copy; > - size_t copy_size = size - desc_offset; > - if (copy_size > bufsize) { > - copy_size = bufsize; > - } > - > - /* Copy packet payload */ > - while (copy_size) { > - iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); > - > - igb_write_to_rx_buffers(core, d, ba, &written, > - iov->iov_base + iov_ofs, iov_copy); > + igb_read_rx_descr(core, &desc, &pdma_st->ba[1], rxi); > > - copy_size -= iov_copy; > - iov_ofs += iov_copy; > - if (iov_ofs == iov->iov_len) { > - iov++; > - iov_ofs = 0; > - } > - } > - > - if (desc_offset + desc_size >= total_size) { > - /* Simulate FCS checksum presence in the last descriptor */ > - igb_write_to_rx_buffers(core, d, ba, &written, > - (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); > - } > - } > - } else { /* as per intel docs; skip descriptors with null buf addr */ > - trace_e1000e_rx_null_descriptor(); > - } > - desc_offset += desc_size; > - if (desc_offset >= total_size) { > + igb_write_to_rx_buffers(core, pkt, d, &pdma_st); > + pdma_st.desc_offset += pdma_st.desc_size; > + if (pdma_st.desc_offset >= pdma_st.total_size) { > is_last = true; > } > > - igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, > - rss_info, etqf, ts, written); > - igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len); > - > - igb_ring_advance(core, rxi, core->rx_desc_len / > E1000_MIN_RX_DESC_LEN); > - > - } while (desc_offset < total_size); > - > - igb_update_rx_stats(core, rxi, size, total_size); > + igb_write_rx_descr(core, &desc, > + is_last ? pkt : NULL, > + rss_info, > + etqf, ts, > + &pdma_st.bastate.written, > + rxi); > + pci_dma_write(d, base, &desc, rx_desc_len); > + igb_ring_advance(core, rxi, > + rx_desc_len / E1000_MIN_RX_DESC_LEN); > + } while (pdma_st.desc_offset < pdma_st.total_size); > + > + igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size); > } > > static bool > diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index > 82ff195dfc..c4ede22181 100644 > --- a/hw/net/igb_regs.h > +++ b/hw/net/igb_regs.h > @@ -452,6 +452,7 @@ union e1000_adv_rx_desc { > #define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00 > #define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ > #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 > +#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 > #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 > #define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 > #define E1000_SRRCTL_DROP_EN 0x80000000 > @@ -692,11 +693,12 @@ union e1000_adv_rx_desc { > > #define E1000_STATUS_NUM_VFS_SHIFT 14 > > -#define E1000_ADVRXD_PKT_IP4 BIT(4) > -#define E1000_ADVRXD_PKT_IP6 BIT(6) > -#define E1000_ADVRXD_PKT_TCP BIT(8) > -#define E1000_ADVRXD_PKT_UDP BIT(9) > -#define E1000_ADVRXD_PKT_SCTP BIT(10) > +#define E1000_ADVRXD_PKT_IP4 BIT(0) > +#define E1000_ADVRXD_PKT_IP6 BIT(2) > +#define E1000_ADVRXD_PKT_IP6E BIT(3) > +#define E1000_ADVRXD_PKT_TCP BIT(4) > +#define E1000_ADVRXD_PKT_UDP BIT(5) > +#define E1000_ADVRXD_PKT_SCTP BIT(6) > > static inline uint8_t igb_ivar_entry_rx(uint8_t i) { diff --git a/hw/net/trace- > events b/hw/net/trace-events index e4a98b2c7d..9a02261360 100644 > --- a/hw/net/trace-events > +++ b/hw/net/trace-events > @@ -277,9 +277,9 @@ igb_core_mdic_write_unhandled(uint32_t addr) > "MDIC WRITE: PHY[%u] UNHANDLED" > igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool > pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF > reset done: %d" > > igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" > -igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, > uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" > +igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* > source, uint32_t len) "buffer #%u, addr: 0x%"PRIx64", offset: %u, from: %p, > length: %u" > > -igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X" > +igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: > 0x%X, rss_pkt_type: 0x%X" > > igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR > enabled" > igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x" > @@ -294,6 +294,8 @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) > "EITR[%u] = 0x%x" > igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x" > igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x" > > +igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported descriptor > type: %d" > + > # igbvf.c > igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register > 0x%"PRIx64 > > diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c index > a603468beb..aa65b5452c 100644 > --- a/tests/qtest/libqos/igb.c > +++ b/tests/qtest/libqos/igb.c > @@ -109,6 +109,9 @@ static void igb_pci_start_hw(QOSGraphObject *obj) > E1000_RAH_AV | E1000_RAH_POOL_1 | > le16_to_cpu(*(uint16_t *)(address + 4))); > > + /* Set supported receive descriptor mode */ > + e1000e_macreg_write(&d->e1000e, E1000_SRRCTL(0), > + E1000_SRRCTL_DESCTYPE_ADV_ONEBUF); > + > /* Enable receive */ > e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN); > e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN); > -- > 2.25.1 ^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup 2023-04-30 11:57 ` Sriram Yagnaraman @ 2023-05-02 14:00 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics 2023-05-03 7:46 ` Sriram Yagnaraman 0 siblings, 1 reply; 12+ messages in thread From: Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics @ 2023-05-02 14:00 UTC (permalink / raw) To: 'Sriram Yagnaraman', qemu-devel, akihiko.odaki Cc: jasowang, k.kwiecien, m.sochacki Not Linux/DPDK/FreeBSD for IGB. Change here adds additional condition (RXCSUM.IPPCSE set) to enable putting IP ID into descriptor, besides clearing RXCSUM.PCSD (required according to Intel 82576 datasheet) that was not present in the e1000e code. -----Original Message----- From: Sriram Yagnaraman <sriram.yagnaraman@est.tech> Sent: niedziela, 30 kwietnia 2023 13:57 To: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>; qemu-devel@nongnu.org; akihiko.odaki@daynix.com Cc: jasowang@redhat.com; k.kwiecien@samsung.com; m.sochacki@samsung.com Subject: RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup > -----Original Message----- > From: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> > Sent: Thursday, 27 April 2023 12:48 > To: qemu-devel@nongnu.org; akihiko.odaki@daynix.com > Cc: Sriram Yagnaraman <sriram.yagnaraman@est.tech>; > jasowang@redhat.com; k.kwiecien@samsung.com; m.sochacki@samsung.com > Subject: [PATCH v3 1/2] igb: RX descriptors handling cleanup > > Format of Intel 82576 was changed in comparison to Intel 82574 > extended descriptors. This change updates filling of advanced > descriptors fields > accordingly: > * remove TCP ACK detection > * add IPv6 with extensions traffic detection > * fragment checksum and IP ID is filled only when RXCSUM.IPPCSE is set (in > addition to RXCSUM.PCSD bit cleared condition) Just curious if any device driver still uses IP payload checksum enable (IPPCSE)? > > Refactoring is done in preparation for support of multiple advanced > descriptors RX modes, especially packet-split modes. > > Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> > --- > hw/net/e1000e_core.c | 18 +- > hw/net/e1000x_regs.h | 1 + > hw/net/igb_core.c | 478 ++++++++++++++++++++++++--------------- > hw/net/igb_regs.h | 12 +- > hw/net/trace-events | 6 +- > tests/qtest/libqos/igb.c | 3 + > 6 files changed, 316 insertions(+), 202 deletions(-) > > diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index > 78373d7db7..0085ad53c2 100644 > --- a/hw/net/e1000e_core.c > +++ b/hw/net/e1000e_core.c > @@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore > *core, } > > static void > -e1000e_write_to_rx_buffers(E1000ECore *core, > - hwaddr ba[MAX_PS_BUFFERS], > - e1000e_ba_state *bastate, > - const char *data, > - dma_addr_t data_len) > +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core, > + hwaddr ba[MAX_PS_BUFFERS], > + e1000e_ba_state *bastate, > + const char *data, > + dma_addr_t data_len) > { > while (data_len > 0) { > uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; > @@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore > *core, struct NetRxPkt *pkt, > while (copy_size) { > iov_copy = MIN(copy_size, iov->iov_len - > iov_ofs); > > - e1000e_write_to_rx_buffers(core, ba, &bastate, > - iov->iov_base + iov_ofs, iov_copy); > + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, > + iov->iov_base + > + iov_ofs, > + > + iov_copy); > > copy_size -= iov_copy; > iov_ofs += iov_copy; @@ -1607,7 +1609,7 @@ > e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, > > if (desc_offset + desc_size >= total_size) { > /* Simulate FCS checksum presence in the last descriptor */ > - e1000e_write_to_rx_buffers(core, ba, &bastate, > + e1000e_write_payload_frag_to_rx_buffers(core, ba, > + &bastate, > (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); > } > } > diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index > 13760c66d3..344fd10359 100644 > --- a/hw/net/e1000x_regs.h > +++ b/hw/net/e1000x_regs.h > @@ -827,6 +827,7 @@ union e1000_rx_desc_packet_split { > /* Receive Checksum Control bits */ > #define E1000_RXCSUM_IPOFLD 0x100 /* IP Checksum Offload Enable */ > #define E1000_RXCSUM_TUOFLD 0x200 /* TCP/UDP Checksum Offload > Enable */ > +#define E1000_RXCSUM_IPPCSE 0x1000 /* IP Payload Checksum enable */ > #define E1000_RXCSUM_PCSD 0x2000 /* Packet Checksum Disable */ > > #define E1000_RING_DESC_LEN (16) > diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index > 96b7335b31..1cb64402aa 100644 > --- a/hw/net/igb_core.c > +++ b/hw/net/igb_core.c > @@ -267,6 +267,21 @@ igb_rx_use_legacy_descriptor(IGBCore *core) > return false; > } > > +typedef struct E1000E_RingInfo_st { > + int dbah; > + int dbal; > + int dlen; > + int dh; > + int dt; > + int idx; > +} E1000E_RingInfo; > + > +static uint32_t > +igb_rx_queue_desctyp_get(IGBCore *core, const E1000E_RingInfo *r) { > + return core->mac[E1000_SRRCTL(r->idx) >> 2] & > +E1000_SRRCTL_DESCTYPE_MASK; } > + > static inline bool > igb_rss_enabled(IGBCore *core) > { > @@ -694,15 +709,6 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int > queue_idx) > return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0; } > > -typedef struct E1000E_RingInfo_st { > - int dbah; > - int dbal; > - int dlen; > - int dh; > - int dt; > - int idx; > -} E1000E_RingInfo; > - > static inline bool > igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r) { @@ -941,6 > +947,14 @@ igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, > +size_t > total_size) > bufsize; > } > > +static uint32_t > +igb_get_queue_rx_header_buf_size(IGBCore *core, const E1000E_RingInfo > +*r) { > + uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2]; > + return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >> > + E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; } > + > void > igb_start_recv(IGBCore *core) > { > @@ -1232,7 +1246,7 @@ igb_read_adv_rx_descr(IGBCore *core, union > e1000_adv_rx_desc *desc, } > > static inline void > -igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, > +igb_read_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > hwaddr *buff_addr) > { > if (igb_rx_use_legacy_descriptor(core)) { @@ -1281,15 +1295,11 @@ > igb_verify_csum_in_sw(IGBCore *core, } > > static void > -igb_build_rx_metadata(IGBCore *core, > - struct NetRxPkt *pkt, > - bool is_eop, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > - uint16_t *pkt_info, uint16_t *hdr_info, > - uint32_t *rss, > - uint32_t *status_flags, > - uint16_t *ip_id, > - uint16_t *vlan_tag) > +igb_build_rx_metadata_common(IGBCore *core, > + struct NetRxPkt *pkt, > + bool is_eop, > + uint32_t *status_flags, > + uint16_t *vlan_tag) > { > struct virtio_net_hdr *vhdr; > bool hasip4, hasip6, csum_valid; > @@ -1298,7 +1308,6 @@ igb_build_rx_metadata(IGBCore *core, > *status_flags = E1000_RXD_STAT_DD; > > /* No additional metadata needed for non-EOP descriptors */ > - /* TODO: EOP apply only to status so don't skip whole function. */ > if (!is_eop) { > goto func_exit; > } > @@ -1315,64 +1324,6 @@ igb_build_rx_metadata(IGBCore *core, > trace_e1000e_rx_metadata_vlan(*vlan_tag); > } > > - /* Packet parsing results */ > - if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { > - if (rss_info->enabled) { > - *rss = cpu_to_le32(rss_info->hash); > - trace_igb_rx_metadata_rss(*rss); > - } > - } else if (hasip4) { > - *status_flags |= E1000_RXD_STAT_IPIDV; > - *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); > - trace_e1000e_rx_metadata_ip_id(*ip_id); > - } > - > - if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && > net_rx_pkt_is_tcp_ack(pkt)) { > - *status_flags |= E1000_RXD_STAT_ACK; > - trace_e1000e_rx_metadata_ack(); > - } > - > - if (pkt_info) { > - *pkt_info = rss_info->enabled ? rss_info->type : 0; > - > - if (etqf < 8) { > - *pkt_info |= BIT(11) | (etqf << 4); > - } else { > - if (hasip4) { > - *pkt_info |= E1000_ADVRXD_PKT_IP4; > - } > - > - if (hasip6) { > - *pkt_info |= E1000_ADVRXD_PKT_IP6; > - } > - > - switch (l4hdr_proto) { > - case ETH_L4_HDR_PROTO_TCP: > - *pkt_info |= E1000_ADVRXD_PKT_TCP; > - break; > - > - case ETH_L4_HDR_PROTO_UDP: > - *pkt_info |= E1000_ADVRXD_PKT_UDP; > - break; > - > - case ETH_L4_HDR_PROTO_SCTP: > - *pkt_info |= E1000_ADVRXD_PKT_SCTP; > - break; > - > - default: > - break; > - } > - } > - } > - > - if (hdr_info) { > - *hdr_info = 0; > - } > - > - if (ts) { > - *status_flags |= BIT(16); > - } > - > /* RX CSO information */ > if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { > trace_e1000e_rx_metadata_ipv6_sum_disabled(); > @@ -1426,58 +1377,126 @@ func_exit: > } > > static inline void > -igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, > +igb_write_lgcy_rx_descr(IGBCore *core, > + struct e1000_rx_desc *desc, > struct NetRxPkt *pkt, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > + const E1000E_RSSInfo *rss_info, > uint16_t length) { > - uint32_t status_flags, rss; > - uint16_t ip_id; > + uint32_t status_flags; > > assert(!rss_info->enabled); > + > + memset(desc, 0, sizeof(*desc)); > desc->length = cpu_to_le16(length); > - desc->csum = 0; > + igb_build_rx_metadata_common(core, pkt, pkt != NULL, > + &status_flags, > + &desc->special); > > - igb_build_rx_metadata(core, pkt, pkt != NULL, > - rss_info, etqf, ts, > - NULL, NULL, &rss, > - &status_flags, &ip_id, > - &desc->special); > desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); > desc->status = (uint8_t) le32_to_cpu(status_flags); } > > +static uint16_t > +igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, > +uint16_t etqf) { > + uint16_t pkt_type = 0; > + bool hasip4, hasip6; > + EthL4HdrProto l4hdr_proto; > + > + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); > + > + if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { > + eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt); > + pkt_type |= ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E : > + E1000_ADVRXD_PKT_IP6; > + } else if (hasip4) { > + pkt_type = E1000_ADVRXD_PKT_IP4; > + } > + > + if (etqf < 8) { > + pkt_type |= (BIT(11) >> 4) | etqf; > + return pkt_type; > + } > + > + switch (l4hdr_proto) { > + case ETH_L4_HDR_PROTO_TCP: > + pkt_type |= E1000_ADVRXD_PKT_TCP; > + break; > + case ETH_L4_HDR_PROTO_UDP: > + pkt_type |= E1000_ADVRXD_PKT_UDP; > + break; > + case ETH_L4_HDR_PROTO_SCTP: > + pkt_type |= E1000_ADVRXD_PKT_SCTP; > + break; > + default: > + break; > + } > + > + return pkt_type; > +} > + > static inline void > -igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, > +igb_write_adv_rx_descr(IGBCore *core, > + union e1000_adv_rx_desc *d, > struct NetRxPkt *pkt, > - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, > + const E1000E_RSSInfo *rss_info, > + uint16_t etqf, > + bool ts, > uint16_t length) { > - memset(&desc->wb, 0, sizeof(desc->wb)); > + bool hasip4, hasip6; > + EthL4HdrProto l4hdr_proto; > + uint16_t rss_type = 0, pkt_type; > + bool eop = (pkt != NULL); > + memset(&d->wb, 0, sizeof(d->wb)); > + > + d->wb.upper.length = cpu_to_le16(length); > + igb_build_rx_metadata_common(core, pkt, eop, > + &d->wb.upper.status_error, > + &d->wb.upper.vlan); > + > + if (!eop) { > + return; > + } > + > + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); > + > + if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { > + if (rss_info->enabled) { > + d->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash); > + rss_type = rss_info->type; > + trace_igb_rx_metadata_rss(d->wb.lower.hi_dword.rss, rss_type); > + } > + } else if (core->mac[RXCSUM] & E1000_RXCSUM_IPPCSE && > + hasip4) { > + d->wb.lower.hi_dword.csum_ip.ip_id = > cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); > + trace_e1000e_rx_metadata_ip_id(d->wb.lower.hi_dword.csum_ip.ip_id); > + } > > - desc->wb.upper.length = cpu_to_le16(length); > + if (ts) { > + d->wb.upper.status_error |= BIT(16); > + } > > - igb_build_rx_metadata(core, pkt, pkt != NULL, > - rss_info, etqf, ts, > - &desc->wb.lower.lo_dword.pkt_info, > - &desc->wb.lower.lo_dword.hdr_info, > - &desc->wb.lower.hi_dword.rss, > - &desc->wb.upper.status_error, > - &desc->wb.lower.hi_dword.csum_ip.ip_id, > - &desc->wb.upper.vlan); > + pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf); > + trace_e1000e_rx_metadata_pkt_type(pkt_type); > + d->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type > + << 4)); > } > > static inline void > -igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, > - struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, > - uint16_t etqf, bool ts, uint16_t length) > +igb_write_rx_descr(IGBCore *core, > + union e1000_rx_desc_union *desc, > + struct NetRxPkt *pkt, > + const E1000E_RSSInfo *rss_info, > + uint16_t etqf, > + bool ts, > + uint16_t(*written)[MAX_PS_BUFFERS], > + const E1000E_RingInfo *r) > { > if (igb_rx_use_legacy_descriptor(core)) { > - igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, > - etqf, ts, length); > + igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, > + (*written)[1]); > } else { > - igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, > - etqf, ts, length); > + igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, etqf, > + ts, (*written)[1]); > } > } > > @@ -1513,19 +1532,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core, > PCIDevice *dev, dma_addr_t addr, > } > } > > -static void > -igb_write_to_rx_buffers(IGBCore *core, > - PCIDevice *d, > - hwaddr ba, > - uint16_t *written, > - const char *data, > - dma_addr_t data_len) > -{ > - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); > - pci_dma_write(d, ba + *written, data, data_len); > - *written += data_len; > -} > - > static void > igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, > size_t pkt_size, size_t pkt_fcs_size) @@ -1551,6 > +1557,137 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi) > ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * > 16; } > > +typedef struct IGB_BaState_st { > + uint16_t written[MAX_PS_BUFFERS]; > + uint8_t cur_idx; > +} IGB_BaState; > + > +typedef struct IGB_PacketRxDMAState_st { > + size_t size; > + size_t total_size; > + size_t ps_hdr_len; > + size_t desc_size; > + size_t desc_offset; > + uint32_t rx_desc_packet_buf_size; > + uint32_t rx_desc_header_buf_size; > + struct iovec *iov; > + size_t iov_ofs; > + bool is_first; > + IGB_BaState bastate; > + hwaddr ba[MAX_PS_BUFFERS]; > +} IGB_PacketRxDMAState; > + > +static void > +igb_truncate_to_descriptor_size(IGB_PacketRxDMAState *pdma_st, size_t > +*size) { > + if (*size > pdma_st->rx_desc_packet_buf_size) { > + *size = pdma_st->rx_desc_packet_buf_size; > + } > +} > + > +static void > +igb_write_payload_frag_to_rx_buffers(IGBCore *core, > + PCIDevice *d, > + hwaddr (*ba)[MAX_PS_BUFFERS], > + IGB_BaState *bastate, > + uint32_t cur_buf_len, > + const char *data, > + dma_addr_t data_len) { > + while (data_len > 0) { > + assert(bastate->cur_idx < MAX_PS_BUFFERS); > + > + uint32_t cur_buf_bytes_left = cur_buf_len - > + bastate->written[bastate->cur_idx]; > + uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left); > + > + trace_igb_rx_desc_buff_write(bastate->cur_idx, > + (*ba)[bastate->cur_idx], > + bastate->written[bastate->cur_idx], > + data, > + bytes_to_write); > + > + pci_dma_write(d, > + (*ba)[bastate->cur_idx] + > + bastate->written[bastate->cur_idx], > + data, bytes_to_write); > + > + bastate->written[bastate->cur_idx] += bytes_to_write; > + data += bytes_to_write; > + data_len -= bytes_to_write; > + > + if (bastate->written[bastate->cur_idx] == cur_buf_len) { > + bastate->cur_idx++; > + } > + } > +} > + > +static void > +igb_write_payload_to_rx_buffers(IGBCore *core, > + struct NetRxPkt *pkt, > + PCIDevice *d, > + IGB_PacketRxDMAState *pdma_st, > + size_t *copy_size) { > + static const uint32_t fcs_pad; > + size_t iov_copy; > + > + /* Copy packet payload */ > + while (*copy_size) { > + iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); > + igb_write_payload_frag_to_rx_buffers(core, d, > + &pdma_st->ba, > + &pdma_st->bastate, > + pdma_st->rx_desc_packet_buf_size, > + pdma_st->iov->iov_base + > + pdma_st->iov_ofs, > + iov_copy); > + > + *copy_size -= iov_copy; > + pdma_st->iov_ofs += iov_copy; > + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { > + pdma_st->iov++; > + pdma_st->iov_ofs = 0; > + } > + } > + > + if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { > + /* Simulate FCS checksum presence in the last descriptor */ > + igb_write_payload_frag_to_rx_buffers(core, d, > + &pdma_st->ba, > + &pdma_st->bastate, > + pdma_st->rx_desc_packet_buf_size, > + (const char *) &fcs_pad, > + e1000x_fcs_len(core->mac)); > + } > +} > + > +static void > +igb_write_to_rx_buffers(IGBCore *core, > + struct NetRxPkt *pkt, > + PCIDevice *d, > + IGB_PacketRxDMAState *pdma_st) { > + size_t copy_size; > + > + if (!(pdma_st->ba)[1]) { > + /* as per intel docs; skip descriptors with null buf addr */ > + trace_e1000e_rx_null_descriptor(); > + return; > + } > + > + if (pdma_st->desc_offset >= pdma_st->size) { > + return; > + } > + > + pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset; > + igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); > + copy_size = pdma_st->size - pdma_st->desc_offset; > + igb_truncate_to_descriptor_size(pdma_st, ©_size); > + pdma_st->bastate.cur_idx = 1; > + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, > +©_size); } > + > static void > igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, > const E1000E_RxRing *rxr, @@ -1560,91 > +1697,58 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, > PCIDevice *d; > dma_addr_t base; > union e1000_rx_desc_union desc; > - size_t desc_size; > - size_t desc_offset = 0; > - size_t iov_ofs = 0; > - > - struct iovec *iov = net_rx_pkt_get_iovec(pkt); > - size_t size = net_rx_pkt_get_total_len(pkt); > - size_t total_size = size + e1000x_fcs_len(core->mac); > - const E1000E_RingInfo *rxi = rxr->i; > - size_t bufsize = igb_rxbufsize(core, rxi); > - > + const E1000E_RingInfo *rxi; > + size_t rx_desc_len; > + > + IGB_PacketRxDMAState pdma_st = {0}; > + pdma_st.is_first = true; > + pdma_st.size = net_rx_pkt_get_total_len(pkt); > + pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac); > + > + rxi = rxr->i; > + rx_desc_len = core->rx_desc_len; > + pdma_st.rx_desc_packet_buf_size = > + igb_rxbufsize(core, rxi); > + pdma_st.rx_desc_header_buf_size = > + igb_get_queue_rx_header_buf_size(core, rxi); > + pdma_st.iov = net_rx_pkt_get_iovec(pkt); > d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); > if (!d) { > d = core->owner; > } > > do { > - hwaddr ba; > - uint16_t written = 0; > + memset(&pdma_st.bastate, 0, sizeof(IGB_BaState)); > bool is_last = false; > > - desc_size = total_size - desc_offset; > - > - if (desc_size > bufsize) { > - desc_size = bufsize; > - } > - > if (igb_ring_empty(core, rxi)) { > return; > } > > base = igb_ring_head_descr(core, rxi); > + pci_dma_read(d, base, &desc, rx_desc_len); > + trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); > > - pci_dma_read(d, base, &desc, core->rx_desc_len); > - > - trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); > - > - igb_read_rx_descr(core, &desc, &ba); > - > - if (ba) { > - if (desc_offset < size) { > - static const uint32_t fcs_pad; > - size_t iov_copy; > - size_t copy_size = size - desc_offset; > - if (copy_size > bufsize) { > - copy_size = bufsize; > - } > - > - /* Copy packet payload */ > - while (copy_size) { > - iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); > - > - igb_write_to_rx_buffers(core, d, ba, &written, > - iov->iov_base + iov_ofs, iov_copy); > + igb_read_rx_descr(core, &desc, &pdma_st->ba[1], rxi); > > - copy_size -= iov_copy; > - iov_ofs += iov_copy; > - if (iov_ofs == iov->iov_len) { > - iov++; > - iov_ofs = 0; > - } > - } > - > - if (desc_offset + desc_size >= total_size) { > - /* Simulate FCS checksum presence in the last descriptor */ > - igb_write_to_rx_buffers(core, d, ba, &written, > - (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); > - } > - } > - } else { /* as per intel docs; skip descriptors with null buf addr */ > - trace_e1000e_rx_null_descriptor(); > - } > - desc_offset += desc_size; > - if (desc_offset >= total_size) { > + igb_write_to_rx_buffers(core, pkt, d, &pdma_st); > + pdma_st.desc_offset += pdma_st.desc_size; > + if (pdma_st.desc_offset >= pdma_st.total_size) { > is_last = true; > } > > - igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, > - rss_info, etqf, ts, written); > - igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len); > - > - igb_ring_advance(core, rxi, core->rx_desc_len / > E1000_MIN_RX_DESC_LEN); > - > - } while (desc_offset < total_size); > - > - igb_update_rx_stats(core, rxi, size, total_size); > + igb_write_rx_descr(core, &desc, > + is_last ? pkt : NULL, > + rss_info, > + etqf, ts, > + &pdma_st.bastate.written, > + rxi); > + pci_dma_write(d, base, &desc, rx_desc_len); > + igb_ring_advance(core, rxi, > + rx_desc_len / E1000_MIN_RX_DESC_LEN); > + } while (pdma_st.desc_offset < pdma_st.total_size); > + > + igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size); > } > > static bool > diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index > 82ff195dfc..c4ede22181 100644 > --- a/hw/net/igb_regs.h > +++ b/hw/net/igb_regs.h > @@ -452,6 +452,7 @@ union e1000_adv_rx_desc { > #define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00 > #define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ > #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 > +#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 > #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 > #define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 > #define E1000_SRRCTL_DROP_EN 0x80000000 > @@ -692,11 +693,12 @@ union e1000_adv_rx_desc { > > #define E1000_STATUS_NUM_VFS_SHIFT 14 > > -#define E1000_ADVRXD_PKT_IP4 BIT(4) > -#define E1000_ADVRXD_PKT_IP6 BIT(6) > -#define E1000_ADVRXD_PKT_TCP BIT(8) > -#define E1000_ADVRXD_PKT_UDP BIT(9) > -#define E1000_ADVRXD_PKT_SCTP BIT(10) > +#define E1000_ADVRXD_PKT_IP4 BIT(0) > +#define E1000_ADVRXD_PKT_IP6 BIT(2) > +#define E1000_ADVRXD_PKT_IP6E BIT(3) > +#define E1000_ADVRXD_PKT_TCP BIT(4) > +#define E1000_ADVRXD_PKT_UDP BIT(5) > +#define E1000_ADVRXD_PKT_SCTP BIT(6) > > static inline uint8_t igb_ivar_entry_rx(uint8_t i) { diff --git > a/hw/net/trace- events b/hw/net/trace-events index > e4a98b2c7d..9a02261360 100644 > --- a/hw/net/trace-events > +++ b/hw/net/trace-events > @@ -277,9 +277,9 @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC > WRITE: PHY[%u] UNHANDLED" > igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, > bool > pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: > %d, PF reset done: %d" > > igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" > -igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* > source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" > +igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, > +const void* > source, uint32_t len) "buffer #%u, addr: 0x%"PRIx64", offset: %u, > from: %p, > length: %u" > > -igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X" > +igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: > 0x%X, rss_pkt_type: 0x%X" > > igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to > GPIE.NSICR enabled" > igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x" > @@ -294,6 +294,8 @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) > "EITR[%u] = 0x%x" > igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x" > igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x" > > +igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported > +descriptor > type: %d" > + > # igbvf.c > igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register > 0x%"PRIx64 > > diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c index > a603468beb..aa65b5452c 100644 > --- a/tests/qtest/libqos/igb.c > +++ b/tests/qtest/libqos/igb.c > @@ -109,6 +109,9 @@ static void igb_pci_start_hw(QOSGraphObject *obj) > E1000_RAH_AV | E1000_RAH_POOL_1 | > le16_to_cpu(*(uint16_t *)(address + 4))); > > + /* Set supported receive descriptor mode */ > + e1000e_macreg_write(&d->e1000e, E1000_SRRCTL(0), > + E1000_SRRCTL_DESCTYPE_ADV_ONEBUF); > + > /* Enable receive */ > e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN); > e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN); > -- > 2.25.1 ^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup 2023-05-02 14:00 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics @ 2023-05-03 7:46 ` Sriram Yagnaraman 2023-05-03 15:11 ` Akihiko Odaki 0 siblings, 1 reply; 12+ messages in thread From: Sriram Yagnaraman @ 2023-05-03 7:46 UTC (permalink / raw) To: Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics, qemu-devel@nongnu.org, akihiko.odaki@daynix.com Cc: jasowang@redhat.com, k.kwiecien@samsung.com, m.sochacki@samsung.com > -----Original Message----- > From: Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung > Electronics <t.dzieciol@partner.samsung.com> > Sent: Tuesday, 2 May 2023 16:01 > To: Sriram Yagnaraman <sriram.yagnaraman@est.tech>; qemu- > devel@nongnu.org; akihiko.odaki@daynix.com > Cc: jasowang@redhat.com; k.kwiecien@samsung.com; > m.sochacki@samsung.com > Subject: RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup > > Not Linux/DPDK/FreeBSD for IGB. > > Change here adds additional condition (RXCSUM.IPPCSE set) to enable putting > IP ID into descriptor, besides clearing RXCSUM.PCSD (required according to > Intel 82576 datasheet) that was not present in the e1000e code. > Yes, we can't even use ethtool to set this field. My suggestion is to not add/maintain code that we cannot test. I leave it up to Akhikho to decide if we really need to implement IPPCSE. The default value of RXCSUM.IPPCSE is unset, so we could as well ignore this field until there is a user who sets this. Anyhow, I will wait with futher comments, until you respin this after splitting the changes as requested. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH v3 1/2] igb: RX descriptors handling cleanup 2023-05-03 7:46 ` Sriram Yagnaraman @ 2023-05-03 15:11 ` Akihiko Odaki 2023-05-04 7:37 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics 0 siblings, 1 reply; 12+ messages in thread From: Akihiko Odaki @ 2023-05-03 15:11 UTC (permalink / raw) To: Sriram Yagnaraman, Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics, qemu-devel@nongnu.org Cc: jasowang@redhat.com, k.kwiecien@samsung.com, m.sochacki@samsung.com On 2023/05/03 16:46, Sriram Yagnaraman wrote: > > >> -----Original Message----- >> From: Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung >> Electronics <t.dzieciol@partner.samsung.com> >> Sent: Tuesday, 2 May 2023 16:01 >> To: Sriram Yagnaraman <sriram.yagnaraman@est.tech>; qemu- >> devel@nongnu.org; akihiko.odaki@daynix.com >> Cc: jasowang@redhat.com; k.kwiecien@samsung.com; >> m.sochacki@samsung.com >> Subject: RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup >> >> Not Linux/DPDK/FreeBSD for IGB. >> >> Change here adds additional condition (RXCSUM.IPPCSE set) to enable putting >> IP ID into descriptor, besides clearing RXCSUM.PCSD (required according to >> Intel 82576 datasheet) that was not present in the e1000e code. >> > > Yes, we can't even use ethtool to set this field. > My suggestion is to not add/maintain code that we cannot test. I leave it up to Akhikho to decide if we really need to implement IPPCSE. > The default value of RXCSUM.IPPCSE is unset, so we could as well ignore this field until there is a user who sets this. In general I won't reject a patch to implement a feature not used by a known guest, but I don't recommend that. It just doesn't make sense to spend time to write code that can turn out so buggy that it is unusable in practice, which is often the case with untested code. ^ permalink raw reply [flat|nested] 12+ messages in thread
* RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup 2023-05-03 15:11 ` Akihiko Odaki @ 2023-05-04 7:37 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics 0 siblings, 0 replies; 12+ messages in thread From: Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics @ 2023-05-04 7:37 UTC (permalink / raw) To: 'Akihiko Odaki', 'Sriram Yagnaraman', qemu-devel Cc: jasowang, k.kwiecien, m.sochacki I will remove checking RXCSUM.IPPCSE flag then. -----Original Message----- From: Akihiko Odaki <akihiko.odaki@daynix.com> Sent: środa, 3 maja 2023 17:11 To: Sriram Yagnaraman <sriram.yagnaraman@est.tech>; Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics <t.dzieciol@partner.samsung.com>; qemu-devel@nongnu.org Cc: jasowang@redhat.com; k.kwiecien@samsung.com; m.sochacki@samsung.com Subject: Re: [PATCH v3 1/2] igb: RX descriptors handling cleanup On 2023/05/03 16:46, Sriram Yagnaraman wrote: > > >> -----Original Message----- >> From: Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung >> Electronics <t.dzieciol@partner.samsung.com> >> Sent: Tuesday, 2 May 2023 16:01 >> To: Sriram Yagnaraman <sriram.yagnaraman@est.tech>; qemu- >> devel@nongnu.org; akihiko.odaki@daynix.com >> Cc: jasowang@redhat.com; k.kwiecien@samsung.com; >> m.sochacki@samsung.com >> Subject: RE: [PATCH v3 1/2] igb: RX descriptors handling cleanup >> >> Not Linux/DPDK/FreeBSD for IGB. >> >> Change here adds additional condition (RXCSUM.IPPCSE set) to enable >> putting IP ID into descriptor, besides clearing RXCSUM.PCSD (required >> according to Intel 82576 datasheet) that was not present in the e1000e code. >> > > Yes, we can't even use ethtool to set this field. > My suggestion is to not add/maintain code that we cannot test. I leave it up to Akhikho to decide if we really need to implement IPPCSE. > The default value of RXCSUM.IPPCSE is unset, so we could as well ignore this field until there is a user who sets this. In general I won't reject a patch to implement a feature not used by a known guest, but I don't recommend that. It just doesn't make sense to spend time to write code that can turn out so buggy that it is unusable in practice, which is often the case with untested code. ^ permalink raw reply [flat|nested] 12+ messages in thread
[parent not found: <CGME20230427104750eucas1p1eb8fb7fac00cc13dea5e4a7e0df5c113@eucas1p1.samsung.com>]
* [PATCH v3 2/2] igb: packet-split descriptors support [not found] ` <CGME20230427104750eucas1p1eb8fb7fac00cc13dea5e4a7e0df5c113@eucas1p1.samsung.com> @ 2023-04-27 10:47 ` Tomasz Dzieciol 0 siblings, 0 replies; 12+ messages in thread From: Tomasz Dzieciol @ 2023-04-27 10:47 UTC (permalink / raw) To: qemu-devel, akihiko.odaki Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki Packet-split descriptors are used by Linux VF driver for MTU values from 2048 --- hw/net/igb_core.c | 300 +++++++++++++++++++++++++++++++++++++++++----- hw/net/igb_regs.h | 6 + 2 files changed, 276 insertions(+), 30 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index 1cb64402aa..6abb152d51 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -282,6 +282,14 @@ igb_rx_queue_desctyp_get(IGBCore *core, const E1000E_RingInfo *r) return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK; } +static bool +igb_rx_use_ps_descriptor(IGBCore *core, const E1000E_RingInfo *r) +{ + uint32_t desctyp = igb_rx_queue_desctyp_get(core, r); + return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT || + desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; +} + static inline bool igb_rss_enabled(IGBCore *core) { @@ -1239,21 +1247,70 @@ igb_read_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, } static inline void -igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, - hwaddr *buff_addr) +igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc, + hwaddr *buff_addr) { *buff_addr = le64_to_cpu(desc->read.pkt_addr); } static inline void -igb_read_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, - hwaddr *buff_addr) +igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc, + hwaddr *buff_addr) +{ + buff_addr[0] = le64_to_cpu(desc->read.hdr_addr); + buff_addr[1] = le64_to_cpu(desc->read.pkt_addr); +} + +typedef struct IGB_BaState_st { + uint16_t written[MAX_PS_BUFFERS]; + uint8_t cur_idx; +} IGB_BaState; + +typedef struct IGB_PacketRxDMAState_st { + size_t size; + size_t total_size; + size_t ps_hdr_len; + size_t desc_size; + size_t desc_offset; + uint32_t rx_desc_packet_buf_size; + uint32_t rx_desc_header_buf_size; + struct iovec *iov; + size_t iov_ofs; + bool do_ps; + bool is_first; + IGB_BaState bastate; + hwaddr ba[MAX_PS_BUFFERS]; +} IGB_PacketRxDMAState; + +static inline void +igb_read_rx_descr(IGBCore *core, + union e1000_rx_desc_union *desc, + IGB_PacketRxDMAState *pdma_st, + const E1000E_RingInfo *r) { + uint32_t desc_type; + if (igb_rx_use_legacy_descriptor(core)) { - igb_read_lgcy_rx_descr(core, &desc->legacy, buff_addr); - } else { - igb_read_adv_rx_descr(core, &desc->adv, buff_addr); + igb_read_lgcy_rx_descr(core, &desc->legacy, &pdma_st->ba[1]); + pdma_st->ba[0] = 0; + return; + } + + /* advanced header split descriptor */ + if (igb_rx_use_ps_descriptor(core, r)) { + igb_read_adv_rx_split_buf_descr(core, &desc->adv, &pdma_st->ba[0]); + return; } + + /* descriptor replication modes not supported */ + desc_type = igb_rx_queue_desctyp_get(core, r); + if (desc_type != E1000_SRRCTL_DESCTYPE_ADV_ONEBUF) { + trace_igb_wrn_rx_desc_modes_not_supp(desc_type); + } + + /* advanced single buffer descriptor */ + igb_read_adv_rx_single_buf_descr(core, &desc->adv, &pdma_st->ba[1]); + pdma_st->ba[0] = 0; } static void @@ -1397,6 +1454,13 @@ igb_write_lgcy_rx_descr(IGBCore *core, desc->status = (uint8_t) le32_to_cpu(status_flags); } +static bool +igb_rx_ps_descriptor_split_always(IGBCore *core, const E1000E_RingInfo *r) +{ + uint32_t desctyp = igb_rx_queue_desctyp_get(core, r); + return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; +} + static uint16_t igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf) { @@ -1483,6 +1547,49 @@ igb_write_adv_rx_descr(IGBCore *core, d->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4)); } +typedef struct IGB_SplitDescriptorData_st { + bool sph; + bool hbo; + size_t hdr_len; +} IGB_SplitDescriptorData; + +static inline void +igb_write_adv_ps_split_rx_descr(IGBCore *core, + union e1000_adv_rx_desc *d, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + const E1000E_RingInfo *r, + uint16_t etqf, + bool ts, + IGB_SplitDescriptorData *ps_desc_data, + uint16_t(*written)[MAX_PS_BUFFERS]) +{ + size_t pkt_len; + size_t hdr_len = ps_desc_data->hdr_len; + + bool split_always = igb_rx_ps_descriptor_split_always(core, r); + if (!split_always) { + if ((!ps_desc_data->sph && !ps_desc_data->hbo) || + ( ps_desc_data->sph && ps_desc_data->hbo)) { + pkt_len = (*written)[0] + (*written)[1]; + } else { + assert(!ps_desc_data->hbo); + pkt_len = (*written)[1]; + } + } else { + pkt_len = (*written)[1]; + } + + igb_write_adv_rx_descr(core, d, pkt, rss_info, etqf, ts, pkt_len); + + d->wb.lower.lo_dword.hdr_info = (hdr_len << E1000_ADVRXD_HDR_LEN_OFFSET) & + E1000_ADVRXD_ADV_HDR_LEN_MASK; + d->wb.lower.lo_dword.hdr_info |= ps_desc_data->sph ? E1000_ADVRXD_HDR_SPH + : 0; + d->wb.upper.status_error |= ps_desc_data->hbo ? + E1000_ADVRXD_ST_ERR_HBO_OFFSET : 0; +} + static inline void igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, @@ -1490,13 +1597,18 @@ igb_write_rx_descr(IGBCore *core, const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, + IGB_SplitDescriptorData *ps_desc_data, uint16_t(*written)[MAX_PS_BUFFERS], const E1000E_RingInfo *r) { if (igb_rx_use_legacy_descriptor(core)) { igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, (*written)[1]); + } else if (igb_rx_use_ps_descriptor(core, r)) { + igb_write_adv_ps_split_rx_descr(core, &desc->adv, pkt, rss_info, r, + etqf, ts, ps_desc_data, written); } else { - igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, etqf, ts, (*written)[1]); + igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, + etqf, ts, (*written)[1]); } } @@ -1557,34 +1669,149 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi) ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; } -typedef struct IGB_BaState_st { - uint16_t written[MAX_PS_BUFFERS]; - uint8_t cur_idx; -} IGB_BaState; +static bool +igb_do_ps(IGBCore *core, + const E1000E_RingInfo *r, + struct NetRxPkt *pkt, + size_t *hdr_len, + IGB_SplitDescriptorData *ps_desc_data) +{ + bool hasip4, hasip6; + EthL4HdrProto l4hdr_proto; + bool fragment; + bool split_always; + size_t bheader_size; + size_t total_pkt_len; -typedef struct IGB_PacketRxDMAState_st { - size_t size; - size_t total_size; - size_t ps_hdr_len; - size_t desc_size; - size_t desc_offset; - uint32_t rx_desc_packet_buf_size; - uint32_t rx_desc_header_buf_size; - struct iovec *iov; - size_t iov_ofs; - bool is_first; - IGB_BaState bastate; - hwaddr ba[MAX_PS_BUFFERS]; -} IGB_PacketRxDMAState; + if (!igb_rx_use_ps_descriptor(core, r)) { + return false; + } + + memset(ps_desc_data, 0, sizeof(IGB_SplitDescriptorData)); + + total_pkt_len = net_rx_pkt_get_total_len(pkt); + bheader_size = igb_get_queue_rx_header_buf_size(core, r); + split_always = igb_rx_ps_descriptor_split_always(core, r); + if (split_always && total_pkt_len <= bheader_size) { + *hdr_len = total_pkt_len; + ps_desc_data->hdr_len = total_pkt_len; + return true; + } + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if (hasip4) { + fragment = net_rx_pkt_get_ip4_info(pkt)->fragment; + } else if (hasip6) { + fragment = net_rx_pkt_get_ip6_info(pkt)->fragment; + } else { + ps_desc_data->hdr_len = bheader_size; + goto header_not_handled; + } + + if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) { + ps_desc_data->hdr_len = bheader_size; + goto header_not_handled; + } + + /* no header splitting for SCTP */ + if (!fragment && (l4hdr_proto == ETH_L4_HDR_PROTO_UDP || + l4hdr_proto == ETH_L4_HDR_PROTO_TCP)) { + *hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt); + } else { + *hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt); + } + + ps_desc_data->sph = true; + ps_desc_data->hdr_len = *hdr_len; + + if (*hdr_len > bheader_size) { + ps_desc_data->hbo = true; + goto header_not_handled; + } + + return true; + +header_not_handled: + if (split_always) { + *hdr_len = bheader_size; + return true; + } + + return false; +} static void igb_truncate_to_descriptor_size(IGB_PacketRxDMAState *pdma_st, size_t *size) { - if (*size > pdma_st->rx_desc_packet_buf_size) { - *size = pdma_st->rx_desc_packet_buf_size; + if (pdma_st->do_ps && pdma_st->is_first) { + if (*size > pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len) { + *size = pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len; + } + } else { + if (*size > pdma_st->rx_desc_packet_buf_size) { + *size = pdma_st->rx_desc_packet_buf_size; + } } } +static inline void +igb_write_hdr_to_rx_buffers(IGBCore *core, + PCIDevice *d, + hwaddr (*ba)[MAX_PS_BUFFERS], + IGB_BaState *bastate, + uint32_t rx_desc_header_buf_size, + const char *data, + dma_addr_t data_len) +{ + assert(data_len <= rx_desc_header_buf_size - bastate->written[0]); + pci_dma_write(d, (*ba)[0] + bastate->written[0], data, data_len); + bastate->written[0] += data_len; + bastate->cur_idx = 1; +} + +static void +igb_write_packet_hdr_to_descr_addr(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + IGB_PacketRxDMAState *pdma_st, + size_t *copy_size) +{ + size_t iov_copy; + size_t ps_hdr_copied = 0; + + if (!pdma_st->is_first) { + /* Leave buffer 0 of each descriptor except first */ + /* empty */ + igb_write_hdr_to_rx_buffers(core, d, &pdma_st->ba, &pdma_st->bastate, + pdma_st->rx_desc_header_buf_size, + NULL, 0); + return; + } + + do { + iov_copy = MIN(pdma_st->ps_hdr_len - ps_hdr_copied, + pdma_st->iov->iov_len - pdma_st->iov_ofs); + + igb_write_hdr_to_rx_buffers(core, d, &pdma_st->ba, + &pdma_st->bastate, + pdma_st->rx_desc_header_buf_size, + pdma_st->iov->iov_base, + iov_copy); + + *copy_size -= iov_copy; + ps_hdr_copied += iov_copy; + + pdma_st->iov_ofs += iov_copy; + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { + pdma_st->iov++; + pdma_st->iov_ofs = 0; + } + } while (ps_hdr_copied < pdma_st->ps_hdr_len); + + pdma_st->is_first = false; +} + static void igb_write_payload_frag_to_rx_buffers(IGBCore *core, PCIDevice *d, @@ -1684,7 +1911,14 @@ igb_write_to_rx_buffers(IGBCore *core, igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); copy_size = pdma_st->size - pdma_st->desc_offset; igb_truncate_to_descriptor_size(pdma_st, ©_size); - pdma_st->bastate.cur_idx = 1; + + /* For PS mode copy the packet header first */ + if (pdma_st->do_ps) { + igb_write_packet_hdr_to_descr_addr(core, pkt, d, pdma_st, ©_size); + } else { + pdma_st->bastate.cur_idx = 1; + } + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size); } @@ -1699,6 +1933,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, union e1000_rx_desc_union desc; const E1000E_RingInfo *rxi; size_t rx_desc_len; + IGB_SplitDescriptorData ps_desc_data; IGB_PacketRxDMAState pdma_st = {0}; pdma_st.is_first = true; @@ -1717,6 +1952,10 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, d = core->owner; } + pdma_st.do_ps = igb_do_ps(core, rxi, pkt, + &pdma_st.ps_hdr_len, + &ps_desc_data); + do { memset(&pdma_st.bastate, 0, sizeof(IGB_BaState)); bool is_last = false; @@ -1729,7 +1968,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, pci_dma_read(d, base, &desc, rx_desc_len); trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); - igb_read_rx_descr(core, &desc, &pdma_st->ba[1], rxi); + igb_read_rx_descr(core, &desc, &pdma_st, rxi); igb_write_to_rx_buffers(core, pkt, d, &pdma_st); pdma_st.desc_offset += pdma_st.desc_size; @@ -1741,6 +1980,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, is_last ? pkt : NULL, rss_info, etqf, ts, + &ps_desc_data, &pdma_st.bastate.written, rxi); pci_dma_write(d, base, &desc, rx_desc_len); diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index c4ede22181..080f03fc43 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -700,6 +700,12 @@ union e1000_adv_rx_desc { #define E1000_ADVRXD_PKT_UDP BIT(5) #define E1000_ADVRXD_PKT_SCTP BIT(6) +#define E1000_ADVRXD_HDR_LEN_OFFSET (21 - 16) +#define E1000_ADVRXD_ADV_HDR_LEN_MASK ((BIT(10) - 1) << \ + E1000_ADVRXD_HDR_LEN_OFFSET) +#define E1000_ADVRXD_HDR_SPH BIT(15) +#define E1000_ADVRXD_ST_ERR_HBO_OFFSET BIT(3 + 20) + static inline uint8_t igb_ivar_entry_rx(uint8_t i) { return i < 8 ? i * 4 : (i - 8) * 4 + 2; -- 2.25.1 ^ permalink raw reply related [flat|nested] 12+ messages in thread
end of thread, other threads:[~2023-05-04 7:37 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- [not found] <CGME20230427104749eucas1p19480869211eed34117a518f3c3800946@eucas1p1.samsung.com> 2023-04-27 10:47 ` [PATCH v3 0/2] igb: packet-split descriptors support Tomasz Dzieciol [not found] ` <CGME20230427104750eucas1p1158eee5a37c71cacaea021a7abbd6ace@eucas1p1.samsung.com> 2023-04-27 10:47 ` [PATCH v3 1/2] igb: RX descriptors handling cleanup Tomasz Dzieciol 2023-04-28 10:31 ` Akihiko Odaki 2023-04-28 12:51 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics 2023-04-30 6:05 ` Akihiko Odaki 2023-04-28 12:53 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics 2023-04-30 11:57 ` Sriram Yagnaraman 2023-05-02 14:00 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics 2023-05-03 7:46 ` Sriram Yagnaraman 2023-05-03 15:11 ` Akihiko Odaki 2023-05-04 7:37 ` Tomasz Dzieciol/VIM Integration (NC) /SRPOL/Engineer/Samsung Electronics [not found] ` <CGME20230427104750eucas1p1eb8fb7fac00cc13dea5e4a7e0df5c113@eucas1p1.samsung.com> 2023-04-27 10:47 ` [PATCH v3 2/2] igb: packet-split descriptors support Tomasz Dzieciol
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).