qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 0/5] igb: packet-split descriptors support
       [not found] <CGME20230504131105eucas1p1cc725f17aa42262be597f5ee81aca037@eucas1p1.samsung.com>
@ 2023-05-04 13:10 ` Tomasz Dzieciol
       [not found]   ` <CGME20230504131105eucas1p2610f7bcc7c4fabba29cd198c52ac30f2@eucas1p2.samsung.com>
                     ` (4 more replies)
  0 siblings, 5 replies; 9+ messages in thread
From: Tomasz Dzieciol @ 2023-05-04 13:10 UTC (permalink / raw)
  To: qemu-devel, akihiko.odaki
  Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki

Based-on: <20230423041833.5302-1-akihiko.odaki@daynix.com>
("[PATCH v3 00/47] igb: Fix for DPDK")

Purposes of this series of patches:
* introduce packet-split RX descriptors support. This feature is used by Linux
  VF driver for MTU values from 2048.
* refactor RX descriptor handling for introduction of packet-split RX
  descriptors support
* fix descriptors flags handling

Tomasz Dzieciol (5):
  igb: remove TCP ACK detection
  igb: rename E1000E_RingInfo_st
  igb: RX descriptors handling cleanup
  igb: add IPv6 extended headers traffic detection
  igb: packet-split descriptors support

 hw/net/e1000e_core.c     |  18 +-
 hw/net/igb_core.c        | 753 ++++++++++++++++++++++++++++-----------
 hw/net/igb_regs.h        |  20 +-
 hw/net/trace-events      |   6 +-
 tests/qtest/libqos/igb.c |   5 +
 5 files changed, 580 insertions(+), 222 deletions(-)

-- 
2.25.1



^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v4 1/5] igb: remove TCP ACK detection
       [not found]   ` <CGME20230504131105eucas1p2610f7bcc7c4fabba29cd198c52ac30f2@eucas1p2.samsung.com>
@ 2023-05-04 13:10     ` Tomasz Dzieciol
  0 siblings, 0 replies; 9+ messages in thread
From: Tomasz Dzieciol @ 2023-05-04 13:10 UTC (permalink / raw)
  To: qemu-devel, akihiko.odaki
  Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki

TCP ACK detection is no longer present in igb.

Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>
---
 hw/net/igb_core.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 96b7335b31..012eb1e1b9 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1327,11 +1327,6 @@ igb_build_rx_metadata(IGBCore *core,
             trace_e1000e_rx_metadata_ip_id(*ip_id);
     }
 
-    if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && net_rx_pkt_is_tcp_ack(pkt)) {
-        *status_flags |= E1000_RXD_STAT_ACK;
-        trace_e1000e_rx_metadata_ack();
-    }
-
     if (pkt_info) {
         *pkt_info = rss_info->enabled ? rss_info->type : 0;
 
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v4 2/5] igb: rename E1000E_RingInfo_st
       [not found]   ` <CGME20230504131106eucas1p1714e019395c941f51155c0e9ae52a66b@eucas1p1.samsung.com>
@ 2023-05-04 13:10     ` Tomasz Dzieciol
  2023-05-04 14:51       ` Akihiko Odaki
  0 siblings, 1 reply; 9+ messages in thread
From: Tomasz Dzieciol @ 2023-05-04 13:10 UTC (permalink / raw)
  To: qemu-devel, akihiko.odaki
  Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki

Rename E1000E_RingInfo_st and E1000E_RingInfo according to qemu typdefs guide.

Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>
---
 hw/net/igb_core.c | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 012eb1e1b9..b6031dea24 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -694,24 +694,24 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx)
     return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0;
 }
 
-typedef struct E1000E_RingInfo_st {
+typedef struct E1000ERingInfo {
     int dbah;
     int dbal;
     int dlen;
     int dh;
     int dt;
     int idx;
-} E1000E_RingInfo;
+} E1000ERingInfo;
 
 static inline bool
-igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_empty(IGBCore *core, const E1000ERingInfo *r)
 {
     return core->mac[r->dh] == core->mac[r->dt] ||
                 core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN;
 }
 
 static inline uint64_t
-igb_ring_base(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_base(IGBCore *core, const E1000ERingInfo *r)
 {
     uint64_t bah = core->mac[r->dbah];
     uint64_t bal = core->mac[r->dbal];
@@ -720,13 +720,13 @@ igb_ring_base(IGBCore *core, const E1000E_RingInfo *r)
 }
 
 static inline uint64_t
-igb_ring_head_descr(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_head_descr(IGBCore *core, const E1000ERingInfo *r)
 {
     return igb_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh];
 }
 
 static inline void
-igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count)
+igb_ring_advance(IGBCore *core, const E1000ERingInfo *r, uint32_t count)
 {
     core->mac[r->dh] += count;
 
@@ -736,7 +736,7 @@ igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count)
 }
 
 static inline uint32_t
-igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_free_descr_num(IGBCore *core, const E1000ERingInfo *r)
 {
     trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen],
                                  core->mac[r->dh],  core->mac[r->dt]);
@@ -755,13 +755,13 @@ igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r)
 }
 
 static inline bool
-igb_ring_enabled(IGBCore *core, const E1000E_RingInfo *r)
+igb_ring_enabled(IGBCore *core, const E1000ERingInfo *r)
 {
     return core->mac[r->dlen] > 0;
 }
 
 typedef struct IGB_TxRing_st {
-    const E1000E_RingInfo *i;
+    const E1000ERingInfo *i;
     struct igb_tx *tx;
 } IGB_TxRing;
 
@@ -774,7 +774,7 @@ igb_mq_queue_idx(int base_reg_idx, int reg_idx)
 static inline void
 igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx)
 {
-    static const E1000E_RingInfo i[IGB_NUM_QUEUES] = {
+    static const E1000ERingInfo i[IGB_NUM_QUEUES] = {
         { TDBAH0, TDBAL0, TDLEN0, TDH0, TDT0, 0 },
         { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 },
         { TDBAH2, TDBAL2, TDLEN2, TDH2, TDT2, 2 },
@@ -800,13 +800,13 @@ igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx)
 }
 
 typedef struct E1000E_RxRing_st {
-    const E1000E_RingInfo *i;
+    const E1000ERingInfo *i;
 } E1000E_RxRing;
 
 static inline void
 igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx)
 {
-    static const E1000E_RingInfo i[IGB_NUM_QUEUES] = {
+    static const E1000ERingInfo i[IGB_NUM_QUEUES] = {
         { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 },
         { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 },
         { RDBAH2, RDBAL2, RDLEN2, RDH2, RDT2, 2 },
@@ -833,7 +833,7 @@ igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx)
 static uint32_t
 igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
                      union e1000_adv_tx_desc *tx_desc,
-                     const E1000E_RingInfo *txi)
+                     const E1000ERingInfo *txi)
 {
     PCIDevice *d;
     uint32_t cmd_type_len = le32_to_cpu(tx_desc->read.cmd_type_len);
@@ -866,7 +866,7 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
 }
 
 static inline bool
-igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
+igb_tx_enabled(IGBCore *core, const E1000ERingInfo *txi)
 {
     bool vmdq = core->mac[MRQC] & 1;
     uint16_t qn = txi->idx;
@@ -883,7 +883,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
     PCIDevice *d;
     dma_addr_t base;
     union e1000_adv_tx_desc desc;
-    const E1000E_RingInfo *txi = txr->i;
+    const E1000ERingInfo *txi = txr->i;
     uint32_t eic = 0;
 
     if (!igb_tx_enabled(core, txi)) {
@@ -918,7 +918,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
 }
 
 static uint32_t
-igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r)
+igb_rxbufsize(IGBCore *core, const E1000ERingInfo *r)
 {
     uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
     uint32_t bsizepkt = srrctl & E1000_SRRCTL_BSIZEPKT_MASK;
@@ -930,7 +930,7 @@ igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r)
 }
 
 static bool
-igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size)
+igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size)
 {
     uint32_t bufs = igb_ring_free_descr_num(core, r);
     uint32_t bufsize = igb_rxbufsize(core, r);
@@ -1522,7 +1522,7 @@ igb_write_to_rx_buffers(IGBCore *core,
 }
 
 static void
-igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
+igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi,
                     size_t pkt_size, size_t pkt_fcs_size)
 {
     eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt);
@@ -1540,7 +1540,7 @@ igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
 }
 
 static inline bool
-igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi)
+igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi)
 {
     return igb_ring_free_descr_num(core, rxi) ==
            ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16;
@@ -1562,7 +1562,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
     struct iovec *iov = net_rx_pkt_get_iovec(pkt);
     size_t size = net_rx_pkt_get_total_len(pkt);
     size_t total_size = size + e1000x_fcs_len(core->mac);
-    const E1000E_RingInfo *rxi = rxr->i;
+    const E1000ERingInfo *rxi = rxr->i;
     size_t bufsize = igb_rxbufsize(core, rxi);
 
     d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8);
@@ -1643,7 +1643,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
 }
 
 static bool
-igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
+igb_rx_strip_vlan(IGBCore *core, const E1000ERingInfo *rxi)
 {
     if (core->mac[MRQC] & 1) {
         uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v4 3/5] igb: RX descriptors handling cleanup
       [not found]   ` <CGME20230504131106eucas1p13530e4d1f10ca1086b39b37feddc3e28@eucas1p1.samsung.com>
@ 2023-05-04 13:10     ` Tomasz Dzieciol
  2023-05-04 15:12       ` Akihiko Odaki
  0 siblings, 1 reply; 9+ messages in thread
From: Tomasz Dzieciol @ 2023-05-04 13:10 UTC (permalink / raw)
  To: qemu-devel, akihiko.odaki
  Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki

Refactoring is done in preparation for support of multiple advanced
descriptors RX modes, especially packet-split modes.

Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>
---
 hw/net/e1000e_core.c     |  18 +-
 hw/net/igb_core.c        | 410 ++++++++++++++++++++++-----------------
 hw/net/igb_regs.h        |  12 +-
 hw/net/trace-events      |   6 +-
 tests/qtest/libqos/igb.c |   5 +
 5 files changed, 263 insertions(+), 188 deletions(-)

diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index 78373d7db7..0085ad53c2 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -1418,11 +1418,11 @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core,
 }
 
 static void
-e1000e_write_to_rx_buffers(E1000ECore *core,
-                           hwaddr ba[MAX_PS_BUFFERS],
-                           e1000e_ba_state *bastate,
-                           const char *data,
-                           dma_addr_t data_len)
+e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core,
+                                        hwaddr ba[MAX_PS_BUFFERS],
+                                        e1000e_ba_state *bastate,
+                                        const char *data,
+                                        dma_addr_t data_len)
 {
     while (data_len > 0) {
         uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx];
@@ -1594,8 +1594,10 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
                 while (copy_size) {
                     iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
 
-                    e1000e_write_to_rx_buffers(core, ba, &bastate,
-                                            iov->iov_base + iov_ofs, iov_copy);
+                    e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
+                                                            iov->iov_base +
+                                                            iov_ofs,
+                                                            iov_copy);
 
                     copy_size -= iov_copy;
                     iov_ofs += iov_copy;
@@ -1607,7 +1609,7 @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
 
                 if (desc_offset + desc_size >= total_size) {
                     /* Simulate FCS checksum presence in the last descriptor */
-                    e1000e_write_to_rx_buffers(core, ba, &bastate,
+                    e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate,
                           (const char *) &fcs_pad, e1000x_fcs_len(core->mac));
                 }
             }
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index b6031dea24..dbd09b9b46 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -267,6 +267,15 @@ igb_rx_use_legacy_descriptor(IGBCore *core)
     return false;
 }
 
+typedef struct E1000ERingInfo {
+    int dbah;
+    int dbal;
+    int dlen;
+    int dh;
+    int dt;
+    int idx;
+} E1000ERingInfo;
+
 static inline bool
 igb_rss_enabled(IGBCore *core)
 {
@@ -694,15 +703,6 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx)
     return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0;
 }
 
-typedef struct E1000ERingInfo {
-    int dbah;
-    int dbal;
-    int dlen;
-    int dh;
-    int dt;
-    int idx;
-} E1000ERingInfo;
-
 static inline bool
 igb_ring_empty(IGBCore *core, const E1000ERingInfo *r)
 {
@@ -941,6 +941,14 @@ igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size)
                          bufsize;
 }
 
+static uint32_t
+igb_get_queue_rx_header_buf_size(IGBCore *core, const E1000ERingInfo *r)
+{
+    uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
+    return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >>
+           E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+}
+
 void
 igb_start_recv(IGBCore *core)
 {
@@ -1281,15 +1289,11 @@ igb_verify_csum_in_sw(IGBCore *core,
 }
 
 static void
-igb_build_rx_metadata(IGBCore *core,
-                      struct NetRxPkt *pkt,
-                      bool is_eop,
-                      const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
-                      uint16_t *pkt_info, uint16_t *hdr_info,
-                      uint32_t *rss,
-                      uint32_t *status_flags,
-                      uint16_t *ip_id,
-                      uint16_t *vlan_tag)
+igb_build_rx_metadata_common(IGBCore *core,
+                             struct NetRxPkt *pkt,
+                             bool is_eop,
+                             uint32_t *status_flags,
+                             uint16_t *vlan_tag)
 {
     struct virtio_net_hdr *vhdr;
     bool hasip4, hasip6, csum_valid;
@@ -1298,7 +1302,6 @@ igb_build_rx_metadata(IGBCore *core,
     *status_flags = E1000_RXD_STAT_DD;
 
     /* No additional metadata needed for non-EOP descriptors */
-    /* TODO: EOP apply only to status so don't skip whole function. */
     if (!is_eop) {
         goto func_exit;
     }
@@ -1315,59 +1318,6 @@ igb_build_rx_metadata(IGBCore *core,
         trace_e1000e_rx_metadata_vlan(*vlan_tag);
     }
 
-    /* Packet parsing results */
-    if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) {
-        if (rss_info->enabled) {
-            *rss = cpu_to_le32(rss_info->hash);
-            trace_igb_rx_metadata_rss(*rss);
-        }
-    } else if (hasip4) {
-            *status_flags |= E1000_RXD_STAT_IPIDV;
-            *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt));
-            trace_e1000e_rx_metadata_ip_id(*ip_id);
-    }
-
-    if (pkt_info) {
-        *pkt_info = rss_info->enabled ? rss_info->type : 0;
-
-        if (etqf < 8) {
-            *pkt_info |= BIT(11) | (etqf << 4);
-        } else {
-            if (hasip4) {
-                *pkt_info |= E1000_ADVRXD_PKT_IP4;
-            }
-
-            if (hasip6) {
-                *pkt_info |= E1000_ADVRXD_PKT_IP6;
-            }
-
-            switch (l4hdr_proto) {
-            case ETH_L4_HDR_PROTO_TCP:
-                *pkt_info |= E1000_ADVRXD_PKT_TCP;
-                break;
-
-            case ETH_L4_HDR_PROTO_UDP:
-                *pkt_info |= E1000_ADVRXD_PKT_UDP;
-                break;
-
-            case ETH_L4_HDR_PROTO_SCTP:
-                *pkt_info |= E1000_ADVRXD_PKT_SCTP;
-                break;
-
-            default:
-                break;
-            }
-        }
-    }
-
-    if (hdr_info) {
-        *hdr_info = 0;
-    }
-
-    if (ts) {
-        *status_flags |= BIT(16);
-    }
-
     /* RX CSO information */
     if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) {
         trace_e1000e_rx_metadata_ipv6_sum_disabled();
@@ -1423,43 +1373,106 @@ func_exit:
 static inline void
 igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
                         struct NetRxPkt *pkt,
-                        const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
+                        const E1000E_RSSInfo *rss_info,
                         uint16_t length)
 {
-    uint32_t status_flags, rss;
-    uint16_t ip_id;
+    uint32_t status_flags;
 
     assert(!rss_info->enabled);
+
+    memset(desc, 0, sizeof(*desc));
     desc->length = cpu_to_le16(length);
-    desc->csum = 0;
+    igb_build_rx_metadata_common(core, pkt, pkt != NULL,
+                                 &status_flags,
+                                 &desc->special);
 
-    igb_build_rx_metadata(core, pkt, pkt != NULL,
-                          rss_info, etqf, ts,
-                          NULL, NULL, &rss,
-                          &status_flags, &ip_id,
-                          &desc->special);
     desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24);
     desc->status = (uint8_t) le32_to_cpu(status_flags);
 }
 
+static uint16_t
+igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf)
+{
+    uint16_t pkt_type = 0;
+    bool hasip4, hasip6;
+    EthL4HdrProto l4hdr_proto;
+
+    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+    if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) {
+        pkt_type |= E1000_ADVRXD_PKT_IP6;
+    } else if (hasip4) {
+        pkt_type = E1000_ADVRXD_PKT_IP4;
+    }
+
+    if (etqf < 8) {
+        pkt_type |= (BIT(11) >> 4) | etqf;
+        return pkt_type;
+    }
+
+    switch (l4hdr_proto) {
+    case ETH_L4_HDR_PROTO_TCP:
+        pkt_type |= E1000_ADVRXD_PKT_TCP;
+        break;
+    case ETH_L4_HDR_PROTO_UDP:
+        pkt_type |= E1000_ADVRXD_PKT_UDP;
+        break;
+    case ETH_L4_HDR_PROTO_SCTP:
+        pkt_type |= E1000_ADVRXD_PKT_SCTP;
+        break;
+    default:
+        break;
+    }
+
+    return pkt_type;
+}
+
 static inline void
-igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+igb_write_adv_rx_descr(IGBCore *core,
+                       union e1000_adv_rx_desc *d,
                        struct NetRxPkt *pkt,
-                       const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
+                       const E1000E_RSSInfo *rss_info,
+                       uint16_t etqf,
+                       bool ts,
                        uint16_t length)
 {
-    memset(&desc->wb, 0, sizeof(desc->wb));
+    bool hasip4, hasip6;
+    EthL4HdrProto l4hdr_proto;
+    uint16_t rss_type = 0, pkt_type;
+    bool eop = (pkt != NULL);
+    memset(&d->wb, 0, sizeof(d->wb));
+
+    d->wb.upper.length = cpu_to_le16(length);
+    igb_build_rx_metadata_common(core, pkt, eop,
+                                 &d->wb.upper.status_error,
+                                 &d->wb.upper.vlan);
+
+    if (!eop) {
+        return;
+    }
 
-    desc->wb.upper.length = cpu_to_le16(length);
+    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+    if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) {
+        if (rss_info->enabled) {
+            d->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash);
+            rss_type = rss_info->type;
+            trace_igb_rx_metadata_rss(d->wb.lower.hi_dword.rss, rss_type);
+        }
+    } else if (hasip4) {
+            d->wb.upper.status_error |= E1000_RXD_STAT_IPIDV;
+            d->wb.lower.hi_dword.csum_ip.ip_id =
+                cpu_to_le16(net_rx_pkt_get_ip_id(pkt));
+            trace_e1000e_rx_metadata_ip_id(d->wb.lower.hi_dword.csum_ip.ip_id);
+    }
+
+    if (ts) {
+        d->wb.upper.status_error |= BIT(16);
+    }
 
-    igb_build_rx_metadata(core, pkt, pkt != NULL,
-                          rss_info, etqf, ts,
-                          &desc->wb.lower.lo_dword.pkt_info,
-                          &desc->wb.lower.lo_dword.hdr_info,
-                          &desc->wb.lower.hi_dword.rss,
-                          &desc->wb.upper.status_error,
-                          &desc->wb.lower.hi_dword.csum_ip.ip_id,
-                          &desc->wb.upper.vlan);
+    pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf);
+    trace_e1000e_rx_metadata_pkt_type(pkt_type);
+    d->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4));
 }
 
 static inline void
@@ -1468,8 +1481,7 @@ igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
                    uint16_t etqf, bool ts, uint16_t length)
 {
     if (igb_rx_use_legacy_descriptor(core)) {
-        igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info,
-                                etqf, ts, length);
+        igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, length);
     } else {
         igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info,
                                etqf, ts, length);
@@ -1508,19 +1520,6 @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr,
     }
 }
 
-static void
-igb_write_to_rx_buffers(IGBCore *core,
-                        PCIDevice *d,
-                        hwaddr ba,
-                        uint16_t *written,
-                        const char *data,
-                        dma_addr_t data_len)
-{
-    trace_igb_rx_desc_buff_write(ba, *written, data, data_len);
-    pci_dma_write(d, ba + *written, data, data_len);
-    *written += data_len;
-}
-
 static void
 igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi,
                     size_t pkt_size, size_t pkt_fcs_size)
@@ -1546,6 +1545,108 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi)
            ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16;
 }
 
+typedef struct IGBPacketRxDMAState {
+    size_t size;
+    size_t total_size;
+    size_t ps_hdr_len;
+    size_t desc_size;
+    size_t desc_offset;
+    uint32_t rx_desc_packet_buf_size;
+    uint32_t rx_desc_header_buf_size;
+    struct iovec *iov;
+    size_t iov_ofs;
+    bool is_first;
+    uint16_t written;
+    hwaddr ba;
+} IGBPacketRxDMAState;
+
+static void
+igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size)
+{
+    if (*size > pdma_st->rx_desc_packet_buf_size) {
+        *size = pdma_st->rx_desc_packet_buf_size;
+    }
+}
+
+static void
+igb_write_payload_frag_to_rx_buffers(IGBCore *core,
+                                     PCIDevice *d,
+                                     hwaddr ba,
+                                     uint16_t *written,
+                                     uint32_t cur_buf_len,
+                                     const char *data,
+                                     dma_addr_t data_len)
+{
+    trace_igb_rx_desc_buff_write(ba, *written, data, data_len);
+    pci_dma_write(d, ba + *written, data, data_len);
+    *written += data_len;
+}
+
+static void
+igb_write_payload_to_rx_buffers(IGBCore *core,
+                                struct NetRxPkt *pkt,
+                                PCIDevice *d,
+                                IGBPacketRxDMAState *pdma_st,
+                                size_t *copy_size)
+{
+    static const uint32_t fcs_pad;
+    size_t iov_copy;
+
+    /* Copy packet payload */
+    while (*copy_size) {
+        iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs);
+        igb_write_payload_frag_to_rx_buffers(core, d,
+                                             pdma_st->ba,
+                                             &pdma_st->written,
+                                             pdma_st->rx_desc_packet_buf_size,
+                                             pdma_st->iov->iov_base +
+                                             pdma_st->iov_ofs,
+                                             iov_copy);
+
+        *copy_size -= iov_copy;
+        pdma_st->iov_ofs += iov_copy;
+        if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
+            pdma_st->iov++;
+            pdma_st->iov_ofs = 0;
+        }
+    }
+
+    if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) {
+        /* Simulate FCS checksum presence in the last descriptor */
+        igb_write_payload_frag_to_rx_buffers(core, d,
+                                             pdma_st->ba,
+                                             &pdma_st->written,
+                                             pdma_st->rx_desc_packet_buf_size,
+                                             (const char *) &fcs_pad,
+                                             e1000x_fcs_len(core->mac));
+    }
+}
+
+static void
+igb_write_to_rx_buffers(IGBCore *core,
+                        struct NetRxPkt *pkt,
+                        PCIDevice *d,
+                        IGBPacketRxDMAState *pdma_st)
+{
+    size_t copy_size;
+
+    if (!pdma_st->ba) {
+        /* as per intel docs; skip descriptors with null buf addr */
+        trace_e1000e_rx_null_descriptor();
+        return;
+    }
+
+    if (pdma_st->desc_offset >= pdma_st->size) {
+        return;
+    }
+
+    pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset;
+    igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size);
+    copy_size = pdma_st->size - pdma_st->desc_offset;
+    igb_truncate_to_descriptor_size(pdma_st, &copy_size);
+    igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, &copy_size);
+}
+
 static void
 igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
                           const E1000E_RxRing *rxr,
@@ -1555,91 +1656,54 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
     PCIDevice *d;
     dma_addr_t base;
     union e1000_rx_desc_union desc;
-    size_t desc_size;
-    size_t desc_offset = 0;
-    size_t iov_ofs = 0;
-
-    struct iovec *iov = net_rx_pkt_get_iovec(pkt);
-    size_t size = net_rx_pkt_get_total_len(pkt);
-    size_t total_size = size + e1000x_fcs_len(core->mac);
-    const E1000ERingInfo *rxi = rxr->i;
-    size_t bufsize = igb_rxbufsize(core, rxi);
-
+    const E1000ERingInfo *rxi;
+    size_t rx_desc_len;
+
+    IGBPacketRxDMAState pdma_st = {0};
+    pdma_st.is_first = true;
+    pdma_st.size = net_rx_pkt_get_total_len(pkt);
+    pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac);
+
+    rxi = rxr->i;
+    rx_desc_len = core->rx_desc_len;
+    pdma_st.rx_desc_packet_buf_size =
+        igb_rxbufsize(core, rxi);
+    pdma_st.rx_desc_header_buf_size =
+        igb_get_queue_rx_header_buf_size(core, rxi);
+    pdma_st.iov = net_rx_pkt_get_iovec(pkt);
     d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8);
     if (!d) {
         d = core->owner;
     }
 
     do {
-        hwaddr ba;
-        uint16_t written = 0;
+        pdma_st.written = 0;
         bool is_last = false;
 
-        desc_size = total_size - desc_offset;
-
-        if (desc_size > bufsize) {
-            desc_size = bufsize;
-        }
-
         if (igb_ring_empty(core, rxi)) {
             return;
         }
 
         base = igb_ring_head_descr(core, rxi);
+        pci_dma_read(d, base, &desc, rx_desc_len);
+        trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len);
 
-        pci_dma_read(d, base, &desc, core->rx_desc_len);
-
-        trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len);
-
-        igb_read_rx_descr(core, &desc, &ba);
-
-        if (ba) {
-            if (desc_offset < size) {
-                static const uint32_t fcs_pad;
-                size_t iov_copy;
-                size_t copy_size = size - desc_offset;
-                if (copy_size > bufsize) {
-                    copy_size = bufsize;
-                }
-
-                /* Copy packet payload */
-                while (copy_size) {
-                    iov_copy = MIN(copy_size, iov->iov_len - iov_ofs);
+        igb_read_rx_descr(core, &desc, &pdma_st.ba);
 
-                    igb_write_to_rx_buffers(core, d, ba, &written,
-                                            iov->iov_base + iov_ofs, iov_copy);
-
-                    copy_size -= iov_copy;
-                    iov_ofs += iov_copy;
-                    if (iov_ofs == iov->iov_len) {
-                        iov++;
-                        iov_ofs = 0;
-                    }
-                }
-
-                if (desc_offset + desc_size >= total_size) {
-                    /* Simulate FCS checksum presence in the last descriptor */
-                    igb_write_to_rx_buffers(core, d, ba, &written,
-                          (const char *) &fcs_pad, e1000x_fcs_len(core->mac));
-                }
-            }
-        } else { /* as per intel docs; skip descriptors with null buf addr */
-            trace_e1000e_rx_null_descriptor();
-        }
-        desc_offset += desc_size;
-        if (desc_offset >= total_size) {
+        igb_write_to_rx_buffers(core, pkt, d, &pdma_st);
+        pdma_st.desc_offset += pdma_st.desc_size;
+        if (pdma_st.desc_offset >= pdma_st.total_size) {
             is_last = true;
         }
 
         igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL,
-                           rss_info, etqf, ts, written);
-        igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len);
-
-        igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN);
-
-    } while (desc_offset < total_size);
+                           rss_info, etqf, ts, pdma_st.written);
+        pci_dma_write(d, base, &desc, rx_desc_len);
+        igb_ring_advance(core, rxi,
+                         rx_desc_len / E1000_MIN_RX_DESC_LEN);
+    } while (pdma_st.desc_offset < pdma_st.total_size);
 
-    igb_update_rx_stats(core, rxi, size, total_size);
+    igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size);
 }
 
 static bool
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
index 82ff195dfc..c4ede22181 100644
--- a/hw/net/igb_regs.h
+++ b/hw/net/igb_regs.h
@@ -452,6 +452,7 @@ union e1000_adv_rx_desc {
 #define E1000_SRRCTL_BSIZEHDRSIZE_MASK         0x00000F00
 #define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT        2  /* Shift _left_ */
 #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF       0x02000000
+#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT        0x04000000
 #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000
 #define E1000_SRRCTL_DESCTYPE_MASK             0x0E000000
 #define E1000_SRRCTL_DROP_EN                   0x80000000
@@ -692,11 +693,12 @@ union e1000_adv_rx_desc {
 
 #define E1000_STATUS_NUM_VFS_SHIFT 14
 
-#define E1000_ADVRXD_PKT_IP4 BIT(4)
-#define E1000_ADVRXD_PKT_IP6 BIT(6)
-#define E1000_ADVRXD_PKT_TCP BIT(8)
-#define E1000_ADVRXD_PKT_UDP BIT(9)
-#define E1000_ADVRXD_PKT_SCTP BIT(10)
+#define E1000_ADVRXD_PKT_IP4  BIT(0)
+#define E1000_ADVRXD_PKT_IP6  BIT(2)
+#define E1000_ADVRXD_PKT_IP6E BIT(3)
+#define E1000_ADVRXD_PKT_TCP  BIT(4)
+#define E1000_ADVRXD_PKT_UDP  BIT(5)
+#define E1000_ADVRXD_PKT_SCTP BIT(6)
 
 static inline uint8_t igb_ivar_entry_rx(uint8_t i)
 {
diff --git a/hw/net/trace-events b/hw/net/trace-events
index e4a98b2c7d..def651c186 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -277,9 +277,9 @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
 igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d"
 
 igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
-igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
+igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer %u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
 
-igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X"
+igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X"
 
 igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled"
 igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x"
@@ -294,6 +294,8 @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = 0x%x"
 igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x"
 igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x"
 
+igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported descriptor type: %d"
+
 # igbvf.c
 igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register 0x%"PRIx64
 
diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c
index a603468beb..f40c4ec4cd 100644
--- a/tests/qtest/libqos/igb.c
+++ b/tests/qtest/libqos/igb.c
@@ -109,6 +109,11 @@ static void igb_pci_start_hw(QOSGraphObject *obj)
                         E1000_RAH_AV | E1000_RAH_POOL_1 |
                         le16_to_cpu(*(uint16_t *)(address + 4)));
 
+    /* Set supported receive descriptor mode */
+    e1000e_macreg_write(&d->e1000e,
+                        E1000_SRRCTL(0),
+                        E1000_SRRCTL_DESCTYPE_ADV_ONEBUF);
+
     /* Enable receive */
     e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN);
     e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN);
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v4 4/5] igb: add IPv6 extended headers traffic detection
       [not found]   ` <CGME20230504131107eucas1p1bb75dd2ba7baf4f269ec2476feba69ac@eucas1p1.samsung.com>
@ 2023-05-04 13:10     ` Tomasz Dzieciol
  0 siblings, 0 replies; 9+ messages in thread
From: Tomasz Dzieciol @ 2023-05-04 13:10 UTC (permalink / raw)
  To: qemu-devel, akihiko.odaki
  Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki

Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>
---
 hw/net/igb_core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index dbd09b9b46..8c0291665f 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -1400,7 +1400,9 @@ igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf)
     net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
 
     if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) {
-        pkt_type |= E1000_ADVRXD_PKT_IP6;
+        eth_ip6_hdr_info *ip6hdr_info  = net_rx_pkt_get_ip6_info(pkt);
+        pkt_type |= ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E :
+                                                E1000_ADVRXD_PKT_IP6;
     } else if (hasip4) {
         pkt_type = E1000_ADVRXD_PKT_IP4;
     }
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v4 5/5] igb: packet-split descriptors support
       [not found]   ` <CGME20230504131107eucas1p2aae525d7b23725b4efd8713e14943fb6@eucas1p2.samsung.com>
@ 2023-05-04 13:10     ` Tomasz Dzieciol
  2023-05-04 15:06       ` Akihiko Odaki
  0 siblings, 1 reply; 9+ messages in thread
From: Tomasz Dzieciol @ 2023-05-04 13:10 UTC (permalink / raw)
  To: qemu-devel, akihiko.odaki
  Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki

Packet-split descriptors are used by Linux VF driver for MTU values from 2048

Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>
---
 hw/net/igb_core.c   | 368 ++++++++++++++++++++++++++++++++++++++------
 hw/net/igb_regs.h   |   8 +
 hw/net/trace-events |   2 +-
 3 files changed, 332 insertions(+), 46 deletions(-)

diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index 8c0291665f..9c1a2fa136 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -276,6 +276,20 @@ typedef struct E1000ERingInfo {
     int idx;
 } E1000ERingInfo;
 
+static uint32_t
+igb_rx_queue_desctyp_get(IGBCore *core, const E1000ERingInfo *r)
+{
+    return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK;
+}
+
+static bool
+igb_rx_use_ps_descriptor(IGBCore *core, const E1000ERingInfo *r)
+{
+    uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
+    return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT ||
+           desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+}
+
 static inline bool
 igb_rss_enabled(IGBCore *core)
 {
@@ -1233,21 +1247,70 @@ igb_read_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
 }
 
 static inline void
-igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
-                      hwaddr *buff_addr)
+igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+                                 hwaddr *buff_addr)
 {
     *buff_addr = le64_to_cpu(desc->read.pkt_addr);
 }
 
 static inline void
-igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
-                  hwaddr *buff_addr)
+igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
+                                hwaddr *buff_addr)
 {
+    buff_addr[0] = le64_to_cpu(desc->read.hdr_addr);
+    buff_addr[1] = le64_to_cpu(desc->read.pkt_addr);
+}
+
+typedef struct IGBBaState {
+    uint16_t written[IGB_MAX_PS_BUFFERS];
+    uint8_t cur_idx;
+} IGBBaState;
+
+typedef struct IGBPacketRxDMAState {
+    size_t size;
+    size_t total_size;
+    size_t ps_hdr_len;
+    size_t desc_size;
+    size_t desc_offset;
+    uint32_t rx_desc_packet_buf_size;
+    uint32_t rx_desc_header_buf_size;
+    struct iovec *iov;
+    size_t iov_ofs;
+    bool do_ps;
+    bool is_first;
+    IGBBaState bastate;
+    hwaddr ba[IGB_MAX_PS_BUFFERS];
+} IGBPacketRxDMAState;
+
+static inline void
+igb_read_rx_descr(IGBCore *core,
+                  union e1000_rx_desc_union *desc,
+                  IGBPacketRxDMAState *pdma_st,
+                  const E1000ERingInfo *r)
+{
+    uint32_t desc_type;
+
     if (igb_rx_use_legacy_descriptor(core)) {
-        igb_read_lgcy_rx_descr(core, &desc->legacy, buff_addr);
-    } else {
-        igb_read_adv_rx_descr(core, &desc->adv, buff_addr);
+        igb_read_lgcy_rx_descr(core, &desc->legacy, &pdma_st->ba[1]);
+        pdma_st->ba[0] = 0;
+        return;
     }
+
+    /* advanced header split descriptor */
+    if (igb_rx_use_ps_descriptor(core, r)) {
+        igb_read_adv_rx_split_buf_descr(core, &desc->adv, &pdma_st->ba[0]);
+        return;
+    }
+
+    /* descriptor replication modes not supported */
+    desc_type = igb_rx_queue_desctyp_get(core, r);
+    if (desc_type != E1000_SRRCTL_DESCTYPE_ADV_ONEBUF) {
+        trace_igb_wrn_rx_desc_modes_not_supp(desc_type);
+    }
+
+    /* advanced single buffer descriptor */
+    igb_read_adv_rx_single_buf_descr(core, &desc->adv, &pdma_st->ba[1]);
+    pdma_st->ba[0] = 0;
 }
 
 static void
@@ -1390,6 +1453,13 @@ igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
     desc->status = (uint8_t) le32_to_cpu(status_flags);
 }
 
+static bool
+igb_rx_ps_descriptor_split_always(IGBCore *core, const E1000ERingInfo *r)
+{
+    uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
+    return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
+}
+
 static uint16_t
 igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf)
 {
@@ -1477,16 +1547,69 @@ igb_write_adv_rx_descr(IGBCore *core,
     d->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4));
 }
 
+typedef struct IGBSplitDescriptorData {
+    bool sph;
+    bool hbo;
+    size_t hdr_len;
+} IGBSplitDescriptorData;
+
+static inline void
+igb_write_adv_ps_split_rx_descr(IGBCore *core,
+                                union e1000_adv_rx_desc *d,
+                                struct NetRxPkt *pkt,
+                                const E1000E_RSSInfo *rss_info,
+                                const E1000ERingInfo *r,
+                                uint16_t etqf,
+                                bool ts,
+                                IGBSplitDescriptorData *ps_desc_data,
+                                IGBPacketRxDMAState *pdma_st)
+{
+    size_t pkt_len;
+    size_t hdr_len = ps_desc_data->hdr_len;
+
+    bool split_always = igb_rx_ps_descriptor_split_always(core, r);
+    if (!split_always) {
+        if ((!ps_desc_data->sph && !ps_desc_data->hbo) ||
+            (ps_desc_data->sph &&  ps_desc_data->hbo)) {
+            pkt_len = pdma_st->bastate.written[0] + pdma_st->bastate.written[1];
+        } else {
+            assert(!ps_desc_data->hbo);
+            pkt_len = pdma_st->bastate.written[1];
+        }
+    } else {
+        pkt_len = pdma_st->bastate.written[1];
+    }
+
+    igb_write_adv_rx_descr(core, d, pkt, rss_info, etqf, ts, pkt_len);
+
+    d->wb.lower.lo_dword.hdr_info = (hdr_len << E1000_ADVRXD_HDR_LEN_OFFSET) &
+                                    E1000_ADVRXD_ADV_HDR_LEN_MASK;
+    d->wb.lower.lo_dword.hdr_info |= ps_desc_data->sph ? E1000_ADVRXD_HDR_SPH
+                                                       : 0;
+    d->wb.upper.status_error |= ps_desc_data->hbo ?
+                                    E1000_ADVRXD_ST_ERR_HBO_OFFSET : 0;
+}
+
 static inline void
-igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
-                   struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info,
-                   uint16_t etqf, bool ts, uint16_t length)
+igb_write_rx_descr(IGBCore *core,
+                   union e1000_rx_desc_union *desc,
+                   struct NetRxPkt *pkt,
+                   const E1000E_RSSInfo *rss_info,
+                   uint16_t etqf,
+                   bool ts,
+                   IGBSplitDescriptorData *ps_desc_data,
+                   IGBPacketRxDMAState *pdma_st,
+                   const E1000ERingInfo *r)
 {
     if (igb_rx_use_legacy_descriptor(core)) {
-        igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, length);
+        igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info,
+                                pdma_st->bastate.written[1]);
+    } else if (igb_rx_use_ps_descriptor(core, r)) {
+        igb_write_adv_ps_split_rx_descr(core, &desc->adv, pkt, rss_info, r,
+                                        etqf, ts, ps_desc_data, pdma_st);
     } else {
         igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info,
-                               etqf, ts, length);
+                               etqf, ts, pdma_st->bastate.written[1]);
     }
 }
 
@@ -1547,41 +1670,182 @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi)
            ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16;
 }
 
-typedef struct IGBPacketRxDMAState {
-    size_t size;
-    size_t total_size;
-    size_t ps_hdr_len;
-    size_t desc_size;
-    size_t desc_offset;
-    uint32_t rx_desc_packet_buf_size;
-    uint32_t rx_desc_header_buf_size;
-    struct iovec *iov;
-    size_t iov_ofs;
-    bool is_first;
-    uint16_t written;
-    hwaddr ba;
-} IGBPacketRxDMAState;
+static bool
+igb_do_ps(IGBCore *core,
+          const E1000ERingInfo *r,
+          struct NetRxPkt *pkt,
+          size_t *hdr_len,
+          IGBSplitDescriptorData *ps_desc_data)
+{
+    bool hasip4, hasip6;
+    EthL4HdrProto l4hdr_proto;
+    bool fragment;
+    bool split_always;
+    size_t bheader_size;
+    size_t total_pkt_len;
+
+    if (!igb_rx_use_ps_descriptor(core, r)) {
+        return false;
+    }
+
+    memset(ps_desc_data, 0, sizeof(IGBSplitDescriptorData));
+
+    total_pkt_len = net_rx_pkt_get_total_len(pkt);
+    bheader_size = igb_get_queue_rx_header_buf_size(core, r);
+    split_always = igb_rx_ps_descriptor_split_always(core, r);
+    if (split_always && total_pkt_len <= bheader_size) {
+        *hdr_len = total_pkt_len;
+        ps_desc_data->hdr_len = total_pkt_len;
+        return true;
+    }
+
+    net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
+
+    if (hasip4) {
+        fragment = net_rx_pkt_get_ip4_info(pkt)->fragment;
+    } else if (hasip6) {
+        fragment = net_rx_pkt_get_ip6_info(pkt)->fragment;
+    } else {
+        ps_desc_data->hdr_len = bheader_size;
+        goto header_not_handled;
+    }
+
+    if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) {
+        ps_desc_data->hdr_len = bheader_size;
+        goto header_not_handled;
+    }
+
+    /* no header splitting for SCTP */
+    if (!fragment && (l4hdr_proto == ETH_L4_HDR_PROTO_UDP ||
+                      l4hdr_proto == ETH_L4_HDR_PROTO_TCP)) {
+        *hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt);
+    } else {
+        *hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt);
+    }
+
+    ps_desc_data->sph = true;
+    ps_desc_data->hdr_len = *hdr_len;
+
+    if (*hdr_len > bheader_size) {
+        ps_desc_data->hbo = true;
+        goto header_not_handled;
+    }
+
+    return true;
+
+header_not_handled:
+    if (split_always) {
+        *hdr_len = bheader_size;
+        return true;
+    }
+
+    return false;
+}
 
 static void
 igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size)
 {
-    if (*size > pdma_st->rx_desc_packet_buf_size) {
-        *size = pdma_st->rx_desc_packet_buf_size;
+    if (pdma_st->do_ps && pdma_st->is_first) {
+        if (*size > pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len) {
+            *size = pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len;
+        }
+    } else {
+        if (*size > pdma_st->rx_desc_packet_buf_size) {
+            *size = pdma_st->rx_desc_packet_buf_size;
+        }
+    }
+}
+
+static inline void
+igb_write_hdr_to_rx_buffers(IGBCore *core,
+                            PCIDevice *d,
+                            IGBPacketRxDMAState *pdma_st,
+                            const char *data,
+                            dma_addr_t data_len)
+{
+    assert(data_len <= pdma_st->rx_desc_header_buf_size -
+                       pdma_st->bastate.written[0]);
+    pci_dma_write(d,
+                  pdma_st->ba[0] + pdma_st->bastate.written[0],
+                  data, data_len);
+    pdma_st->bastate.written[0] += data_len;
+    pdma_st->bastate.cur_idx = 1;
+}
+
+static void
+igb_write_packet_hdr_to_descr_addr(IGBCore *core,
+                                   struct NetRxPkt *pkt,
+                                   PCIDevice *d,
+                                   IGBPacketRxDMAState *pdma_st,
+                                   size_t *copy_size)
+{
+    size_t iov_copy;
+    size_t ps_hdr_copied = 0;
+
+    if (!pdma_st->is_first) {
+        /* Leave buffer 0 of each descriptor except first */
+        /* empty                                          */
+        pdma_st->bastate.cur_idx = 1;
+        return;
     }
+
+    do {
+        iov_copy = MIN(pdma_st->ps_hdr_len - ps_hdr_copied,
+                       pdma_st->iov->iov_len - pdma_st->iov_ofs);
+
+        igb_write_hdr_to_rx_buffers(core, d, pdma_st,
+                                    pdma_st->iov->iov_base,
+                                    iov_copy);
+
+        *copy_size -= iov_copy;
+        ps_hdr_copied += iov_copy;
+
+        pdma_st->iov_ofs += iov_copy;
+        if (pdma_st->iov_ofs == pdma_st->iov->iov_len) {
+            pdma_st->iov++;
+            pdma_st->iov_ofs = 0;
+        }
+    } while (ps_hdr_copied < pdma_st->ps_hdr_len);
+
+    pdma_st->is_first = false;
 }
 
 static void
 igb_write_payload_frag_to_rx_buffers(IGBCore *core,
                                      PCIDevice *d,
-                                     hwaddr ba,
-                                     uint16_t *written,
-                                     uint32_t cur_buf_len,
+                                     IGBPacketRxDMAState *pdma_st,
                                      const char *data,
                                      dma_addr_t data_len)
 {
-    trace_igb_rx_desc_buff_write(ba, *written, data, data_len);
-    pci_dma_write(d, ba + *written, data, data_len);
-    *written += data_len;
+    while (data_len > 0) {
+        assert(pdma_st->bastate.cur_idx < IGB_MAX_PS_BUFFERS);
+
+        uint32_t cur_buf_bytes_left =
+            pdma_st->rx_desc_packet_buf_size -
+            pdma_st->bastate.written[pdma_st->bastate.cur_idx];
+        uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left);
+
+        trace_igb_rx_desc_buff_write(
+            pdma_st->bastate.cur_idx,
+            pdma_st->ba[pdma_st->bastate.cur_idx],
+            pdma_st->bastate.written[pdma_st->bastate.cur_idx],
+            data,
+            bytes_to_write);
+
+        pci_dma_write(d,
+                      pdma_st->ba[pdma_st->bastate.cur_idx] +
+                      pdma_st->bastate.written[pdma_st->bastate.cur_idx],
+                      data, bytes_to_write);
+
+        pdma_st->bastate.written[pdma_st->bastate.cur_idx] += bytes_to_write;
+        data += bytes_to_write;
+        data_len -= bytes_to_write;
+
+        if (pdma_st->bastate.written[pdma_st->bastate.cur_idx] ==
+            pdma_st->rx_desc_packet_buf_size) {
+            pdma_st->bastate.cur_idx++;
+        }
+    }
 }
 
 static void
@@ -1598,9 +1862,7 @@ igb_write_payload_to_rx_buffers(IGBCore *core,
     while (*copy_size) {
         iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs);
         igb_write_payload_frag_to_rx_buffers(core, d,
-                                             pdma_st->ba,
-                                             &pdma_st->written,
-                                             pdma_st->rx_desc_packet_buf_size,
+                                             pdma_st,
                                              pdma_st->iov->iov_base +
                                              pdma_st->iov_ofs,
                                              iov_copy);
@@ -1616,9 +1878,7 @@ igb_write_payload_to_rx_buffers(IGBCore *core,
     if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) {
         /* Simulate FCS checksum presence in the last descriptor */
         igb_write_payload_frag_to_rx_buffers(core, d,
-                                             pdma_st->ba,
-                                             &pdma_st->written,
-                                             pdma_st->rx_desc_packet_buf_size,
+                                             pdma_st,
                                              (const char *) &fcs_pad,
                                              e1000x_fcs_len(core->mac));
     }
@@ -1632,7 +1892,7 @@ igb_write_to_rx_buffers(IGBCore *core,
 {
     size_t copy_size;
 
-    if (!pdma_st->ba) {
+    if (!(pdma_st->ba)[1]) {
         /* as per intel docs; skip descriptors with null buf addr */
         trace_e1000e_rx_null_descriptor();
         return;
@@ -1646,6 +1906,14 @@ igb_write_to_rx_buffers(IGBCore *core,
     igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size);
     copy_size = pdma_st->size - pdma_st->desc_offset;
     igb_truncate_to_descriptor_size(pdma_st, &copy_size);
+
+    /* For PS mode copy the packet header first */
+    if (pdma_st->do_ps) {
+        igb_write_packet_hdr_to_descr_addr(core, pkt, d, pdma_st, &copy_size);
+    } else {
+        pdma_st->bastate.cur_idx = 1;
+    }
+
     igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, &copy_size);
 }
 
@@ -1660,6 +1928,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
     union e1000_rx_desc_union desc;
     const E1000ERingInfo *rxi;
     size_t rx_desc_len;
+    IGBSplitDescriptorData ps_desc_data;
 
     IGBPacketRxDMAState pdma_st = {0};
     pdma_st.is_first = true;
@@ -1678,8 +1947,12 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
         d = core->owner;
     }
 
+    pdma_st.do_ps = igb_do_ps(core, rxi, pkt,
+                              &pdma_st.ps_hdr_len,
+                              &ps_desc_data);
+
     do {
-        pdma_st.written = 0;
+        memset(&pdma_st.bastate, 0, sizeof(IGBBaState));
         bool is_last = false;
 
         if (igb_ring_empty(core, rxi)) {
@@ -1690,7 +1963,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
         pci_dma_read(d, base, &desc, rx_desc_len);
         trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len);
 
-        igb_read_rx_descr(core, &desc, &pdma_st.ba);
+        igb_read_rx_descr(core, &desc, &pdma_st, rxi);
 
         igb_write_to_rx_buffers(core, pkt, d, &pdma_st);
         pdma_st.desc_offset += pdma_st.desc_size;
@@ -1698,8 +1971,13 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
             is_last = true;
         }
 
-        igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL,
-                           rss_info, etqf, ts, pdma_st.written);
+        igb_write_rx_descr(core, &desc,
+                           is_last ? pkt : NULL,
+                           rss_info,
+                           etqf, ts,
+                           &ps_desc_data,
+                           &pdma_st,
+                           rxi);
         pci_dma_write(d, base, &desc, rx_desc_len);
         igb_ring_advance(core, rxi,
                          rx_desc_len / E1000_MIN_RX_DESC_LEN);
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
index c4ede22181..ed7427b8fe 100644
--- a/hw/net/igb_regs.h
+++ b/hw/net/igb_regs.h
@@ -700,6 +700,14 @@ union e1000_adv_rx_desc {
 #define E1000_ADVRXD_PKT_UDP  BIT(5)
 #define E1000_ADVRXD_PKT_SCTP BIT(6)
 
+#define IGB_MAX_PS_BUFFERS 2
+
+#define E1000_ADVRXD_HDR_LEN_OFFSET    (21 - 16)
+#define E1000_ADVRXD_ADV_HDR_LEN_MASK  ((BIT(10) - 1) << \
+                                        E1000_ADVRXD_HDR_LEN_OFFSET)
+#define E1000_ADVRXD_HDR_SPH           BIT(15)
+#define E1000_ADVRXD_ST_ERR_HBO_OFFSET BIT(3 + 20)
+
 static inline uint8_t igb_ivar_entry_rx(uint8_t i)
 {
     return i < 8 ? i * 4 : (i - 8) * 4 + 2;
diff --git a/hw/net/trace-events b/hw/net/trace-events
index def651c186..e97e9dc17b 100644
--- a/hw/net/trace-events
+++ b/hw/net/trace-events
@@ -277,7 +277,7 @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
 igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d"
 
 igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
-igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer %u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
+igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer %u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
 
 igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X"
 
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 2/5] igb: rename E1000E_RingInfo_st
  2023-05-04 13:10     ` [PATCH v4 2/5] igb: rename E1000E_RingInfo_st Tomasz Dzieciol
@ 2023-05-04 14:51       ` Akihiko Odaki
  0 siblings, 0 replies; 9+ messages in thread
From: Akihiko Odaki @ 2023-05-04 14:51 UTC (permalink / raw)
  To: Tomasz Dzieciol, qemu-devel
  Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki

On 2023/05/04 22:10, Tomasz Dzieciol wrote:
> Rename E1000E_RingInfo_st and E1000E_RingInfo according to qemu typdefs guide.

Please make the same change for e1000e to make it easy to compare it 
with igb.

> 
> Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>
> ---
>   hw/net/igb_core.c | 42 +++++++++++++++++++++---------------------
>   1 file changed, 21 insertions(+), 21 deletions(-)
> 
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
> index 012eb1e1b9..b6031dea24 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -694,24 +694,24 @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx)
>       return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0;
>   }
>   
> -typedef struct E1000E_RingInfo_st {
> +typedef struct E1000ERingInfo {
>       int dbah;
>       int dbal;
>       int dlen;
>       int dh;
>       int dt;
>       int idx;
> -} E1000E_RingInfo;
> +} E1000ERingInfo;
>   
>   static inline bool
> -igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r)
> +igb_ring_empty(IGBCore *core, const E1000ERingInfo *r)
>   {
>       return core->mac[r->dh] == core->mac[r->dt] ||
>                   core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN;
>   }
>   
>   static inline uint64_t
> -igb_ring_base(IGBCore *core, const E1000E_RingInfo *r)
> +igb_ring_base(IGBCore *core, const E1000ERingInfo *r)
>   {
>       uint64_t bah = core->mac[r->dbah];
>       uint64_t bal = core->mac[r->dbal];
> @@ -720,13 +720,13 @@ igb_ring_base(IGBCore *core, const E1000E_RingInfo *r)
>   }
>   
>   static inline uint64_t
> -igb_ring_head_descr(IGBCore *core, const E1000E_RingInfo *r)
> +igb_ring_head_descr(IGBCore *core, const E1000ERingInfo *r)
>   {
>       return igb_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh];
>   }
>   
>   static inline void
> -igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count)
> +igb_ring_advance(IGBCore *core, const E1000ERingInfo *r, uint32_t count)
>   {
>       core->mac[r->dh] += count;
>   
> @@ -736,7 +736,7 @@ igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count)
>   }
>   
>   static inline uint32_t
> -igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r)
> +igb_ring_free_descr_num(IGBCore *core, const E1000ERingInfo *r)
>   {
>       trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen],
>                                    core->mac[r->dh],  core->mac[r->dt]);
> @@ -755,13 +755,13 @@ igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r)
>   }
>   
>   static inline bool
> -igb_ring_enabled(IGBCore *core, const E1000E_RingInfo *r)
> +igb_ring_enabled(IGBCore *core, const E1000ERingInfo *r)
>   {
>       return core->mac[r->dlen] > 0;
>   }
>   
>   typedef struct IGB_TxRing_st {
> -    const E1000E_RingInfo *i;
> +    const E1000ERingInfo *i;
>       struct igb_tx *tx;
>   } IGB_TxRing;
>   
> @@ -774,7 +774,7 @@ igb_mq_queue_idx(int base_reg_idx, int reg_idx)
>   static inline void
>   igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx)
>   {
> -    static const E1000E_RingInfo i[IGB_NUM_QUEUES] = {
> +    static const E1000ERingInfo i[IGB_NUM_QUEUES] = {
>           { TDBAH0, TDBAL0, TDLEN0, TDH0, TDT0, 0 },
>           { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 },
>           { TDBAH2, TDBAL2, TDLEN2, TDH2, TDT2, 2 },
> @@ -800,13 +800,13 @@ igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx)
>   }
>   
>   typedef struct E1000E_RxRing_st {
> -    const E1000E_RingInfo *i;
> +    const E1000ERingInfo *i;
>   } E1000E_RxRing;
>   
>   static inline void
>   igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx)
>   {
> -    static const E1000E_RingInfo i[IGB_NUM_QUEUES] = {
> +    static const E1000ERingInfo i[IGB_NUM_QUEUES] = {
>           { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 },
>           { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 },
>           { RDBAH2, RDBAL2, RDLEN2, RDH2, RDT2, 2 },
> @@ -833,7 +833,7 @@ igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx)
>   static uint32_t
>   igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
>                        union e1000_adv_tx_desc *tx_desc,
> -                     const E1000E_RingInfo *txi)
> +                     const E1000ERingInfo *txi)
>   {
>       PCIDevice *d;
>       uint32_t cmd_type_len = le32_to_cpu(tx_desc->read.cmd_type_len);
> @@ -866,7 +866,7 @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
>   }
>   
>   static inline bool
> -igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
> +igb_tx_enabled(IGBCore *core, const E1000ERingInfo *txi)
>   {
>       bool vmdq = core->mac[MRQC] & 1;
>       uint16_t qn = txi->idx;
> @@ -883,7 +883,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
>       PCIDevice *d;
>       dma_addr_t base;
>       union e1000_adv_tx_desc desc;
> -    const E1000E_RingInfo *txi = txr->i;
> +    const E1000ERingInfo *txi = txr->i;
>       uint32_t eic = 0;
>   
>       if (!igb_tx_enabled(core, txi)) {
> @@ -918,7 +918,7 @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
>   }
>   
>   static uint32_t
> -igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r)
> +igb_rxbufsize(IGBCore *core, const E1000ERingInfo *r)
>   {
>       uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2];
>       uint32_t bsizepkt = srrctl & E1000_SRRCTL_BSIZEPKT_MASK;
> @@ -930,7 +930,7 @@ igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r)
>   }
>   
>   static bool
> -igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size)
> +igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size)
>   {
>       uint32_t bufs = igb_ring_free_descr_num(core, r);
>       uint32_t bufsize = igb_rxbufsize(core, r);
> @@ -1522,7 +1522,7 @@ igb_write_to_rx_buffers(IGBCore *core,
>   }
>   
>   static void
> -igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
> +igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi,
>                       size_t pkt_size, size_t pkt_fcs_size)
>   {
>       eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt);
> @@ -1540,7 +1540,7 @@ igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
>   }
>   
>   static inline bool
> -igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi)
> +igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi)
>   {
>       return igb_ring_free_descr_num(core, rxi) ==
>              ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16;
> @@ -1562,7 +1562,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
>       struct iovec *iov = net_rx_pkt_get_iovec(pkt);
>       size_t size = net_rx_pkt_get_total_len(pkt);
>       size_t total_size = size + e1000x_fcs_len(core->mac);
> -    const E1000E_RingInfo *rxi = rxr->i;
> +    const E1000ERingInfo *rxi = rxr->i;
>       size_t bufsize = igb_rxbufsize(core, rxi);
>   
>       d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8);
> @@ -1643,7 +1643,7 @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
>   }
>   
>   static bool
> -igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
> +igb_rx_strip_vlan(IGBCore *core, const E1000ERingInfo *rxi)
>   {
>       if (core->mac[MRQC] & 1) {
>           uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 5/5] igb: packet-split descriptors support
  2023-05-04 13:10     ` [PATCH v4 5/5] igb: packet-split descriptors support Tomasz Dzieciol
@ 2023-05-04 15:06       ` Akihiko Odaki
  0 siblings, 0 replies; 9+ messages in thread
From: Akihiko Odaki @ 2023-05-04 15:06 UTC (permalink / raw)
  To: Tomasz Dzieciol, qemu-devel
  Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki

On 2023/05/04 22:10, Tomasz Dzieciol wrote:
> Packet-split descriptors are used by Linux VF driver for MTU values from 2048
> 
> Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>
> ---
>   hw/net/igb_core.c   | 368 ++++++++++++++++++++++++++++++++++++++------
>   hw/net/igb_regs.h   |   8 +
>   hw/net/trace-events |   2 +-
>   3 files changed, 332 insertions(+), 46 deletions(-)
> 
> diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
> index 8c0291665f..9c1a2fa136 100644
> --- a/hw/net/igb_core.c
> +++ b/hw/net/igb_core.c
> @@ -276,6 +276,20 @@ typedef struct E1000ERingInfo {
>       int idx;
>   } E1000ERingInfo;
>   
> +static uint32_t
> +igb_rx_queue_desctyp_get(IGBCore *core, const E1000ERingInfo *r)
> +{
> +    return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK;
> +}
> +
> +static bool
> +igb_rx_use_ps_descriptor(IGBCore *core, const E1000ERingInfo *r)
> +{
> +    uint32_t desctyp = igb_rx_queue_desctyp_get(core, r);
> +    return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT ||
> +           desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
> +}
> +
>   static inline bool
>   igb_rss_enabled(IGBCore *core)
>   {
> @@ -1233,21 +1247,70 @@ igb_read_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
>   }
>   
>   static inline void
> -igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
> -                      hwaddr *buff_addr)
> +igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
> +                                 hwaddr *buff_addr)
>   {
>       *buff_addr = le64_to_cpu(desc->read.pkt_addr);
>   }
>   
>   static inline void
> -igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
> -                  hwaddr *buff_addr)
> +igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
> +                                hwaddr *buff_addr)
>   {
> +    buff_addr[0] = le64_to_cpu(desc->read.hdr_addr);
> +    buff_addr[1] = le64_to_cpu(desc->read.pkt_addr);
> +}
> +
> +typedef struct IGBBaState {
> +    uint16_t written[IGB_MAX_PS_BUFFERS];
> +    uint8_t cur_idx;
> +} IGBBaState;

This struct derives from e1000e so you should rename the corresponding 
struct of e1000e.

> +
> +typedef struct IGBPacketRxDMAState {
> +    size_t size;
> +    size_t total_size;
> +    size_t ps_hdr_len;
> +    size_t desc_size;
> +    size_t desc_offset;
> +    uint32_t rx_desc_packet_buf_size;
> +    uint32_t rx_desc_header_buf_size;
> +    struct iovec *iov;
> +    size_t iov_ofs;
> +    bool do_ps;
> +    bool is_first;
> +    IGBBaState bastate;
> +    hwaddr ba[IGB_MAX_PS_BUFFERS];

I meant this should not be an array but instead should be defined as a 
struct as it is in the "read" member of union e1000_adv_rx_desc. It's 
defined in a way different from e1000e's extended packet split 
descriptor (union e1000_rx_desc_packet_split) and that is based on the 
differences of notations in e1000e's and igb's datasheets so I want you 
to respect that.

Also, this definition is moved from the place where it is defined at 
first in an earlier patch, but I suggest to place it here in the earlier 
patch to keep this patch concise unless there is something to prevent that.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v4 3/5] igb: RX descriptors handling cleanup
  2023-05-04 13:10     ` [PATCH v4 3/5] igb: RX descriptors handling cleanup Tomasz Dzieciol
@ 2023-05-04 15:12       ` Akihiko Odaki
  0 siblings, 0 replies; 9+ messages in thread
From: Akihiko Odaki @ 2023-05-04 15:12 UTC (permalink / raw)
  To: Tomasz Dzieciol, qemu-devel
  Cc: sriram.yagnaraman, jasowang, k.kwiecien, m.sochacki

On 2023/05/04 22:10, Tomasz Dzieciol wrote:
> Refactoring is done in preparation for support of multiple advanced
> descriptors RX modes, especially packet-split modes. >
> Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com>

I guess the refactoring of igb_write_rx_descr() and 
igb_write_packet_to_guest() can be split. Please do so in that casea


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-05-04 17:47 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <CGME20230504131105eucas1p1cc725f17aa42262be597f5ee81aca037@eucas1p1.samsung.com>
2023-05-04 13:10 ` [PATCH v4 0/5] igb: packet-split descriptors support Tomasz Dzieciol
     [not found]   ` <CGME20230504131105eucas1p2610f7bcc7c4fabba29cd198c52ac30f2@eucas1p2.samsung.com>
2023-05-04 13:10     ` [PATCH v4 1/5] igb: remove TCP ACK detection Tomasz Dzieciol
     [not found]   ` <CGME20230504131106eucas1p1714e019395c941f51155c0e9ae52a66b@eucas1p1.samsung.com>
2023-05-04 13:10     ` [PATCH v4 2/5] igb: rename E1000E_RingInfo_st Tomasz Dzieciol
2023-05-04 14:51       ` Akihiko Odaki
     [not found]   ` <CGME20230504131106eucas1p13530e4d1f10ca1086b39b37feddc3e28@eucas1p1.samsung.com>
2023-05-04 13:10     ` [PATCH v4 3/5] igb: RX descriptors handling cleanup Tomasz Dzieciol
2023-05-04 15:12       ` Akihiko Odaki
     [not found]   ` <CGME20230504131107eucas1p1bb75dd2ba7baf4f269ec2476feba69ac@eucas1p1.samsung.com>
2023-05-04 13:10     ` [PATCH v4 4/5] igb: add IPv6 extended headers traffic detection Tomasz Dzieciol
     [not found]   ` <CGME20230504131107eucas1p2aae525d7b23725b4efd8713e14943fb6@eucas1p2.samsung.com>
2023-05-04 13:10     ` [PATCH v4 5/5] igb: packet-split descriptors support Tomasz Dzieciol
2023-05-04 15:06       ` Akihiko Odaki

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).