From mboxrd@z Thu Jan 1 00:00:00 1970 From: Alexander Lobakin Date: Fri, 28 Jan 2022 18:38:50 +0100 Subject: [Intel-wired-lan] [PATCH net-next 13/19] iecm: implement splitq napi_poll In-Reply-To: <20220128001009.721392-14-alan.brady@intel.com> References: <20220128001009.721392-1-alan.brady@intel.com> Message-ID: <20220128173850.27818-1-alexandr.lobakin@intel.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: intel-wired-lan@osuosl.org List-ID: From: Alan Brady Date: Thu, 27 Jan 2022 16:10:03 -0800 > This adds everything we need to actually receive packets and process spent > buffers using the splitq model. This contrasts to more traditional queueing > models by essentially splitting a normal queue of descriptors and mapped > buffers into separate queues. This allows us to deal with both more > efficiently and also allows us to implement asymmetric queuing setups where > you have multiple completion queues associated with a single buffer queue. > > Signed-off-by: Phani Burra > Signed-off-by: Joshua Hay > Signed-off-by: Madhu Chittim > Signed-off-by: Pavan Kumar Linga > Signed-off-by: Alan Brady > --- > drivers/net/ethernet/intel/iecm/iecm_txrx.c | 1468 ++++++++++++++++- > drivers/net/ethernet/intel/include/iecm.h | 4 + > .../net/ethernet/intel/include/iecm_txrx.h | 20 + > 3 files changed, 1490 insertions(+), 2 deletions(-) > > diff --git a/drivers/net/ethernet/intel/iecm/iecm_txrx.c b/drivers/net/ethernet/intel/iecm/iecm_txrx.c > index 4b9288e1c254..85a82b58525a 100644 > --- a/drivers/net/ethernet/intel/iecm/iecm_txrx.c > +++ b/drivers/net/ethernet/intel/iecm/iecm_txrx.c > @@ -218,6 +218,36 @@ const struct iecm_rx_ptype_decoded iecm_ptype_lookup[IECM_RX_MAX_PTYPE] = { > }; > EXPORT_SYMBOL(iecm_ptype_lookup); > > +/** > + * iecm_buf_lifo_push - push a buffer pointer onto stack > + * @stack: pointer to stack struct > + * @buf: pointer to buf to push > + * > + * Returns 0 on success, negative on failure > + **/ > +static int iecm_buf_lifo_push(struct iecm_buf_lifo *stack, > + struct iecm_tx_buf *buf) > +{ > + if (stack->top == stack->size) How frequent is this? A candidate for unlikely() maybe? > + return -ENOSPC; > + > + stack->bufs[stack->top++] = buf; > + > + return 0; > +} > + > +/** > + * iecm_buf_lifo_pop - pop a buffer pointer from stack > + * @stack: pointer to stack struct > + **/ > +static struct iecm_tx_buf *iecm_buf_lifo_pop(struct iecm_buf_lifo *stack) > +{ > + if (!stack->top) > + return NULL; > + > + return stack->bufs[--stack->top]; > +} > + > /** > * iecm_get_stats64 - get statistics for network device structure > * @netdev: network interface device structure > @@ -812,6 +842,30 @@ iecm_rx_buf_hw_alloc_all(struct iecm_queue *rxbufq, u16 alloc_count) > return !!alloc_count; > } > > +/** > + * iecm_rx_post_buf_refill - Post buffer id to refill queue > + * @refillq: refill queue to post to > + * @buf_id: buffer id to post > + */ > +void iecm_rx_post_buf_refill(struct iecm_sw_queue *refillq, u16 buf_id) > +{ > + u16 nta = refillq->next_to_alloc; > + u16 *bi; > + > + bi = IECM_SPLITQ_RX_BI_DESC(refillq, nta); > + /* store the buffer ID and the SW maintained GEN bit to the refillq */ > + *bi = ((buf_id << IECM_RX_BI_BUFID_S) & IECM_RX_BI_BUFID_M) | > + (!!(test_bit(__IECM_Q_GEN_CHK, refillq->flags)) << > + IECM_RX_BI_GEN_S); Please use FIELD_GET() and FIELD_PREP() for masks. This won't pass the maintainers. > + > + nta++; > + if (unlikely(nta == refillq->desc_count)) { Could be compressed into one line. > + nta = 0; > + change_bit(__IECM_Q_GEN_CHK, refillq->flags); > + } > + refillq->next_to_alloc = nta; > +} > + > /** > * iecm_rx_post_buf_desc - Post buffer to bufq descriptor ring > * @bufq: buffer queue to post to > @@ -1670,6 +1724,398 @@ int iecm_vport_queues_alloc(struct iecm_vport *vport) > return err; > } > > +/** > + * iecm_tx_find_q - Find the tx q based on q id > + * @vport: the vport we care about > + * @q_id: Id of the queue > + * > + * Returns queue ptr if found else returns NULL > + */ > +static struct iecm_queue * > +iecm_tx_find_q(struct iecm_vport *vport, int q_id) > +{ > + int i; > + > + for (i = 0; i < vport->num_txq; i++) { > + struct iecm_queue *tx_q = vport->txqs[i]; > + > + if (tx_q->q_id == q_id) > + return tx_q; > + } for (i = ...) if (vport->txqs[i].q_id == q_id) return tx_q; No need to create a variable. > + > + return NULL; > +} > + > +/** > + * iecm_tx_handle_sw_marker - Handle queue marker packet > + * @tx_q: tx queue to handle software marker > + */ > +static void iecm_tx_handle_sw_marker(struct iecm_queue *tx_q) > +{ > + struct iecm_vport *vport = tx_q->vport; > + bool drain_complete = true; > + int i; > + > + clear_bit(__IECM_Q_SW_MARKER, tx_q->flags); > + /* Hardware must write marker packets to all queues associated with > + * completion queues. So check if all queues received marker packets > + */ > + for (i = 0; i < vport->num_txq; i++) { > + if (test_bit(__IECM_Q_SW_MARKER, vport->txqs[i]->flags)) > + drain_complete = false; > + } > + if (drain_complete) { > + set_bit(__IECM_SW_MARKER, vport->adapter->flags); > + wake_up(&vport->adapter->sw_marker_wq); > + } > +} > + > +/** > + * iecm_tx_splitq_clean_buf - Clean TX buffer resources > + * @tx_q: tx queue to clean buffer from > + * @tx_buf: buffer to be cleaned > + * @napi_budget: Used to determine if we are in netpoll > + */ > +static void > +iecm_tx_splitq_clean_buf(struct iecm_queue *tx_q, struct iecm_tx_buf *tx_buf, > + int napi_budget) > +{ > + /* unmap skb header data */ > + dma_unmap_single(tx_q->dev, > + dma_unmap_addr(tx_buf, dma), These two lines can fit into 79 without wrapping. > + dma_unmap_len(tx_buf, len), > + DMA_TO_DEVICE); These two as well. > + > + napi_consume_skb(tx_buf->skb, napi_budget); > + > + /* clear tx_buf data */ > + tx_buf->skb = NULL; > + dma_unmap_len_set(tx_buf, len, 0); > +} > + > +/** > + * iecm_stash_flow_sch_buffers - store buffere parameter info to be freed at a > + * later time (only relevant for flow scheduling mode) > + * @txq: Tx queue to clean > + * @tx_buf: buffer to store > + */ > +static int > +iecm_stash_flow_sch_buffers(struct iecm_queue *txq, struct iecm_tx_buf *tx_buf) > +{ > + struct iecm_adapter *adapter = txq->vport->adapter; > + struct iecm_tx_buf *shadow_buf; > + > + shadow_buf = iecm_buf_lifo_pop(&txq->buf_stack); > + if (!shadow_buf) { > + dev_err(&adapter->pdev->dev, > + "No out-of-order TX buffers left!\n"); > + return -ENOMEM; > + } > + > + /* Store buffer params in shadow buffer */ > + shadow_buf->skb = tx_buf->skb; > + shadow_buf->bytecount = tx_buf->bytecount; > + shadow_buf->gso_segs = tx_buf->gso_segs; > + dma_unmap_addr_set(shadow_buf, dma, dma_unmap_addr(tx_buf, dma)); > + dma_unmap_len_set(shadow_buf, len, dma_unmap_len(tx_buf, len)); > + shadow_buf->compl_tag = tx_buf->compl_tag; > + > + /* Add buffer to buf_hash table to be freed > + * later > + */ > + hash_add(txq->sched_buf_hash, &shadow_buf->hlist, > + shadow_buf->compl_tag); > + > + memset(tx_buf, 0, sizeof(struct iecm_tx_buf)); > + > + return 0; > +} > + > +/** > + * iecm_tx_splitq_clean - Reclaim resources from buffer queue > + * @tx_q: Tx queue to clean > + * @end: queue index until which it should be cleaned > + * @napi_budget: Used to determine if we are in netpoll > + * @descs_only: true if queue is using flow-based scheduling and should > + * not clean buffers at this time > + * > + * Cleans the queue descriptor ring. If the queue is using queue-based > + * scheduling, the buffers will be cleaned as well and this function will > + * return the number of bytes/packets cleaned. If the queue is using flow-based > + * scheduling, only the descriptors are cleaned at this time. Separate packet > + * completion events will be reported on the completion queue, and the buffers > + * will be cleaned separately. The stats returned from this function when using > + * flow-based scheduling are irrelevant. > + */ > +static struct iecm_tx_queue_stats > +iecm_tx_splitq_clean(struct iecm_queue *tx_q, u16 end, int napi_budget, > + bool descs_only) > +{ > + union iecm_tx_flex_desc *next_pending_desc = NULL; > + struct iecm_tx_queue_stats cleaned_stats = {0}; > + union iecm_tx_flex_desc *tx_desc; > + s16 ntc = tx_q->next_to_clean; > + struct iecm_tx_buf *tx_buf; > + unsigned short gso_segs = 0; > + unsigned int bytecount = 0; > + struct netdev_queue *nq; > + > + tx_desc = IECM_FLEX_TX_DESC(tx_q, ntc); > + next_pending_desc = IECM_FLEX_TX_DESC(tx_q, end); > + tx_buf = &tx_q->tx_buf[ntc]; > + ntc -= tx_q->desc_count; > + > + while (tx_desc != next_pending_desc) { > + union iecm_tx_flex_desc *eop_desc = > + (union iecm_tx_flex_desc *)tx_buf->next_to_watch; > + > + /* clear next_to_watch to prevent false hangs */ > + tx_buf->next_to_watch = NULL; > + > + bytecount += tx_buf->bytecount; > + gso_segs += tx_buf->gso_segs; > + > + if (descs_only) { > + if (iecm_stash_flow_sch_buffers(tx_q, tx_buf)) > + goto tx_splitq_clean_out; > + > + while (tx_desc != eop_desc) { > + tx_buf++; > + tx_desc++; > + ntc++; > + if (unlikely(!ntc)) { > + ntc -= tx_q->desc_count; > + tx_buf = tx_q->tx_buf; > + tx_desc = IECM_FLEX_TX_DESC(tx_q, 0); > + } > + > + if (dma_unmap_len(tx_buf, len)) { > + if (iecm_stash_flow_sch_buffers(tx_q, > + tx_buf)) > + goto tx_splitq_clean_out; > + } Redundant braces, redundant nested ifs. Also, it's actually an error, since dma_unmap_len() always equals 0 for architectures which don't select NEED_DMA_MAP_STATE. So this should either check for any other signs of mapped buffer or field `len` should be present and defined with correct values unconditionally. > + } > + } else { > + /* update the statistics for this packet */ > + cleaned_stats.bytes += tx_buf->bytecount; > + cleaned_stats.packets += tx_buf->gso_segs; > + > + iecm_tx_splitq_clean_buf(tx_q, tx_buf, napi_budget); > + > + /* unmap remaining buffers */ > + while (tx_desc != eop_desc) { > + tx_buf++; > + tx_desc++; > + ntc++; > + if (unlikely(!ntc)) { > + ntc -= tx_q->desc_count; > + tx_buf = tx_q->tx_buf; > + tx_desc = IECM_FLEX_TX_DESC(tx_q, 0); > + } > + > + /* unmap any remaining paged data */ > + if (dma_unmap_len(tx_buf, len)) { Same here. > + dma_unmap_page(tx_q->dev, > + dma_unmap_addr(tx_buf, dma), > + dma_unmap_len(tx_buf, len), > + DMA_TO_DEVICE); > + dma_unmap_len_set(tx_buf, len, 0); > + } > + } > + } > + > + tx_buf++; > + tx_desc++; > + ntc++; > + if (unlikely(!ntc)) { > + ntc -= tx_q->desc_count; > + tx_buf = tx_q->tx_buf; > + tx_desc = IECM_FLEX_TX_DESC(tx_q, 0); > + } > + } > + > +tx_splitq_clean_out: > + ntc += tx_q->desc_count; > + tx_q->next_to_clean = ntc; > + > + nq = netdev_get_tx_queue(tx_q->vport->netdev, tx_q->idx); > + netdev_tx_completed_queue(nq, gso_segs, bytecount); > + > + return cleaned_stats; > +} > + > +/** > + * iecm_tx_clean_flow_sch_bufs - clean bufs that were stored for > + * out of order completions > + * @txq: queue to clean > + * @compl_tag: completion tag of packet to clean (from completion descriptor) > + * @budget: Used to determine if we are in netpoll > + */ > +static struct iecm_tx_queue_stats > +iecm_tx_clean_flow_sch_bufs(struct iecm_queue *txq, u16 compl_tag, > + int budget) > +{ > + struct iecm_tx_queue_stats cleaned_stats = {0}; > + struct hlist_node *tmp_buf = NULL; > + struct iecm_tx_buf *tx_buf = NULL; > + > + /* Buffer completion */ > + hash_for_each_possible_safe(txq->sched_buf_hash, tx_buf, tmp_buf, > + hlist, compl_tag) { > + if (tx_buf->compl_tag != compl_tag) > + continue; > + > + if (likely(tx_buf->skb)) { > + /* update the statistics for this packet */ > + cleaned_stats.bytes += tx_buf->bytecount; > + cleaned_stats.packets += tx_buf->gso_segs; > + > + iecm_tx_splitq_clean_buf(txq, tx_buf, budget); > + } else if (dma_unmap_len(tx_buf, len)) { Here as well. > + dma_unmap_page(txq->dev, > + dma_unmap_addr(tx_buf, dma), > + dma_unmap_len(tx_buf, len), > + DMA_TO_DEVICE); > + dma_unmap_len_set(tx_buf, len, 0); > + } > + /* Push shadow buf back onto stack */ > + iecm_buf_lifo_push(&txq->buf_stack, tx_buf); > + > + hash_del(&tx_buf->hlist); > + } > + > + return cleaned_stats; > +} > + > +/** > + * iecm_tx_clean_complq - Reclaim resources on completion queue > + * @complq: Tx ring to clean > + * @budget: Used to determine if we are in netpoll > + * > + * Returns true if there's any budget left (e.g. the clean is finished) > + */ > +static bool > +iecm_tx_clean_complq(struct iecm_queue *complq, int budget) > +{ > + struct iecm_splitq_tx_compl_desc *tx_desc; > + struct iecm_vport *vport = complq->vport; > + s16 ntc = complq->next_to_clean; > + bool clean_completed = false; > + unsigned int complq_budget; > + > + complq_budget = vport->compln_clean_budget; > + tx_desc = IECM_SPLITQ_TX_COMPLQ_DESC(complq, ntc); > + ntc -= complq->desc_count; > + > + do { > + struct iecm_tx_queue_stats cleaned_stats = {0}; > + struct iecm_queue *tx_q; > + u16 compl_tag, hw_head; > + int tx_qid; > + u8 ctype; /* completion type */ > + u16 gen; > + > + /* if the descriptor isn't done, no work yet to do */ > + gen = (le16_to_cpu(tx_desc->qid_comptype_gen) & > + IECM_TXD_COMPLQ_GEN_M) >> IECM_TXD_COMPLQ_GEN_S; Same stuff about FIELD_{GET,PREP}(). > + if (test_bit(__IECM_Q_GEN_CHK, complq->flags) != gen) > + break; > + > + /* Find necessary info of TX queue to clean buffers */ > + tx_qid = (le16_to_cpu(tx_desc->qid_comptype_gen) & > + IECM_TXD_COMPLQ_QID_M) >> IECM_TXD_COMPLQ_QID_S; Here as well, and in other places I missed. > + tx_q = iecm_tx_find_q(vport, tx_qid); > + if (!tx_q) { > + dev_err(&complq->vport->adapter->pdev->dev, > + "TxQ #%d not found\n", tx_qid); > + goto fetch_next_desc; > + } > + > + /* Determine completion type */ > + ctype = (le16_to_cpu(tx_desc->qid_comptype_gen) & > + IECM_TXD_COMPLQ_COMPL_TYPE_M) >> > + IECM_TXD_COMPLQ_COMPL_TYPE_S; > + switch (ctype) { > + case IECM_TXD_COMPLT_RE: > + hw_head = le16_to_cpu(tx_desc->q_head_compl_tag.q_head); > + > + cleaned_stats = iecm_tx_splitq_clean(tx_q, hw_head, > + budget, true); > + break; > + case IECM_TXD_COMPLT_RS: > + if (test_bit(__IECM_Q_FLOW_SCH_EN, tx_q->flags)) { > + compl_tag = > + le16_to_cpu(tx_desc->q_head_compl_tag.compl_tag); > + > + cleaned_stats = > + iecm_tx_clean_flow_sch_bufs(tx_q, > + compl_tag, > + budget); > + } else { > + hw_head = > + le16_to_cpu(tx_desc->q_head_compl_tag.q_head); > + > + cleaned_stats = iecm_tx_splitq_clean(tx_q, > + hw_head, > + budget, > + false); > + } > + > + break; > + case IECM_TXD_COMPLT_SW_MARKER: > + iecm_tx_handle_sw_marker(tx_q); > + break; > + default: > + dev_err(&tx_q->vport->adapter->pdev->dev, > + "Unknown TX completion type: %d\n", > + ctype); > + goto fetch_next_desc; > + } > + > + u64_stats_update_begin(&tx_q->stats_sync); > + tx_q->q_stats.tx.packets += cleaned_stats.packets; > + tx_q->q_stats.tx.bytes += cleaned_stats.bytes; > + u64_stats_update_end(&tx_q->stats_sync); > + > + if (unlikely(cleaned_stats.packets && > + netif_carrier_ok(tx_q->vport->netdev) && > + (IECM_DESC_UNUSED(tx_q) >= IECM_TX_WAKE_THRESH) && > + (IECM_TX_BUF_UNUSED(tx_q) >= tx_q->desc_count >> 2))) { > + /* Make sure any other threads stopping queue after > + * this see new next_to_clean. > + */ > + smp_mb(); > + if (tx_q->vport->adapter->state == __IECM_UP && > + __netif_subqueue_stopped(tx_q->vport->netdev, > + tx_q->idx)) { > + netif_wake_subqueue(tx_q->vport->netdev, > + tx_q->idx); > + } > + } > + > +fetch_next_desc: > + tx_desc++; > + ntc++; > + if (unlikely(!ntc)) { > + ntc -= complq->desc_count; > + tx_desc = IECM_SPLITQ_TX_COMPLQ_DESC(complq, 0); > + change_bit(__IECM_Q_GEN_CHK, complq->flags); > + } > + > + prefetch(tx_desc); > + > + /* update budget accounting */ > + complq_budget--; > + } while (likely(complq_budget)); > + > + ntc += complq->desc_count; > + complq->next_to_clean = ntc; > + > + clean_completed = !!complq_budget; > + > + return clean_completed; > +} > + > /** > * iecm_tx_splitq_build_ctb - populate command tag and size for queue > * based scheduling descriptors > @@ -2337,6 +2783,940 @@ netdev_tx_t iecm_tx_splitq_start(struct sk_buff *skb, > return iecm_tx_splitq_frame(skb, tx_q); > } > > +/** > + * iecm_ptype_to_htype - get a hash type > + * @decoded: Decoded Rx packet type related fields > + * > + * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by > + * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of > + * Rx desc. > + */ > +enum > +pkt_hash_types iecm_ptype_to_htype(struct iecm_rx_ptype_decoded *decoded) > +{ > + if (!decoded->known) > + return PKT_HASH_TYPE_NONE; > + if (decoded->payload_layer == IECM_RX_PTYPE_PAYLOAD_LAYER_PAY2 && > + decoded->inner_prot) > + return PKT_HASH_TYPE_L4; > + if (decoded->payload_layer == IECM_RX_PTYPE_PAYLOAD_LAYER_PAY2 && > + decoded->outer_ip) > + return PKT_HASH_TYPE_L3; > + if (decoded->outer_ip == IECM_RX_PTYPE_OUTER_L2) > + return PKT_HASH_TYPE_L2; > + > + return PKT_HASH_TYPE_NONE; > +} > + > +/** > + * iecm_rx_hash - set the hash value in the skb > + * @rxq: Rx descriptor ring packet is being transacted on > + * @skb: pointer to current skb being populated > + * @rx_desc: Receive descriptor > + * @decoded: Decoded Rx packet type related fields > + */ > +static void > +iecm_rx_hash(struct iecm_queue *rxq, struct sk_buff *skb, > + struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, > + struct iecm_rx_ptype_decoded *decoded) > +{ > + u32 hash; > + > + if (!iecm_is_feature_ena(rxq->vport, NETIF_F_RXHASH)) > + return; > + > + hash = le16_to_cpu(rx_desc->hash1) | > + (rx_desc->ff2_mirrid_hash2.hash2 << 16) | > + (rx_desc->hash3 << 24); > + > + skb_set_hash(skb, hash, iecm_ptype_to_htype(decoded)); > +} > + > +/** > + * iecm_rx_csum - Indicate in skb if checksum is good > + * @rxq: Rx descriptor ring packet is being transacted on > + * @skb: pointer to current skb being populated > + * @csum_bits: checksum fields extracted from the descriptor > + * @decoded: Decoded Rx packet type related fields > + * > + * skb->protocol must be set before this function is called > + */ > +static void iecm_rx_csum(struct iecm_queue *rxq, struct sk_buff *skb, > + struct iecm_rx_csum_decoded *csum_bits, > + struct iecm_rx_ptype_decoded *decoded) > +{ > + bool ipv4, ipv6; > + > + /* Start with CHECKSUM_NONE and by default csum_level = 0 */ > + skb->ip_summed = CHECKSUM_NONE; > + > + /* check if Rx checksum is enabled */ > + if (!iecm_is_feature_ena(rxq->vport, NETIF_F_RXCSUM)) > + return; > + > + /* check if HW has decoded the packet and checksum */ > + if (!(csum_bits->l3l4p)) > + return; > + > + ipv4 = (decoded->outer_ip == IECM_RX_PTYPE_OUTER_IP) && > + (decoded->outer_ip_ver == IECM_RX_PTYPE_OUTER_IPV4); > + ipv6 = (decoded->outer_ip == IECM_RX_PTYPE_OUTER_IP) && > + (decoded->outer_ip_ver == IECM_RX_PTYPE_OUTER_IPV6); > + > + if (ipv4 && (csum_bits->ipe || csum_bits->eipe)) > + goto checksum_fail; > + > + if (ipv6 && csum_bits->ipv6exadd) > + return; > + > + /* HW checksum will be invalid if vlan stripping is not enabled and > + * packet has an outer vlan tag. raw_csum_inv will also not be set > + * even though it's invalid. > + */ > + if (skb_vlan_tag_present(skb)) > + return; > + > + /* check for L4 errors and handle packets that were not able to be > + * checksummed > + */ > + if (csum_bits->l4e) > + goto checksum_fail; > + > + /* Only report checksum unnecessary for ICMP, TCP, UDP, or SCTP */ > + switch (decoded->inner_prot) { > + case IECM_RX_PTYPE_INNER_PROT_ICMP: > + case IECM_RX_PTYPE_INNER_PROT_TCP: > + case IECM_RX_PTYPE_INNER_PROT_UDP: > + case IECM_RX_PTYPE_INNER_PROT_SCTP: > + skb->ip_summed = CHECKSUM_UNNECESSARY; > + default: > + break; > + } > + return; > + > +checksum_fail: > + rxq->vport->port_stats.rx_hw_csum_err++; > +} > + > +/** > + * iecm_rx_splitq_extract_csum_bits - Extract checksum bits from descriptor > + * @rx_desc: receive descriptor > + * @csum: structure to extract checksum fields > + * > + **/ > +static void > +iecm_rx_splitq_extract_csum_bits(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, > + struct iecm_rx_csum_decoded *csum) > +{ > + u8 qword0, qword1; > + > + qword0 = rx_desc->status_err0_qw0; > + qword1 = rx_desc->status_err0_qw1; > + > + csum->ipe = !!(qword1 & > + BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_IPE_S)); > + csum->eipe = !!(qword1 & > + BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_EIPE_S)); > + csum->l4e = !!(qword1 & > + BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_XSUM_L4E_S)); > + csum->l3l4p = !!(qword1 & > + BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L3L4P_S)); > + csum->ipv6exadd = > + !!(qword0 & > + BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_IPV6EXADD_S)); > + csum->rsc = !!(le16_to_cpu(rx_desc->hdrlen_flags) & > + VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M); > + csum->raw_csum_inv = !!(le16_to_cpu(rx_desc->ptype_err_fflags0) & > + BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_FF0_S)); > + csum->raw_csum = le16_to_cpu(rx_desc->misc.raw_cs); > + csum->pprs = 0; > +} > + > +/** > + * iecm_rx_rsc - Set the RSC fields in the skb > + * @rxq : Rx descriptor ring packet is being transacted on > + * @skb : pointer to current skb being populated > + * @rx_desc: Receive descriptor > + * @decoded: Decoded Rx packet type related fields > + * > + * Return 0 on success and error code on failure > + * > + * Populate the skb fields with the total number of RSC segments, RSC payload > + * length and packet type. > + */ > +static int iecm_rx_rsc(struct iecm_queue *rxq, struct sk_buff *skb, > + struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc, > + struct iecm_rx_ptype_decoded *decoded) > +{ > + u16 rsc_segments, rsc_payload_len; > + struct tcphdr *tcph; > + bool ipv4, ipv6; > + > + if (!decoded->outer_ip) > + return -EINVAL; > + > + rsc_payload_len = le16_to_cpu(rx_desc->misc.rscseglen); > + if (!rsc_payload_len) > + return -EINVAL; > + > + ipv4 = (decoded->outer_ip == IECM_RX_PTYPE_OUTER_IP) && > + (decoded->outer_ip_ver == IECM_RX_PTYPE_OUTER_IPV4); > + ipv6 = (decoded->outer_ip == IECM_RX_PTYPE_OUTER_IP) && > + (decoded->outer_ip_ver == IECM_RX_PTYPE_OUTER_IPV6); > + > + if (!(ipv4 ^ ipv6)) > + return -EINVAL; > + > + rsc_segments = DIV_ROUND_UP(skb->data_len, rsc_payload_len); > + > + NAPI_GRO_CB(skb)->count = rsc_segments; > + skb_shinfo(skb)->gso_size = rsc_payload_len; > + > + skb_reset_network_header(skb); > + > + if (ipv4) { > + struct iphdr *ipv4h = ip_hdr(skb); > + > + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; > + > + /* Reset and set transport header offset in skb */ > + skb_set_transport_header(skb, sizeof(struct iphdr)); > + tcph = tcp_hdr(skb); > + > + /* Compute the TCP pseudo header checksum*/ > + tcph->check = > + ~tcp_v4_check(skb->len - skb_transport_offset(skb), > + ipv4h->saddr, ipv4h->daddr, 0); > + } else { > + struct ipv6hdr *ipv6h = ipv6_hdr(skb); > + > + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; > + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); > + tcph = tcp_hdr(skb); > + tcph->check = > + ~tcp_v6_check(skb->len - skb_transport_offset(skb), > + &ipv6h->saddr, &ipv6h->daddr, 0); > + } > + > + tcp_gro_complete(skb); > + > + u64_stats_update_begin(&rxq->stats_sync); > + rxq->q_stats.rx.rsc_pkts++; > + u64_stats_update_end(&rxq->stats_sync); > + > + return 0; > +} > + > +/** > + * iecm_rx_process_skb_fields - Populate skb header fields from Rx descriptor > + * @rxq: Rx descriptor ring packet is being transacted on > + * @skb: pointer to current skb being populated > + * @rx_desc: Receive descriptor > + * > + * This function checks the ring, descriptor, and packet information in > + * order to populate the hash, checksum, VLAN, protocol, and > + * other fields within the skb. > + */ > +int > +iecm_rx_process_skb_fields(struct iecm_queue *rxq, struct sk_buff *skb, > + struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) > +{ > + struct iecm_rx_ptype_decoded decoded; > + struct iecm_rx_csum_decoded csum_bits; Please follow RCT. > + u16 rx_ptype; > + int err = 0; > + > + rx_ptype = le16_to_cpu(rx_desc->ptype_err_fflags0) & > + VIRTCHNL2_RX_FLEX_DESC_ADV_PTYPE_M; > + > + decoded = rxq->vport->rx_ptype_lkup[rx_ptype]; > + if (!decoded.known) > + return -EINVAL; > + > + /* modifies the skb - consumes the enet header */ > + skb->protocol = eth_type_trans(skb, rxq->vport->netdev); eth_type_trans() should generally be called *right* before napi_gro_receive() to still have caches warm. > + iecm_rx_splitq_extract_csum_bits(rx_desc, &csum_bits); > + iecm_rx_csum(rxq, skb, &csum_bits, &decoded); > + /* process RSS/hash */ > + iecm_rx_hash(rxq, skb, rx_desc, &decoded); > + > + if (csum_bits.rsc) > + err = iecm_rx_rsc(rxq, skb, rx_desc, &decoded); > + > + return err; > +} > + > +/** > + * iecm_rx_skb - Send a completed packet up the stack > + * @rxq: Rx ring in play > + * @skb: packet to send up > + * @vlan_tag: packet vlan tag > + * > + * This function sends the completed packet (via. skb) up the stack using > + * gro receive functions > + */ > +void iecm_rx_skb(struct iecm_queue *rxq, struct sk_buff *skb, u16 vlan_tag) > +{ > + /* Adding HW VLAN tag to skb must occur after processing csum */ > + if (vlan_tag & VLAN_VID_MASK) > + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); Why can't this be placed into process_skb_fields()? This function becomes redundant then, and you could call napi_gro_receive() directly instead. > + > + napi_gro_receive(&rxq->q_vector->napi, skb); > +} > + > +/** > + * iecm_rx_page_is_reserved - check if reuse is possible > + * @page: page struct to check > + */ > +static bool iecm_rx_page_is_reserved(struct page *page) > +{ > + return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); > +} Please check generic dev_page_is_reusable(), it's almost the same (a bit more optimized). > + > +/** > + * iecm_rx_buf_adjust_pg - Prepare rx buffer for reuse > + * @rx_buf: Rx buffer to adjust > + * @size: Size of adjustment > + * > + * Update the offset within page so that rx buf will be ready to be reused. > + * For systems with PAGE_SIZE < 8192 this function will flip the page offset > + * so the second half of page assigned to rx buffer will be used, otherwise > + * the offset is moved by the @size bytes > + */ > +void > +iecm_rx_buf_adjust_pg(struct iecm_rx_buf *rx_buf, unsigned int size) > +{ > + struct iecm_page_info *page_info = &rx_buf->page_info[rx_buf->page_indx]; > + > +#if (PAGE_SIZE < 8192) > + if (rx_buf->buf_size > IECM_RX_BUF_2048) > + /* flip to second page */ > + rx_buf->page_indx = !rx_buf->page_indx; > + else > + /* flip page offset to other buffer */ > + page_info->page_offset ^= size; > +#else > + /* move offset up to the next cache line */ > + page_info->page_offset += size; > +#endif Again, no need for ifdeffery, PAGE_SIZE check can be placed into an if statement. > +} > + > +/** > + * iecm_rx_can_reuse_page - Determine if page can be reused for another rx > + * @rx_buf: buffer containing the page > + * > + * If page is reusable, we have a green light for calling iecm_reuse_rx_page, > + * which will assign the current buffer to the buffer that next_to_alloc is > + * pointing to; otherwise, the dma mapping needs to be destroyed and > + * page freed > + */ > +bool iecm_rx_can_reuse_page(struct iecm_rx_buf *rx_buf) > +{ > + struct iecm_page_info *page_info = &rx_buf->page_info[rx_buf->page_indx]; > + > +#if (PAGE_SIZE >= 8192) > + unsigned int last_offset = PAGE_SIZE - rx_buf->buf_size; > +#endif /* PAGE_SIZE < 8192) */ > + unsigned int pagecnt_bias = page_info->pagecnt_bias; > + struct page *page = page_info->page; > + > + /* avoid re-using remote pages */ > + if (unlikely(iecm_rx_page_is_reserved(page))) > + return false; > + > +#if (PAGE_SIZE < 8192) > + /* if we are only owner of page we can reuse it */ > + if (unlikely((page_count(page) - pagecnt_bias) > 1)) > + return false; > +#else > + if (rx_buf->page_offset > last_offset) > + return false; > +#endif /* PAGE_SIZE < 8192) */ Same here 2 times. > + > + /* If we have drained the page fragment pool we need to update > + * the pagecnt_bias and page count so that we fully restock the > + * number of references the driver holds. > + */ > + if (unlikely(pagecnt_bias == 1)) { With 1532 byte frames, this condition will be hit 50% of times. It's definitely not a good place for unlkely(). > + page_ref_add(page, USHRT_MAX - 1); > + page_info->pagecnt_bias = USHRT_MAX; > + } > + > + return true; > +} > + > +/** > + * iecm_rx_add_frag - Add contents of Rx buffer to sk_buff as a frag > + * @rx_buf: buffer containing page to add > + * @skb: sk_buff to place the data into > + * @size: packet length from rx_desc > + * > + * This function will add the data contained in rx_buf->page to the skb. > + * It will just attach the page as a frag to the skb. > + * The function will then update the page offset. > + */ > +void iecm_rx_add_frag(struct iecm_rx_buf *rx_buf, struct sk_buff *skb, > + unsigned int size) > +{ > + struct iecm_page_info *page_info = &rx_buf->page_info[rx_buf->page_indx]; > + > +#if (PAGE_SIZE >= 8192) > + unsigned int truesize = SKB_DATA_ALIGN(size); > +#else > + unsigned int truesize = rx_buf->buf_size; > +#endif Same. > + > + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page_info->page, > + page_info->page_offset, size, truesize); > + > + iecm_rx_buf_adjust_pg(rx_buf, truesize); > +} > + > +/** > + * iecm_rx_get_buf_page - Fetch Rx buffer page and synchronize data for use > + * @dev: device struct > + * @rx_buf: Rx buf to fetch page for > + * @size: size of buffer to add to skb > + * > + * This function will pull an Rx buffer page from the ring and synchronize it > + * for use by the CPU. > + */ > +static void > +iecm_rx_get_buf_page(struct device *dev, struct iecm_rx_buf *rx_buf, > + const unsigned int size) > +{ > + struct iecm_page_info *page_info = &rx_buf->page_info[rx_buf->page_indx]; > + > + prefetch(page_info->page); > + > + /* we are reusing so sync this buffer for CPU use */ > + dma_sync_single_range_for_cpu(dev, page_info->dma, > + page_info->page_offset, size, > + DMA_FROM_DEVICE); > + > + /* We have pulled a buffer for use, so decrement pagecnt_bias */ > + page_info->pagecnt_bias--; > +} > + > +/** > + * iecm_rx_construct_skb - Allocate skb and populate it > + * @rxq: Rx descriptor queue > + * @rx_buf: Rx buffer to pull data from > + * @size: the length of the packet > + * > + * This function allocates an skb. It then populates it with the page > + * data from the current receive descriptor, taking care to set up the > + * skb correctly. > + */ > +struct sk_buff * > +iecm_rx_construct_skb(struct iecm_queue *rxq, struct iecm_rx_buf *rx_buf, > + unsigned int size) > +{ > + struct iecm_page_info *page_info = &rx_buf->page_info[rx_buf->page_indx]; > + > + void *va = page_address(page_info->page) + page_info->page_offset; > + unsigned int headlen; > + struct sk_buff *skb; > + > + /* prefetch first cache line of first page */ > + prefetch(va); > +#if L1_CACHE_BYTES < 128 > + prefetch((u8 *)va + L1_CACHE_BYTES); > +#endif /* L1_CACHE_BYTES */ Here's open-coded net_prefetch(). > + /* allocate a skb to store the frags */ > + skb = __napi_alloc_skb(&rxq->q_vector->napi, IECM_RX_HDR_SIZE, > + GFP_ATOMIC | __GFP_NOWARN); > + if (unlikely(!skb)) > + return NULL; > + > + skb_record_rx_queue(skb, rxq->idx); > + > + /* Determine available headroom for copy */ > + headlen = size; > + if (headlen > IECM_RX_HDR_SIZE) > + headlen = eth_get_headlen(skb->dev, va, IECM_RX_HDR_SIZE); > + > + /* align pull length to size of long to optimize memcpy performance */ > + memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); > + > + /* if we exhaust the linear part then add what is left as a frag */ > + size -= headlen; > + if (size) { > +#if (PAGE_SIZE >= 8192) > + unsigned int truesize = SKB_DATA_ALIGN(size); > +#else > + unsigned int truesize = rx_buf->buf_size; > +#endif Again. > + skb_add_rx_frag(skb, 0, page_info->page, > + page_info->page_offset + headlen, size, > + truesize); > + /* buffer is used by skb, update page_offset */ > + iecm_rx_buf_adjust_pg(rx_buf, truesize); > + > + } else { > + /* buffer is unused, reset bias back to rx_buf; data was copied > + * onto skb's linear part so there's no need for adjusting > + * page offset and we can reuse this buffer as-is > + */ > + page_info->pagecnt_bias++; > + } > + > + return skb; > +} > + > +/** > + * iecm_rx_hdr_construct_skb - Allocate skb and populate it from header buffer > + * @rxq: Rx descriptor queue > + * @hdr_buf: Rx buffer to pull data from > + * @size: the length of the packet > + * > + * This function allocates an skb. It then populates it with the page data from > + * the current receive descriptor, taking care to set up the skb correctly. > + * This specifcally uses a header buffer to start building the skb. > + */ > +static struct sk_buff * > +iecm_rx_hdr_construct_skb(struct iecm_queue *rxq, struct iecm_dma_mem *hdr_buf, > + unsigned int size) > +{ > + struct sk_buff *skb; > + > + /* allocate a skb to store the frags */ > + skb = __napi_alloc_skb(&rxq->q_vector->napi, IECM_RX_HDR_SIZE, > + GFP_ATOMIC | __GFP_NOWARN); > + if (unlikely(!skb)) > + return NULL; > + > + skb_record_rx_queue(skb, rxq->idx); > + > + memcpy(__skb_put(skb, size), hdr_buf->va, size); > + > + return skb; > +} > + > +/** > + * iecm_rx_splitq_test_staterr - tests bits in Rx descriptor > + * status and error fields > + * @stat_err_field: field from descriptor to test bits in > + * @stat_err_bits: value to mask > + * > + */ > +bool > +iecm_rx_splitq_test_staterr(u8 stat_err_field, const u8 stat_err_bits) > +{ > + return !!(stat_err_field & stat_err_bits); > +} > + > +/** > + * iecm_rx_splitq_extract_vlan_tag - Extract vlan tag from the descriptor > + * @desc: Rx flex descriptor > + * @rxq: rxq to check the vlan flags > + * @vlan_tag: vlan tag to fill in > + * > + * Return true if error bit is set in the descriptor, else return false and > + * store the vlan_tag in the variable passed in the function parameters > + */ > +bool iecm_rx_splitq_extract_vlan_tag(struct virtchnl2_rx_flex_desc_adv_nic_3 *desc, > + struct iecm_queue *rxq, u16 *vlan_tag) > +{ > + u8 stat_err0_qw0, stat_err_bits, stat_err1; > + > + stat_err_bits = BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_RXE_S); > + stat_err0_qw0 = desc->status_err0_qw0; > + if (unlikely(iecm_rx_splitq_test_staterr(stat_err0_qw0, stat_err_bits))) > + return true; > + > + stat_err1 = desc->status_err1; > + > + if (stat_err0_qw0 & BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_L2TAG1P_S) && > + test_bit(__IECM_Q_VLAN_TAG_LOC_L2TAG1, rxq->flags)) > + *vlan_tag = le16_to_cpu(desc->l2tag1); > + if (stat_err1 & BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS1_L2TAG2P_S) && > + test_bit(__IECM_Q_VLAN_TAG_LOC_L2TAG2, rxq->flags)) > + *vlan_tag = le16_to_cpu(desc->l2tag2); > + > + return false; > +} > + > +/** > + * iecm_rx_splitq_is_non_eop - process handling of non-EOP buffers > + * @rx_desc: Rx descriptor for current buffer > + * > + * If the buffer is an EOP buffer, this function exits returning false, > + * otherwise return true indicating that this is in fact a non-EOP buffer. > + */ > +static bool > +iecm_rx_splitq_is_non_eop(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) > +{ > + /* if we are the last buffer then there is nothing else to do */ > +#define IECM_RXD_EOF BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_S) This is a wrong place to define anything, it can be easily overlooked. > + if (likely(iecm_rx_splitq_test_staterr(rx_desc->status_err0_qw1, > + IECM_RXD_EOF))) > + return false; > + > + return true; This can be converted to an one-liner return likely(...); > +} > + > +/** > + * iecm_rx_splitq_recycle_buf - Attempt to recycle or realloc buffer > + * @rxbufq: receive queue > + * @rx_buf: Rx buffer to pull data from > + * > + * This function will clean up the contents of the rx_buf. It will either > + * recycle the buffer or unmap it and free the associated resources. The buffer > + * will then be placed on a refillq where it will later be reclaimed by the > + * corresponding bufq. > + * > + * This works based on page flipping. If we assume e.g., a 4k page, it will be > + * divided into two 2k buffers. We post the first half to hardware and, after > + * using it, flip to second half of the page with iecm_adjust_pg_offset and > + * post that to hardware. The third time through we'll flip back to first half > + * of page and check if stack is still using it, if not we can reuse the buffer > + * as is, otherwise we'll drain it and get a new page. > + */ > +static void iecm_rx_splitq_recycle_buf(struct iecm_queue *rxbufq, > + struct iecm_rx_buf *rx_buf) > +{ > + struct iecm_page_info *page_info = &rx_buf->page_info[rx_buf->page_indx]; > + > + if (!iecm_rx_can_reuse_page(rx_buf)) { > + /* we are not reusing the buffer so unmap it */ > + dma_unmap_page_attrs(rxbufq->dev, page_info->dma, PAGE_SIZE, > + DMA_FROM_DEVICE, IECM_RX_DMA_ATTR); > + __page_frag_cache_drain(page_info->page, > + page_info->pagecnt_bias); > + > + /* clear contents of buffer_info */ > + page_info->page = NULL; > + rx_buf->skb = NULL; > + > + /* It's possible the alloc can fail here but there's not much > + * we can do, bufq will have to try and realloc to fill the > + * hole. > + */ > + iecm_alloc_page(rxbufq, page_info); > + } > + > + /* We sync the memory back to hardware now to do as much work in this > + * context as feasible. Hardware won't actually know about the buffer > + * until it's reclaimed off the refillq and put back into the bufq. > + */ > + if (likely(page_info->page)) { > + dma_sync_single_range_for_device(rxbufq->dev, page_info->dma, > + page_info->page_offset, > + rxbufq->rx_buf_size, > + DMA_FROM_DEVICE); > + } One-liner, braces are redundant. > +} Here you set page_info->page to NULL in the first condition branch and check for it in the second. So it's equivalent to if (iecm_rx_can_reuse_page(rx_buf)) { dma_sync_single_range(); return; } /* we are not reusing the buffer ... */ ... but saves 1 indent level and generally more readable. > + > +/** > + * iecm_rx_bump_ntc - Bump and wrap q->next_to_clean value > + * @q: queue to bump > + */ > +void iecm_rx_bump_ntc(struct iecm_queue *q) > +{ > + u16 ntc = q->next_to_clean + 1; There should be an empty line (between variable declarations and function body). checkpatch should've mentioned this. > + /* fetch, update, and store next to clean */ > + if (ntc < q->desc_count) { > + q->next_to_clean = ntc; > + } else { > + q->next_to_clean = 0; > + change_bit(__IECM_Q_GEN_CHK, q->flags); > + } > +} > + > +/** > + * iecm_rx_splitq_clean - Clean completed descriptors from Rx queue > + * @rxq: Rx descriptor queue to retrieve receive buffer queue > + * @budget: Total limit on number of packets to process > + * > + * This function provides a "bounce buffer" approach to Rx interrupt > + * processing. The advantage to this is that on systems that have > + * expensive overhead for IOMMU access this provides a means of avoiding > + * it by maintaining the mapping of the page to the system. > + * > + * Returns amount of work completed > + */ > +static int iecm_rx_splitq_clean(struct iecm_queue *rxq, int budget) > +{ > + int total_rx_bytes = 0, total_rx_pkts = 0; > + struct iecm_queue *rx_bufq = NULL; > + struct sk_buff *skb = rxq->skb; > + bool failure = false; > + > + /* Process Rx packets bounded by budget */ > + while (likely(total_rx_pkts < budget)) { > + struct virtchnl2_rx_flex_desc_adv_nic_3 *splitq_flex_rx_desc; > + struct iecm_sw_queue *refillq = NULL; > + struct iecm_dma_mem *hdr_buf = NULL; > + struct iecm_rxq_set *rxq_set = NULL; > + struct iecm_rx_buf *rx_buf = NULL; > + u16 gen_id, buf_id, vlan_tag = 0; > + union virtchnl2_rx_desc *rx_desc; > + unsigned int pkt_len = 0; > + unsigned int hdr_len = 0; > + /* Header buffer overflow only valid for header split */ > + bool hbo = false; > + int bufq_id; > + > + /* get the Rx desc from Rx queue based on 'next_to_clean' */ > + rx_desc = IECM_RX_DESC(rxq, rxq->next_to_clean); > + splitq_flex_rx_desc = (struct virtchnl2_rx_flex_desc_adv_nic_3 *)rx_desc; > + > + /* This memory barrier is needed to keep us from reading > + * any other fields out of the rx_desc > + */ > + dma_rmb(); > + > + /* if the descriptor isn't done, no work yet to do */ > + gen_id = le16_to_cpu(splitq_flex_rx_desc->pktlen_gen_bufq_id); > + gen_id = (gen_id & VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M) >> > + VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_S; > + if (test_bit(__IECM_Q_GEN_CHK, rxq->flags) != gen_id) > + break; > + > + if ((splitq_flex_rx_desc->rxdid_ucast & > + VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M) != VIRTCHNL2_RXDID_1_FLEX_SPLITQ) { > + iecm_rx_bump_ntc(rxq); > + rxq->vport->port_stats.rx_bad_descs++; > + continue; > + } > + > + pkt_len = le16_to_cpu(splitq_flex_rx_desc->pktlen_gen_bufq_id) & > + VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_PBUF_M; > + > + hbo = splitq_flex_rx_desc->status_err0_qw1 & > + BIT(VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_HBO_S); > + > + if (unlikely(hbo)) { > + rxq->vport->port_stats.rx_hsplit_hbo++; > + goto bypass_hsplit; > + } > + > + hdr_len = > + le16_to_cpu(splitq_flex_rx_desc->hdrlen_flags) & > + VIRTCHNL2_RX_FLEX_DESC_ADV_LEN_HDR_M; > + > +bypass_hsplit: > + bufq_id = le16_to_cpu(splitq_flex_rx_desc->pktlen_gen_bufq_id); > + bufq_id = (bufq_id & VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_M) >> > + VIRTCHNL2_RX_FLEX_DESC_ADV_BUFQ_ID_S; > + > + rxq_set = container_of(rxq, struct iecm_rxq_set, rxq); > + if (!bufq_id) > + refillq = rxq_set->refillq0; > + else > + refillq = rxq_set->refillq1; > + > + /* retrieve buffer from the rxq */ > + rx_bufq = &rxq->rxq_grp->splitq.bufq_sets[bufq_id].bufq; > + > + buf_id = le16_to_cpu(splitq_flex_rx_desc->buf_id); > + > + if (pkt_len) { > + rx_buf = &rx_bufq->rx_buf.buf[buf_id]; > + iecm_rx_get_buf_page(rx_bufq->dev, rx_buf, pkt_len); > + } > + > + if (hdr_len) { > + hdr_buf = rx_bufq->rx_buf.hdr_buf[buf_id]; > + > + dma_sync_single_for_cpu(rxq->dev, hdr_buf->pa, hdr_buf->size, > + DMA_FROM_DEVICE); > + > + skb = iecm_rx_hdr_construct_skb(rxq, hdr_buf, hdr_len); > + rxq->vport->port_stats.rx_hsplit++; > + } > + > + if (pkt_len) { > + if (skb) > + iecm_rx_add_frag(rx_buf, skb, pkt_len); > + else > + skb = iecm_rx_construct_skb(rxq, rx_buf, > + pkt_len); > + } > + > + /* exit if we failed to retrieve a buffer */ > + if (!skb) { > + /* If we fetched a buffer, but didn't use it > + * undo pagecnt_bias decrement > + */ > + if (rx_buf) > + rx_buf->page_info[rx_buf->page_indx].pagecnt_bias++; Line of 85 cols here. > + break; > + } > + > + if (rx_buf) > + iecm_rx_splitq_recycle_buf(rx_bufq, rx_buf); > + iecm_rx_post_buf_refill(refillq, buf_id); > + > + iecm_rx_bump_ntc(rxq); > + /* skip if it is non EOP desc */ > + if (iecm_rx_splitq_is_non_eop(splitq_flex_rx_desc)) > + continue; > + > + /* extract vlan tag from the descriptor */ > + if (unlikely(iecm_rx_splitq_extract_vlan_tag(splitq_flex_rx_desc, > + rxq, &vlan_tag))) { 81 and 81 respectively. > + dev_kfree_skb_any(skb); > + skb = NULL; > + continue; > + } > + > + /* pad skb if needed (to make valid ethernet frame) */ > + if (eth_skb_pad(skb)) { > + skb = NULL; > + continue; > + } > + > + /* probably a little skewed due to removing CRC */ > + total_rx_bytes += skb->len; > + > + /* protocol */ > + if (unlikely(iecm_rx_process_skb_fields(rxq, skb, > + splitq_flex_rx_desc))) { You can define variables with shorter names to avoid this. > + dev_kfree_skb_any(skb); > + skb = NULL; > + continue; > + } > + > + /* send completed skb up the stack */ > + iecm_rx_skb(rxq, skb, vlan_tag); > + skb = NULL; > + > + /* update budget accounting */ > + total_rx_pkts++; > + } > + rxq->skb = skb; > + u64_stats_update_begin(&rxq->stats_sync); > + rxq->q_stats.rx.packets += total_rx_pkts; > + rxq->q_stats.rx.bytes += total_rx_bytes; > + u64_stats_update_end(&rxq->stats_sync); > + > + /* guarantee a trip back through this routine if there was a failure */ > + return unlikely(failure) ? budget : total_rx_pkts; > +} > + > +/** > + * iecm_rx_update_bufq_desc - Update buffer queue descriptor > + * @bufq: Pointer to the buffer queue > + * @desc: Refill queue descriptor > + * @buf_desc: Buffer queue descriptor > + * > + * Return 0 on success and negative on failure. > + */ > +static int iecm_rx_update_bufq_desc(struct iecm_queue *bufq, u16 *desc, > + struct virtchnl2_splitq_rx_buf_desc *buf_desc) > +{ > + struct iecm_page_info *page_info; > + struct iecm_rx_buf *buf; > + u16 buf_id; > + > + buf_id = ((*desc) & IECM_RX_BI_BUFID_M) >> IECM_RX_BI_BUFID_S; FIELD_GET(). > + > + buf = &bufq->rx_buf.buf[buf_id]; > + page_info = &buf->page_info[buf->page_indx]; > + > + /* It's possible page alloc failed during rxq clean, try to > + * recover here. > + */ > + if (unlikely(!page_info->page)) { > + if (iecm_alloc_page(bufq, page_info)) > + return -ENOMEM; > + } if (1 && 2). > + buf_desc->pkt_addr = cpu_to_le64(page_info->dma + page_info->page_offset); > + buf_desc->qword0.buf_id = cpu_to_le16(buf_id); > + > + if (bufq->rx_hsplit_en) { > + struct iecm_dma_mem *hdr_buf = bufq->rx_buf.hdr_buf[buf_id]; > + > + buf_desc->hdr_addr = cpu_to_le64(hdr_buf->pa); > + dma_sync_single_for_device(bufq->dev, hdr_buf->pa, > + hdr_buf->size, DMA_FROM_DEVICE); > + } if (!hsplit_en) return 0; hdr_buf = ... > + > + return 0; > +} > + > +/** > + * iecm_rx_clean_refillq - Clean refill queue buffers > + * @bufq: buffer queue to post buffers back to > + * @refillq: refill queue to clean > + * > + * This function takes care of the buffer refill management > + */ > +static void iecm_rx_clean_refillq(struct iecm_queue *bufq, > + struct iecm_sw_queue *refillq) > +{ > + struct virtchnl2_splitq_rx_buf_desc *buf_desc; > + u16 refillq_ntc = refillq->next_to_clean; > + u16 bufq_nta = bufq->next_to_alloc; > + u16 *refill_desc; > + int cleaned = 0; > + u16 gen; > + > + refill_desc = IECM_SPLITQ_RX_BI_DESC(refillq, refillq_ntc); > + buf_desc = IECM_SPLITQ_RX_BUF_DESC(bufq, bufq_nta); > + > + /* make sure we stop at ring wrap in the unlikely case ring is full */ > + while (likely(cleaned < refillq->desc_count)) { > + bool failure; > + > + gen = ((*refill_desc) & IECM_RX_BI_GEN_M) >> IECM_RX_BI_GEN_S; FIELD_GET(). > + if (test_bit(__IECM_RFLQ_GEN_CHK, refillq->flags) != gen) > + break; > + > + failure = iecm_rx_update_bufq_desc(bufq, refill_desc, > + buf_desc); > + if (failure) > + break; > + > + refillq_ntc++; > + refill_desc++; > + bufq_nta++; > + buf_desc++; > + cleaned++; > + > + if (unlikely(refillq_ntc == refillq->desc_count)) { > + change_bit(__IECM_RFLQ_GEN_CHK, refillq->flags); > + refill_desc = IECM_SPLITQ_RX_BI_DESC(refillq, 0); > + refillq_ntc = 0; > + } > + if (unlikely(bufq_nta == bufq->desc_count)) { > + buf_desc = IECM_SPLITQ_RX_BUF_DESC(bufq, 0); > + bufq_nta = 0; > + } > + } > + > + if (cleaned) { > + /* only update hardware tail in strides */ > + if (((bufq->next_to_use <= bufq_nta ? 0 : bufq->desc_count) + > + bufq_nta - bufq->next_to_use) >= bufq->rx_buf_stride) > + iecm_rx_buf_hw_update(bufq, bufq_nta & ~(bufq->rx_buf_stride - 1)); 92 chars. > + > + /* update next to alloc since we have filled the ring */ > + refillq->next_to_clean = refillq_ntc; > + bufq->next_to_alloc = bufq_nta; > + } > +} > + > +/** > + * iecm_rx_clean_refillq_all - Clean all refill queues > + * @bufq: bufq with refillqs to clean > + * > + * Iterates through all refill queues assigned to the buffer queue assigned to > + * this vector. Returns true if clean is complete within budget, false > + * otherwise. > + */ > +static void iecm_rx_clean_refillq_all(struct iecm_queue *bufq) > +{ > + struct iecm_bufq_set *bufq_set; > + int i = 0; > + > + bufq_set = container_of(bufq, struct iecm_bufq_set, bufq); > + for (i = 0; i < bufq_set->num_refillqs; i++) > + iecm_rx_clean_refillq(bufq, &bufq_set->refillqs[i]); > +} > + > /** > * iecm_vport_intr_clean_queues - MSIX mode Interrupt Handler > * @irq: interrupt number > @@ -2742,6 +4122,64 @@ iecm_vport_intr_napi_ena_all(struct iecm_vport *vport) > } > } > > +/** > + * iecm_tx_splitq_clean_all- Clean completetion queues > + * @q_vec: queue vector > + * @budget: Used to determine if we are in netpoll > + * > + * Returns false if clean is not complete else returns true > + */ > +static bool > +iecm_tx_splitq_clean_all(struct iecm_q_vector *q_vec, int budget) > +{ > + bool clean_complete = true; > + int i, budget_per_q; > + > + budget_per_q = max(budget / q_vec->num_txq, 1); > + for (i = 0; i < q_vec->num_txq; i++) { > + if (!iecm_tx_clean_complq(q_vec->tx[i], budget_per_q)) > + clean_complete = false; > + } > + return clean_complete; > +} > + > +/** > + * iecm_rx_splitq_clean_all- Clean completetion queues > + * @q_vec: queue vector > + * @budget: Used to determine if we are in netpoll > + * @cleaned: returns number of packets cleaned > + * > + * Returns false if clean is not complete else returns true > + */ > +static bool > +iecm_rx_splitq_clean_all(struct iecm_q_vector *q_vec, int budget, > + int *cleaned) > +{ > + bool clean_complete = true; > + int pkts_cleaned = 0; > + int i, budget_per_q; > + > + budget_per_q = max(budget / q_vec->num_rxq, 1); > + for (i = 0; i < q_vec->num_rxq; i++) { > + struct iecm_queue *rxq = q_vec->rx[i]; > + int pkts_cleaned_per_q; > + > + pkts_cleaned_per_q = iecm_rx_splitq_clean(rxq, budget_per_q); > + /* if we clean as many as budgeted, we must not > + * be done > + */ > + if (pkts_cleaned_per_q >= budget_per_q) > + clean_complete = false; > + pkts_cleaned += pkts_cleaned_per_q; > + } > + *cleaned = pkts_cleaned; > + > + for (i = 0; i < q_vec->num_bufq; i++) > + iecm_rx_clean_refillq_all(q_vec->bufq[i]); > + > + return clean_complete; > +} > + > /** > * iecm_vport_splitq_napi_poll - NAPI handler > * @napi: struct from which you get q_vector > @@ -2749,8 +4187,34 @@ iecm_vport_intr_napi_ena_all(struct iecm_vport *vport) > */ > static int iecm_vport_splitq_napi_poll(struct napi_struct *napi, int budget) > { > - /* stub */ > - return 0; > + struct iecm_q_vector *q_vector = > + container_of(napi, struct iecm_q_vector, napi); You can assign it later (before clean_complete assignment) to avoid this. > + bool clean_complete; > + int work_done = 0; > + > + clean_complete = iecm_tx_splitq_clean_all(q_vector, budget); > + > + /* Handle case where we are called by netpoll with a budget of 0 */ > + if (budget <= 0) > + return budget; > + > + /* We attempt to distribute budget to each Rx queue fairly, but don't > + * allow the budget to go below 1 because that would exit polling early. > + */ > + clean_complete |= iecm_rx_splitq_clean_all(q_vector, budget, > + &work_done); > + > + /* If work not completed, return budget and polling will return */ > + if (!clean_complete) > + return budget; > + > + /* Exit the polling mode, but don't re-enable interrupts if stack might > + * poll us due to busy-polling > + */ > + if (likely(napi_complete_done(napi, work_done))) > + iecm_vport_intr_update_itr_ena_irq(q_vector); > + > + return min_t(int, work_done, budget - 1); > } > > /** > diff --git a/drivers/net/ethernet/intel/include/iecm.h b/drivers/net/ethernet/intel/include/iecm.h > index a655e797f457..3cf2a2f0cb0f 100644 > --- a/drivers/net/ethernet/intel/include/iecm.h > +++ b/drivers/net/ethernet/intel/include/iecm.h > @@ -12,6 +12,10 @@ > #include > #include > #include > +#include > +#include > +#include > +#include > #include > #include > > diff --git a/drivers/net/ethernet/intel/include/iecm_txrx.h b/drivers/net/ethernet/intel/include/iecm_txrx.h > index 7ec742fd4c6b..086b0efc989a 100644 > --- a/drivers/net/ethernet/intel/include/iecm_txrx.h > +++ b/drivers/net/ethernet/intel/include/iecm_txrx.h > @@ -638,6 +638,7 @@ void iecm_vport_calc_total_qs(struct iecm_adapter *adapter, > struct virtchnl2_create_vport *vport_msg); > void iecm_vport_calc_num_q_groups(struct iecm_vport *vport); > int iecm_vport_queues_alloc(struct iecm_vport *vport); > +void iecm_rx_post_buf_refill(struct iecm_sw_queue *refillq, u16 buf_id); > void iecm_vport_queues_rel(struct iecm_vport *vport); > void iecm_vport_calc_num_q_vec(struct iecm_vport *vport); > void iecm_vport_intr_rel(struct iecm_vport *vport); > @@ -650,14 +651,33 @@ int iecm_vport_intr_init(struct iecm_vport *vport); > irqreturn_t > iecm_vport_intr_clean_queues(int __always_unused irq, void *data); > void iecm_vport_intr_ena_irq_all(struct iecm_vport *vport); > +enum > +pkt_hash_types iecm_ptype_to_htype(struct iecm_rx_ptype_decoded *decoded); > int iecm_config_rss(struct iecm_vport *vport); > void iecm_fill_dflt_rss_lut(struct iecm_vport *vport); > int iecm_init_rss(struct iecm_vport *vport); > void iecm_deinit_rss(struct iecm_vport *vport); > +bool iecm_rx_can_reuse_page(struct iecm_rx_buf *rx_buf); > +void iecm_rx_buf_adjust_pg(struct iecm_rx_buf *rx_buf, unsigned int size); > +void iecm_rx_add_frag(struct iecm_rx_buf *rx_buf, struct sk_buff *skb, > + unsigned int size); > +struct sk_buff *iecm_rx_construct_skb(struct iecm_queue *rxq, > + struct iecm_rx_buf *rx_buf, > + unsigned int size); > +void iecm_rx_skb(struct iecm_queue *rxq, struct sk_buff *skb, u16 vlan_tag); > bool iecm_init_rx_buf_hw_alloc(struct iecm_queue *rxq, struct iecm_rx_buf *buf); > void iecm_rx_buf_hw_update(struct iecm_queue *rxq, u32 val); > void iecm_tx_buf_hw_update(struct iecm_queue *tx_q, u32 val, > bool xmit_more); > +void iecm_rx_splitq_put_bufs(struct iecm_queue *rx_bufq, > + struct iecm_rx_buf *hdr_buf, > + struct iecm_rx_buf *rx_buf); > +bool iecm_rx_splitq_test_staterr(u8 stat_err_field, const u8 stat_err_bits); > +int iecm_rx_process_skb_fields(struct iecm_queue *rxq, struct sk_buff *skb, > + struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc); > +bool iecm_rx_splitq_extract_vlan_tag(struct virtchnl2_rx_flex_desc_adv_nic_3 *desc, > + struct iecm_queue *rxq, u16 *vlan_tag); > +void iecm_rx_bump_ntc(struct iecm_queue *q); > void iecm_tx_buf_rel(struct iecm_queue *tx_q, struct iecm_tx_buf *tx_buf); > unsigned int iecm_size_to_txd_count(unsigned int size); > unsigned int iecm_tx_desc_count_required(struct sk_buff *skb); > -- > 2.33.0 Thanks, Al