Re: [PATCH v1] ixgbe: remove vector pmd burst size restriction

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Zoltan Kiss <zoltan.kiss@linaro.org>
To: "Ananyev, Konstantin" <konstantin.ananyev@intel.com>,
	"Liang, Cunming" <cunming.liang@intel.com>,
	"dev@dpdk.org" <dev@dpdk.org>
Subject: Re: [PATCH v1] ixgbe: remove vector pmd burst size restriction
Date: Fri, 31 Jul 2015 12:57:23 +0100	[thread overview]
Message-ID: <55BB62A3.2040906@linaro.org> (raw)
In-Reply-To: <2601191342CEEE43887BDE71AB97725836A6B79C@irsmsx105.ger.corp.intel.com>



On 31/07/15 11:21, Ananyev, Konstantin wrote:
>
>
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Zoltan Kiss
>> Sent: Friday, July 31, 2015 11:04 AM
>> To: Liang, Cunming; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v1] ixgbe: remove vector pmd burst size restriction
>>
>>
>>
>> On 31/07/15 09:17, Cunming Liang wrote:
>>> The patch removes the restriction of burst size on a constant 32.
>>>
>>> On receive side, the burst size floor now aligns to RTE_IXGBE_DESCS_PER_LOOP power of 2.
>>> According to this rule, the burst size less than 4 still won't receive anything.
>>> (Before this change, the burst size less than 32 can't receive anything.)
>>>
>>> On transmit side, the max burst size no longer bind with a constant, however it still
>>> require to check the cross tx_rs_thresh violation.
>>>
>>> There's no obvious performance drop found on both recv_pkts_vec
>>> and recv_scattered_pkts_vec on burst size 32.
>>>
>>> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
>>> ---
>>>    drivers/net/ixgbe/ixgbe_rxtx.c     |  3 ++-
>>>    drivers/net/ixgbe/ixgbe_rxtx.h     |  4 +---
>>>    drivers/net/ixgbe/ixgbe_rxtx_vec.c | 35 ++++++++++++++++-------------------
>>>    3 files changed, 19 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
>>> index 3f808b3..dbdb761 100644
>>> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
>>> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
>>> @@ -4008,7 +4008,8 @@ ixgbe_set_rx_function(struct rte_eth_dev *dev)
>>>    	 */
>>>    	} else if (adapter->rx_vec_allowed) {
>>>    		PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
>>> -				   "burst size no less than 32.");
>>> +				    "burst size no less than 4 (port=%d).",
>>> +			     dev->data->port_id);
>>
>> I think it would be better to use RTE_IXGBE_DESCS_PER_LOOP instead of a
>> constant 4.
>>>
>>>    		dev->rx_pkt_burst = ixgbe_recv_pkts_vec;
>>>    	} else if (adapter->rx_bulk_alloc_allowed) {
>>> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
>>> index 113682a..eb931fe 100644
>>> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
>>> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
>>> @@ -47,9 +47,7 @@
>>>    	(uint64_t) ((mb)->buf_physaddr + RTE_PKTMBUF_HEADROOM)
>>>
>>>    #ifdef RTE_IXGBE_INC_VECTOR
>>> -#define RTE_IXGBE_VPMD_RX_BURST         32
>>> -#define RTE_IXGBE_VPMD_TX_BURST         32
>>> -#define RTE_IXGBE_RXQ_REARM_THRESH      RTE_IXGBE_VPMD_RX_BURST
>>> +#define RTE_IXGBE_RXQ_REARM_THRESH      32
>>>    #define RTE_IXGBE_TX_MAX_FREE_BUF_SZ    64
>>>    #endif
>>>
>>> diff --git a/drivers/net/ixgbe/ixgbe_rxtx_vec.c b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
>>> index 1c16dec..b72f817 100644
>>> --- a/drivers/net/ixgbe/ixgbe_rxtx_vec.c
>>> +++ b/drivers/net/ixgbe/ixgbe_rxtx_vec.c
>>> @@ -199,13 +199,11 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)
>>>    #endif
>>>
>>>    /*
>>> - * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
>>> - * in one loop
>>> + * vPMD raw receive routine
>> I would keep some warning there, like "(if nb_pkts <
>> RTE_IXGBE_DESCS_PER_LOOP, won't receive anything)"
>>
>>>     *
>>>     * Notice:
>>> - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
>>> - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
>>> - *   numbers of DD bit
>>> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>>> + * - 'nb_pkts < 4' causes 0 packet receiving
>> Again, RTE_IXGBE_DESCS_PER_LOOP would be better than 4
>>
>>>     * - don't support ol_flags for rss and csum err
>>>     */
>>>    static inline uint16_t
>>> @@ -240,8 +238,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>>>    	__m128i dd_check, eop_check;
>>>    #endif /* RTE_NEXT_ABI */
>>>
>>> -	if (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))
>>> -		return 0;
>>> +	nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_IXGBE_DESCS_PER_LOOP);
>>>
>>>    	/* Just the act of getting into the function from the application is
>>>    	 * going to cost about 7 cycles */
>>> @@ -310,7 +307,7 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>>>    	 * D. fill info. from desc to mbuf
>>>    	 */
>>>    #endif /* RTE_NEXT_ABI */
>>> -	for (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;
>>> +	for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;
>>>    			pos += RTE_IXGBE_DESCS_PER_LOOP,
>>>    			rxdp += RTE_IXGBE_DESCS_PER_LOOP) {
>>>    		__m128i descs[RTE_IXGBE_DESCS_PER_LOOP];
>>> @@ -450,13 +447,11 @@ _recv_raw_pkts_vec(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>>>    }
>>>
>>>    /*
>>> - * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
>>> - * in one loop
>>> + * vPMD receive routine
>> Same note here as above
>>
>>>     *
>>>     * Notice:
>>> - * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet
>>> - * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST
>>> - *   numbers of DD bit
>>> + * - floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>>> + * - 'nb_pkts < 4' causes 0 packet receiving
>>>     * - don't support ol_flags for rss and csum err
>>>     */
>>>    uint16_t
>>> @@ -470,12 +465,11 @@ static inline uint16_t
>>>    reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
>>>    		uint16_t nb_bufs, uint8_t *split_flags)
>>>    {
>>> -	struct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/
>>> +	struct rte_mbuf *pkts[nb_bufs]; /*finished pkts*/
>>>    	struct rte_mbuf *start = rxq->pkt_first_seg;
>>>    	struct rte_mbuf *end =  rxq->pkt_last_seg;
>>>    	unsigned pkt_idx, buf_idx;
>>>
>>> -
>>>    	for (buf_idx = 0, pkt_idx = 0; buf_idx < nb_bufs; buf_idx++) {
>>>    		if (end != NULL) {
>>>    			/* processing a split packet */
>>> @@ -535,14 +529,17 @@ reassemble_packets(struct ixgbe_rx_queue *rxq, struct rte_mbuf **rx_bufs,
>>>     *
>>>     * Notice:
>>>     * - don't support ol_flags for rss and csum err
>>> - * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)
>>> + * - will floor align nb_pkts to a RTE_IXGBE_DESC_PER_LOOP power-of-two
>>> + * - 'nb_pkts < 4' causes 0 packet receiving
>>>     */
>>>    uint16_t
>>>    ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>>>    		uint16_t nb_pkts)
>>>    {
>>>    	struct ixgbe_rx_queue *rxq = rx_queue;
>>> -	uint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};
>>> +	uint8_t split_flags[nb_pkts];
>>> +
>>> +	memset(split_flags, 0, nb_pkts);
>>>
>>>    	/* get some new buffers */
>>>    	uint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,
>>
>> After this _recv_raw_pkts_vec it checks 32 bytes in split_flags (4x8
>> bytes), that can overrun or miss some flags.
>> Btw. Bruce just fixed that part in "ixgbe: fix check for split packets"
>
> Ah yes, missed that when reviewing, that code would be broken if nb_bufs > 32:
>
>          const uint64_t *split_fl64 = (uint64_t *)split_flags;
>          if (rxq->pkt_first_seg == NULL &&
>                          split_fl64[0] == 0 && split_fl64[1] == 0 &&
>                          split_fl64[2] == 0 && split_fl64[3] == 0)
>                  return nb_bufs;
>
> right?

Yes. And if nb_bufs < (32 -RTE_IXGBE_DESCS_PER_LOOP), it would address 
into memory outside the array.
>
> Another thing, that I just thought about:
> Right now we invoke ixgbe_rxq_rearm() only at the start of _recv_raw_pkts_vec().
> Before it was ok, as _recv_raw_pkts_vec() would never try to read more then 32 RXDs.
> But what would happen if nb_pkts > rxq->nb_desc and rxq->rxrearm_nb == 0?
Yes, that call would deplete the RX ring, the card wouldn't be able to 
receive more, so the receive function wouldn't be called again to rearm 
the ring.

> I suppose,  _recv_raw_pkts_vec() can wrpa around RXD ring and 'receive' same packet twice?
> So we probably better still limit nb_pkts <= 32 at _recv_raw_pkts_vec().
I agree we should limit nb_pkts. To avoid the above problem it would be 
enough to limit it to (rxq->nb_desc - 1), but I don't know if there is 
another factor here we should consider.
>
> Konstantin
>
>>
>>
>>> @@ -667,8 +664,8 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
>>>    	uint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;
>>>    	int i;
>>>
>>> -	if (unlikely(nb_pkts > RTE_IXGBE_VPMD_TX_BURST))
>>> -		nb_pkts = RTE_IXGBE_VPMD_TX_BURST;
>>> +	/* cross rx_thresh boundary is not allowed */
>>> +	nb_pkts = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
>>>
>>>    	if (txq->nb_tx_free < txq->tx_free_thresh)
>>>    		ixgbe_tx_free_bufs(txq);
>>>

next prev parent reply	other threads:[~2015-07-31 11:57 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-07-31  8:17 [PATCH v1] ixgbe: remove vector pmd burst size restriction Cunming Liang
2015-07-31  9:21 ` Ananyev, Konstantin
2015-07-31 10:03 ` Zoltan Kiss
2015-07-31 10:21   ` Ananyev, Konstantin
2015-07-31 11:57     ` Zoltan Kiss [this message]
2015-07-31 14:49       ` Zoltan Kiss
2015-08-03  7:41     ` Liang, Cunming
2015-08-03  9:59       ` Liang, Cunming
2015-08-03  2:40   ` Liang, Cunming
2015-08-04  7:32 ` [PATCH v2] " Cunming Liang
2015-08-04  9:02   ` Zoltan Kiss
2015-08-04 11:15     ` Liang, Cunming
2015-08-04 11:47   ` [PATCH v3] " Cunming Liang
2015-08-04 16:26     ` Zoltan Kiss
2015-08-05  6:28       ` Liang, Cunming
2015-08-05 15:59         ` Zoltan Kiss
2015-08-05  9:31     ` Ananyev, Konstantin
2015-09-09 13:16       ` Thomas Monjalon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55BB62A3.2040906@linaro.org \
    --to=zoltan.kiss@linaro.org \
    --cc=cunming.liang@intel.com \
    --cc=dev@dpdk.org \
    --cc=konstantin.ananyev@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.