From mboxrd@z Thu Jan  1 00:00:00 1970
From: Ilya Matveychikov <matvejchikov@gmail.com>
Subject: Re: A question about (poor) rte_ethdev internal rx/tx
 callbacks design
Date: Mon, 13 Nov 2017 23:33:44 +0400
Message-ID: <2FF46D73-66D4-4E6B-8509-DD0CEEFF12D3@gmail.com>
References: <2259E047-80C0-40AC-AAF4-F21617605508@gmail.com>
 <20171113103927.GP24849@6wind.com>
 <5F2502C3-99CF-4BE1-9DEC-364C5E636061@gmail.com>
 <20171113171512.GV24849@6wind.com>
Mime-Version: 1.0 (Mac OS X Mail 10.3 \(3273\))
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: quoted-printable
Cc: dev@dpdk.org
To: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Return-path: <dev-bounces@dpdk.org>
Received: from mail-wm0-f67.google.com (mail-wm0-f67.google.com [74.125.82.67])
 by dpdk.org (Postfix) with ESMTP id 91D35199AE
 for <dev@dpdk.org>; Mon, 13 Nov 2017 20:33:47 +0100 (CET)
Received: by mail-wm0-f67.google.com with SMTP id r68so17584608wmr.3
 for <dev@dpdk.org>; Mon, 13 Nov 2017 11:33:47 -0800 (PST)
In-Reply-To: <20171113171512.GV24849@6wind.com>
List-Id: DPDK patches and discussions <dev.dpdk.org>
List-Unsubscribe: <http://dpdk.org/ml/options/dev>,
 <mailto:dev-request@dpdk.org?subject=unsubscribe>
List-Archive: <http://dpdk.org/ml/archives/dev/>
List-Post: <mailto:dev@dpdk.org>
List-Help: <mailto:dev-request@dpdk.org?subject=help>
List-Subscribe: <http://dpdk.org/ml/listinfo/dev>,
 <mailto:dev-request@dpdk.org?subject=subscribe>
Errors-To: dev-bounces@dpdk.org
Sender: "dev" <dev-bounces@dpdk.org>


> On Nov 13, 2017, at 9:15 PM, Adrien Mazarguil =
<adrien.mazarguil@6wind.com> wrote:
>=20
> On Mon, Nov 13, 2017 at 02:56:23PM +0400, Ilya Matveychikov wrote:
>>=20
>>> On Nov 13, 2017, at 2:39 PM, Adrien Mazarguil =
<adrien.mazarguil@6wind.com> wrote:
>>>=20
>>> On Sat, Nov 11, 2017 at 09:18:45PM +0400, Ilya Matveychikov wrote:
>>>> Folks,
>>>>=20
>>>> Are you serious with it:
>>>>=20
>>>> typedef uint16_t (*eth_rx_burst_t)(void *rxq,
>>>> 				   struct rte_mbuf **rx_pkts,
>>>> 				   uint16_t nb_pkts);
>>>> typedef uint16_t (*eth_tx_burst_t)(void *txq,
>>>> 				   struct rte_mbuf **tx_pkts,
>>>> 				   uint16_t nb_pkts);
>>>>=20
>>>> I=E2=80=99m not surprised that every PMD stores port_id in every =
and each queue as having just the queue as an argument doesn=E2=80=99t =
allow to get the device. So the question is - why not to use something =
like:
>>>>=20
>>>> typedef uint16_t (*eth_rx_burst_t)(void *dev, uint16_t queue_id,
>>>> 				   struct rte_mbuf **rx_pkts,
>>>> 				   uint16_t nb_pkts);
>>>> typedef uint16_t (*eth_tx_burst_t)(void *dev, uint16_t queue_id,
>>>> 				   struct rte_mbuf **tx_pkts,
>>>> 				   uint16_t nb_pkts);
>>>=20
>>> I assume it's since the rte_eth_[rt]x_burst() wrappers already pay =
the price
>>> for that indirection, doing it twice would be redundant.
>>=20
>> No need to do it twice, agree. We can pass dev pointer as well as =
queue, not just the queue=E2=80=99s
>> index.
>>=20
>>>=20
>>> Basically the cost of storing a back-pointer to dev or a queue index =
in each
>>> Rx/Tx queue structure is minor compared to saving a couple of CPU =
cycles
>>> wherever we can.
>>=20
>> Not sure about it. More data to store - more cache space to occupy. =
Note that every queue has
>> at least 4 bytes more than it actually needs. And =
RTE_MAX_QUEUES_PER_PORT is defined
>> by it=E2=80=99s default to 1024. So we may have 4k extra for each =
port....
>=20
> Note that queues are only allocated if requested by application, =
there's
> really not much overhead involved.

Yeah, mostly you are right here.

>=20
> Also to echo Konstantin's reply and clarify mine, PMDs normally do not
> access this structure from their data plane. This pointer, if needed, =
is
> normally stored away from hot regions accessed during TX/RX, usually =
at the
> end of TX/RX structures and only for the convenience of management
> operations. It therefore has no measurable impact on the CPU cache.
>=20

I did a research of how drivers implements rx/tx queues and now I want =
to share the information
and some thoughts about it (see the info at the end):

1) All drivers have tx/rx queues defined as structures
2) Current design implies that it=E2=80=99s enough to pass opaque rx/tx =
queue to the driver and frankly
   speaking it is. But..
3) Most of drivers wants to get not only the queue=E2=80=99s pointer but =
at least queue_id and port_id and
   most of them wants to have the pointer to internal devices=E2=80=99 =
data also.=20

The way each driver solves (3) issue is data duplication. In other =
words, every queue used to have
such the information (queue_id, port_id and dev_priv pointer) inside.

My question was and still about such the design. Not sure that it=E2=80=99=
s the best way to do it keeping in
mind that queue_id may be calculated using pointer difference and =
port_id may be stored just only
once per device. But it=E2=80=99ll require to change internal interface, =
sure.

And as I promised here is the result of the research on rx/tx queues:

drivers/net/af_packet:
  struct pkt_rx_queue { in_port }

drivers/net/ark:
  struct aark_rx_queue { queue_index }
  struct aark_tx_queue { queue_index }

drivers/net/avp:
  struct avp_queue { avp*, dev_data*, queue_id }

drivers/net/bnx2x:
  struct bnx2x_rx_queue { queue_id, port_id, sc* }
  struct bnx2x_tx_queue { queue_id, port_id, sc* }

drivers/net/bnxt:
  struct bnxt_rx_queue { queue_id, port_id, *bp } (rx_tpa is unused)
  struct bnxt_tx_queue { queue_id, port_id, *bp }

drivers/net/bonding:
  struct bond_rx_queue { queue_id, dev_private* }
  struct bond_tx_queue { queue_id, dev_private* }

drivers/net/cxgbe:
  struct sge_eth_rxq { sge_rspq { adapter*, eth_dev*, port_id, idx } }
  struct sge_eth_txq { eth_dev* }

drivers/net/dpaa:
  struct qman_fq { dpaa_intf* }

drivers/net/dpaa2:
  struct dpaa2_queue { dev* }

drivers/net/e1000:
  struct em_rx_queue { queue_id, port_id }
  struct em_tx_queue { queue_id, port_id }

drivers/net/ena:
  struct ena_ring { port_id, id, adapter* }

drivers/net/enic:
  struct vnic_wq { index, *vdev }

drivers/net/failsafe:
  struct rxq { priv*, qid }
  struct txq { priv*, qid }

drivers/net/fm10k:
  struct fm10k_rx_queue { queue_id, port_id }
  struct fm10k_tx_queue { queue_id, port_id }

drivers/net/i40e:
  struct i40e_rx_queue { queue_id, port_id, vsi* }
  struct i40e_tx_queue { queue_id, port_id, vsi* }

drivers/net/ixgbe:
  struct ixgbe_rx_queue { queue_id, port_id }
  struct ixgbe_tx_queue { queue_id, port_id }

drivers/net/kni:
  struct pmd_queue { internals* }

drivers/net/liquidio:
  struct lio_droq { q_no, lio_dev* }
  struct lio_instrqueue { lio_dev*, q_index }

drivers/net/mlx4:
  struct rxq { priv*, port_id }
  struct txq { priv* }

drivers/net/mlx5:
  struct mlx5_rxq_data { port_id }

drivers/net/mrvl:
  struct mrvl_rxq { queue_id, port_id, priv* }
  struct mrvl_txq { queue_id, port_id, priv* }

drivers/net/nfp:
  struct nfp_net_rxq { hw*, port_id, qidx }
  struct nfp_net_txq { hw*, port_id, qidx }

drivers/net/null:
 struct null_queue { internals* }

drivers/net/octeontx:
  struct octeontx_rxq { queue_id, port_id, eth_dev* }
  struct octeontx_txq { queue_id, eth_dev* }

drivers/net/pcap:
  struct pcap_rx_queue { in_port }

drivers/net/qede:
  struct qede_rx_queue { queue_id, port_id, *qdev }
  struct qede_tx_queue { queue_id, port_id, *qdev }

drivers/net/sfq:
  struct sfc_ef10_rxq { dp { queue_id, port_id } }
  struct sfc_ef10_txq { dp { queue_id, port_id } }

drivers/net/softnic:
  struct pmd_rx_queue { port_id, queue_id }

drivers/net/szedata2:
  struct szedata2_rx_queue { rx_channel, port_id }
  struct szedata2_tx_queue { tx_channel }

drivers/net/tap:
  struct rx_queue { port_id }

drivers/net/thunderx:
  struct nicvf_rxq { nic*, queue_id, port_id }
  struct nicvf_txq { nic*, queue_id }

drivers/net/vhost:
  struct vhost_queue { vid, internal* }

drivers/net/virtio:
  struct virtqueue { hw* }
  struct virtnet_rx { queue_id, port_id }
  struct virtnet_tx { queue_id, port_id }

drivers/net/vmxnet3:
  struct vmxnet3_rx_queue { hw*, queue_id, port_id }
  struct vmxnet3_tx_queue { hw*, queue_id, port_id }