Re: [PATCH v27 15/20] net/mlx5e: NVMEoTCP, use KLM UMRs for buffer registration

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Simon Horman <horms@kernel.org>
To: Aurelien Aptel <aaptel@nvidia.com>
Cc: linux-nvme@lists.infradead.org, netdev@vger.kernel.org,
	sagi@grimberg.me, hch@lst.de, kbusch@kernel.org, axboe@fb.com,
	chaitanyak@nvidia.com, davem@davemloft.net, kuba@kernel.org,
	aurelien.aptel@gmail.com, smalin@nvidia.com, malin1024@gmail.com,
	ogerlitz@nvidia.com, yorayz@nvidia.com, borisp@nvidia.com,
	galshalom@nvidia.com, mgurtovoy@nvidia.com, tariqt@nvidia.com
Subject: Re: [PATCH v27 15/20] net/mlx5e: NVMEoTCP, use KLM UMRs for buffer registration
Date: Tue, 4 Mar 2025 17:45:10 +0000	[thread overview]
Message-ID: <20250304174510.GI3666230@kernel.org> (raw)
In-Reply-To: <20250303095304.1534-16-aaptel@nvidia.com>

On Mon, Mar 03, 2025 at 09:52:59AM +0000, Aurelien Aptel wrote:
> From: Ben Ben-Ishay <benishay@nvidia.com>
> 
> NVMEoTCP offload uses buffer registration for ddp operation.
> Every request comprises from SG list that might consist from elements
> with multiple combination sizes, thus the appropriate way to perform
> buffer registration is with KLM UMRs.
> 
> UMR stands for user-mode memory registration, it is a mechanism to alter
> address translation properties of MKEY by posting WorkQueueElement
> aka WQE on send queue.
> 
> MKEY stands for memory key, MKEY are used to describe a region in memory
> that can be later used by HW.
> 
> KLM stands for {Key, Length, MemVa}, KLM_MKEY is indirect MKEY that
> enables to map multiple memory spaces with different sizes in unified MKEY.
> KLM UMR is a UMR that use to update a KLM_MKEY.
> 
> Nothing needs to be done on memory registration completion and this
> notification is expensive so we add a wrapper to be able to ring the
> doorbell without generating any.
> 
> Signed-off-by: Ben Ben-Ishay <benishay@nvidia.com>
> Signed-off-by: Boris Pismenny <borisp@nvidia.com>
> Signed-off-by: Or Gerlitz <ogerlitz@nvidia.com>
> Signed-off-by: Yoray Zack <yorayz@nvidia.com>
> Signed-off-by: Aurelien Aptel <aaptel@nvidia.com>
> Reviewed-by: Tariq Toukan <tariqt@nvidia.com>

...

> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/nvmeotcp.c

...

> @@ -19,6 +20,120 @@ static const struct rhashtable_params rhash_queues = {
>  	.max_size = MAX_NUM_NVMEOTCP_QUEUES,
>  };
>  
> +static void
> +fill_nvmeotcp_klm_wqe(struct mlx5e_nvmeotcp_queue *queue, struct mlx5e_umr_wqe *wqe, u16 ccid,
> +		      u32 klm_entries, u16 klm_offset)
> +{
> +	struct scatterlist *sgl_mkey;
> +	u32 lkey, i;
> +
> +	lkey = queue->priv->mdev->mlx5e_res.hw_objs.mkey;
> +	for (i = 0; i < klm_entries; i++) {
> +		sgl_mkey = &queue->ccid_table[ccid].sgl[i + klm_offset];
> +		wqe->inline_klms[i].bcount = cpu_to_be32(sg_dma_len(sgl_mkey));
> +		wqe->inline_klms[i].key = cpu_to_be32(lkey);
> +		wqe->inline_klms[i].va = cpu_to_be64(sgl_mkey->dma_address);
> +	}
> +
> +	for (; i < ALIGN(klm_entries, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT); i++) {
> +		wqe->inline_klms[i].bcount = 0;
> +		wqe->inline_klms[i].key = 0;
> +		wqe->inline_klms[i].va = 0;
> +	}
> +}
> +
> +static void
> +build_nvmeotcp_klm_umr(struct mlx5e_nvmeotcp_queue *queue, struct mlx5e_umr_wqe *wqe,
> +		       u16 ccid, int klm_entries, u32 klm_offset, u32 len,
> +		       enum wqe_type klm_type)
> +{
> +	u32 id = (klm_type == KLM_UMR) ? queue->ccid_table[ccid].klm_mkey :
> +		 (mlx5e_tir_get_tirn(&queue->tir) << MLX5_WQE_CTRL_TIR_TIS_INDEX_SHIFT);
> +	u8 opc_mod = (klm_type == KLM_UMR) ? MLX5_CTRL_SEGMENT_OPC_MOD_UMR_UMR :
> +		MLX5_OPC_MOD_TRANSPORT_TIR_STATIC_PARAMS;
> +	u32 ds_cnt = MLX5E_KLM_UMR_DS_CNT(ALIGN(klm_entries, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT));
> +	struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl;
> +	struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
> +	struct mlx5_mkey_seg *mkc = &wqe->mkc;

Hi Aurelien, all,

I think that the lines above...

> +	u32 sqn = queue->sq.sqn;
> +	u16 pc = queue->sq.pc;
> +
> +	cseg->opmod_idx_opcode = cpu_to_be32((pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
> +					     MLX5_OPCODE_UMR | (opc_mod) << 24);
> +	cseg->qpn_ds = cpu_to_be32((sqn << MLX5_WQE_CTRL_QPN_SHIFT) | ds_cnt);
> +	cseg->general_id = cpu_to_be32(id);
> +
> +	if (klm_type == KLM_UMR && !klm_offset) {
> +		ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_XLT_OCT_SIZE |
> +					       MLX5_MKEY_MASK_LEN | MLX5_MKEY_MASK_FREE);
> +		mkc->xlt_oct_size = cpu_to_be32(ALIGN(len, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT));
> +		mkc->len = cpu_to_be64(queue->ccid_table[ccid].size);
> +	}
> +
> +	ucseg->flags = MLX5_UMR_INLINE | MLX5_UMR_TRANSLATION_OFFSET_EN;
> +	ucseg->xlt_octowords = cpu_to_be16(ALIGN(klm_entries, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT));
> +	ucseg->xlt_offset = cpu_to_be16(klm_offset);
> +	fill_nvmeotcp_klm_wqe(queue, wqe, ccid, klm_entries, klm_offset);
> +}
> +
> +static void
> +mlx5e_nvmeotcp_fill_wi(struct mlx5e_icosq *sq, u32 wqebbs, u16 pi)
> +{
> +	struct mlx5e_icosq_wqe_info *wi = &sq->db.wqe_info[pi];
> +
> +	memset(wi, 0, sizeof(*wi));
> +
> +	wi->num_wqebbs = wqebbs;
> +	wi->wqe_type = MLX5E_ICOSQ_WQE_UMR_NVMEOTCP;
> +}
> +
> +static u32
> +post_klm_wqe(struct mlx5e_nvmeotcp_queue *queue,
> +	     enum wqe_type wqe_type,
> +	     u16 ccid,
> +	     u32 klm_length,
> +	     u32 klm_offset)
> +{
> +	struct mlx5e_icosq *sq = &queue->sq;
> +	u32 wqebbs, cur_klm_entries;
> +	struct mlx5e_umr_wqe *wqe;
> +	u16 pi, wqe_sz;
> +
> +	cur_klm_entries = min_t(int, queue->max_klms_per_wqe, klm_length - klm_offset);
> +	wqe_sz = MLX5E_KLM_UMR_WQE_SZ(ALIGN(cur_klm_entries, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT));
> +	wqebbs = DIV_ROUND_UP(wqe_sz, MLX5_SEND_WQE_BB);
> +	pi = mlx5e_icosq_get_next_pi(sq, wqebbs);
> +	wqe = MLX5E_NVMEOTCP_FETCH_KLM_WQE(sq, pi);
> +	mlx5e_nvmeotcp_fill_wi(sq, wqebbs, pi);
> +	build_nvmeotcp_klm_umr(queue, wqe, ccid, cur_klm_entries, klm_offset,
> +			       klm_length, wqe_type);
> +	sq->pc += wqebbs;
> +	sq->doorbell_cseg = &wqe->ctrl;

... and this one need (at least) to be updated for the following commit
which is now present in net-next:

bf08fd32cc55 ("net/mlx5e: Avoid a hundred -Wflex-array-member-not-at-end warnings")

> +	return cur_klm_entries;
> +}
> +

...

next prev parent reply	other threads:[~2025-03-04 20:29 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-03  9:52 [PATCH v27 00/20] nvme-tcp receive offloads Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 01/20] net: Introduce direct data placement tcp offload Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 02/20] netlink: add new family to manage ULP_DDP enablement and stats Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 03/20] iov_iter: skip copy if src == dst for direct data placement Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 04/20] net/tls,core: export get_netdev_for_sock Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 05/20] nvme-tcp: Add DDP offload control path Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 06/20] nvme-tcp: Add DDP data-path Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 07/20] nvme-tcp: RX DDGST offload Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 08/20] nvme-tcp: Deal with netdevice DOWN events Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 09/20] Documentation: add ULP DDP offload documentation Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 10/20] net/mlx5e: Rename from tls to transport static params Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 11/20] net/mlx5e: Refactor ico sq polling to get budget Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 12/20] net/mlx5: Add NVMEoTCP caps, HW bits, 128B CQE and enumerations Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 13/20] net/mlx5e: NVMEoTCP, offload initialization Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 14/20] net/mlx5e: TCP flow steering for nvme-tcp acceleration Aurelien Aptel
2025-03-03  9:52 ` [PATCH v27 15/20] net/mlx5e: NVMEoTCP, use KLM UMRs for buffer registration Aurelien Aptel
2025-03-04 17:45   ` Simon Horman [this message]
2025-03-26 11:28     ` Aurelien Aptel
2025-04-03  4:43       ` Christoph Hellwig
2025-04-04 10:12         ` Aurelien Aptel
2025-04-11 13:24         ` Aurelien Aptel
2025-04-13 21:34           ` Sagi Grimberg
2025-03-03  9:53 ` [PATCH v27 16/20] net/mlx5e: NVMEoTCP, queue init/teardown Aurelien Aptel
2025-03-03  9:53 ` [PATCH v27 17/20] net/mlx5e: NVMEoTCP, ddp setup and resync Aurelien Aptel
2025-03-03  9:53 ` [PATCH v27 18/20] net/mlx5e: NVMEoTCP, async ddp invalidation Aurelien Aptel
2025-03-03  9:53 ` [PATCH v27 19/20] net/mlx5e: NVMEoTCP, data-path for DDP+DDGST offload Aurelien Aptel
2025-03-03  9:53 ` [PATCH v27 20/20] net/mlx5e: NVMEoTCP, statistics Aurelien Aptel

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250304174510.GI3666230@kernel.org \
    --to=horms@kernel.org \
    --cc=aaptel@nvidia.com \
    --cc=aurelien.aptel@gmail.com \
    --cc=axboe@fb.com \
    --cc=borisp@nvidia.com \
    --cc=chaitanyak@nvidia.com \
    --cc=davem@davemloft.net \
    --cc=galshalom@nvidia.com \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=kuba@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=malin1024@gmail.com \
    --cc=mgurtovoy@nvidia.com \
    --cc=netdev@vger.kernel.org \
    --cc=ogerlitz@nvidia.com \
    --cc=sagi@grimberg.me \
    --cc=smalin@nvidia.com \
    --cc=tariqt@nvidia.com \
    --cc=yorayz@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.