Linux-NVME Archive on lore.kernel.org
 help / color / mirror / Atom feed
From: Chao Leng <lengchao@huawei.com>
To: Max Gurtovoy <mgurtovoy@nvidia.com>, <linux-nvme@lists.infradead.org>
Cc: <hch@lst.de>, <sagi@grimberg.me>, <kbusch@kernel.org>
Subject: Re: [PATCH] nvme: add DIX support for nvme-rdma
Date: Mon, 29 Aug 2022 21:16:57 +0800	[thread overview]
Message-ID: <59dc78d8-c91d-6a7a-339e-b191cb77fd31@huawei.com> (raw)
In-Reply-To: <673f2871-3c0d-3910-c9b7-7cb4913c7cd5@nvidia.com>



On 2022/8/29 18:43, Max Gurtovoy wrote:
> hi,
> 
> On 8/29/2022 11:12 AM, Chao Leng wrote:
>> Now some rdma HBA alread support DIX-DIF: host buffer to host HBA use
>> DIX, host HBA to target use DIF.
>> This patch add DIX support for nvme-rdma.
>> Test results:
>> The performance of different I/O sizes all be optimized.
>> The CPU usage of 256k size I/Os can reduce more than 20%.
> 
> You're adding a new PI guard type for 16bit mode only, aren't you ?
No, Now DIX just be defined for 16bit mode.
> 
> I don't think the subject and commit message is correct.
> 
> Can you attach to commit message please the table of performance numbers before and after the patch ?ok, I will give you the detailed performance numbers later.
> 
> You can mention that also in iser, if supported, the default is to use IP_CHECKSUM for DIX and not CRC.
According to DIX define:DIX = IP_CHECKSUM.
To reduce CPU utilization, the end-to-end DIF for SCSI protocols is DIX-DIF when supported by hardware.
> 
> 
>>
>> Signed-off-by: Chao Leng <lengchao@huawei.com>
>> ---
>>   drivers/nvme/host/core.c | 19 ++++++++++++++-----
>>   drivers/nvme/host/nvme.h |  1 +
>>   drivers/nvme/host/rdma.c | 24 ++++++++++++++++--------
>>   3 files changed, 31 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>> index 2429b11eb9a8..7055d3b7b582 100644
>> --- a/drivers/nvme/host/core.c
>> +++ b/drivers/nvme/host/core.c
>> @@ -1623,7 +1623,7 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
>>   #ifdef CONFIG_BLK_DEV_INTEGRITY
>>   static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
>> -                u32 max_integrity_segments)
>> +                u32 max_integrity_segments, bool dix_support)
>>   {
>>       struct blk_integrity integrity = { };
>> @@ -1631,7 +1631,11 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
>>       case NVME_NS_DPS_PI_TYPE3:
>>           switch (ns->guard_type) {
>>           case NVME_NVM_NS_16B_GUARD:
>> -            integrity.profile = &t10_pi_type3_crc;
>> +            if (dix_support)
>> +                integrity.profile = &t10_pi_type3_ip;
>> +            else
>> +                integrity.profile = &t10_pi_type3_crc;
>> +
>>               integrity.tag_size = sizeof(u16) + sizeof(u32);
>>               integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
>>               break;
>> @@ -1649,7 +1653,11 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
>>       case NVME_NS_DPS_PI_TYPE2:
>>           switch (ns->guard_type) {
>>           case NVME_NVM_NS_16B_GUARD:
>> -            integrity.profile = &t10_pi_type1_crc;
>> +            if (dix_support)
>> +                integrity.profile = &t10_pi_type1_ip;
>> +            else
>> +                integrity.profile = &t10_pi_type1_crc;
>> +
>>               integrity.tag_size = sizeof(u16);
>>               integrity.flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
>>               break;
>> @@ -1674,7 +1682,7 @@ static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
>>   }
>>   #else
>>   static void nvme_init_integrity(struct gendisk *disk, struct nvme_ns *ns,
>> -                u32 max_integrity_segments)
>> +                u32 max_integrity_segments, bool dix_support)
>>   {
>>   }
>>   #endif /* CONFIG_BLK_DEV_INTEGRITY */
>> @@ -1900,7 +1908,8 @@ static void nvme_update_disk_info(struct gendisk *disk,
>>           if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
>>               (ns->features & NVME_NS_METADATA_SUPPORTED))
>>               nvme_init_integrity(disk, ns,
>> -                        ns->ctrl->max_integrity_segments);
>> +                        ns->ctrl->max_integrity_segments,
>> +                        ns->ctrl->dix_support);
>>           else if (!nvme_ns_has_pi(ns))
>>               capacity = 0;
>>       }
>> diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
>> index bdc0ff7ed9ab..92cf93cf120b 100644
>> --- a/drivers/nvme/host/nvme.h
>> +++ b/drivers/nvme/host/nvme.h
>> @@ -276,6 +276,7 @@ struct nvme_ctrl {
>>       u32 max_hw_sectors;
>>       u32 max_segments;
>>       u32 max_integrity_segments;
>> +    bool dix_support;
>>       u32 max_discard_sectors;
>>       u32 max_discard_segments;
>>       u32 max_zeroes_sectors;
>> diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
>> index 3100643be299..0f63b626b3d4 100644
>> --- a/drivers/nvme/host/rdma.c
>> +++ b/drivers/nvme/host/rdma.c
>> @@ -872,6 +872,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
>>           IBK_INTEGRITY_HANDOVER)
>>           pi_capable = true;
>> +    if (ctrl->device->dev->attrs.sig_guard_cap & IB_GUARD_T10DIF_CSUM)
>> +        ctrl->ctrl.dix_support = true;
>> +
>>       ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,
>>                               pi_capable);
>> @@ -1423,10 +1426,10 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
>>   static void nvme_rdma_set_sig_domain(struct blk_integrity *bi,
>>           struct nvme_command *cmd, struct ib_sig_domain *domain,
>> -        u16 control, u8 pi_type)
>> +        u16 control, u8 pi_type, enum ib_t10_dif_bg_type dif_type)
>>   {
>>       domain->sig_type = IB_SIG_TYPE_T10_DIF;
>> -    domain->sig.dif.bg_type = IB_T10DIF_CRC;
>> +    domain->sig.dif.bg_type = dif_type;
>>       domain->sig.dif.pi_interval = 1 << bi->interval_exp;
>>       domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
>>       if (control & NVME_RW_PRINFO_PRCHK_REF)
>> @@ -1441,7 +1444,7 @@ static void nvme_rdma_set_sig_domain(struct blk_integrity *bi,
>>   static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi,
>>           struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs,
>> -        u8 pi_type)
>> +        u8 pi_type, bool dix_support)
>>   {
>>       u16 control = le16_to_cpu(cmd->rw.control);
>> @@ -1450,16 +1453,20 @@ static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi,
>>           /* for WRITE_INSERT/READ_STRIP no memory domain */
>>           sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
>>           nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
>> -                     pi_type);
>> +                     pi_type, IB_T10DIF_CRC);
>>           /* Clear the PRACT bit since HCA will generate/verify the PI */
>>           control &= ~NVME_RW_PRINFO_PRACT;
>>           cmd->rw.control = cpu_to_le16(control);
>>       } else {
>>           /* for WRITE_PASS/READ_PASS both wire/memory domains exist */
>>           nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
>> -                     pi_type);
>> -        nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
>> -                     pi_type);
>> +                     pi_type, IB_T10DIF_CRC);
>> +        if (dix_support)
>> +            nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem,
>> +                    control, pi_type, IB_T10DIF_CSUM);
>> +        else
>> +            nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem,
>> +                    control, pi_type, IB_T10DIF_CRC);
>>       }
>>   }
>> @@ -1501,7 +1508,8 @@ static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
>>           goto mr_put;
>>       nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_bdev->bd_disk), c,
>> -                req->mr->sig_attrs, ns->pi_type);
>> +                req->mr->sig_attrs, ns->pi_type,
>> +                ns->ctrl->dix_support);
>>       nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
>>       ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
> .


  reply	other threads:[~2022-08-29 13:57 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-29  8:12 [PATCH] nvme: add DIX support for nvme-rdma Chao Leng
2022-08-29  9:02 ` Sagi Grimberg
2022-08-29 10:43 ` Max Gurtovoy
2022-08-29 13:16   ` Chao Leng [this message]
2022-08-29 14:56     ` Max Gurtovoy
2022-08-29 15:10       ` Keith Busch
2022-08-29 23:47         ` Max Gurtovoy
2022-08-30 12:18         ` Chao Leng
2022-08-30 14:49           ` Keith Busch
2022-08-30  2:38       ` Martin K. Petersen
2022-08-30 12:21         ` Chao Leng
2022-09-05  6:37           ` Christoph Hellwig
2022-09-06  2:13             ` Chao Leng
2022-09-06  6:59               ` Christoph Hellwig
2022-09-06  9:34                 ` Max Gurtovoy
2022-09-06  9:35                   ` Christoph Hellwig
2022-09-06 10:05                     ` Max Gurtovoy
2022-09-06 10:13                 ` Chao Leng
2022-08-30 12:15       ` Chao Leng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=59dc78d8-c91d-6a7a-339e-b191cb77fd31@huawei.com \
    --to=lengchao@huawei.com \
    --cc=hch@lst.de \
    --cc=kbusch@kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=mgurtovoy@nvidia.com \
    --cc=sagi@grimberg.me \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox