Re: [PATCH v2 1/1] hw/nvme: add atomic write support

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: alan.adamson@oracle.com
To: Klaus Jensen <its@irrelevant.dk>
Cc: qemu-devel@nongnu.org, kbusch@kernel.org, qemu-block@nongnu.org
Subject: Re: [PATCH v2 1/1] hw/nvme: add atomic write support
Date: Thu, 26 Sep 2024 10:21:22 -0700	[thread overview]
Message-ID: <e14f59a1-33c9-49c5-a3ff-b0f6eb614c06@oracle.com> (raw)
In-Reply-To: <ZvKtZkmZNuw9_Qzf@AALNPWKJENSEN.aal.scsc.local>


On 9/24/24 5:15 AM, Klaus Jensen wrote:
> On Sep 19 17:07, Alan Adamson wrote:
>> Adds support for the controller atomic parameters: AWUN and AWUPF. Atomic
>> Compare and Write Unit (ACWU) is not currently supported.
>>
>> Writes that adhere to the ACWU and AWUPF parameters are guaranteed to be atomic.
>>
>> New NVMe QEMU Parameters (See NVMe Specification for details):
>>         atomic.dn (default off) - Set the value of Disable Normal.
>>         atomic.awun=UINT16 (default: 0)
>>         atomic.awupf=UINT16 (default: 0)
>>
>> By default (Disable Normal set to zero), the maximum atomic write size is
>> set to the AWUN value.  If Disable Normal is set, the maximum atomic write
>> size is set to AWUPF.
>>
>> Signed-off-by: Alan Adamson <alan.adamson@oracle.com>
>> Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
>> ---
>>   hw/nvme/ctrl.c | 164 ++++++++++++++++++++++++++++++++++++++++++++++++-
>>   hw/nvme/nvme.h |  12 ++++
>>   2 files changed, 175 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
>> index 9e94a2405407..0af46c57ee86 100644
>> --- a/hw/nvme/ctrl.c
>> +++ b/hw/nvme/ctrl.c
>> @@ -40,6 +40,9 @@
>>    *              sriov_vi_flexible=<N[optional]> \
>>    *              sriov_max_vi_per_vf=<N[optional]> \
>>    *              sriov_max_vq_per_vf=<N[optional]> \
>> + *              atomic.dn=<on|off[optional]>, \
>> + *              atomic.awun<N[optional]>, \
>> + *              atomic.awupf<N[optional]>, \
>>    *              subsys=<subsys_id>
>>    *      -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
>>    *              zoned=<true|false[optional]>, \
>> @@ -254,6 +257,7 @@ static const uint32_t nvme_feature_cap[NVME_FID_MAX] = {
>>       [NVME_ERROR_RECOVERY]           = NVME_FEAT_CAP_CHANGE | NVME_FEAT_CAP_NS,
>>       [NVME_VOLATILE_WRITE_CACHE]     = NVME_FEAT_CAP_CHANGE,
>>       [NVME_NUMBER_OF_QUEUES]         = NVME_FEAT_CAP_CHANGE,
>> +    [NVME_WRITE_ATOMICITY]          = NVME_FEAT_CAP_CHANGE,
>>       [NVME_ASYNCHRONOUS_EVENT_CONF]  = NVME_FEAT_CAP_CHANGE,
>>       [NVME_TIMESTAMP]                = NVME_FEAT_CAP_CHANGE,
>>       [NVME_HOST_BEHAVIOR_SUPPORT]    = NVME_FEAT_CAP_CHANGE,
>> @@ -6293,8 +6297,10 @@ defaults:
>>           if (ret) {
>>               return ret;
>>           }
>> -        goto out;
>> +        break;
>>   
>> +    case NVME_WRITE_ATOMICITY:
>> +        result = n->dn;
>>           break;
>>       default:
>>           result = nvme_feature_default[fid];
>> @@ -6378,6 +6384,8 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
>>       uint8_t save = NVME_SETFEAT_SAVE(dw10);
>>       uint16_t status;
>>       int i;
>> +    NvmeIdCtrl *id = &n->id_ctrl;
>> +    NvmeAtomic *atomic = &n->atomic;
>>   
>>       trace_pci_nvme_setfeat(nvme_cid(req), nsid, fid, save, dw11);
>>   
>> @@ -6530,6 +6538,22 @@ static uint16_t nvme_set_feature(NvmeCtrl *n, NvmeRequest *req)
>>           return NVME_CMD_SEQ_ERROR | NVME_DNR;
>>       case NVME_FDP_EVENTS:
>>           return nvme_set_feature_fdp_events(n, ns, req);
>> +    case NVME_WRITE_ATOMICITY:
>> +
>> +        n->dn = 0x1 & dw11;
>> +
>> +        if (n->dn) {
>> +            atomic->atomic_max_write_size = id->awupf + 1;
>> +        } else {
>> +            atomic->atomic_max_write_size = id->awun + 1;
>> +        }
> le16_to_cpu()'s needed here.
>
>> +
>> +        if (atomic->atomic_max_write_size == 1) {
>> +            atomic->atomic_writes = 0;
>> +        } else {
>> +            atomic->atomic_writes = 1;
>> +        }
>> +        break;
>>       default:
>>           return NVME_FEAT_NOT_CHANGEABLE | NVME_DNR;
>>       }
>> @@ -7227,6 +7251,80 @@ static void nvme_update_sq_tail(NvmeSQueue *sq)
>>       trace_pci_nvme_update_sq_tail(sq->sqid, sq->tail);
>>   }
>>   
>> +#define NVME_ATOMIC_NO_START        0
>> +#define NVME_ATOMIC_START_ATOMIC    1
>> +#define NVME_ATOMIC_START_NONATOMIC 2
>> +
>> +static int nvme_atomic_write_check(NvmeCtrl *n, NvmeCmd *cmd,
>> +    NvmeAtomic *atomic)
>> +{
>> +    NvmeRwCmd *rw = (NvmeRwCmd *)cmd;
>> +    uint64_t slba = le64_to_cpu(rw->slba);
>> +    uint32_t nlb = (uint32_t)le16_to_cpu(rw->nlb);
>> +    uint64_t elba = slba + nlb;
>> +    bool cmd_atomic_wr = true;
>> +    int i;
>> +
>> +    if ((cmd->opcode == NVME_CMD_READ) || ((cmd->opcode == NVME_CMD_WRITE) &&
>> +        ((rw->nlb + 1) > atomic->atomic_max_write_size))) {
>> +        cmd_atomic_wr = false;
>> +    }
>> +
>> +    /*
>> +     * Walk the queues to see if there are any atomic conflicts.
>> +     */
>> +    for (i = 1; i < n->params.max_ioqpairs + 1; i++) {
>> +        NvmeSQueue *sq;
>> +        NvmeRequest *req;
>> +        NvmeRwCmd *req_rw;
>> +        uint64_t req_slba;
>> +        uint32_t req_nlb;
>> +        uint64_t req_elba;
>> +
>> +        sq = n->sq[i];
>> +        if (!sq) {
>> +            break;
> This needs to be a `continue`.
>
>> +        }
>> +
>> +        /*
>> +         * Walk all the requests on a given queue.
>> +         */
>> +        QTAILQ_FOREACH(req, &sq->out_req_list, entry) {
>> +            req_rw = (NvmeRwCmd *)&req->cmd;
>> +
>> +            if (((req_rw->opcode == NVME_CMD_WRITE) || (req_rw->opcode == NVME_CMD_READ)) &&
>> +                (cmd->nsid == req->ns->params.nsid)) {
>> +                req_slba = le64_to_cpu(req_rw->slba);
>> +                req_nlb = (uint32_t)le16_to_cpu(req_rw->nlb);
>> +                req_elba = req_slba + req_nlb;
>> +
>> +                if (cmd_atomic_wr) {
>> +                    if ((elba >= req_slba) && (slba <= req_elba)) {
>> +                        return NVME_ATOMIC_NO_START;
>> +                    }
>> +                } else {
>> +                    if (req->atomic_write && ((elba >= req_slba) &&
>> +                        (slba <= req_elba))) {
>> +                        return NVME_ATOMIC_NO_START;
>> +                    }
>> +                }
>> +            }
>> +        }
>> +    }
>> +    if (cmd_atomic_wr) {
>> +        return NVME_ATOMIC_START_ATOMIC;
>> +    }
>> +    return NVME_ATOMIC_START_NONATOMIC;
>> +}
>> +
>> +static NvmeAtomic *nvme_get_atomic(NvmeCtrl *n, NvmeCmd *cmd)
>> +{
>> +    if (n->atomic.atomic_writes) {
>> +        return &n->atomic;
>> +    }
>> +    return NULL;
>> +}
>> +
>>   static void nvme_process_sq(void *opaque)
>>   {
>>       NvmeSQueue *sq = opaque;
>> @@ -7243,6 +7341,9 @@ static void nvme_process_sq(void *opaque)
>>       }
>>   
>>       while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
>> +        NvmeAtomic *atomic;
>> +        bool cmd_is_atomic;
>> +
>>           addr = sq->dma_addr + (sq->head << NVME_SQES);
>>           if (nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd))) {
>>               trace_pci_nvme_err_addr_read(addr);
>> @@ -7250,6 +7351,28 @@ static void nvme_process_sq(void *opaque)
>>               stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
>>               break;
>>           }
>> +
>> +        atomic = nvme_get_atomic(n, &cmd);
>> +
>> +        cmd_is_atomic = false;
>> +        if (sq->sqid && atomic) {
>> +            int ret;
>> +
>> +            qemu_mutex_lock(&atomic->atomic_lock);
> I don't think this needs to be protected by a lock. The nvme emulation
> is running in the main loop, so a Set Feature cannot be processed at the
> same time as this. I think that is what we are expecting to guard
> against?
>
> If I/O queues were processed from an iothread, this would be needed, but
> then we also need to take the lock when processing the feature and a
> bunch of other stuff might become more complicated.
>
> For now, I think it can just be dropped since if we enable the user to
> attach an iothread, my intention is to reduce such complexity by
> disabling all the "faked" features of the device.

I verified removing the locks doesn't have any issues.  I'll include 
this and the requests in v3.

What's the plan for iothread support, just a single thread per 
controller (for all queues) or a iothread per queue?

Thanks,

Alan

     prev parent reply	other threads:[~2024-09-26 17:22 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-20  0:07 [PATCH v2 0/1] hw/nvme: add atomic write support Alan Adamson
2024-09-20  0:07 ` [PATCH v2 1/1] " Alan Adamson
2024-09-24 12:15   ` Klaus Jensen
2024-09-26 17:21     ` alan.adamson [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=e14f59a1-33c9-49c5-a3ff-b0f6eb614c06@oracle.com \
    --to=alan.adamson@oracle.com \
    --cc=its@irrelevant.dk \
    --cc=kbusch@kernel.org \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).