All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Hajnoczi <stefanha@redhat.com>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: qemu-block@nongnu.org, qemu-devel@nongnu.org, kwolf@redhat.com,
	hreitz@redhat.com, fam@euphon.net, ronniesahlberg@gmail.com,
	Changqi Lu <luchangqi.123@bytedance.com>,
	pl@dlhnet.de, kbusch@kernel.org, its@irrelevant.dk,
	foss@defmacro.it, philmd@linaro.org,
	zhenwei pi <pizhenwei@bytedance.com>
Subject: Re: [PATCH 5/9] hw/scsi: add persistent reservation in/out api for scsi device
Date: Thu, 9 May 2024 14:45:12 -0400	[thread overview]
Message-ID: <20240509184512.GG515246@fedora.redhat.com> (raw)
In-Reply-To: <20240508093629.441057-6-luchangqi.123@bytedance.com>

[-- Attachment #1: Type: text/plain, Size: 13453 bytes --]

On Wed, May 08, 2024 at 05:36:25PM +0800, Changqi Lu wrote:
> Add persistent reservation in/out operations in the
> SCSI device layer. By introducing the persistent
> reservation in/out api, this enables the SCSI device
> to perform reservation-related tasks, including querying
> keys, querying reservation status, registering reservation
> keys, initiating and releasing reservations, as well as
> clearing and preempting reservations held by other keys.
> 
> These operations are crucial for management and control of
> shared storage resources in a persistent manner.
> 
> Signed-off-by: Changqi Lu <luchangqi.123@bytedance.com>
> Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
> ---
>  hw/scsi/scsi-disk.c | 302 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 302 insertions(+)

Paolo: Please review for buffer overflows. I find the buffer size
assumption in the SCSI layer mysterious (e.g. scsi_req_get_buf() returns
a void* and it's not obvious how large the buffer is), so I didn't check
for buffer overflows.

> 
> diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
> index 4bd7af9d0c..bdd66c4026 100644
> --- a/hw/scsi/scsi-disk.c
> +++ b/hw/scsi/scsi-disk.c
> @@ -32,6 +32,7 @@
>  #include "migration/vmstate.h"
>  #include "hw/scsi/emulation.h"
>  #include "scsi/constants.h"
> +#include "scsi/utils.h"
>  #include "sysemu/block-backend.h"
>  #include "sysemu/blockdev.h"
>  #include "hw/block/block.h"
> @@ -1474,6 +1475,296 @@ static void scsi_disk_emulate_read_data(SCSIRequest *req)
>      scsi_req_complete(&r->req, GOOD);
>  }
>  
> +typedef struct SCSIPrReadKeys {
> +    uint32_t generation;
> +    uint32_t num_keys;
> +    uint64_t *keys;
> +    void     *req;
> +} SCSIPrReadKeys;
> +
> +typedef struct SCSIPrReadReservation {
> +    uint32_t generation;
> +    uint64_t key;
> +    BlockPrType type;
> +    void *req;
> +} SCSIPrReadReservation;
> +
> +static void scsi_pr_read_keys_complete(void *opaque, int ret)
> +{
> +    int num_keys;
> +    uint8_t *buf;
> +    SCSIPrReadKeys *blk_keys = (SCSIPrReadKeys *)opaque;
> +    SCSIDiskReq *r = (SCSIDiskReq *)blk_keys->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +            qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    buf = scsi_req_get_buf(&r->req);
> +    num_keys = MIN(blk_keys->num_keys, ret);
> +    blk_keys->generation = cpu_to_be32(blk_keys->generation);
> +    memcpy(&buf[0], &blk_keys->generation, 4);
> +    for (int i = 0; i < num_keys; i++) {
> +        blk_keys->keys[i] = cpu_to_be64(blk_keys->keys[i]);
> +        memcpy(&buf[8 + i * 8], &blk_keys->keys[i], 8);
> +    }
> +    num_keys = cpu_to_be32(num_keys * 8);
> +    memcpy(&buf[4], &num_keys, 4);
> +
> +    scsi_req_data(&r->req, r->buflen);
> +done:
> +    scsi_req_unref(&r->req);
> +    g_free(blk_keys->keys);
> +    g_free(blk_keys);
> +}
> +
> +static int scsi_disk_emulate_pr_read_keys(SCSIRequest *req)
> +{
> +    SCSIPrReadKeys *blk_keys;
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +    int buflen = MIN(r->req.cmd.xfer, r->buflen);
> +    int num_keys = (buflen - sizeof(uint32_t) * 2) / sizeof(uint64_t);
> +
> +    blk_keys = g_new0(SCSIPrReadKeys, 1);
> +    blk_keys->generation = 0;
> +    /* num_keys is the maximum number of keys that can be transmitted */
> +    blk_keys->num_keys = num_keys;
> +    blk_keys->keys = g_malloc(sizeof(uint64_t) * num_keys);
> +    blk_keys->req = r;
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_read_keys(s->qdev.conf.blk, &blk_keys->generation,
> +                                        blk_keys->num_keys, blk_keys->keys,
> +                                        scsi_pr_read_keys_complete, blk_keys);
> +    return 0;
> +}
> +
> +static void scsi_pr_read_reservation_complete(void *opaque, int ret)
> +{
> +    uint8_t *buf;
> +    uint32_t num_keys = 0;
> +    SCSIPrReadReservation *blk_rsv = (SCSIPrReadReservation *)opaque;
> +    SCSIDiskReq *r = (SCSIDiskReq *)blk_rsv->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +            qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    buf = scsi_req_get_buf(&r->req);
> +    blk_rsv->generation = cpu_to_be32(blk_rsv->generation);
> +    memcpy(&buf[0], &blk_rsv->generation, 4);
> +    if (ret) {
> +        num_keys = cpu_to_be32(16);

The SPC-6 calls this field Additional Length, which is clearer than
num_keys (there is only 1 key here!). Could you rename it to
additional_len to avoid confusion?

> +        blk_rsv->key = cpu_to_be64(blk_rsv->key);
> +        memcpy(&buf[8], &blk_rsv->key, 8);
> +        buf[21] = block_pr_type_to_scsi(blk_rsv->type) & 0xf;
> +    } else {
> +        num_keys = cpu_to_be32(0);
> +    }
> +
> +    memcpy(&buf[4], &num_keys, 4);
> +    scsi_req_data(&r->req, r->buflen);
> +
> +done:
> +    scsi_req_unref(&r->req);
> +    g_free(blk_rsv);
> +}
> +
> +static int scsi_disk_emulate_pr_read_reservation(SCSIRequest *req)
> +{
> +    SCSIPrReadReservation *blk_rsv;
> +    SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    blk_rsv = g_malloc(sizeof(*blk_rsv));
> +    blk_rsv->generation = 0;
> +    blk_rsv->key = 0;
> +    blk_rsv->type = 0;
> +    blk_rsv->req = r;
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_read_reservation(s->qdev.conf.blk,
> +                   &blk_rsv->generation, &blk_rsv->key, &blk_rsv->type,
> +                   scsi_pr_read_reservation_complete, blk_rsv);
> +    return 0;
> +}
> +
> +static void scsi_aio_pr_complete(void *opaque, int ret)
> +{
> +    SCSIDiskReq *r = (SCSIDiskReq *)opaque;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
> +
> +    /* The request must only run in the BlockBackend's AioContext */
> +    assert(blk_get_aio_context(s->qdev.conf.blk) ==
> +           qemu_get_current_aio_context());
> +
> +    assert(r->req.aiocb != NULL);
> +    r->req.aiocb = NULL;
> +
> +    if (scsi_disk_req_check_error(r, ret, true)) {
> +        goto done;
> +    }
> +
> +    scsi_req_complete(&r->req, GOOD);
> +
> +done:
> +    scsi_req_unref(&r->req);
> +}
> +
> +static int scsi_disk_emulate_pr_register(SCSIDiskReq *r, uint64_t old_key,
> +                                         uint64_t new_key, SCSIPrType type,
> +                                         bool ignore_key)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_register(s->qdev.conf.blk, old_key, new_key,
> +                                       scsi_pr_type_to_block(type),
> +                                       ignore_key, scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_reserve(SCSIDiskReq *r, uint64_t key,
> +                                        SCSIPrType type)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_reserve(s->qdev.conf.blk, key,
> +                                      scsi_pr_type_to_block(type),
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_release(SCSIDiskReq *r, uint64_t key,
> +                                        SCSIPrType type)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_release(s->qdev.conf.blk, key,
> +                                      scsi_pr_type_to_block(type),
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_clear(SCSIDiskReq *r, uint64_t key)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_clear(s->qdev.conf.blk, key,
> +                                    scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_preempt(SCSIDiskReq *r, uint64_t new_key,
> +                                        uint64_t old_key, SCSIPrType type,
> +                                        bool abort)
> +{
> +    SCSIRequest *req = &r->req;
> +    SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
> +
> +    scsi_req_ref(&r->req);
> +    r->req.aiocb = blk_aio_pr_preempt(s->qdev.conf.blk, new_key, old_key,
> +                                      scsi_pr_type_to_block(type), abort,
> +                                      scsi_aio_pr_complete, r);
> +
> +    return 0;
> +}
> +
> +static int scsi_disk_emulate_pr_in(SCSIRequest *req)
> +{
> +    int rc;
> +    SCSIPrInAction action = req->cmd.buf[1] & 0x1f;
> +
> +    switch (action) {
> +    case SCSI_PR_IN_READ_KEYS:
> +        rc = scsi_disk_emulate_pr_read_keys(req);
> +        break;
> +    case SCSI_PR_IN_READ_RESERVATION:
> +        rc = scsi_disk_emulate_pr_read_reservation(req);
> +        break;
> +    default:
> +        return -ENOTSUP;
> +    }
> +
> +    return rc;
> +}
> +
> +static int scsi_disk_emulate_pr_out(SCSIDiskReq *r, uint8_t *inbuf)
> +{
> +    int rc;
> +    uint64_t old_key, new_key;
> +    SCSIPrOutAction action;
> +    SCSIPrScope scope;
> +    SCSIPrType type;
> +    SCSIRequest *req = &r->req;
> +
> +    memcpy(&old_key, &inbuf[0], 8);
> +    old_key = be64_to_cpu(old_key);
> +    memcpy(&new_key, &inbuf[8], 8);
> +    new_key = be64_to_cpu(new_key);
> +    action = req->cmd.buf[1] & 0x1f;
> +    scope = (req->cmd.buf[2] >> 4) & 0x0f;
> +    type = req->cmd.buf[2] & 0x0f;
> +
> +    if (scope != SCSI_PR_LU_SCOPE) {
> +        return -ENOTSUP;
> +    }
> +
> +    switch (action) {
> +    case SCSI_PR_OUT_REGISTER:
> +        rc = scsi_disk_emulate_pr_register(r, old_key, new_key, type, false);
> +        break;
> +    case SCSI_PR_OUT_REG_AND_IGNORE_KEY:
> +        rc = scsi_disk_emulate_pr_register(r, old_key, new_key, type, true);
> +        break;
> +    case SCSI_PR_OUT_RESERVE:
> +        rc = scsi_disk_emulate_pr_reserve(r, old_key, type);
> +        break;
> +    case SCSI_PR_OUT_RELEASE:
> +        rc = scsi_disk_emulate_pr_release(r, old_key, type);
> +        break;
> +    case SCSI_PR_OUT_CLEAR:
> +        rc = scsi_disk_emulate_pr_clear(r, old_key);
> +        break;
> +    case SCSI_PR_OUT_PREEMPT:
> +        rc = scsi_disk_emulate_pr_preempt(r, old_key, new_key, type, false);
> +        break;
> +    case SCSI_PR_OUT_PREEMPT_AND_ABORT:
> +        rc = scsi_disk_emulate_pr_preempt(r, old_key, new_key, type, true);
> +        break;
> +    default:
> +        return -ENOTSUP;
> +    }
> +
> +    return rc;
> +}
> +
>  static int scsi_disk_check_mode_select(SCSIDiskState *s, int page,
>                                         uint8_t *inbuf, int inlen)
>  {
> @@ -1957,6 +2248,9 @@ static void scsi_disk_emulate_write_data(SCSIRequest *req)
>          scsi_req_complete(&r->req, GOOD);
>          break;
>  
> +    case PERSISTENT_RESERVE_OUT:
> +        scsi_disk_emulate_pr_out(r, r->iov.iov_base);
> +        break;
>      default:
>          abort();
>      }
> @@ -2213,6 +2507,12 @@ static int32_t scsi_disk_emulate_command(SCSIRequest *req, uint8_t *buf)
>      case FORMAT_UNIT:
>          trace_scsi_disk_emulate_command_FORMAT_UNIT(r->req.cmd.xfer);
>          break;
> +    case PERSISTENT_RESERVE_OUT:
> +        break;
> +    case PERSISTENT_RESERVE_IN:
> +        scsi_req_ref(&r->req);

Please add a comment indicating which scsi_req_unref() this ref is
paired with. That makes it easier to understand request reference
counting.

> +        scsi_disk_emulate_pr_in(req);
> +        return 0;
>      default:
>          trace_scsi_disk_emulate_command_UNKNOWN(buf[0],
>                                                  scsi_command_name(buf[0]));
> @@ -2632,6 +2932,8 @@ static const SCSIReqOps *const scsi_disk_reqops_dispatch[256] = {
>      [VERIFY_12]                       = &scsi_disk_emulate_reqops,
>      [VERIFY_16]                       = &scsi_disk_emulate_reqops,
>      [FORMAT_UNIT]                     = &scsi_disk_emulate_reqops,
> +    [PERSISTENT_RESERVE_IN]           = &scsi_disk_emulate_reqops,
> +    [PERSISTENT_RESERVE_OUT]          = &scsi_disk_emulate_reqops,
>  
>      [READ_6]                          = &scsi_disk_dma_reqops,
>      [READ_10]                         = &scsi_disk_dma_reqops,
> -- 
> 2.20.1
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 488 bytes --]

  reply	other threads:[~2024-05-09 18:46 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-05-08  9:36 [PATCH 0/9] Support persistent reservation operations Changqi Lu
2024-05-08  9:36 ` [PATCH 1/9] block: add persistent reservation in/out api Changqi Lu
2024-05-09 18:22   ` Stefan Hajnoczi
2024-05-10  2:37     ` zhenwei pi
2024-05-08  9:36 ` [PATCH 2/9] block/raw: add persistent reservation in/out driver Changqi Lu
2024-05-09 18:23   ` Stefan Hajnoczi
2024-05-08  9:36 ` [PATCH 3/9] scsi/constant: add persistent reservation in/out protocol constants Changqi Lu
2024-05-09 18:28   ` Stefan Hajnoczi
2024-05-08  9:36 ` [PATCH 4/9] scsi/util: add helper functions for persistent reservation types conversion Changqi Lu
2024-05-09 18:28   ` Stefan Hajnoczi
2024-05-08  9:36 ` [PATCH 5/9] hw/scsi: add persistent reservation in/out api for scsi device Changqi Lu
2024-05-09 18:45   ` Stefan Hajnoczi [this message]
2024-05-09 19:09   ` Stefan Hajnoczi
2024-05-08  9:36 ` [PATCH 6/9] block/nvme: add reservation command protocol constants Changqi Lu
2024-05-09 18:48   ` Stefan Hajnoczi
2024-05-08  9:36 ` [PATCH 7/9] hw/nvme: add helper functions for converting reservation types Changqi Lu
2024-05-09 18:48   ` Stefan Hajnoczi
2024-05-08  9:36 ` [PATCH 8/9] hw/nvme: add reservation protocal command Changqi Lu
2024-05-08 10:41   ` Klaus Jensen
2024-05-08  9:36 ` [PATCH 9/9] block/iscsi: add persistent reservation in/out driver Changqi Lu
2024-05-09 19:08 ` [PATCH 0/9] Support persistent reservation operations Stefan Hajnoczi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240509184512.GG515246@fedora.redhat.com \
    --to=stefanha@redhat.com \
    --cc=fam@euphon.net \
    --cc=foss@defmacro.it \
    --cc=hreitz@redhat.com \
    --cc=its@irrelevant.dk \
    --cc=kbusch@kernel.org \
    --cc=kwolf@redhat.com \
    --cc=luchangqi.123@bytedance.com \
    --cc=pbonzini@redhat.com \
    --cc=philmd@linaro.org \
    --cc=pizhenwei@bytedance.com \
    --cc=pl@dlhnet.de \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=ronniesahlberg@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.