qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Klaus Jensen <its@irrelevant.dk>
To: qemu-devel@nongnu.org
Cc: Kevin Wolf <kwolf@redhat.com>, Fam Zheng <fam@euphon.net>,
	qemu-block@nongnu.org, Klaus Jensen <k.jensen@samsung.com>,
	Gollu Appalanaidu <anaidu.gollu@samsung.com>,
	Max Reitz <mreitz@redhat.com>, Klaus Jensen <its@irrelevant.dk>,
	Stefan Hajnoczi <stefanha@redhat.com>,
	Keith Busch <kbusch@kernel.org>
Subject: [PATCH RFC 1/3] nvme: add support for extended LBAs
Date: Thu, 17 Dec 2020 22:02:20 +0100	[thread overview]
Message-ID: <20201217210222.779619-2-its@irrelevant.dk> (raw)
In-Reply-To: <20201217210222.779619-1-its@irrelevant.dk>

From: Gollu Appalanaidu <anaidu.gollu@samsung.com>

This allows logical blocks to be extended with a number of metadata
bytes specified by the new namespace parameter 'ms'. The additional
bytes are stored immediately after each logical block.

The Deallocated or Unwritten Logical Block Error recovery feature is not
supported for namespaces with extended LBAs since the extended logical
blocks are not aligned with the blocks of the underlying device and the
allocation status of blocks can thus not be detemined by the
BDRV_BLOCK_ZERO bdrv_block_status flag. Similary, the DLFEAT field will
not report any read behavior for deallocated logical blocks reported.

Signed-off-by: Gollu Appalanaidu <anaidu.gollu@samsung.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
 hw/block/nvme-ns.h | 19 ++++++++++++++++---
 hw/block/nvme-ns.c | 21 +++++++++++++++++----
 hw/block/nvme.c    |  6 ++++--
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/hw/block/nvme-ns.h b/hw/block/nvme-ns.h
index 44bf6271b744..1e621fb130a3 100644
--- a/hw/block/nvme-ns.h
+++ b/hw/block/nvme-ns.h
@@ -21,6 +21,7 @@
 
 typedef struct NvmeNamespaceParams {
     uint32_t nsid;
+    uint16_t ms;
 } NvmeNamespaceParams;
 
 typedef struct NvmeNamespace {
@@ -57,18 +58,30 @@ static inline uint8_t nvme_ns_lbads(NvmeNamespace *ns)
     return nvme_ns_lbaf(ns)->ds;
 }
 
-/* calculate the number of LBAs that the namespace can accomodate */
-static inline uint64_t nvme_ns_nlbas(NvmeNamespace *ns)
+static inline uint16_t nvme_ns_ms(NvmeNamespace *ns)
 {
-    return ns->size >> nvme_ns_lbads(ns);
+    return nvme_ns_lbaf(ns)->ms;
 }
 
 /* convert an LBA to the equivalent in bytes */
 static inline size_t nvme_l2b(NvmeNamespace *ns, uint64_t lba)
 {
+    if (NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas)) {
+        return (lba << nvme_ns_lbads(ns)) + (lba * nvme_ns_ms(ns));
+    }
+
     return lba << nvme_ns_lbads(ns);
 }
 
+/* calculate the number of LBAs that the namespace can accomodate */
+static inline uint64_t nvme_ns_nlbas(NvmeNamespace *ns)
+{
+    if (NVME_ID_NS_FLBAS_EXTENDED(ns->id_ns.flbas)) {
+        return ns->size / nvme_l2b(ns, 1);
+    }
+    return ns->size >> nvme_ns_lbads(ns);
+}
+
 typedef struct NvmeCtrl NvmeCtrl;
 
 int nvme_ns_setup(NvmeCtrl *n, NvmeNamespace *ns, Error **errp);
diff --git a/hw/block/nvme-ns.c b/hw/block/nvme-ns.c
index 2d69b5177b51..a9785a12eb13 100644
--- a/hw/block/nvme-ns.c
+++ b/hw/block/nvme-ns.c
@@ -37,9 +37,24 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
     int lba_index = NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas);
     int npdg;
 
-    ns->id_ns.dlfeat = 0x9;
+    id_ns->dlfeat = 0x10;
 
     id_ns->lbaf[lba_index].ds = 31 - clz32(ns->blkconf.logical_block_size);
+    id_ns->lbaf[lba_index].ms = ns->params.ms;
+
+    /* support DULBE and I/O optimization fields */
+    id_ns->nsfeat |= 0x10;
+
+    if (!ns->params.ms) {
+        /* zeroes are guaranteed to be read from deallocated blocks */
+        id_ns->dlfeat |= 0x1 | 0x8;
+
+        /* support DULBE */
+        id_ns->nsfeat |= 0x4;
+    } else {
+        id_ns->mc = 0x1;
+        id_ns->flbas |= 0x10;
+    }
 
     id_ns->nsze = cpu_to_le64(nvme_ns_nlbas(ns));
 
@@ -47,9 +62,6 @@ static int nvme_ns_init(NvmeNamespace *ns, Error **errp)
     id_ns->ncap = id_ns->nsze;
     id_ns->nuse = id_ns->ncap;
 
-    /* support DULBE and I/O optimization fields */
-    id_ns->nsfeat |= (0x4 | 0x10);
-
     npdg = ns->blkconf.discard_granularity / ns->blkconf.logical_block_size;
 
     if (bdrv_get_info(blk_bs(ns->blkconf.blk), &bdi) >= 0 &&
@@ -150,6 +162,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
 static Property nvme_ns_props[] = {
     DEFINE_BLOCK_PROPERTIES(NvmeNamespace, blkconf),
     DEFINE_PROP_UINT32("nsid", NvmeNamespace, params.nsid, 0),
+    DEFINE_PROP_UINT16("ms", NvmeNamespace, params.ms, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 28416b18a5c0..e4922c37c94d 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1214,6 +1214,7 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
         BLOCK_ACCT_WRITE : BLOCK_ACCT_READ;
     BlockBackend *blk = ns->blkconf.blk;
     uint16_t status;
+    uint32_t sector_size;
 
     trace_pci_nvme_rw(nvme_cid(req), nvme_io_opc_str(rw->opcode),
                       nvme_nsid(ns), nlb, data_size, slba);
@@ -1246,12 +1247,13 @@ static uint16_t nvme_rw(NvmeCtrl *n, NvmeRequest *req)
 
     block_acct_start(blk_get_stats(blk), &req->acct, data_size, acct);
     if (req->qsg.sg) {
+        sector_size = nvme_l2b(ns, 1);
         if (acct == BLOCK_ACCT_WRITE) {
             req->aiocb = dma_blk_write(blk, &req->qsg, data_offset,
-                                       BDRV_SECTOR_SIZE, nvme_rw_cb, req);
+                                       sector_size, nvme_rw_cb, req);
         } else {
             req->aiocb = dma_blk_read(blk, &req->qsg, data_offset,
-                                      BDRV_SECTOR_SIZE, nvme_rw_cb, req);
+                                      sector_size, nvme_rw_cb, req);
         }
     } else {
         if (acct == BLOCK_ACCT_WRITE) {
-- 
2.29.2



  reply	other threads:[~2020-12-17 21:07 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-17 21:02 [PATCH RFC 0/3] hw/block/nvme: dif-based end-to-end data protection support Klaus Jensen
2020-12-17 21:02 ` Klaus Jensen [this message]
2020-12-17 21:02 ` [PATCH RFC 2/3] hw/block/nvme: refactor nvme_dma Klaus Jensen
2020-12-17 21:02 ` [PATCH RFC 3/3] hw/block/nvme: end-to-end data protection Klaus Jensen
2020-12-18 18:08   ` Keith Busch
2020-12-18 18:24     ` Klaus Jensen
2020-12-17 21:14 ` [PATCH RFC 0/3] hw/block/nvme: dif-based end-to-end data protection support Keith Busch
2020-12-18  9:39   ` Klaus Jensen
2020-12-18 17:50     ` Keith Busch

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20201217210222.779619-2-its@irrelevant.dk \
    --to=its@irrelevant.dk \
    --cc=anaidu.gollu@samsung.com \
    --cc=fam@euphon.net \
    --cc=k.jensen@samsung.com \
    --cc=kbusch@kernel.org \
    --cc=kwolf@redhat.com \
    --cc=mreitz@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).