qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Lukasz Maniak <lukasz.maniak@linux.intel.com>
To: qemu-devel@nongnu.org
Cc: "Fam Zheng" <fam@euphon.net>, "Kevin Wolf" <kwolf@redhat.com>,
	qemu-block@nongnu.org,
	"Łukasz Gieryk" <lukasz.gieryk@linux.intel.com>,
	"Lukasz Maniak" <lukasz.maniak@linux.intel.com>,
	"Klaus Jensen" <its@irrelevant.dk>,
	"Hanna Reitz" <hreitz@redhat.com>,
	"Stefan Hajnoczi" <stefanha@redhat.com>,
	"Keith Busch" <kbusch@kernel.org>,
	"Philippe Mathieu-Daudé" <philmd@redhat.com>
Subject: [PATCH 12/15] hw/nvme: Initialize capability structures for primary/secondary controllers
Date: Thu,  7 Oct 2021 18:24:03 +0200	[thread overview]
Message-ID: <20211007162406.1920374-13-lukasz.maniak@linux.intel.com> (raw)
In-Reply-To: <20211007162406.1920374-1-lukasz.maniak@linux.intel.com>

From: Łukasz Gieryk <lukasz.gieryk@linux.intel.com>

With two new properties (sriov_max_vi_per_vf, sriov_max_vq_per_vf) one
can configure the maximum number of virtual queues and interrupts
assignable to a single virtual device. The primary and secondary
controller capability structures are initialized accordingly.

Since the number of available queues (interrupts) now varies between
VF/PF, BAR size calculation is also adjusted.

Signed-off-by: Łukasz Gieryk <lukasz.gieryk@linux.intel.com>
---
 hw/nvme/ctrl.c       | 110 +++++++++++++++++++++++++++++++++++++++----
 hw/nvme/nvme.h       |   2 +
 include/block/nvme.h |   5 ++
 3 files changed, 108 insertions(+), 9 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 425fbf2c73..67c7210d7e 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -36,6 +36,8 @@
  *              zoned.zasl=<N[optional]>, \
  *              zoned.auto_transition=<on|off[optional]>, \
  *              sriov_max_vfs=<N[optional]> \
+ *              sriov_max_vi_per_vf=<N[optional]> \
+ *              sriov_max_vq_per_vf=<N[optional]> \
  *              subsys=<subsys_id>
  *      -device nvme-ns,drive=<drive_id>,bus=<bus_name>,nsid=<nsid>,\
  *              zoned=<true|false[optional]>, \
@@ -113,6 +115,18 @@
  *   enables reporting of both SR-IOV and ARI capabilities by the NVMe device.
  *   Virtual function controllers will not report SR-IOV capability.
  *
+ * - `sriov_max_vi_per_vf`
+ *   Indicates the maximum number of virtual interrupt resources assignable
+ *   to a secondary controller. Must be explicitly set if sriov_max_vfs != 0.
+ *   The parameter affect VFs similarly to how msix_qsize affects PF, i.e.,
+ *   determines the number of interrupts available to all queues (admin, io).
+ *
+ * - `sriov_max_vq_per_vf`
+ *   Indicates the maximum number of virtual queue resources assignable to
+ *   a secondary controller. Must be explicitly set if sriov_max_vfs != 0.
+ *   The parameter affect VFs similarly to how max_ioqpairs affects PF,
+ *   except the number of flexible queues includes the admin queue.
+ *
  * nvme namespace device parameters
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  * - `shared`
@@ -184,6 +198,7 @@
 #define NVME_NUM_FW_SLOTS 1
 #define NVME_DEFAULT_MAX_ZA_SIZE (128 * KiB)
 #define NVME_MAX_VFS 127
+#define NVME_VF_RES_GRANULARITY 1
 #define NVME_VF_OFFSET 0x1
 #define NVME_VF_STRIDE 1
 
@@ -6254,6 +6269,7 @@ static const MemoryRegionOps nvme_cmb_ops = {
 static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
 {
     NvmeParams *params = &n->params;
+    int msix_total;
 
     if (params->num_queues) {
         warn_report("num_queues is deprecated; please use max_ioqpairs "
@@ -6324,6 +6340,30 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
                        NVME_MAX_VFS);
             return;
         }
+
+        if (params->sriov_max_vi_per_vf < 1 ||
+            (params->sriov_max_vi_per_vf - 1) % NVME_VF_RES_GRANULARITY) {
+            error_setg(errp, "sriov_max_vi_per_vf must meet:"
+                       " (X - 1) %% %d == 0 and X >= 1",
+                       NVME_VF_RES_GRANULARITY);
+            return;
+        }
+
+        if (params->sriov_max_vq_per_vf < 2 ||
+            (params->sriov_max_vq_per_vf - 1) % NVME_VF_RES_GRANULARITY) {
+            error_setg(errp, "sriov_max_vq_per_vf must meet:"
+                       " (X - 1) %% %d == 0 and X >= 2",
+                       NVME_VF_RES_GRANULARITY);
+            return;
+        }
+
+        msix_total = params->msix_qsize +
+                     params->sriov_max_vfs * params->sriov_max_vi_per_vf;
+        if (msix_total > PCI_MSIX_FLAGS_QSIZE + 1) {
+            error_setg(errp, "sriov_max_vi_per_vf is too big for max_vfs=%d",
+                       params->sriov_max_vfs);
+            return;
+        }
     }
 }
 
@@ -6332,13 +6372,35 @@ static void nvme_init_state(NvmeCtrl *n)
     NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
     NvmeSecCtrlList *list = &n->sec_ctrl_list;
     NvmeSecCtrlEntry *sctrl;
+    uint8_t max_vfs;
+    uint32_t total_vq, total_vi;
     int i;
 
-    n->max_ioqpairs = n->params.max_ioqpairs;
-    n->conf_ioqpairs = n->max_ioqpairs;
+    if (pci_is_vf(&n->parent_obj)) {
+        sctrl = nvme_sctrl(n);
+
+        max_vfs = 0;
+
+        n->max_ioqpairs = n->params.sriov_max_vq_per_vf - 1;
+        n->conf_ioqpairs = sctrl->nvq ? le16_to_cpu(sctrl->nvq) - 1 : 0;
+
+        n->max_msix_qsize = n->params.sriov_max_vi_per_vf;
+        n->conf_msix_qsize = sctrl->nvi ? le16_to_cpu(sctrl->nvi) : 1;
+    } else {
+        max_vfs = n->params.sriov_max_vfs;
+
+        n->max_ioqpairs = n->params.max_ioqpairs +
+                          max_vfs * n->params.sriov_max_vq_per_vf;
+        n->conf_ioqpairs = n->max_ioqpairs;
+
+        n->max_msix_qsize = n->params.msix_qsize +
+                            max_vfs * n->params.sriov_max_vi_per_vf;
+        n->conf_msix_qsize = n->max_msix_qsize;
+    }
+
+    total_vq = n->params.sriov_max_vq_per_vf * max_vfs;
+    total_vi = n->params.sriov_max_vi_per_vf * max_vfs;
 
-    n->max_msix_qsize = n->params.msix_qsize;
-    n->conf_msix_qsize = n->max_msix_qsize;
     n->sq = g_new0(NvmeSQueue *, n->max_ioqpairs + 1);
     n->cq = g_new0(NvmeCQueue *, n->max_ioqpairs + 1);
     n->temperature = NVME_TEMPERATURE;
@@ -6346,13 +6408,34 @@ static void nvme_init_state(NvmeCtrl *n)
     n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
     n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
 
-    list->numcntl = cpu_to_le16(n->params.sriov_max_vfs);
-    for (i = 0; i < n->params.sriov_max_vfs; i++) {
+    list->numcntl = cpu_to_le16(max_vfs);
+    for (i = 0; i < max_vfs; i++) {
         sctrl = &list->sec[i];
         sctrl->pcid = cpu_to_le16(n->cntlid);
     }
 
     cap->cntlid = cpu_to_le16(n->cntlid);
+    cap->crt = NVME_CRT_VQ | NVME_CRT_VI;
+
+    cap->vqfrt = cpu_to_le32(total_vq);
+    cap->vqrfap = cpu_to_le32(total_vq);
+    if (pci_is_vf(&n->parent_obj)) {
+        cap->vqprt = cpu_to_le16(n->conf_ioqpairs + 1);
+    } else {
+        cap->vqprt = cpu_to_le16(n->params.max_ioqpairs + 1);
+        cap->vqfrsm = cpu_to_le16(n->params.sriov_max_vq_per_vf);
+        cap->vqgran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
+    }
+
+    cap->vifrt = cpu_to_le32(total_vi);
+    cap->virfap = cpu_to_le32(total_vi);
+    if (pci_is_vf(&n->parent_obj)) {
+        cap->viprt = cpu_to_le16(n->conf_msix_qsize);
+    } else {
+        cap->viprt = cpu_to_le16(n->params.msix_qsize);
+        cap->vifrsm = cpu_to_le16(n->params.sriov_max_vi_per_vf);
+        cap->vigran = cpu_to_le16(NVME_VF_RES_GRANULARITY);
+    }
 }
 
 static void nvme_init_cmb(NvmeCtrl *n, PCIDevice *pci_dev)
@@ -6448,11 +6531,13 @@ static void nvme_update_vfs(PCIDevice *pci_dev, uint16_t prev_num_vfs,
     }
 }
 
-static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset,
-                            uint64_t bar_size)
+static void nvme_init_sriov(NvmeCtrl *n, PCIDevice *pci_dev, uint16_t offset)
 {
     uint16_t vf_dev_id = n->params.use_intel_id ?
                          PCI_DEVICE_ID_INTEL_NVME : PCI_DEVICE_ID_REDHAT_NVME;
+    uint64_t bar_size = nvme_bar_size(n->params.sriov_max_vq_per_vf,
+                                      n->params.sriov_max_vi_per_vf,
+                                      NULL, NULL);
 
     pcie_sriov_pf_init(pci_dev, offset, "nvme", vf_dev_id,
                        n->params.sriov_max_vfs, n->params.sriov_max_vfs,
@@ -6550,7 +6635,7 @@ static int nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp)
     }
 
     if (!pci_is_vf(pci_dev) && n->params.sriov_max_vfs) {
-        nvme_init_sriov(n, pci_dev, 0x120, bar_size);
+        nvme_init_sriov(n, pci_dev, 0x120);
     }
 
     return 0;
@@ -6574,6 +6659,7 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
     NvmeIdCtrl *id = &n->id_ctrl;
     uint8_t *pci_conf = pci_dev->config;
     uint64_t cap = ldq_le_p(&n->bar.cap);
+    NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
 
     id->vid = cpu_to_le16(pci_get_word(pci_conf + PCI_VENDOR_ID));
     id->ssvid = cpu_to_le16(pci_get_word(pci_conf + PCI_SUBSYSTEM_VENDOR_ID));
@@ -6665,6 +6751,10 @@ static void nvme_init_ctrl(NvmeCtrl *n, PCIDevice *pci_dev)
 
     stl_le_p(&n->bar.vs, NVME_SPEC_VER);
     n->bar.intmc = n->bar.intms = 0;
+
+    if (pci_is_vf(&n->parent_obj) && !sctrl->scs) {
+        stl_le_p(&n->bar.csts, NVME_CSTS_FAILED);
+    }
 }
 
 static int nvme_init_subsys(NvmeCtrl *n, Error **errp)
@@ -6804,6 +6894,8 @@ static Property nvme_props[] = {
     DEFINE_PROP_BOOL("zoned.auto_transition", NvmeCtrl,
                      params.auto_transition_zones, true),
     DEFINE_PROP_UINT8("sriov_max_vfs", NvmeCtrl, params.sriov_max_vfs, 0),
+    DEFINE_PROP_UINT8("sriov_max_vi_per_vf", NvmeCtrl, params.sriov_max_vi_per_vf, 0),
+    DEFINE_PROP_UINT8("sriov_max_vq_per_vf", NvmeCtrl, params.sriov_max_vq_per_vf, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index a8eded4713..43609c979a 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -393,6 +393,8 @@ typedef struct NvmeParams {
     bool     auto_transition_zones;
     bool     legacy_cmb;
     uint8_t  sriov_max_vfs;
+    uint8_t  sriov_max_vq_per_vf;
+    uint8_t  sriov_max_vi_per_vf;
 } NvmeParams;
 
 typedef struct NvmeCtrl {
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 96595ea8f1..26672d0a31 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1488,6 +1488,11 @@ typedef struct QEMU_PACKED NvmePriCtrlCap {
     uint8_t     rsvd80[4016];
 } NvmePriCtrlCap;
 
+typedef enum NvmePriCtrlCapCrt {
+    NVME_CRT_VQ             = 1 << 0,
+    NVME_CRT_VI             = 1 << 1,
+} NvmePriCtrlCapCrt;
+
 typedef struct QEMU_PACKED NvmeSecCtrlEntry {
     uint16_t    scid;
     uint16_t    pcid;
-- 
2.25.1



  parent reply	other threads:[~2021-10-07 17:24 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-07 16:23 [PATCH 00/15] hw/nvme: SR-IOV with Virtualization Enhancements Lukasz Maniak
2021-10-07 16:23 ` [PATCH 01/15] pcie: Set default and supported MaxReadReq to 512 Lukasz Maniak
2021-10-07 22:12   ` Michael S. Tsirkin
2021-10-26 14:36     ` Lukasz Maniak
2021-10-26 15:37       ` Knut Omang
2021-10-07 16:23 ` [PATCH 02/15] pcie: Add support for Single Root I/O Virtualization (SR/IOV) Lukasz Maniak
2021-10-07 16:23 ` [PATCH 03/15] pcie: Add some SR/IOV API documentation in docs/pcie_sriov.txt Lukasz Maniak
2021-10-07 16:23 ` [PATCH 04/15] pcie: Add callback preceding SR-IOV VFs update Lukasz Maniak
2021-10-12  7:25   ` Michael S. Tsirkin
2021-10-12 16:06     ` Lukasz Maniak
2021-10-13  9:10       ` Michael S. Tsirkin
2021-10-15 16:24         ` Lukasz Maniak
2021-10-15 17:30           ` Michael S. Tsirkin
2021-10-20 13:30             ` Lukasz Maniak
2021-10-07 16:23 ` [PATCH 05/15] hw/nvme: Add support for SR-IOV Lukasz Maniak
2021-10-20 19:07   ` Klaus Jensen
2021-10-21 14:33     ` Lukasz Maniak
2021-11-02 14:33   ` Klaus Jensen
2021-11-02 17:33     ` Lukasz Maniak
2021-11-04 14:30       ` Lukasz Maniak
2021-11-08  7:56         ` Klaus Jensen
2021-11-10 13:42           ` Lukasz Maniak
2021-11-10 16:39             ` Klaus Jensen
2021-10-07 16:23 ` [PATCH 06/15] hw/nvme: Add support for Primary Controller Capabilities Lukasz Maniak
2021-11-02 14:34   ` Klaus Jensen
2021-10-07 16:23 ` [PATCH 07/15] hw/nvme: Add support for Secondary Controller List Lukasz Maniak
2021-11-02 14:35   ` Klaus Jensen
2021-10-07 16:23 ` [PATCH 08/15] pcie: Add 1.2 version token for the Power Management Capability Lukasz Maniak
2021-10-07 16:24 ` [PATCH 09/15] hw/nvme: Implement the Function Level Reset Lukasz Maniak
2021-11-02 14:35   ` Klaus Jensen
2021-10-07 16:24 ` [PATCH 10/15] hw/nvme: Make max_ioqpairs and msix_qsize configurable in runtime Lukasz Maniak
2021-10-18 10:06   ` Philippe Mathieu-Daudé
2021-10-18 15:53     ` Łukasz Gieryk
2021-10-20 19:06   ` Klaus Jensen
2021-10-21 13:40     ` Łukasz Gieryk
2021-11-03 12:11       ` Klaus Jensen
2021-10-20 19:26   ` Klaus Jensen
2021-10-07 16:24 ` [PATCH 11/15] hw/nvme: Calculate BAR atributes in a function Lukasz Maniak
2021-10-18  9:52   ` Philippe Mathieu-Daudé
2021-10-07 16:24 ` Lukasz Maniak [this message]
2021-11-03 12:07   ` [PATCH 12/15] hw/nvme: Initialize capability structures for primary/secondary controllers Klaus Jensen
2021-11-04 15:48     ` Łukasz Gieryk
2021-11-05  8:46       ` Łukasz Gieryk
2021-11-05 14:04         ` Łukasz Gieryk
2021-11-08  8:25           ` Klaus Jensen
2021-11-08 13:57             ` Łukasz Gieryk
2021-11-09 12:22               ` Klaus Jensen
2021-10-07 16:24 ` [PATCH 13/15] pcie: Add helpers to the SR/IOV API Lukasz Maniak
2021-10-26 16:57   ` Knut Omang
2021-10-07 16:24 ` [PATCH 14/15] hw/nvme: Add support for the Virtualization Management command Lukasz Maniak
2021-10-07 16:24 ` [PATCH 15/15] docs: Add documentation for SR-IOV and Virtualization Enhancements Lukasz Maniak
2021-10-08  6:31 ` [PATCH 00/15] hw/nvme: SR-IOV with " Klaus Jensen
2021-10-26 18:20 ` Klaus Jensen
2021-10-27 16:49   ` Lukasz Maniak
2021-11-02  7:24     ` Klaus Jensen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211007162406.1920374-13-lukasz.maniak@linux.intel.com \
    --to=lukasz.maniak@linux.intel.com \
    --cc=fam@euphon.net \
    --cc=hreitz@redhat.com \
    --cc=its@irrelevant.dk \
    --cc=kbusch@kernel.org \
    --cc=kwolf@redhat.com \
    --cc=lukasz.gieryk@linux.intel.com \
    --cc=philmd@redhat.com \
    --cc=qemu-block@nongnu.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).