From: Klaus Jensen <its@irrelevant.dk>
To: Peter Maydell <peter.maydell@linaro.org>, qemu-devel@nongnu.org
Cc: "Stefan Hajnoczi" <stefanha@redhat.com>,
"Igor Mammedov" <imammedo@redhat.com>,
"Ani Sinha" <ani@anisinha.ca>, "Hanna Reitz" <hreitz@redhat.com>,
"Kevin Wolf" <kwolf@redhat.com>,
"Michael S. Tsirkin" <mst@redhat.com>,
"Klaus Jensen" <its@irrelevant.dk>,
qemu-block@nongnu.org, "Keith Busch" <kbusch@kernel.org>,
"Fam Zheng" <fam@euphon.net>,
"Philippe Mathieu-Daudé" <f4bug@amsat.org>,
"Marcel Apfelbaum" <marcel.apfelbaum@gmail.com>,
"Lukasz Maniak" <lukasz.maniak@linux.intel.com>,
"Klaus Jensen" <k.jensen@samsung.com>
Subject: [PULL 03/15] hw/nvme: Add support for Secondary Controller List
Date: Thu, 23 Jun 2022 23:34:30 +0200 [thread overview]
Message-ID: <20220623213442.67789-4-its@irrelevant.dk> (raw)
In-Reply-To: <20220623213442.67789-1-its@irrelevant.dk>
From: Lukasz Maniak <lukasz.maniak@linux.intel.com>
Introduce handling for Secondary Controller List (Identify command with
CNS value of 15h).
Secondary controller ids are unique in the subsystem, hence they are
reserved by it upon initialization of the primary controller to the
number of sriov_max_vfs.
ID reservation requires the addition of an intermediate controller slot
state, so the reserved controller has the address 0xFFFF.
A secondary controller is in the reserved state when it has no virtual
function assigned, but its primary controller is realized.
Secondary controller reservations are released to NULL when its primary
controller is unregistered.
Signed-off-by: Lukasz Maniak <lukasz.maniak@linux.intel.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Klaus Jensen <k.jensen@samsung.com>
Signed-off-by: Klaus Jensen <k.jensen@samsung.com>
---
hw/nvme/ctrl.c | 35 +++++++++++++++++++++
hw/nvme/ns.c | 2 +-
hw/nvme/nvme.h | 18 +++++++++++
hw/nvme/subsys.c | 75 ++++++++++++++++++++++++++++++++++++++------
hw/nvme/trace-events | 1 +
include/block/nvme.h | 20 ++++++++++++
6 files changed, 141 insertions(+), 10 deletions(-)
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index d004b484092c..b031212758cb 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -4812,6 +4812,29 @@ static uint16_t nvme_identify_pri_ctrl_cap(NvmeCtrl *n, NvmeRequest *req)
sizeof(NvmePriCtrlCap), req);
}
+static uint16_t nvme_identify_sec_ctrl_list(NvmeCtrl *n, NvmeRequest *req)
+{
+ NvmeIdentify *c = (NvmeIdentify *)&req->cmd;
+ uint16_t pri_ctrl_id = le16_to_cpu(n->pri_ctrl_cap.cntlid);
+ uint16_t min_id = le16_to_cpu(c->ctrlid);
+ uint8_t num_sec_ctrl = n->sec_ctrl_list.numcntl;
+ NvmeSecCtrlList list = {0};
+ uint8_t i;
+
+ for (i = 0; i < num_sec_ctrl; i++) {
+ if (n->sec_ctrl_list.sec[i].scid >= min_id) {
+ list.numcntl = num_sec_ctrl - i;
+ memcpy(&list.sec, n->sec_ctrl_list.sec + i,
+ list.numcntl * sizeof(NvmeSecCtrlEntry));
+ break;
+ }
+ }
+
+ trace_pci_nvme_identify_sec_ctrl_list(pri_ctrl_id, list.numcntl);
+
+ return nvme_c2h(n, (uint8_t *)&list, sizeof(list), req);
+}
+
static uint16_t nvme_identify_ns_csi(NvmeCtrl *n, NvmeRequest *req,
bool active)
{
@@ -5030,6 +5053,8 @@ static uint16_t nvme_identify(NvmeCtrl *n, NvmeRequest *req)
return nvme_identify_ctrl_list(n, req, false);
case NVME_ID_CNS_PRIMARY_CTRL_CAP:
return nvme_identify_pri_ctrl_cap(n, req);
+ case NVME_ID_CNS_SECONDARY_CTRL_LIST:
+ return nvme_identify_sec_ctrl_list(n, req);
case NVME_ID_CNS_CS_NS:
return nvme_identify_ns_csi(n, req, true);
case NVME_ID_CNS_CS_NS_PRESENT:
@@ -6622,6 +6647,9 @@ static void nvme_check_constraints(NvmeCtrl *n, Error **errp)
static void nvme_init_state(NvmeCtrl *n)
{
NvmePriCtrlCap *cap = &n->pri_ctrl_cap;
+ NvmeSecCtrlList *list = &n->sec_ctrl_list;
+ NvmeSecCtrlEntry *sctrl;
+ int i;
/* add one to max_ioqpairs to account for the admin queue pair */
n->reg_size = pow2ceil(sizeof(NvmeBar) +
@@ -6633,6 +6661,13 @@ static void nvme_init_state(NvmeCtrl *n)
n->starttime_ms = qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL);
n->aer_reqs = g_new0(NvmeRequest *, n->params.aerl + 1);
+ list->numcntl = cpu_to_le16(n->params.sriov_max_vfs);
+ for (i = 0; i < n->params.sriov_max_vfs; i++) {
+ sctrl = &list->sec[i];
+ sctrl->pcid = cpu_to_le16(n->cntlid);
+ sctrl->vfn = cpu_to_le16(i + 1);
+ }
+
cap->cntlid = cpu_to_le16(n->cntlid);
}
diff --git a/hw/nvme/ns.c b/hw/nvme/ns.c
index 1b9c9d11567f..870c3ca1a2f0 100644
--- a/hw/nvme/ns.c
+++ b/hw/nvme/ns.c
@@ -597,7 +597,7 @@ static void nvme_ns_realize(DeviceState *dev, Error **errp)
for (i = 0; i < ARRAY_SIZE(subsys->ctrls); i++) {
NvmeCtrl *ctrl = subsys->ctrls[i];
- if (ctrl) {
+ if (ctrl && ctrl != SUBSYS_SLOT_RSVD) {
nvme_attach_ns(ctrl, ns);
}
}
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 6ef458d3bc24..b66421cdf9e8 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -43,6 +43,7 @@ typedef struct NvmeBus {
#define TYPE_NVME_SUBSYS "nvme-subsys"
#define NVME_SUBSYS(obj) \
OBJECT_CHECK(NvmeSubsystem, (obj), TYPE_NVME_SUBSYS)
+#define SUBSYS_SLOT_RSVD (void *)0xFFFF
typedef struct NvmeSubsystem {
DeviceState parent_obj;
@@ -68,6 +69,10 @@ static inline NvmeCtrl *nvme_subsys_ctrl(NvmeSubsystem *subsys,
return NULL;
}
+ if (subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD) {
+ return NULL;
+ }
+
return subsys->ctrls[cntlid];
}
@@ -480,6 +485,7 @@ typedef struct NvmeCtrl {
} features;
NvmePriCtrlCap pri_ctrl_cap;
+ NvmeSecCtrlList sec_ctrl_list;
} NvmeCtrl;
static inline NvmeNamespace *nvme_ns(NvmeCtrl *n, uint32_t nsid)
@@ -514,6 +520,18 @@ static inline uint16_t nvme_cid(NvmeRequest *req)
return le16_to_cpu(req->cqe.cid);
}
+static inline NvmeSecCtrlEntry *nvme_sctrl(NvmeCtrl *n)
+{
+ PCIDevice *pci_dev = &n->parent_obj;
+ NvmeCtrl *pf = NVME(pcie_sriov_get_pf(pci_dev));
+
+ if (pci_is_vf(pci_dev)) {
+ return &pf->sec_ctrl_list.sec[pcie_sriov_vf_number(pci_dev)];
+ }
+
+ return NULL;
+}
+
void nvme_attach_ns(NvmeCtrl *n, NvmeNamespace *ns);
uint16_t nvme_bounce_data(NvmeCtrl *n, void *ptr, uint32_t len,
NvmeTxDirection dir, NvmeRequest *req);
diff --git a/hw/nvme/subsys.c b/hw/nvme/subsys.c
index 691a90d20947..9d2643678b53 100644
--- a/hw/nvme/subsys.c
+++ b/hw/nvme/subsys.c
@@ -11,20 +11,71 @@
#include "nvme.h"
+static int nvme_subsys_reserve_cntlids(NvmeCtrl *n, int start, int num)
+{
+ NvmeSubsystem *subsys = n->subsys;
+ NvmeSecCtrlList *list = &n->sec_ctrl_list;
+ NvmeSecCtrlEntry *sctrl;
+ int i, cnt = 0;
+
+ for (i = start; i < ARRAY_SIZE(subsys->ctrls) && cnt < num; i++) {
+ if (!subsys->ctrls[i]) {
+ sctrl = &list->sec[cnt];
+ sctrl->scid = cpu_to_le16(i);
+ subsys->ctrls[i] = SUBSYS_SLOT_RSVD;
+ cnt++;
+ }
+ }
+
+ return cnt;
+}
+
+static void nvme_subsys_unreserve_cntlids(NvmeCtrl *n)
+{
+ NvmeSubsystem *subsys = n->subsys;
+ NvmeSecCtrlList *list = &n->sec_ctrl_list;
+ NvmeSecCtrlEntry *sctrl;
+ int i, cntlid;
+
+ for (i = 0; i < n->params.sriov_max_vfs; i++) {
+ sctrl = &list->sec[i];
+ cntlid = le16_to_cpu(sctrl->scid);
+
+ if (cntlid) {
+ assert(subsys->ctrls[cntlid] == SUBSYS_SLOT_RSVD);
+ subsys->ctrls[cntlid] = NULL;
+ sctrl->scid = 0;
+ }
+ }
+}
+
int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
{
NvmeSubsystem *subsys = n->subsys;
- int cntlid, nsid;
+ NvmeSecCtrlEntry *sctrl = nvme_sctrl(n);
+ int cntlid, nsid, num_rsvd, num_vfs = n->params.sriov_max_vfs;
- for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
- if (!subsys->ctrls[cntlid]) {
- break;
+ if (pci_is_vf(&n->parent_obj)) {
+ cntlid = le16_to_cpu(sctrl->scid);
+ } else {
+ for (cntlid = 0; cntlid < ARRAY_SIZE(subsys->ctrls); cntlid++) {
+ if (!subsys->ctrls[cntlid]) {
+ break;
+ }
}
- }
- if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
- error_setg(errp, "no more free controller id");
- return -1;
+ if (cntlid == ARRAY_SIZE(subsys->ctrls)) {
+ error_setg(errp, "no more free controller id");
+ return -1;
+ }
+
+ num_rsvd = nvme_subsys_reserve_cntlids(n, cntlid + 1, num_vfs);
+ if (num_rsvd != num_vfs) {
+ nvme_subsys_unreserve_cntlids(n);
+ error_setg(errp,
+ "no more free controller ids for secondary controllers");
+ return -1;
+ }
}
if (!subsys->serial) {
@@ -48,7 +99,13 @@ int nvme_subsys_register_ctrl(NvmeCtrl *n, Error **errp)
void nvme_subsys_unregister_ctrl(NvmeSubsystem *subsys, NvmeCtrl *n)
{
- subsys->ctrls[n->cntlid] = NULL;
+ if (pci_is_vf(&n->parent_obj)) {
+ subsys->ctrls[n->cntlid] = SUBSYS_SLOT_RSVD;
+ } else {
+ subsys->ctrls[n->cntlid] = NULL;
+ nvme_subsys_unreserve_cntlids(n);
+ }
+
n->cntlid = -1;
}
diff --git a/hw/nvme/trace-events b/hw/nvme/trace-events
index 1834b17cf214..889bbb3101e4 100644
--- a/hw/nvme/trace-events
+++ b/hw/nvme/trace-events
@@ -57,6 +57,7 @@ pci_nvme_identify_ctrl_csi(uint8_t csi) "identify controller, csi=0x%"PRIx8""
pci_nvme_identify_ns(uint32_t ns) "nsid %"PRIu32""
pci_nvme_identify_ctrl_list(uint8_t cns, uint16_t cntid) "cns 0x%"PRIx8" cntid %"PRIu16""
pci_nvme_identify_pri_ctrl_cap(uint16_t cntlid) "identify primary controller capabilities cntlid=%"PRIu16""
+pci_nvme_identify_sec_ctrl_list(uint16_t cntlid, uint8_t numcntl) "identify secondary controller list cntlid=%"PRIu16" numcntl=%"PRIu8""
pci_nvme_identify_ns_csi(uint32_t ns, uint8_t csi) "nsid=%"PRIu32", csi=0x%"PRIx8""
pci_nvme_identify_nslist(uint32_t ns) "nsid %"PRIu32""
pci_nvme_identify_nslist_csi(uint16_t ns, uint8_t csi) "nsid=%"PRIu16", csi=0x%"PRIx8""
diff --git a/include/block/nvme.h b/include/block/nvme.h
index 524a04fb94ec..94efd32578cb 100644
--- a/include/block/nvme.h
+++ b/include/block/nvme.h
@@ -1034,6 +1034,7 @@ enum NvmeIdCns {
NVME_ID_CNS_NS_ATTACHED_CTRL_LIST = 0x12,
NVME_ID_CNS_CTRL_LIST = 0x13,
NVME_ID_CNS_PRIMARY_CTRL_CAP = 0x14,
+ NVME_ID_CNS_SECONDARY_CTRL_LIST = 0x15,
NVME_ID_CNS_CS_NS_PRESENT_LIST = 0x1a,
NVME_ID_CNS_CS_NS_PRESENT = 0x1b,
NVME_ID_CNS_IO_COMMAND_SET = 0x1c,
@@ -1575,6 +1576,23 @@ typedef struct QEMU_PACKED NvmePriCtrlCap {
uint8_t rsvd80[4016];
} NvmePriCtrlCap;
+typedef struct QEMU_PACKED NvmeSecCtrlEntry {
+ uint16_t scid;
+ uint16_t pcid;
+ uint8_t scs;
+ uint8_t rsvd5[3];
+ uint16_t vfn;
+ uint16_t nvq;
+ uint16_t nvi;
+ uint8_t rsvd14[18];
+} NvmeSecCtrlEntry;
+
+typedef struct QEMU_PACKED NvmeSecCtrlList {
+ uint8_t numcntl;
+ uint8_t rsvd1[31];
+ NvmeSecCtrlEntry sec[127];
+} NvmeSecCtrlList;
+
static inline void _nvme_check_size(void)
{
QEMU_BUILD_BUG_ON(sizeof(NvmeBar) != 4096);
@@ -1611,5 +1629,7 @@ static inline void _nvme_check_size(void)
QEMU_BUILD_BUG_ON(sizeof(NvmeZoneDescr) != 64);
QEMU_BUILD_BUG_ON(sizeof(NvmeDifTuple) != 16);
QEMU_BUILD_BUG_ON(sizeof(NvmePriCtrlCap) != 4096);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlEntry) != 32);
+ QEMU_BUILD_BUG_ON(sizeof(NvmeSecCtrlList) != 4096);
}
#endif
--
2.36.1
next prev parent reply other threads:[~2022-06-23 21:38 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-23 21:34 [PULL 00/15] hw/nvme updates Klaus Jensen
2022-06-23 21:34 ` [PULL 01/15] hw/nvme: Add support for SR-IOV Klaus Jensen
2022-06-23 21:34 ` [PULL 02/15] hw/nvme: Add support for Primary Controller Capabilities Klaus Jensen
2022-06-23 21:34 ` Klaus Jensen [this message]
2022-06-23 21:34 ` [PULL 04/15] hw/nvme: Implement the Function Level Reset Klaus Jensen
2022-06-23 21:34 ` [PULL 05/15] hw/nvme: Make max_ioqpairs and msix_qsize configurable in runtime Klaus Jensen
2022-06-23 21:34 ` [PULL 06/15] hw/nvme: Remove reg_size variable and update BAR0 size calculation Klaus Jensen
2022-06-23 21:34 ` [PULL 07/15] hw/nvme: Calculate BAR attributes in a function Klaus Jensen
2022-06-23 21:34 ` [PULL 08/15] hw/nvme: Initialize capability structures for primary/secondary controllers Klaus Jensen
2022-06-23 21:34 ` [PULL 09/15] hw/nvme: Add support for the Virtualization Management command Klaus Jensen
2022-06-23 21:34 ` [PULL 10/15] docs: Add documentation for SR-IOV and Virtualization Enhancements Klaus Jensen
2022-06-23 21:34 ` [PULL 11/15] hw/nvme: Update the initalization place for the AER queue Klaus Jensen
2022-06-23 21:34 ` [PULL 12/15] hw/acpi: Make the PCI hot-plug aware of SR-IOV Klaus Jensen
2022-06-23 21:34 ` [PULL 13/15] hw/nvme: clean up CC register write logic Klaus Jensen
2022-06-23 21:34 ` [PULL 14/15] Revert "hw/block/nvme: add support for sgl bit bucket descriptor" Klaus Jensen
2022-06-23 21:34 ` [PULL 15/15] hw/nvme: clear aen mask on reset Klaus Jensen
2022-06-23 23:28 ` [PULL 00/15] hw/nvme updates Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220623213442.67789-4-its@irrelevant.dk \
--to=its@irrelevant.dk \
--cc=ani@anisinha.ca \
--cc=f4bug@amsat.org \
--cc=fam@euphon.net \
--cc=hreitz@redhat.com \
--cc=imammedo@redhat.com \
--cc=k.jensen@samsung.com \
--cc=kbusch@kernel.org \
--cc=kwolf@redhat.com \
--cc=lukasz.maniak@linux.intel.com \
--cc=marcel.apfelbaum@gmail.com \
--cc=mst@redhat.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-block@nongnu.org \
--cc=qemu-devel@nongnu.org \
--cc=stefanha@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).