From: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
To: Yuval Shaia <yuval.shaia@oracle.com>,
dmitry.fleytman@gmail.com, jasowang@redhat.com,
eblake@redhat.com, armbru@redhat.com, pbonzini@redhat.com,
qemu-devel@nongnu.org, shamir.rabinovitch@oracle.com,
cohuck@redhat.com
Subject: Re: [Qemu-devel] [PATCH v5 11/24] hw/pvrdma: Add support to allow guest to configure GID table
Date: Sun, 25 Nov 2018 09:29:19 +0200 [thread overview]
Message-ID: <3285f590-f388-dab4-b87c-749e65d460c6@gmail.com> (raw)
In-Reply-To: <20181122121402.13764-12-yuval.shaia@oracle.com>
On 11/22/18 2:13 PM, Yuval Shaia wrote:
> The control over the RDMA device's GID table is done by updating the
> device's Ethernet function addresses.
> Usually the first GID entry is determined by the MAC address, the second
> by the first IPv6 address and the third by the IPv4 address. Other
> entries can be added by adding more IP addresses. The opposite is the
> same, i.e. whenever an address is removed, the corresponding GID entry
> is removed.
>
> The process is done by the network and RDMA stacks. Whenever an address
> is added the ib_core driver is notified and calls the device driver
> add_gid function which in turn update the device.
>
> To support this in pvrdma device we need to hook into the create_bind
> and destroy_bind HW commands triggered by pvrdma driver in guest.
> Whenever changed is made to the pvrdma port's GID table a special QMP
> messages is sent to be processed by libvirt to update the address of the
> backend Ethernet device.
>
> Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com>
> ---
> hw/rdma/rdma_backend.c | 336 +++++++++++++++++++++++++-----------
> hw/rdma/rdma_backend.h | 22 +--
> hw/rdma/rdma_backend_defs.h | 11 +-
> hw/rdma/rdma_rm.c | 104 ++++++++++-
> hw/rdma/rdma_rm.h | 17 +-
> hw/rdma/rdma_rm_defs.h | 9 +-
> hw/rdma/rdma_utils.h | 15 ++
> hw/rdma/vmw/pvrdma.h | 2 +-
> hw/rdma/vmw/pvrdma_cmd.c | 55 +++---
> hw/rdma/vmw/pvrdma_main.c | 25 +--
> hw/rdma/vmw/pvrdma_qp_ops.c | 20 +++
> 11 files changed, 453 insertions(+), 163 deletions(-)
>
> diff --git a/hw/rdma/rdma_backend.c b/hw/rdma/rdma_backend.c
> index 7c220a5798..8b5a111bf4 100644
> --- a/hw/rdma/rdma_backend.c
> +++ b/hw/rdma/rdma_backend.c
> @@ -15,15 +15,18 @@
>
> #include "qemu/osdep.h"
> #include "qemu/error-report.h"
> +#include "sysemu/sysemu.h"
> #include "qapi/error.h"
> #include "qapi/qmp/qlist.h"
> #include "qapi/qmp/qnum.h"
> +#include "qapi/qapi-events-rdma.h"
>
> #include <infiniband/verbs.h>
> #include <infiniband/umad_types.h>
> #include <infiniband/umad.h>
> #include <rdma/rdma_user_cm.h>
>
> +#include "contrib/rdmacm-mux/rdmacm-mux.h"
> #include "trace.h"
> #include "rdma_utils.h"
> #include "rdma_rm.h"
> @@ -160,6 +163,71 @@ static void *comp_handler_thread(void *arg)
> return NULL;
> }
>
> +static inline void disable_rdmacm_mux_async(RdmaBackendDev *backend_dev)
> +{
> + atomic_set(&backend_dev->rdmacm_mux.can_receive, 0);
> +}
> +
> +static inline void enable_rdmacm_mux_async(RdmaBackendDev *backend_dev)
> +{
> + atomic_set(&backend_dev->rdmacm_mux.can_receive, sizeof(RdmaCmMuxMsg));
Why sizeof is used to set the can_receive field?
> +}
> +
> +static inline int rdmacm_mux_can_process_async(RdmaBackendDev *backend_dev)
> +{
> + return atomic_read(&backend_dev->rdmacm_mux.can_receive);
> +}
> +
> +static int check_mux_op_status(CharBackend *mad_chr_be)
> +{
> + RdmaCmMuxMsg msg = {0};
> + int ret;
> +
> + pr_dbg("Reading response\n");
> + ret = qemu_chr_fe_read_all(mad_chr_be, (uint8_t *)&msg, sizeof(msg));
> + if (ret != sizeof(msg)) {
> + pr_dbg("Invalid message size %d, expecting %ld\n", ret, sizeof(msg));
> + return -EIO;
> + }
> +
> + if (msg.hdr.msg_type != RDMACM_MUX_MSG_TYPE_RESP) {
> + pr_dbg("Invalid message type %d\n", msg.hdr.msg_type);
> + return -EIO;
> + }
> +
> + if (msg.hdr.err_code != RDMACM_MUX_ERR_CODE_OK) {
> + pr_dbg("Operation failed in mux, error code %d\n", msg.hdr.err_code);
> + return -EIO;
> + }
> +
> + return 0;
> +}
> +
> +static int exec_rdmacm_mux_req(RdmaBackendDev *backend_dev, RdmaCmMuxMsg *msg)
> +{
> + int rc = 0;
> +
> + pr_dbg("Executing request %d\n", msg->hdr.op_code);
> +
> + msg->hdr.msg_type = RDMACM_MUX_MSG_TYPE_REQ;
> + disable_rdmacm_mux_async(backend_dev);
> + rc = qemu_chr_fe_write(backend_dev->rdmacm_mux.chr_be,
> + (const uint8_t *)msg, sizeof(*msg));
> + enable_rdmacm_mux_async(backend_dev);
> + if (rc != sizeof(*msg)) {
> + pr_dbg("Fail to send request to rdmacm_mux (rc=%d)\n", rc);
> + return -EIO;
> + }
> +
> + rc = check_mux_op_status(backend_dev->rdmacm_mux.chr_be);
> + if (rc) {
> + pr_dbg("Fail to execute rdmacm_mux request %d (rc=%d)\n",
> + msg->hdr.op_code, rc);
> + }
> +
> + return 0;
> +}
> +
> static void stop_backend_thread(RdmaBackendThread *thread)
> {
> thread->run = false;
> @@ -300,11 +368,11 @@ static int build_host_sge_array(RdmaDeviceResources *rdma_dev_res,
> return 0;
> }
>
> -static int mad_send(RdmaBackendDev *backend_dev, struct ibv_sge *sge,
> - uint32_t num_sge)
> +static int mad_send(RdmaBackendDev *backend_dev, uint8_t sgid_idx,
> + union ibv_gid *sgid, struct ibv_sge *sge, uint32_t num_sge)
> {
> - struct backend_umad umad = {0};
> - char *hdr, *msg;
> + RdmaCmMuxMsg msg = {0};
> + char *hdr, *data;
> int ret;
>
> pr_dbg("num_sge=%d\n", num_sge);
> @@ -313,26 +381,31 @@ static int mad_send(RdmaBackendDev *backend_dev, struct ibv_sge *sge,
> return -EINVAL;
> }
>
> - umad.hdr.length = sge[0].length + sge[1].length;
> - pr_dbg("msg_len=%d\n", umad.hdr.length);
> + msg.hdr.op_code = RDMACM_MUX_OP_CODE_MAD;
> + memcpy(msg.hdr.sgid.raw, sgid->raw, sizeof(msg.hdr.sgid));
>
> - if (umad.hdr.length > sizeof(umad.mad)) {
> + msg.umad_len = sge[0].length + sge[1].length;
> + pr_dbg("umad_len=%d\n", msg.umad_len);
> +
> + if (msg.umad_len > sizeof(msg.umad.mad)) {
> return -ENOMEM;
> }
>
> - umad.hdr.addr.qpn = htobe32(1);
> - umad.hdr.addr.grh_present = 1;
> - umad.hdr.addr.gid_index = backend_dev->backend_gid_idx;
> - memcpy(umad.hdr.addr.gid, backend_dev->gid.raw, sizeof(umad.hdr.addr.gid));
> - umad.hdr.addr.hop_limit = 1;
> + msg.umad.hdr.addr.qpn = htobe32(1);
> + msg.umad.hdr.addr.grh_present = 1;
> + pr_dbg("sgid_idx=%d\n", sgid_idx);
> + pr_dbg("sgid=0x%llx\n", sgid->global.interface_id);
> + msg.umad.hdr.addr.gid_index = sgid_idx;
> + memcpy(msg.umad.hdr.addr.gid, sgid->raw, sizeof(msg.umad.hdr.addr.gid));
> + msg.umad.hdr.addr.hop_limit = 1;
Why is hop_limit set to 1 ?
>
> hdr = rdma_pci_dma_map(backend_dev->dev, sge[0].addr, sge[0].length);
> if (!hdr) {
> pr_dbg("Fail to map to sge[0]\n");
> return -ENOMEM;
> }
> - msg = rdma_pci_dma_map(backend_dev->dev, sge[1].addr, sge[1].length);
> - if (!msg) {
> + data = rdma_pci_dma_map(backend_dev->dev, sge[1].addr, sge[1].length);
> + if (!data) {
> pr_dbg("Fail to map to sge[1]\n");
> rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length);
> return -ENOMEM;
> @@ -341,25 +414,27 @@ static int mad_send(RdmaBackendDev *backend_dev, struct ibv_sge *sge,
> pr_dbg_buf("mad_hdr", hdr, sge[0].length);
> pr_dbg_buf("mad_data", data, sge[1].length);
>
> - memcpy(&umad.mad[0], hdr, sge[0].length);
> - memcpy(&umad.mad[sge[0].length], msg, sge[1].length);
> + memcpy(&msg.umad.mad[0], hdr, sge[0].length);
> + memcpy(&msg.umad.mad[sge[0].length], data, sge[1].length);
>
> - rdma_pci_dma_unmap(backend_dev->dev, msg, sge[1].length);
> + rdma_pci_dma_unmap(backend_dev->dev, data, sge[1].length);
> rdma_pci_dma_unmap(backend_dev->dev, hdr, sge[0].length);
>
> - ret = qemu_chr_fe_write(backend_dev->mad_chr_be, (const uint8_t *)&umad,
> - sizeof(umad));
> -
> - pr_dbg("qemu_chr_fe_write=%d\n", ret);
> + ret = exec_rdmacm_mux_req(backend_dev, &msg);
> + if (ret) {
> + pr_dbg("Fail to send MAD to rdma_umadmux (%d)\n", ret);
> + return -EIO;
> + }
>
> - return (ret != sizeof(umad));
> + return 0;
> }
>
> void rdma_backend_post_send(RdmaBackendDev *backend_dev,
> RdmaBackendQP *qp, uint8_t qp_type,
> struct ibv_sge *sge, uint32_t num_sge,
> - union ibv_gid *dgid, uint32_t dqpn,
> - uint32_t dqkey, void *ctx)
> + uint8_t sgid_idx, union ibv_gid *sgid,
> + union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey,
> + void *ctx)
> {
> BackendCtx *bctx;
> struct ibv_sge new_sge[MAX_SGE];
> @@ -373,7 +448,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
> comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_QP0, ctx);
> } else if (qp_type == IBV_QPT_GSI) {
> pr_dbg("QP1\n");
> - rc = mad_send(backend_dev, sge, num_sge);
> + rc = mad_send(backend_dev, sgid_idx, sgid, sge, num_sge);
> if (rc) {
> comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_MAD_SEND, ctx);
> } else {
> @@ -409,8 +484,7 @@ void rdma_backend_post_send(RdmaBackendDev *backend_dev,
> }
>
> if (qp_type == IBV_QPT_UD) {
> - wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd,
> - backend_dev->backend_gid_idx, dgid);
> + wr.wr.ud.ah = create_ah(backend_dev, qp->ibpd, sgid_idx, dgid);
> if (!wr.wr.ud.ah) {
> comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_FAIL_BACKEND, ctx);
> goto out_dealloc_cqe_ctx;
> @@ -715,9 +789,9 @@ int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
> }
>
> int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
> - uint8_t qp_type, union ibv_gid *dgid,
> - uint32_t dqpn, uint32_t rq_psn, uint32_t qkey,
> - bool use_qkey)
> + uint8_t qp_type, uint8_t sgid_idx,
> + union ibv_gid *dgid, uint32_t dqpn,
> + uint32_t rq_psn, uint32_t qkey, bool use_qkey)
> {
> struct ibv_qp_attr attr = {0};
> union ibv_gid ibv_gid = {
> @@ -729,13 +803,15 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
> attr.qp_state = IBV_QPS_RTR;
> attr_mask = IBV_QP_STATE;
>
> + qp->sgid_idx = sgid_idx;
> +
> switch (qp_type) {
> case IBV_QPT_RC:
> pr_dbg("dgid=0x%" PRIx64 ",%" PRIx64 "\n",
> be64_to_cpu(ibv_gid.global.subnet_prefix),
> be64_to_cpu(ibv_gid.global.interface_id));
> pr_dbg("dqpn=0x%x\n", dqpn);
> - pr_dbg("sgid_idx=%d\n", backend_dev->backend_gid_idx);
> + pr_dbg("sgid_idx=%d\n", qp->sgid_idx);
> pr_dbg("sport_num=%d\n", backend_dev->port_num);
> pr_dbg("rq_psn=0x%x\n", rq_psn);
>
> @@ -747,7 +823,7 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
> attr.ah_attr.is_global = 1;
> attr.ah_attr.grh.hop_limit = 1;
> attr.ah_attr.grh.dgid = ibv_gid;
> - attr.ah_attr.grh.sgid_index = backend_dev->backend_gid_idx;
> + attr.ah_attr.grh.sgid_index = qp->sgid_idx;
> attr.rq_psn = rq_psn;
>
> attr_mask |= IBV_QP_AV | IBV_QP_PATH_MTU | IBV_QP_DEST_QPN |
> @@ -756,8 +832,8 @@ int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
> break;
>
> case IBV_QPT_UD:
> + pr_dbg("qkey=0x%x\n", qkey);
> if (use_qkey) {
> - pr_dbg("qkey=0x%x\n", qkey);
> attr.qkey = qkey;
> attr_mask |= IBV_QP_QKEY;
> }
> @@ -873,29 +949,19 @@ static inline void build_mad_hdr(struct ibv_grh *grh, union ibv_gid *sgid,
> grh->dgid = *my_gid;
>
> pr_dbg("paylen=%d (net=0x%x)\n", paylen, grh->paylen);
> - pr_dbg("my_gid=0x%llx\n", my_gid->global.interface_id);
> - pr_dbg("gid=0x%llx\n", sgid->global.interface_id);
> + pr_dbg("dgid=0x%llx\n", my_gid->global.interface_id);
> + pr_dbg("sgid=0x%llx\n", sgid->global.interface_id);
> }
>
> -static inline int mad_can_receieve(void *opaque)
> +static void process_incoming_mad_req(RdmaBackendDev *backend_dev,
> + RdmaCmMuxMsg *msg)
> {
> - return sizeof(struct backend_umad);
> -}
> -
> -static void mad_read(void *opaque, const uint8_t *buf, int size)
> -{
> - RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque;
> QObject *o_ctx_id;
> unsigned long cqe_ctx_id;
> BackendCtx *bctx;
> char *mad;
> - struct backend_umad *umad;
>
> - assert(size != sizeof(umad));
> - umad = (struct backend_umad *)buf;
> -
> - pr_dbg("Got %d bytes\n", size);
> - pr_dbg("umad->hdr.length=%d\n", umad->hdr.length);
> + pr_dbg("umad_len=%d\n", msg->umad_len);
>
> #ifdef PVRDMA_DEBUG
> struct umad_hdr *hdr = (struct umad_hdr *)&msg->umad.mad;
> @@ -925,15 +991,16 @@ static void mad_read(void *opaque, const uint8_t *buf, int size)
>
> mad = rdma_pci_dma_map(backend_dev->dev, bctx->sge.addr,
> bctx->sge.length);
> - if (!mad || bctx->sge.length < umad->hdr.length + MAD_HDR_SIZE) {
> + if (!mad || bctx->sge.length < msg->umad_len + MAD_HDR_SIZE) {
> comp_handler(IBV_WC_GENERAL_ERR, VENDOR_ERR_INV_MAD_BUFF,
> bctx->up_ctx);
> } else {
> + pr_dbg_buf("mad", msg->umad.mad, msg->umad_len);
> memset(mad, 0, bctx->sge.length);
> build_mad_hdr((struct ibv_grh *)mad,
> - (union ibv_gid *)&umad->hdr.addr.gid,
> - &backend_dev->gid, umad->hdr.length);
> - memcpy(&mad[MAD_HDR_SIZE], umad->mad, umad->hdr.length);
> + (union ibv_gid *)&msg->umad.hdr.addr.gid, &msg->hdr.sgid,
> + msg->umad_len);
> + memcpy(&mad[MAD_HDR_SIZE], msg->umad.mad, msg->umad_len);
> rdma_pci_dma_unmap(backend_dev->dev, mad, bctx->sge.length);
>
> comp_handler(IBV_WC_SUCCESS, 0, bctx->up_ctx);
> @@ -943,30 +1010,51 @@ static void mad_read(void *opaque, const uint8_t *buf, int size)
> rdma_rm_dealloc_cqe_ctx(backend_dev->rdma_dev_res, cqe_ctx_id);
> }
>
> -static int mad_init(RdmaBackendDev *backend_dev)
> +static inline int rdmacm_mux_can_receive(void *opaque)
> {
> - struct backend_umad umad = {0};
> - int ret;
> + RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque;
>
> - if (!qemu_chr_fe_backend_connected(backend_dev->mad_chr_be)) {
> - pr_dbg("Missing chardev for MAD multiplexer\n");
> - return -EIO;
> + return rdmacm_mux_can_process_async(backend_dev);
> +}
> +
> +static void rdmacm_mux_read(void *opaque, const uint8_t *buf, int size)
> +{
> + RdmaBackendDev *backend_dev = (RdmaBackendDev *)opaque;
> + RdmaCmMuxMsg *msg = (RdmaCmMuxMsg *)buf;
> +
> + pr_dbg("Got %d bytes\n", size);
> + pr_dbg("msg_type=%d\n", msg->hdr.msg_type);
> + pr_dbg("op_code=%d\n", msg->hdr.op_code);
> +
> + if (msg->hdr.msg_type != RDMACM_MUX_MSG_TYPE_REQ &&
> + msg->hdr.op_code != RDMACM_MUX_OP_CODE_MAD) {
> + pr_dbg("Error: Not a MAD request, skipping\n");
> + return;
No error flow on mux_read ? What happens at caller site on error ?
> }
> + process_incoming_mad_req(backend_dev, msg);
> +}
> +
> +static int mad_init(RdmaBackendDev *backend_dev, CharBackend *mad_chr_be)
> +{
> + int ret;
>
> - qemu_chr_fe_set_handlers(backend_dev->mad_chr_be, mad_can_receieve,
> - mad_read, NULL, NULL, backend_dev, NULL, true);
> + backend_dev->rdmacm_mux.chr_be = mad_chr_be;
>
> - /* Register ourself */
> - memcpy(umad.hdr.addr.gid, backend_dev->gid.raw, sizeof(umad.hdr.addr.gid));
> - ret = qemu_chr_fe_write(backend_dev->mad_chr_be, (const uint8_t *)&umad,
> - sizeof(umad.hdr));
> - if (ret != sizeof(umad.hdr)) {
> - pr_dbg("Fail to register to rdma_umadmux (%d)\n", ret);
> + ret = qemu_chr_fe_backend_connected(backend_dev->rdmacm_mux.chr_be);
> + if (!ret) {
> + pr_dbg("Missing chardev for MAD multiplexer\n");
> + return -EIO;
> }
>
> qemu_mutex_init(&backend_dev->recv_mads_list.lock);
> backend_dev->recv_mads_list.list = qlist_new();
>
> + enable_rdmacm_mux_async(backend_dev);
> +
> + qemu_chr_fe_set_handlers(backend_dev->rdmacm_mux.chr_be,
> + rdmacm_mux_can_receive, rdmacm_mux_read, NULL,
> + NULL, backend_dev, NULL, true);
> +
> return 0;
> }
>
> @@ -978,6 +1066,8 @@ static void mad_stop(RdmaBackendDev *backend_dev)
>
> pr_dbg("Closing MAD\n");
>
> + disable_rdmacm_mux_async(backend_dev);
> +
> /* Clear MAD buffers list */
> qemu_mutex_lock(&backend_dev->recv_mads_list.lock);
> do {
> @@ -1000,23 +1090,94 @@ static void mad_fini(RdmaBackendDev *backend_dev)
> qemu_mutex_destroy(&backend_dev->recv_mads_list.lock);
> }
>
> +int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev,
> + union ibv_gid *gid)
> +{
> + union ibv_gid sgid;
> + int ret;
> + int i = 0;
> +
> + pr_dbg("0x%llx, 0x%llx\n",
> + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix),
> + (long long unsigned int)be64_to_cpu(gid->global.interface_id));
> +
> + do {
> + ret = ibv_query_gid(backend_dev->context, backend_dev->port_num, i,
> + &sgid);
> + i++;
> + } while (!ret && (memcmp(&sgid, gid, sizeof(*gid))));
> +
> + pr_dbg("gid_index=%d\n", i - 1);
> +
> + return ret ? ret : i - 1;
> +}
> +
> +int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname,
> + union ibv_gid *gid)
> +{
> + RdmaCmMuxMsg msg = {0};
> + int ret;
> +
> + pr_dbg("0x%llx, 0x%llx\n",
> + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix),
> + (long long unsigned int)be64_to_cpu(gid->global.interface_id));
> +
> + msg.hdr.op_code = RDMACM_MUX_OP_CODE_REG;
> + memcpy(msg.hdr.sgid.raw, gid->raw, sizeof(msg.hdr.sgid));
> +
> + ret = exec_rdmacm_mux_req(backend_dev, &msg);
> + if (ret) {
> + pr_dbg("Fail to register GID to rdma_umadmux (%d)\n", ret);
> + return -EIO;
> + }
> +
> + qapi_event_send_rdma_gid_status_changed(ifname, true,
> + gid->global.subnet_prefix,
> + gid->global.interface_id);
> +
> + return ret;
> +}
> +
> +int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname,
> + union ibv_gid *gid)
> +{
> + RdmaCmMuxMsg msg = {0};
> + int ret;
> +
> + pr_dbg("0x%llx, 0x%llx\n",
> + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix),
> + (long long unsigned int)be64_to_cpu(gid->global.interface_id));
> +
> + msg.hdr.op_code = RDMACM_MUX_OP_CODE_UNREG;
> + memcpy(msg.hdr.sgid.raw, gid->raw, sizeof(msg.hdr.sgid));
> +
> + ret = exec_rdmacm_mux_req(backend_dev, &msg);
> + if (ret) {
> + pr_dbg("Fail to unregister GID from rdma_umadmux (%d)\n", ret);
> + return -EIO;
> + }
> +
> + qapi_event_send_rdma_gid_status_changed(ifname, false,
> + gid->global.subnet_prefix,
> + gid->global.interface_id);
> +
> + return 0;
> +}
> +
> int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
> RdmaDeviceResources *rdma_dev_res,
> const char *backend_device_name, uint8_t port_num,
> - uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
> - CharBackend *mad_chr_be, Error **errp)
> + struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be,
> + Error **errp)
> {
> int i;
> int ret = 0;
> int num_ibv_devices;
> struct ibv_device **dev_list;
> - struct ibv_port_attr port_attr;
>
> memset(backend_dev, 0, sizeof(*backend_dev));
>
> backend_dev->dev = pdev;
> - backend_dev->mad_chr_be = mad_chr_be;
> - backend_dev->backend_gid_idx = backend_gid_idx;
> backend_dev->port_num = port_num;
> backend_dev->rdma_dev_res = rdma_dev_res;
>
> @@ -1053,9 +1214,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
> backend_dev->ib_dev = *dev_list;
> }
>
> - pr_dbg("Using backend device %s, port %d, gid_idx %d\n",
> - ibv_get_device_name(backend_dev->ib_dev),
> - backend_dev->port_num, backend_dev->backend_gid_idx);
> + pr_dbg("Using backend device %s, port %d\n",
> + ibv_get_device_name(backend_dev->ib_dev), backend_dev->port_num);
>
> backend_dev->context = ibv_open_device(backend_dev->ib_dev);
> if (!backend_dev->context) {
> @@ -1072,20 +1232,6 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
> }
> pr_dbg("dev->backend_dev.channel=%p\n", backend_dev->channel);
>
> - ret = ibv_query_port(backend_dev->context, backend_dev->port_num,
> - &port_attr);
> - if (ret) {
> - error_setg(errp, "Error %d from ibv_query_port", ret);
> - ret = -EIO;
> - goto out_destroy_comm_channel;
> - }
> -
> - if (backend_dev->backend_gid_idx >= port_attr.gid_tbl_len) {
> - error_setg(errp, "Invalid backend_gid_idx, should be less than %d",
> - port_attr.gid_tbl_len);
> - goto out_destroy_comm_channel;
> - }
> -
> ret = init_device_caps(backend_dev, dev_attr);
> if (ret) {
> error_setg(errp, "Failed to initialize device capabilities");
> @@ -1093,20 +1239,8 @@ int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
> goto out_destroy_comm_channel;
> }
>
> - ret = ibv_query_gid(backend_dev->context, backend_dev->port_num,
> - backend_dev->backend_gid_idx, &backend_dev->gid);
> - if (ret) {
> - error_setg(errp, "Failed to query gid %d",
> - backend_dev->backend_gid_idx);
> - ret = -EIO;
> - goto out_destroy_comm_channel;
> - }
> - pr_dbg("subnet_prefix=0x%" PRIx64 "\n",
> - be64_to_cpu(backend_dev->gid.global.subnet_prefix));
> - pr_dbg("interface_id=0x%" PRIx64 "\n",
> - be64_to_cpu(backend_dev->gid.global.interface_id));
>
> - ret = mad_init(backend_dev);
> + ret = mad_init(backend_dev, mad_chr_be);
> if (ret) {
> error_setg(errp, "Fail to initialize mad");
> ret = -EIO;
> diff --git a/hw/rdma/rdma_backend.h b/hw/rdma/rdma_backend.h
> index fc83330251..59ad2b874b 100644
> --- a/hw/rdma/rdma_backend.h
> +++ b/hw/rdma/rdma_backend.h
> @@ -28,11 +28,6 @@ enum ibv_special_qp_type {
> IBV_QPT_GSI = 1,
> };
>
> -static inline union ibv_gid *rdma_backend_gid(RdmaBackendDev *dev)
> -{
> - return &dev->gid;
> -}
> -
> static inline uint32_t rdma_backend_qpn(const RdmaBackendQP *qp)
> {
> return qp->ibqp ? qp->ibqp->qp_num : 1;
> @@ -51,9 +46,15 @@ static inline uint32_t rdma_backend_mr_rkey(const RdmaBackendMR *mr)
> int rdma_backend_init(RdmaBackendDev *backend_dev, PCIDevice *pdev,
> RdmaDeviceResources *rdma_dev_res,
> const char *backend_device_name, uint8_t port_num,
> - uint8_t backend_gid_idx, struct ibv_device_attr *dev_attr,
> - CharBackend *mad_chr_be, Error **errp);
> + struct ibv_device_attr *dev_attr, CharBackend *mad_chr_be,
> + Error **errp);
> void rdma_backend_fini(RdmaBackendDev *backend_dev);
> +int rdma_backend_add_gid(RdmaBackendDev *backend_dev, const char *ifname,
> + union ibv_gid *gid);
> +int rdma_backend_del_gid(RdmaBackendDev *backend_dev, const char *ifname,
> + union ibv_gid *gid);
> +int rdma_backend_get_gid_index(RdmaBackendDev *backend_dev,
> + union ibv_gid *gid);
> void rdma_backend_start(RdmaBackendDev *backend_dev);
> void rdma_backend_stop(RdmaBackendDev *backend_dev);
> void rdma_backend_register_comp_handler(void (*handler)(int status,
> @@ -82,9 +83,9 @@ int rdma_backend_create_qp(RdmaBackendQP *qp, uint8_t qp_type,
> int rdma_backend_qp_state_init(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
> uint8_t qp_type, uint32_t qkey);
> int rdma_backend_qp_state_rtr(RdmaBackendDev *backend_dev, RdmaBackendQP *qp,
> - uint8_t qp_type, union ibv_gid *dgid,
> - uint32_t dqpn, uint32_t rq_psn, uint32_t qkey,
> - bool use_qkey);
> + uint8_t qp_type, uint8_t sgid_idx,
> + union ibv_gid *dgid, uint32_t dqpn,
> + uint32_t rq_psn, uint32_t qkey, bool use_qkey);
> int rdma_backend_qp_state_rts(RdmaBackendQP *qp, uint8_t qp_type,
> uint32_t sq_psn, uint32_t qkey, bool use_qkey);
> int rdma_backend_query_qp(RdmaBackendQP *qp, struct ibv_qp_attr *attr,
> @@ -94,6 +95,7 @@ void rdma_backend_destroy_qp(RdmaBackendQP *qp);
> void rdma_backend_post_send(RdmaBackendDev *backend_dev,
> RdmaBackendQP *qp, uint8_t qp_type,
> struct ibv_sge *sge, uint32_t num_sge,
> + uint8_t sgid_idx, union ibv_gid *sgid,
> union ibv_gid *dgid, uint32_t dqpn, uint32_t dqkey,
> void *ctx);
> void rdma_backend_post_recv(RdmaBackendDev *backend_dev,
> diff --git a/hw/rdma/rdma_backend_defs.h b/hw/rdma/rdma_backend_defs.h
> index 2a7e667075..1e5c3dd3bf 100644
> --- a/hw/rdma/rdma_backend_defs.h
> +++ b/hw/rdma/rdma_backend_defs.h
> @@ -19,6 +19,7 @@
> #include "qemu/thread.h"
> #include "chardev/char-fe.h"
> #include <infiniband/verbs.h>
> +#include "contrib/rdmacm-mux/rdmacm-mux.h"
>
> typedef struct RdmaDeviceResources RdmaDeviceResources;
>
> @@ -34,19 +35,22 @@ typedef struct RecvMadList {
> QList *list;
> } RecvMadList;
>
> +typedef struct RdmaCmMux {
> + CharBackend *chr_be;
> + int can_receive;
> +} RdmaCmMux;
> +
> typedef struct RdmaBackendDev {
> struct ibv_device_attr dev_attr;
> RdmaBackendThread comp_thread;
> - union ibv_gid gid;
> PCIDevice *dev;
> RdmaDeviceResources *rdma_dev_res;
> struct ibv_device *ib_dev;
> struct ibv_context *context;
> struct ibv_comp_channel *channel;
> uint8_t port_num;
> - uint8_t backend_gid_idx;
> RecvMadList recv_mads_list;
> - CharBackend *mad_chr_be;
> + RdmaCmMux rdmacm_mux;
> } RdmaBackendDev;
>
> typedef struct RdmaBackendPD {
> @@ -66,6 +70,7 @@ typedef struct RdmaBackendCQ {
> typedef struct RdmaBackendQP {
> struct ibv_pd *ibpd;
> struct ibv_qp *ibqp;
> + uint8_t sgid_idx;
> } RdmaBackendQP;
>
> #endif
> diff --git a/hw/rdma/rdma_rm.c b/hw/rdma/rdma_rm.c
> index 4f10fcabcc..250254561c 100644
> --- a/hw/rdma/rdma_rm.c
> +++ b/hw/rdma/rdma_rm.c
> @@ -391,7 +391,7 @@ out_dealloc_qp:
> }
>
> int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> - uint32_t qp_handle, uint32_t attr_mask,
> + uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
> union ibv_gid *dgid, uint32_t dqpn,
> enum ibv_qp_state qp_state, uint32_t qkey,
> uint32_t rq_psn, uint32_t sq_psn)
> @@ -400,6 +400,7 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> int ret;
>
> pr_dbg("qpn=0x%x\n", qp_handle);
> + pr_dbg("qkey=0x%x\n", qkey);
>
> qp = rdma_rm_get_qp(dev_res, qp_handle);
> if (!qp) {
> @@ -430,9 +431,19 @@ int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> }
>
> if (qp->qp_state == IBV_QPS_RTR) {
> + /* Get backend gid index */
> + pr_dbg("Guest sgid_idx=%d\n", sgid_idx);
> + sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev,
> + sgid_idx);
> + if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */
> + pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", sgid_idx);
> + return -EIO;
> + }
> +
> ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
> - qp->qp_type, dgid, dqpn, rq_psn,
> - qkey, attr_mask & IBV_QP_QKEY);
> + qp->qp_type, sgid_idx, dgid, dqpn,
> + rq_psn, qkey,
> + attr_mask & IBV_QP_QKEY);
> if (ret) {
> return -EIO;
> }
> @@ -523,11 +534,91 @@ void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
> res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
> }
>
> +int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> + const char *ifname, union ibv_gid *gid, int gid_idx)
> +{
> + int rc;
> +
> + rc = rdma_backend_add_gid(backend_dev, ifname, gid);
> + if (rc) {
> + pr_dbg("Fail to add gid\n");
> + return -EINVAL;
> + }
> +
> + memcpy(&dev_res->ports[0].gid_tbl[gid_idx].gid, gid, sizeof(*gid));
> +
> + return 0;
> +}
> +
> +int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> + const char *ifname, int gid_idx)
> +{
> + int rc;
> +
> + rc = rdma_backend_del_gid(backend_dev, ifname,
> + &dev_res->ports[0].gid_tbl[gid_idx].gid);
> + if (rc) {
> + pr_dbg("Fail to delete gid\n");
> + return -EINVAL;
> + }
> +
> + memset(dev_res->ports[0].gid_tbl[gid_idx].gid.raw, 0,
> + sizeof(dev_res->ports[0].gid_tbl[gid_idx].gid));
> + dev_res->ports[0].gid_tbl[gid_idx].backend_gid_index = -1;
> +
> + return 0;
> +}
> +
> +int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
> + RdmaBackendDev *backend_dev, int sgid_idx)
> +{
> + if (unlikely(sgid_idx < 0 || sgid_idx > MAX_PORT_GIDS)) {
> + pr_dbg("Got invalid sgid_idx %d\n", sgid_idx);
> + return -EINVAL;
> + }
> +
> + if (unlikely(dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index == -1)) {
> + dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index =
> + rdma_backend_get_gid_index(backend_dev,
> + &dev_res->ports[0].gid_tbl[sgid_idx].gid);
> + }
> +
> + pr_dbg("backend_gid_index=%d\n",
> + dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index);
> +
> + return dev_res->ports[0].gid_tbl[sgid_idx].backend_gid_index;
> +}
> +
> static void destroy_qp_hash_key(gpointer data)
> {
> g_bytes_unref(data);
> }
>
> +static void init_ports(RdmaDeviceResources *dev_res)
> +{
> + int i, j;
> +
> + memset(dev_res->ports, 0, sizeof(dev_res->ports));
> +
> + for (i = 0; i < MAX_PORTS; i++) {
> + dev_res->ports[i].state = IBV_PORT_DOWN;
> + for (j = 0; j < MAX_PORT_GIDS; j++) {
> + dev_res->ports[i].gid_tbl[j].backend_gid_index = -1;
> + }
> + }
> +}
> +
> +static void fini_ports(RdmaDeviceResources *dev_res,
> + RdmaBackendDev *backend_dev, const char *ifname)
> +{
> + int i;
> +
> + dev_res->ports[0].state = IBV_PORT_DOWN;
> + for (i = 0; i < MAX_PORT_GIDS; i++) {
> + rdma_rm_del_gid(dev_res, backend_dev, ifname, i);
> + }
> +}
> +
> int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
> Error **errp)
> {
> @@ -545,11 +636,16 @@ int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
> dev_attr->max_qp_wr, sizeof(void *));
> res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
>
> + init_ports(dev_res);
> +
> return 0;
> }
>
> -void rdma_rm_fini(RdmaDeviceResources *dev_res)
> +void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> + const char *ifname)
> {
> + fini_ports(dev_res, backend_dev, ifname);
> +
> res_tbl_free(&dev_res->uc_tbl);
> res_tbl_free(&dev_res->cqe_ctx_tbl);
> res_tbl_free(&dev_res->qp_tbl);
> diff --git a/hw/rdma/rdma_rm.h b/hw/rdma/rdma_rm.h
> index b4e04cc7b4..a7169b4e89 100644
> --- a/hw/rdma/rdma_rm.h
> +++ b/hw/rdma/rdma_rm.h
> @@ -22,7 +22,8 @@
>
> int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr,
> Error **errp);
> -void rdma_rm_fini(RdmaDeviceResources *dev_res);
> +void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> + const char *ifname);
>
> int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> uint32_t *pd_handle, uint32_t ctx_handle);
> @@ -55,7 +56,7 @@ int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
> uint32_t recv_cq_handle, void *opaque, uint32_t *qpn);
> RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn);
> int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> - uint32_t qp_handle, uint32_t attr_mask,
> + uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
> union ibv_gid *dgid, uint32_t dqpn,
> enum ibv_qp_state qp_state, uint32_t qkey,
> uint32_t rq_psn, uint32_t sq_psn);
> @@ -69,4 +70,16 @@ int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
> void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
> void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id);
>
> +int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> + const char *ifname, union ibv_gid *gid, int gid_idx);
> +int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
> + const char *ifname, int gid_idx);
> +int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
> + RdmaBackendDev *backend_dev, int sgid_idx);
> +static inline union ibv_gid *rdma_rm_get_gid(RdmaDeviceResources *dev_res,
> + int sgid_idx)
> +{
> + return &dev_res->ports[0].gid_tbl[sgid_idx].gid;
> +}
> +
> #endif
> diff --git a/hw/rdma/rdma_rm_defs.h b/hw/rdma/rdma_rm_defs.h
> index 9b399063d3..7b3435f991 100644
> --- a/hw/rdma/rdma_rm_defs.h
> +++ b/hw/rdma/rdma_rm_defs.h
> @@ -19,7 +19,7 @@
> #include "rdma_backend_defs.h"
>
> #define MAX_PORTS 1
> -#define MAX_PORT_GIDS 1
> +#define MAX_PORT_GIDS 255
> #define MAX_GIDS MAX_PORT_GIDS
> #define MAX_PORT_PKEYS 1
> #define MAX_PKEYS MAX_PORT_PKEYS
> @@ -86,8 +86,13 @@ typedef struct RdmaRmQP {
> enum ibv_qp_state qp_state;
> } RdmaRmQP;
>
> +typedef struct RdmaRmGid {
> + union ibv_gid gid;
> + int backend_gid_index;
> +} RdmaRmGid;
> +
> typedef struct RdmaRmPort {
> - union ibv_gid gid_tbl[MAX_PORT_GIDS];
> + RdmaRmGid gid_tbl[MAX_PORT_GIDS];
> enum ibv_port_state state;
> } RdmaRmPort;
>
> diff --git a/hw/rdma/rdma_utils.h b/hw/rdma/rdma_utils.h
> index 04c7c2ef5b..989db249ef 100644
> --- a/hw/rdma/rdma_utils.h
> +++ b/hw/rdma/rdma_utils.h
> @@ -20,6 +20,7 @@
> #include "qemu/osdep.h"
> #include "hw/pci/pci.h"
> #include "sysemu/dma.h"
> +#include "stdio.h"
>
> #define pr_info(fmt, ...) \
> fprintf(stdout, "%s: %-20s (%3d): " fmt, "rdma", __func__, __LINE__,\
> @@ -40,9 +41,23 @@ extern unsigned long pr_dbg_cnt;
> #define pr_dbg(fmt, ...) \
> fprintf(stdout, "%lx %ld: %-20s (%3d): " fmt, pthread_self(), pr_dbg_cnt++, \
> __func__, __LINE__, ## __VA_ARGS__)
> +
> +#define pr_dbg_buf(title, buf, len) \
> +{ \
> + char *b = g_malloc0(len * 3 + 1); \
> + char b1[4]; \
> + for (int i = 0; i < len; i++) { \
> + sprintf(b1, "%.2X ", buf[i] & 0x000000FF); \
> + strcat(b, b1); \
> + } \
> + pr_dbg("%s (%d): %s\n", title, len, b); \
> + g_free(b); \
> +}
> +
> #else
> #define init_pr_dbg(void)
> #define pr_dbg(fmt, ...)
> +#define pr_dbg_buf(title, buf, len)
> #endif
>
> void *rdma_pci_dma_map(PCIDevice *dev, dma_addr_t addr, dma_addr_t plen);
> diff --git a/hw/rdma/vmw/pvrdma.h b/hw/rdma/vmw/pvrdma.h
> index 15c3f28b86..b019cb843a 100644
> --- a/hw/rdma/vmw/pvrdma.h
> +++ b/hw/rdma/vmw/pvrdma.h
> @@ -79,8 +79,8 @@ typedef struct PVRDMADev {
> int interrupt_mask;
> struct ibv_device_attr dev_attr;
> uint64_t node_guid;
> + char *backend_eth_device_name;
> char *backend_device_name;
> - uint8_t backend_gid_idx;
> uint8_t backend_port_num;
> RdmaBackendDev backend_dev;
> RdmaDeviceResources rdma_dev_res;
> diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
> index 57d6f41ae6..a334f6205e 100644
> --- a/hw/rdma/vmw/pvrdma_cmd.c
> +++ b/hw/rdma/vmw/pvrdma_cmd.c
> @@ -504,13 +504,16 @@ static int modify_qp(PVRDMADev *dev, union pvrdma_cmd_req *req,
> rsp->hdr.response = cmd->hdr.response;
> rsp->hdr.ack = PVRDMA_CMD_MODIFY_QP_RESP;
>
> - rsp->hdr.err = rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev,
> - cmd->qp_handle, cmd->attr_mask,
> - (union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid,
> - cmd->attrs.dest_qp_num,
> - (enum ibv_qp_state)cmd->attrs.qp_state,
> - cmd->attrs.qkey, cmd->attrs.rq_psn,
> - cmd->attrs.sq_psn);
> + /* No need to verify sgid_index since it is u8 */
> +
> + rsp->hdr.err =
> + rdma_rm_modify_qp(&dev->rdma_dev_res, &dev->backend_dev, cmd->qp_handle,
> + cmd->attr_mask, cmd->attrs.ah_attr.grh.sgid_index,
> + (union ibv_gid *)&cmd->attrs.ah_attr.grh.dgid,
> + cmd->attrs.dest_qp_num,
> + (enum ibv_qp_state)cmd->attrs.qp_state,
> + cmd->attrs.qkey, cmd->attrs.rq_psn,
> + cmd->attrs.sq_psn);
>
> pr_dbg("ret=%d\n", rsp->hdr.err);
> return rsp->hdr.err;
> @@ -570,10 +573,8 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
> union pvrdma_cmd_resp *rsp)
> {
> struct pvrdma_cmd_create_bind *cmd = &req->create_bind;
> -#ifdef PVRDMA_DEBUG
> - __be64 *subnet = (__be64 *)&cmd->new_gid[0];
> - __be64 *if_id = (__be64 *)&cmd->new_gid[8];
> -#endif
> + int rc;
> + union ibv_gid *gid = (union ibv_gid *)&cmd->new_gid;
>
> pr_dbg("index=%d\n", cmd->index);
>
> @@ -582,19 +583,24 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
> }
>
> pr_dbg("gid[%d]=0x%llx,0x%llx\n", cmd->index,
> - (long long unsigned int)be64_to_cpu(*subnet),
> - (long long unsigned int)be64_to_cpu(*if_id));
> + (long long unsigned int)be64_to_cpu(gid->global.subnet_prefix),
> + (long long unsigned int)be64_to_cpu(gid->global.interface_id));
>
> - /* Driver forces to one port only */
> - memcpy(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, &cmd->new_gid,
> - sizeof(cmd->new_gid));
> + rc = rdma_rm_add_gid(&dev->rdma_dev_res, &dev->backend_dev,
> + dev->backend_eth_device_name, gid, cmd->index);
> + if (rc < 0) {
> + return -EINVAL;
> + }
>
> /* TODO: Since drivers stores node_guid at load_dsr phase then this
> * assignment is not relevant, i need to figure out a way how to
> * retrieve MAC of our netdev */
> - dev->node_guid = dev->rdma_dev_res.ports[0].gid_tbl[0].global.interface_id;
> - pr_dbg("dev->node_guid=0x%llx\n",
> - (long long unsigned int)be64_to_cpu(dev->node_guid));
> + if (!cmd->index) {
> + dev->node_guid =
> + dev->rdma_dev_res.ports[0].gid_tbl[0].gid.global.interface_id;
> + pr_dbg("dev->node_guid=0x%llx\n",
> + (long long unsigned int)be64_to_cpu(dev->node_guid));
> + }
>
> return 0;
> }
> @@ -602,6 +608,8 @@ static int create_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
> static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
> union pvrdma_cmd_resp *rsp)
> {
> + int rc;
> +
> struct pvrdma_cmd_destroy_bind *cmd = &req->destroy_bind;
>
> pr_dbg("index=%d\n", cmd->index);
> @@ -610,8 +618,13 @@ static int destroy_bind(PVRDMADev *dev, union pvrdma_cmd_req *req,
> return -EINVAL;
> }
>
> - memset(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw, 0,
> - sizeof(dev->rdma_dev_res.ports[0].gid_tbl[cmd->index].raw));
> + rc = rdma_rm_del_gid(&dev->rdma_dev_res, &dev->backend_dev,
> + dev->backend_eth_device_name, cmd->index);
> +
> + if (rc < 0) {
> + rsp->hdr.err = rc;
> + goto out;
> + }
>
> return 0;
> }
> diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
> index fc2abd34af..ac8c092db0 100644
> --- a/hw/rdma/vmw/pvrdma_main.c
> +++ b/hw/rdma/vmw/pvrdma_main.c
> @@ -36,9 +36,9 @@
> #include "pvrdma_qp_ops.h"
>
> static Property pvrdma_dev_properties[] = {
> - DEFINE_PROP_STRING("backend-dev", PVRDMADev, backend_device_name),
> - DEFINE_PROP_UINT8("backend-port", PVRDMADev, backend_port_num, 1),
> - DEFINE_PROP_UINT8("backend-gid-idx", PVRDMADev, backend_gid_idx, 0),
> + DEFINE_PROP_STRING("netdev", PVRDMADev, backend_eth_device_name),
> + DEFINE_PROP_STRING("ibdev", PVRDMADev, backend_device_name),
> + DEFINE_PROP_UINT8("ibport", PVRDMADev, backend_port_num, 1),
> DEFINE_PROP_UINT64("dev-caps-max-mr-size", PVRDMADev, dev_attr.max_mr_size,
> MAX_MR_SIZE),
> DEFINE_PROP_INT32("dev-caps-max-qp", PVRDMADev, dev_attr.max_qp, MAX_QP),
> @@ -276,17 +276,6 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
> pr_dbg("Initialized\n");
> }
>
> -static void init_ports(PVRDMADev *dev, Error **errp)
> -{
> - int i;
> -
> - memset(dev->rdma_dev_res.ports, 0, sizeof(dev->rdma_dev_res.ports));
> -
> - for (i = 0; i < MAX_PORTS; i++) {
> - dev->rdma_dev_res.ports[i].state = IBV_PORT_DOWN;
> - }
> -}
> -
> static void uninit_msix(PCIDevice *pdev, int used_vectors)
> {
> PVRDMADev *dev = PVRDMA_DEV(pdev);
> @@ -335,7 +324,8 @@ static void pvrdma_fini(PCIDevice *pdev)
>
> pvrdma_qp_ops_fini();
>
> - rdma_rm_fini(&dev->rdma_dev_res);
> + rdma_rm_fini(&dev->rdma_dev_res, &dev->backend_dev,
> + dev->backend_eth_device_name);
>
> rdma_backend_fini(&dev->backend_dev);
>
> @@ -612,8 +602,7 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
>
> rc = rdma_backend_init(&dev->backend_dev, pdev, &dev->rdma_dev_res,
> dev->backend_device_name, dev->backend_port_num,
> - dev->backend_gid_idx, &dev->dev_attr, &dev->mad_chr,
> - errp);
> + &dev->dev_attr, &dev->mad_chr, errp);
> if (rc) {
> goto out;
> }
> @@ -623,8 +612,6 @@ static void pvrdma_realize(PCIDevice *pdev, Error **errp)
> goto out;
> }
>
> - init_ports(dev, errp);
> -
> rc = pvrdma_qp_ops_init();
> if (rc) {
> goto out;
> diff --git a/hw/rdma/vmw/pvrdma_qp_ops.c b/hw/rdma/vmw/pvrdma_qp_ops.c
> index 3388be1926..2130824098 100644
> --- a/hw/rdma/vmw/pvrdma_qp_ops.c
> +++ b/hw/rdma/vmw/pvrdma_qp_ops.c
> @@ -131,6 +131,8 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
> RdmaRmQP *qp;
> PvrdmaSqWqe *wqe;
> PvrdmaRing *ring;
> + int sgid_idx;
> + union ibv_gid *sgid;
>
> pr_dbg("qp_handle=0x%x\n", qp_handle);
>
> @@ -156,8 +158,26 @@ int pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
> comp_ctx->cqe.qp = qp_handle;
> comp_ctx->cqe.opcode = IBV_WC_SEND;
>
> + sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index);
> + if (!sgid) {
> + pr_dbg("Fail to get gid for idx %d\n", wqe->hdr.wr.ud.av.gid_index);
> + return -EIO;
> + }
> + pr_dbg("sgid_id=%d, sgid=0x%llx\n", wqe->hdr.wr.ud.av.gid_index,
> + sgid->global.interface_id);
> +
> + sgid_idx = rdma_rm_get_backend_gid_index(&dev->rdma_dev_res,
> + &dev->backend_dev,
> + wqe->hdr.wr.ud.av.gid_index);
> + if (sgid_idx <= 0) {
> + pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n",
> + wqe->hdr.wr.ud.av.gid_index);
> + return -EIO;
> + }
> +
> rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
> (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
> + sgid_idx, sgid,
> (union ibv_gid *)wqe->hdr.wr.ud.av.dgid,
> wqe->hdr.wr.ud.remote_qpn,
> wqe->hdr.wr.ud.remote_qkey, comp_ctx);
Thanks,
Marcel
next prev parent reply other threads:[~2018-11-25 7:29 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-22 12:13 [Qemu-devel] [PATCH v5 00/24] Add support for RDMA MAD Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 01/24] contrib/rdmacm-mux: Add implementation of RDMA User MAD multiplexer Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 02/24] hw/rdma: Add ability to force notification without re-arm Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 03/24] hw/rdma: Return qpn 1 if ibqp is NULL Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 04/24] hw/rdma: Abort send-op if fail to create addr handler Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 05/24] hw/rdma: Add support for MAD packets Yuval Shaia
2018-11-25 7:05 ` Marcel Apfelbaum
2018-11-25 7:27 ` Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 06/24] hw/pvrdma: Make function reset_device return void Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 07/24] hw/pvrdma: Make default pkey 0xFFFF Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 08/24] hw/pvrdma: Set the correct opcode for recv completion Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 09/24] hw/pvrdma: Set the correct opcode for send completion Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 10/24] qapi: Define new QMP message for pvrdma Yuval Shaia
2018-11-26 10:01 ` Markus Armbruster
2018-11-26 10:08 ` Yuval Shaia
2018-11-26 20:43 ` Eric Blake
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 11/24] hw/pvrdma: Add support to allow guest to configure GID table Yuval Shaia
2018-11-25 7:29 ` Marcel Apfelbaum [this message]
2018-11-25 9:10 ` Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 12/24] vmxnet3: Move some definitions to header file Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 13/24] hw/pvrdma: Make sure PCI function 0 is vmxnet3 Yuval Shaia
2018-11-25 7:31 ` Marcel Apfelbaum
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 14/24] hw/rdma: Initialize node_guid from vmxnet3 mac address Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 15/24] hw/pvrdma: Make device state depend on Ethernet function state Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 16/24] hw/pvrdma: Fill all CQE fields Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 17/24] hw/pvrdma: Fill error code in command's response Yuval Shaia
2018-11-25 7:40 ` Marcel Apfelbaum
2018-11-25 11:53 ` Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 18/24] hw/rdma: Remove unneeded code that handles more that one port Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 19/24] vl: Introduce shutdown_notifiers Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 20/24] hw/pvrdma: Clean device's resource when system is shutdown Yuval Shaia
2018-11-22 12:13 ` [Qemu-devel] [PATCH v5 21/24] hw/rdma: Do not use bitmap_zero_extend to free bitmap Yuval Shaia
2018-11-22 12:14 ` [Qemu-devel] [PATCH v5 22/24] hw/rdma: Do not call rdma_backend_del_gid on an empty gid Yuval Shaia
2018-11-22 12:14 ` [Qemu-devel] [PATCH v5 23/24] hw/pvrdma: Do not clean resources on shutdown Yuval Shaia
2018-11-25 7:30 ` Yuval Shaia
2018-11-25 7:41 ` Marcel Apfelbaum
2018-11-22 12:14 ` [Qemu-devel] [PATCH v5 24/24] docs: Update pvrdma device documentation Yuval Shaia
[not found] ` <8b89bfaf-be29-e043-32fa-9615fb4ea0f7@gmail.com>
2018-11-26 10:34 ` Marcel Apfelbaum
2018-11-26 13:05 ` Yuval Shaia
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3285f590-f388-dab4-b87c-749e65d460c6@gmail.com \
--to=marcel.apfelbaum@gmail.com \
--cc=armbru@redhat.com \
--cc=cohuck@redhat.com \
--cc=dmitry.fleytman@gmail.com \
--cc=eblake@redhat.com \
--cc=jasowang@redhat.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=shamir.rabinovitch@oracle.com \
--cc=yuval.shaia@oracle.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).