* [RFC v2 07/12] qedr: Add support for memory registeration verbs
From: Ram Amrani @ 2016-09-20 10:35 UTC (permalink / raw)
To: davem-fT/PcQaiUtIeIZ0/mPfg9Q, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA,
Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA,
Yuval.Mintz-YGCgFSpz5w/QT0dZR+AlfA,
rajesh.borundia-YGCgFSpz5w/QT0dZR+AlfA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
Ram amrani
In-Reply-To: <1474367764-9555-1-git-send-email-Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
From: Ram amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
Add support for user, dma and memory regions registration.
Signed-off-by: Rajesh Borundia <rajesh.borundia-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Ram Amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qedr/main.c | 11 +-
drivers/infiniband/hw/qedr/qedr.h | 40 ++++
drivers/infiniband/hw/qedr/verbs.c | 365 +++++++++++++++++++++++++++++
drivers/infiniband/hw/qedr/verbs.h | 14 ++
drivers/net/ethernet/qlogic/qed/qed_roce.c | 240 +++++++++++++++++++
include/linux/qed/qed_roce_if.h | 6 +
6 files changed, 674 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 10ad9ed..66d33fa 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -103,7 +103,9 @@ static int qedr_register_device(struct qedr_dev *dev)
QEDR_UVERBS(CREATE_QP) |
QEDR_UVERBS(MODIFY_QP) |
QEDR_UVERBS(QUERY_QP) |
- QEDR_UVERBS(DESTROY_QP);
+ QEDR_UVERBS(DESTROY_QP) |
+ QEDR_UVERBS(REG_MR) |
+ QEDR_UVERBS(DEREG_MR);
dev->ibdev.phys_port_cnt = 1;
dev->ibdev.num_comp_vectors = dev->num_cnq;
@@ -135,8 +137,13 @@ static int qedr_register_device(struct qedr_dev *dev)
dev->ibdev.query_pkey = qedr_query_pkey;
+ dev->ibdev.get_dma_mr = qedr_get_dma_mr;
+ dev->ibdev.dereg_mr = qedr_dereg_mr;
+ dev->ibdev.reg_user_mr = qedr_reg_user_mr;
+ dev->ibdev.alloc_mr = qedr_alloc_mr;
+ dev->ibdev.map_mr_sg = qedr_map_mr_sg;
+
dev->ibdev.dma_device = &dev->pdev->dev;
-
dev->ibdev.get_link_layer = qedr_link_layer;
dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 05017be..c16844b 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -373,6 +373,41 @@ struct qedr_qp {
struct qedr_userq urq;
};
+struct qedr_ah {
+ struct ib_ah ibah;
+ struct ib_ah_attr attr;
+};
+
+enum qedr_mr_type {
+ QEDR_MR_USER,
+ QEDR_MR_KERNEL,
+ QEDR_MR_DMA,
+ QEDR_MR_FRMR
+};
+
+struct mr_info {
+ struct qedr_pbl *pbl_table;
+ struct qedr_pbl_info pbl_info;
+ struct list_head free_pbl_list;
+ struct list_head inuse_pbl_list;
+ u32 completed;
+ u32 completed_handled;
+};
+
+struct qedr_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+
+ struct qed_rdma_register_tid_in_params hw_mr;
+ enum qedr_mr_type type;
+
+ struct qedr_dev *dev;
+ struct mr_info info;
+
+ u64 *pages;
+ u32 npages;
+};
+
static inline int qedr_get_dmac(struct qedr_dev *dev,
struct ib_ah_attr *ah_attr, u8 *mac_addr)
{
@@ -417,4 +452,9 @@ static inline struct qedr_qp *get_qedr_qp(struct ib_qp *ibqp)
{
return container_of(ibqp, struct qedr_qp, ibqp);
}
+
+static inline struct qedr_mr *get_qedr_mr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct qedr_mr, ibmr);
+}
#endif
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index 616d656..feff3d2 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -2051,3 +2051,368 @@ int qedr_destroy_qp(struct ib_qp *ibqp)
return rc;
}
+
+static void free_mr_info(struct qedr_dev *dev, struct mr_info *info)
+{
+ struct qedr_pbl *pbl, *tmp;
+
+ if (info->pbl_table)
+ list_add_tail(&info->pbl_table->list_entry,
+ &info->free_pbl_list);
+
+ if (!list_empty(&info->inuse_pbl_list))
+ list_splice(&info->inuse_pbl_list, &info->free_pbl_list);
+
+ list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) {
+ list_del(&pbl->list_entry);
+ qedr_free_pbl(dev, &info->pbl_info, pbl);
+ }
+}
+
+static int init_mr_info(struct qedr_dev *dev, struct mr_info *info,
+ size_t page_list_len, bool two_layered)
+{
+ struct qedr_pbl *tmp;
+ int rc;
+
+ INIT_LIST_HEAD(&info->free_pbl_list);
+ INIT_LIST_HEAD(&info->inuse_pbl_list);
+
+ rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info,
+ page_list_len, two_layered);
+ if (rc)
+ goto done;
+
+ info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
+ if (!info->pbl_table) {
+ rc = -ENOMEM;
+ goto done;
+ }
+
+ DP_VERBOSE(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n",
+ &info->pbl_table->pa);
+
+ /* in usual case we use 2 PBLs, so we add one to free
+ * list and allocating another one
+ */
+ tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL);
+ if (!tmp) {
+ DP_VERBOSE(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n");
+ goto done;
+ }
+
+ list_add_tail(&tmp->list_entry, &info->free_pbl_list);
+
+ DP_VERBOSE(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa);
+
+done:
+ if (rc)
+ free_mr_info(dev, info);
+
+ return rc;
+}
+
+struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
+ u64 usr_addr, int acc, struct ib_udata *udata)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+ struct qedr_mr *mr;
+ struct qedr_pd *pd;
+ int rc = -ENOMEM;
+
+ pd = get_qedr_pd(ibpd);
+ DP_VERBOSE(dev, QEDR_MSG_MR,
+ "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n",
+ pd->pd_id, start, len, usr_addr, acc);
+
+ if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
+ return ERR_PTR(-EINVAL);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(rc);
+
+ mr->type = QEDR_MR_USER;
+
+ mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0);
+ if (IS_ERR(mr->umem)) {
+ rc = -EFAULT;
+ goto err0;
+ }
+
+ rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1);
+ if (rc)
+ goto err1;
+
+ qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table,
+ &mr->info.pbl_info);
+
+ rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
+ if (rc) {
+ DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
+ goto err1;
+ }
+
+ /* Index only, 18 bit long, lkey = itid << 8 | key */
+ mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
+ mr->hw_mr.key = 0;
+ mr->hw_mr.pd = pd->pd_id;
+ mr->hw_mr.local_read = 1;
+ mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
+ mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
+ mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+ mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+ mr->hw_mr.mw_bind = false;
+ mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa;
+ mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
+ mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
+ mr->hw_mr.page_size_log = ilog2(mr->umem->page_size);
+ mr->hw_mr.fbo = ib_umem_offset(mr->umem);
+ mr->hw_mr.length = len;
+ mr->hw_mr.vaddr = usr_addr;
+ mr->hw_mr.zbva = false;
+ mr->hw_mr.phy_mr = false;
+ mr->hw_mr.dma_mr = false;
+
+ rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
+ if (rc) {
+ DP_ERR(dev, "roce register tid returned an error %d\n", rc);
+ goto err2;
+ }
+
+ mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
+ if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
+ mr->hw_mr.remote_atomic)
+ mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
+
+ DP_VERBOSE(dev, QEDR_MSG_MR, "register user mr lkey: %x\n",
+ mr->ibmr.lkey);
+ return &mr->ibmr;
+
+err2:
+ dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
+err1:
+ qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
+err0:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+int qedr_dereg_mr(struct ib_mr *ib_mr)
+{
+ struct qedr_mr *mr = get_qedr_mr(ib_mr);
+ struct qedr_dev *dev = get_qedr_dev(ib_mr->device);
+ int rc = 0;
+
+ rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid);
+ if (rc)
+ return rc;
+
+ dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
+
+ if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR))
+ qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table);
+
+ /* it could be user registered memory. */
+ if (mr->umem)
+ ib_umem_release(mr->umem);
+
+ kfree(mr);
+
+ return rc;
+}
+
+struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, int max_page_list_len)
+{
+ struct qedr_pd *pd = get_qedr_pd(ibpd);
+ struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+ struct qedr_mr *mr;
+ int rc = -ENOMEM;
+
+ DP_VERBOSE(dev, QEDR_MSG_MR,
+ "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id,
+ max_page_list_len);
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(rc);
+
+ mr->dev = dev;
+ mr->type = QEDR_MR_FRMR;
+
+ rc = init_mr_info(dev, &mr->info, max_page_list_len, 1);
+ if (rc)
+ goto err0;
+
+ rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
+ if (rc) {
+ DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
+ goto err0;
+ }
+
+ /* Index only, 18 bit long, lkey = itid << 8 | key */
+ mr->hw_mr.tid_type = QED_RDMA_TID_FMR;
+ mr->hw_mr.key = 0;
+ mr->hw_mr.pd = pd->pd_id;
+ mr->hw_mr.local_read = 1;
+ mr->hw_mr.local_write = 0;
+ mr->hw_mr.remote_read = 0;
+ mr->hw_mr.remote_write = 0;
+ mr->hw_mr.remote_atomic = 0;
+ mr->hw_mr.mw_bind = false;
+ mr->hw_mr.pbl_ptr = 0;
+ mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered;
+ mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size);
+ mr->hw_mr.fbo = 0;
+ mr->hw_mr.length = 0;
+ mr->hw_mr.vaddr = 0;
+ mr->hw_mr.zbva = false;
+ mr->hw_mr.phy_mr = true;
+ mr->hw_mr.dma_mr = false;
+
+ rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
+ if (rc) {
+ DP_ERR(dev, "roce register tid returned an error %d\n", rc);
+ goto err1;
+ }
+
+ mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
+ mr->ibmr.rkey = mr->ibmr.lkey;
+
+ DP_VERBOSE(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey);
+ return mr;
+
+err1:
+ dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
+err0:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
+
+struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd,
+ enum ib_mr_type mr_type, u32 max_num_sg)
+{
+ struct qedr_dev *dev;
+ struct qedr_mr *mr;
+
+ if (mr_type != IB_MR_TYPE_MEM_REG)
+ return ERR_PTR(-EINVAL);
+
+ mr = __qedr_alloc_mr(ibpd, max_num_sg);
+
+ if (IS_ERR(mr))
+ return ERR_PTR(-EINVAL);
+
+ dev = mr->dev;
+
+ return &mr->ibmr;
+}
+
+static int qedr_set_page(struct ib_mr *ibmr, u64 addr)
+{
+ struct qedr_mr *mr = get_qedr_mr(ibmr);
+ struct qedr_pbl *pbl_table;
+ struct regpair *pbe;
+ u32 pbes_in_page;
+
+ if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) {
+ DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages);
+ return -ENOMEM;
+ }
+
+ DP_VERBOSE(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n",
+ mr->npages, addr);
+
+ pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64);
+ pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page);
+ pbe = (struct regpair *)pbl_table->va;
+ pbe += mr->npages % pbes_in_page;
+ pbe->lo = cpu_to_le32((u32)addr);
+ pbe->hi = cpu_to_le32((u32)upper_32_bits(addr));
+
+ mr->npages++;
+
+ return 0;
+}
+
+static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info)
+{
+ int work = info->completed - info->completed_handled - 1;
+
+ DP_VERBOSE(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work);
+ while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) {
+ struct qedr_pbl *pbl;
+
+ /* Free all the page list that are possible to be freed
+ * (all the ones that were invalidated), under the assumption
+ * that if an FMR was completed successfully that means that
+ * if there was an invalidate operation before it also ended
+ */
+ pbl = list_first_entry(&info->inuse_pbl_list,
+ struct qedr_pbl, list_entry);
+ list_del(&pbl->list_entry);
+ list_add_tail(&pbl->list_entry, &info->free_pbl_list);
+ info->completed_handled++;
+ }
+}
+
+int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset)
+{
+ struct qedr_mr *mr = get_qedr_mr(ibmr);
+
+ mr->npages = 0;
+
+ handle_completed_mrs(mr->dev, &mr->info);
+ return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page);
+}
+
+struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+ struct qedr_pd *pd = get_qedr_pd(ibpd);
+ struct qedr_mr *mr;
+ int rc;
+
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr)
+ return ERR_PTR(-ENOMEM);
+
+ mr->type = QEDR_MR_DMA;
+
+ rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid);
+ if (rc) {
+ DP_ERR(dev, "roce alloc tid returned an error %d\n", rc);
+ goto err1;
+ }
+
+ /* index only, 18 bit long, lkey = itid << 8 | key */
+ mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR;
+ mr->hw_mr.pd = pd->pd_id;
+ mr->hw_mr.local_read = 1;
+ mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
+ mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
+ mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
+ mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
+ mr->hw_mr.dma_mr = true;
+
+ rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr);
+ if (rc) {
+ DP_ERR(dev, "roce register tid returned an error %d\n", rc);
+ goto err2;
+ }
+
+ mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
+ if (mr->hw_mr.remote_write || mr->hw_mr.remote_read ||
+ mr->hw_mr.remote_atomic)
+ mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key;
+
+ DP_VERBOSE(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey);
+ return &mr->ibmr;
+
+err2:
+ dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid);
+err1:
+ kfree(mr);
+ return ERR_PTR(rc);
+}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index 80dbac1..e7cb3fd 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -44,4 +44,18 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr,
int qp_attr_mask, struct ib_qp_init_attr *);
int qedr_destroy_qp(struct ib_qp *ibqp);
+struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr);
+int qedr_destroy_ah(struct ib_ah *ibah);
+
+int qedr_dereg_mr(struct ib_mr *);
+struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc);
+
+struct ib_mr *qedr_reg_user_mr(struct ib_pd *, u64 start, u64 length,
+ u64 virt, int acc, struct ib_udata *);
+
+int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
+ int sg_nents, unsigned int *sg_offset);
+
+struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
+ u32 max_num_sg);
#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index cf92ff7..04e33ee 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -690,6 +690,17 @@ struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt)
return p_hwfn->p_rdma_info->dev;
}
+void qed_rdma_free_tid(void *rdma_cxt, u32 itid)
+{
+ struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid);
+
+ spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+ qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid)
{
struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
@@ -2321,6 +2332,231 @@ int qed_rdma_modify_qp(void *rdma_cxt,
return rc;
}
+int qed_rdma_register_tid(void *rdma_cxt,
+ struct qed_rdma_register_tid_in_params *params)
+{
+ struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+ struct rdma_register_tid_ramrod_data *p_ramrod;
+ struct qed_sp_init_data init_data;
+ struct qed_spq_entry *p_ent;
+ enum rdma_tid_type tid_type;
+ u8 fw_return_code;
+ int rc;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", params->itid);
+
+ /* Get SPQ entry */
+ memset(&init_data, 0, sizeof(init_data));
+ init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+ init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+ rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_REGISTER_MR,
+ p_hwfn->p_rdma_info->proto, &init_data);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc);
+ return rc;
+ }
+
+ if (p_hwfn->p_rdma_info->last_tid < params->itid)
+ p_hwfn->p_rdma_info->last_tid = params->itid;
+
+ p_ramrod = &p_ent->ramrod.rdma_register_tid;
+
+ p_ramrod->flags = 0;
+ SET_FIELD(p_ramrod->flags,
+ RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL,
+ params->pbl_two_level);
+
+ SET_FIELD(p_ramrod->flags,
+ RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED, params->zbva);
+
+ SET_FIELD(p_ramrod->flags,
+ RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR, params->phy_mr);
+
+ /* Don't initialize D/C field, as it may override other bits. */
+ if (!(params->tid_type == QED_RDMA_TID_FMR) && !(params->dma_mr))
+ SET_FIELD(p_ramrod->flags,
+ RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG,
+ params->page_size_log - 12);
+
+ SET_FIELD(p_ramrod->flags,
+ RDMA_REGISTER_TID_RAMROD_DATA_MAX_ID,
+ p_hwfn->p_rdma_info->last_tid);
+
+ SET_FIELD(p_ramrod->flags,
+ RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ,
+ params->remote_read);
+
+ SET_FIELD(p_ramrod->flags,
+ RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE,
+ params->remote_write);
+
+ SET_FIELD(p_ramrod->flags,
+ RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC,
+ params->remote_atomic);
+
+ SET_FIELD(p_ramrod->flags,
+ RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE,
+ params->local_write);
+
+ SET_FIELD(p_ramrod->flags,
+ RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ, params->local_read);
+
+ SET_FIELD(p_ramrod->flags,
+ RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND,
+ params->mw_bind);
+
+ SET_FIELD(p_ramrod->flags1,
+ RDMA_REGISTER_TID_RAMROD_DATA_PBL_PAGE_SIZE_LOG,
+ params->pbl_page_size_log - 12);
+
+ SET_FIELD(p_ramrod->flags2,
+ RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR, params->dma_mr);
+
+ switch (params->tid_type) {
+ case QED_RDMA_TID_REGISTERED_MR:
+ tid_type = RDMA_TID_REGISTERED_MR;
+ break;
+ case QED_RDMA_TID_FMR:
+ tid_type = RDMA_TID_FMR;
+ break;
+ case QED_RDMA_TID_MW_TYPE1:
+ tid_type = RDMA_TID_MW_TYPE1;
+ break;
+ case QED_RDMA_TID_MW_TYPE2A:
+ tid_type = RDMA_TID_MW_TYPE2A;
+ break;
+ default:
+ rc = -EINVAL;
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc);
+ return rc;
+ }
+ SET_FIELD(p_ramrod->flags1,
+ RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE, tid_type);
+
+ p_ramrod->itid = cpu_to_le32(params->itid);
+ p_ramrod->key = params->key;
+ p_ramrod->pd = cpu_to_le16(params->pd);
+ p_ramrod->length_hi = (u8)(params->length >> 32);
+ p_ramrod->length_lo = DMA_LO_LE(params->length);
+ if (params->zbva) {
+ /* Lower 32 bits of the registered MR address.
+ * In case of zero based MR, will hold FBO
+ */
+ p_ramrod->va.hi = 0;
+ p_ramrod->va.lo = cpu_to_le32(params->fbo);
+ } else {
+ DMA_REGPAIR_LE(p_ramrod->va, params->vaddr);
+ }
+ DMA_REGPAIR_LE(p_ramrod->pbl_base, params->pbl_ptr);
+
+ /* DIF */
+ if (params->dif_enabled) {
+ SET_FIELD(p_ramrod->flags2,
+ RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG, 1);
+ DMA_REGPAIR_LE(p_ramrod->dif_error_addr,
+ params->dif_error_addr);
+ DMA_REGPAIR_LE(p_ramrod->dif_runt_addr, params->dif_runt_addr);
+ }
+
+ rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code);
+
+ if (fw_return_code != RDMA_RETURN_OK) {
+ DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code);
+ return -EINVAL;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Register TID, rc = %d\n", rc);
+ return rc;
+}
+
+int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid)
+{
+ struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+ struct rdma_deregister_tid_ramrod_data *p_ramrod;
+ struct qed_sp_init_data init_data;
+ struct qed_spq_entry *p_ent;
+ struct qed_ptt *p_ptt;
+ u8 fw_return_code;
+ int rc;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid);
+
+ /* Get SPQ entry */
+ memset(&init_data, 0, sizeof(init_data));
+ init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+ init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+ rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_DEREGISTER_MR,
+ p_hwfn->p_rdma_info->proto, &init_data);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc);
+ return rc;
+ }
+
+ p_ramrod = &p_ent->ramrod.rdma_deregister_tid;
+ p_ramrod->itid = cpu_to_le32(itid);
+
+ rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc);
+ return rc;
+ }
+
+ if (fw_return_code == RDMA_RETURN_DEREGISTER_MR_BAD_STATE_ERR) {
+ DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code);
+ return -EINVAL;
+ } else if (fw_return_code == RDMA_RETURN_NIG_DRAIN_REQ) {
+ /* Bit indicating that the TID is in use and a nig drain is
+ * required before sending the ramrod again
+ */
+ p_ptt = qed_ptt_acquire(p_hwfn);
+ if (!p_ptt) {
+ rc = -EBUSY;
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Failed to acquire PTT\n");
+ return rc;
+ }
+
+ rc = qed_mcp_drain(p_hwfn, p_ptt);
+ if (rc) {
+ qed_ptt_release(p_hwfn, p_ptt);
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Drain failed\n");
+ return rc;
+ }
+
+ qed_ptt_release(p_hwfn, p_ptt);
+
+ /* Resend the ramrod */
+ rc = qed_sp_init_request(p_hwfn, &p_ent,
+ RDMA_RAMROD_DEREGISTER_MR,
+ p_hwfn->p_rdma_info->proto,
+ &init_data);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Failed to init sp-element\n");
+ return rc;
+ }
+
+ rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code);
+ if (rc) {
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+ "Ramrod failed\n");
+ return rc;
+ }
+
+ if (fw_return_code != RDMA_RETURN_OK) {
+ DP_NOTICE(p_hwfn, "fw_return_code = %d\n",
+ fw_return_code);
+ return rc;
+ }
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "De-registered TID, rc = %d\n", rc);
+ return rc;
+}
+
static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev)
{
return QED_LEADING_HWFN(cdev);
@@ -2419,6 +2655,10 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
.rdma_modify_qp = &qed_rdma_modify_qp,
.rdma_query_qp = &qed_rdma_query_qp,
.rdma_destroy_qp = &qed_rdma_destroy_qp,
+ .rdma_alloc_tid = &qed_rdma_alloc_tid,
+ .rdma_free_tid = &qed_rdma_free_tid,
+ .rdma_register_tid = &qed_rdma_register_tid,
+ .rdma_deregister_tid = &qed_rdma_deregister_tid,
};
const struct qed_rdma_ops *qed_get_rdma_ops()
diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h
index 02321e3..0b6df6e 100644
--- a/include/linux/qed/qed_roce_if.h
+++ b/include/linux/qed/qed_roce_if.h
@@ -512,6 +512,12 @@ struct qed_rdma_ops {
int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp,
struct qed_rdma_query_qp_out_params *oparams);
int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp);
+ int
+ (*rdma_register_tid)(void *rdma_cxt,
+ struct qed_rdma_register_tid_in_params *iparams);
+ int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid);
+ int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid);
+ void (*rdma_free_tid)(void *rdma_cxt, u32 itid);
};
const struct qed_rdma_ops *qed_get_rdma_ops(void);
--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [RFC v2 05/12] qedr: Add support for PD,PKEY and CQ verbs
From: Ram Amrani @ 2016-09-20 10:35 UTC (permalink / raw)
To: davem-fT/PcQaiUtIeIZ0/mPfg9Q, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA,
Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA,
Yuval.Mintz-YGCgFSpz5w/QT0dZR+AlfA,
rajesh.borundia-YGCgFSpz5w/QT0dZR+AlfA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
Ram amrani
In-Reply-To: <1474367764-9555-1-git-send-email-Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
From: Ram amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
Add support for protection domain and completion queue verbs.
Signed-off-by: Rajesh Borundia <rajesh.borundia-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Ram Amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qedr/main.c | 18 +-
drivers/infiniband/hw/qedr/qedr.h | 78 +++++
drivers/infiniband/hw/qedr/qedr_hsi_rdma.h | 79 +++++
drivers/infiniband/hw/qedr/verbs.c | 543 +++++++++++++++++++++++++++++
drivers/infiniband/hw/qedr/verbs.h | 14 +
drivers/net/ethernet/qlogic/qed/qed_roce.c | 313 +++++++++++++++++
drivers/net/ethernet/qlogic/qed/qed_roce.h | 20 ++
include/linux/qed/qed_roce_if.h | 29 ++
include/uapi/rdma/providers/qedr-abi.h | 19 +
9 files changed, 1112 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index aa2832f..c99dd6a 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -93,7 +93,13 @@ static int qedr_register_device(struct qedr_dev *dev)
dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) |
QEDR_UVERBS(QUERY_DEVICE) |
- QEDR_UVERBS(QUERY_PORT);
+ QEDR_UVERBS(QUERY_PORT) |
+ QEDR_UVERBS(ALLOC_PD) |
+ QEDR_UVERBS(DEALLOC_PD) |
+ QEDR_UVERBS(CREATE_CQ) |
+ QEDR_UVERBS(RESIZE_CQ) |
+ QEDR_UVERBS(DESTROY_CQ) |
+ QEDR_UVERBS(REQ_NOTIFY_CQ);
dev->ibdev.phys_port_cnt = 1;
dev->ibdev.num_comp_vectors = dev->num_cnq;
@@ -110,6 +116,16 @@ static int qedr_register_device(struct qedr_dev *dev)
dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext;
dev->ibdev.mmap = qedr_mmap;
+ dev->ibdev.alloc_pd = qedr_alloc_pd;
+ dev->ibdev.dealloc_pd = qedr_dealloc_pd;
+
+ dev->ibdev.create_cq = qedr_create_cq;
+ dev->ibdev.destroy_cq = qedr_destroy_cq;
+ dev->ibdev.resize_cq = qedr_resize_cq;
+ dev->ibdev.req_notify_cq = qedr_arm_cq;
+
+ dev->ibdev.query_pkey = qedr_query_pkey;
+
dev->ibdev.dma_device = &dev->pdev->dev;
dev->ibdev.get_link_layer = qedr_link_layer;
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 9fde4ee..dd974d5 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -44,8 +44,12 @@
#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
#define DP_NAME(dev) ((dev)->ibdev.name)
+#define QEDR_CQ_MAGIC_NUMBER (0x11223344)
+
enum DP_QEDR_MODULE {
QEDR_MSG_INIT = 0x10000,
+ QEDR_MSG_CQ = 0x20000,
+ QEDR_MSG_MR = 0x100000,
QEDR_MSG_MISC = 0x400000,
};
@@ -179,6 +183,12 @@ struct qedr_dev {
#define QEDR_ROCE_PKEY_TABLE_LEN 1
#define QEDR_ROCE_PKEY_DEFAULT 0xffff
+struct qedr_pbl {
+ struct list_head list_entry;
+ void *va;
+ dma_addr_t pa;
+};
+
struct qedr_ucontext {
struct ib_ucontext ibucontext;
struct qedr_dev *dev;
@@ -194,6 +204,64 @@ struct qedr_ucontext {
struct mutex mm_list_lock;
};
+union db_prod64 {
+ struct rdma_pwm_val32_data data;
+ u64 raw;
+};
+
+enum qedr_cq_type {
+ QEDR_CQ_TYPE_GSI,
+ QEDR_CQ_TYPE_KERNEL,
+ QEDR_CQ_TYPE_USER
+};
+
+struct qedr_pbl_info {
+ u32 num_pbls;
+ u32 num_pbes;
+ u32 pbl_size;
+ u32 pbe_size;
+ bool two_layered;
+};
+
+struct qedr_userq {
+ struct ib_umem *umem;
+ struct qedr_pbl_info pbl_info;
+ struct qedr_pbl *pbl_tbl;
+ u64 buf_addr;
+ size_t buf_len;
+};
+
+struct qedr_cq {
+ struct ib_cq ibcq;
+
+ enum qedr_cq_type cq_type;
+ u32 sig;
+
+ u16 icid;
+
+ /* Lock to protect multiplem CQ's */
+ spinlock_t cq_lock;
+ u8 arm_flags;
+ struct qed_chain pbl;
+
+ void __iomem *db_addr;
+ union db_prod64 db;
+
+ u8 pbl_toggle;
+ union rdma_cqe *latest_cqe;
+ union rdma_cqe *toggle_cqe;
+
+ u32 cq_cons;
+
+ struct qedr_userq q;
+};
+
+struct qedr_pd {
+ struct ib_pd ibpd;
+ u32 pd_id;
+ struct qedr_ucontext *uctx;
+};
+
struct qedr_mm {
struct {
u64 phy_addr;
@@ -213,4 +281,14 @@ static inline struct qedr_dev *get_qedr_dev(struct ib_device *ibdev)
return container_of(ibdev, struct qedr_dev, ibdev);
}
+static inline struct qedr_pd *get_qedr_pd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct qedr_pd, ibpd);
+}
+
+static inline struct qedr_cq *get_qedr_cq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct qedr_cq, ibcq);
+}
+
#endif
diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
index 3e508fb..84f6520 100644
--- a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
+++ b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h
@@ -47,6 +47,19 @@ struct rdma_cqe_responder {
__le32 imm_data_hi;
__le16 rq_cons;
u8 flags;
+#define RDMA_CQE_RESPONDER_TOGGLE_BIT_MASK 0x1
+#define RDMA_CQE_RESPONDER_TOGGLE_BIT_SHIFT 0
+#define RDMA_CQE_RESPONDER_TYPE_MASK 0x3
+#define RDMA_CQE_RESPONDER_TYPE_SHIFT 1
+#define RDMA_CQE_RESPONDER_INV_FLG_MASK 0x1
+#define RDMA_CQE_RESPONDER_INV_FLG_SHIFT 3
+#define RDMA_CQE_RESPONDER_IMM_FLG_MASK 0x1
+#define RDMA_CQE_RESPONDER_IMM_FLG_SHIFT 4
+#define RDMA_CQE_RESPONDER_RDMA_FLG_MASK 0x1
+#define RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT 5
+#define RDMA_CQE_RESPONDER_RESERVED2_MASK 0x3
+#define RDMA_CQE_RESPONDER_RESERVED2_SHIFT 6
+ u8 status;
};
struct rdma_cqe_requester {
@@ -58,6 +71,12 @@ struct rdma_cqe_requester {
__le32 reserved3;
__le16 reserved4;
u8 flags;
+#define RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK 0x1
+#define RDMA_CQE_REQUESTER_TOGGLE_BIT_SHIFT 0
+#define RDMA_CQE_REQUESTER_TYPE_MASK 0x3
+#define RDMA_CQE_REQUESTER_TYPE_SHIFT 1
+#define RDMA_CQE_REQUESTER_RESERVED5_MASK 0x1F
+#define RDMA_CQE_REQUESTER_RESERVED5_SHIFT 3
u8 status;
};
@@ -66,6 +85,12 @@ struct rdma_cqe_common {
struct regpair qp_handle;
__le16 reserved1[7];
u8 flags;
+#define RDMA_CQE_COMMON_TOGGLE_BIT_MASK 0x1
+#define RDMA_CQE_COMMON_TOGGLE_BIT_SHIFT 0
+#define RDMA_CQE_COMMON_TYPE_MASK 0x3
+#define RDMA_CQE_COMMON_TYPE_SHIFT 1
+#define RDMA_CQE_COMMON_RESERVED2_MASK 0x1F
+#define RDMA_CQE_COMMON_RESERVED2_SHIFT 3
u8 status;
};
@@ -76,6 +101,45 @@ union rdma_cqe {
struct rdma_cqe_common cmn;
};
+/* * CQE requester status enumeration */
+enum rdma_cqe_requester_status_enum {
+ RDMA_CQE_REQ_STS_OK,
+ RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR,
+ RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR,
+ RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR,
+ RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR,
+ RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR,
+ RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR,
+ RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR,
+ RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR,
+ RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR,
+ RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR,
+ RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR,
+ MAX_RDMA_CQE_REQUESTER_STATUS_ENUM
+};
+
+/* CQE responder status enumeration */
+enum rdma_cqe_responder_status_enum {
+ RDMA_CQE_RESP_STS_OK,
+ RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR,
+ RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR,
+ RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR,
+ RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR,
+ RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR,
+ RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR,
+ RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR,
+ MAX_RDMA_CQE_RESPONDER_STATUS_ENUM
+};
+
+/* CQE type enumeration */
+enum rdma_cqe_type {
+ RDMA_CQE_TYPE_REQUESTER,
+ RDMA_CQE_TYPE_RESPONDER_RQ,
+ RDMA_CQE_TYPE_RESPONDER_SRQ,
+ RDMA_CQE_TYPE_INVALID,
+ MAX_RDMA_CQE_TYPE
+};
+
struct rdma_sq_sge {
__le32 length;
struct regpair addr;
@@ -93,4 +157,19 @@ struct rdma_srq_sge {
__le32 length;
__le32 l_key;
};
+
+/* Rdma doorbell data for CQ */
+struct rdma_pwm_val32_data {
+ __le16 icid;
+ u8 agg_flags;
+ u8 params;
+#define RDMA_PWM_VAL32_DATA_AGG_CMD_MASK 0x3
+#define RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT 0
+#define RDMA_PWM_VAL32_DATA_BYPASS_EN_MASK 0x1
+#define RDMA_PWM_VAL32_DATA_BYPASS_EN_SHIFT 2
+#define RDMA_PWM_VAL32_DATA_RESERVED_MASK 0x1F
+#define RDMA_PWM_VAL32_DATA_RESERVED_SHIFT 3
+ __le32 value;
+};
+
#endif /* __QED_HSI_RDMA__ */
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index c37158c..e5c4aa1 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -25,6 +25,17 @@
#include "verbs.h"
#include <rdma/providers/qedr-abi.h>
+#define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT)
+
+int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
+{
+ if (index > QEDR_ROCE_PKEY_TABLE_LEN)
+ return -EINVAL;
+
+ *pkey = QEDR_ROCE_PKEY_DEFAULT;
+ return 0;
+}
+
int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
union ib_gid *sgid)
{
@@ -415,3 +426,535 @@ int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
DP_VERBOSE(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
return rc;
}
+
+struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context, struct ib_udata *udata)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibdev);
+ struct qedr_ucontext *uctx = NULL;
+ struct qedr_alloc_pd_uresp uresp;
+ struct qedr_pd *pd;
+ u16 pd_id;
+ int rc;
+
+ DP_VERBOSE(dev, QEDR_MSG_INIT, "Function called from: %s\n",
+ (udata && context) ? "User Lib" : "Kernel");
+ if (udata && context)
+ uctx = get_qedr_ucontext(context);
+
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+ if (!pd) {
+ DP_ERR(dev, "failed to alloce PD\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ if (!dev->rdma_ctx) {
+ DP_ERR(dev, "invlaid RDMA context\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id);
+
+ uresp.pd_id = pd_id;
+ pd->pd_id = pd_id;
+
+ if (uctx) {
+ rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (rc)
+ DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id);
+ uctx->pd = pd;
+ pd->uctx = uctx;
+ }
+
+ return &pd->ibpd;
+}
+
+int qedr_dealloc_pd(struct ib_pd *ibpd)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibpd->device);
+ struct qedr_pd *pd = get_qedr_pd(ibpd);
+
+ if (!pd)
+ pr_err("Invalid PD received in dealloc_pd\n");
+
+ DP_VERBOSE(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id);
+ dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id);
+
+ kfree(pd);
+
+ return 0;
+}
+
+static void qedr_free_pbl(struct qedr_dev *dev,
+ struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl)
+{
+ struct pci_dev *pdev = dev->pdev;
+ int i;
+
+ for (i = 0; i < pbl_info->num_pbls; i++) {
+ if (!pbl[i].va)
+ continue;
+ dma_free_coherent(&pdev->dev, pbl_info->pbl_size,
+ pbl[i].va, pbl[i].pa);
+ }
+
+ kfree(pbl);
+}
+
+#define MIN_FW_PBL_PAGE_SIZE (4 * 1024)
+#define MAX_FW_PBL_PAGE_SIZE (64 * 1024)
+
+#define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64))
+#define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE)
+#define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE)
+
+static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev,
+ struct qedr_pbl_info *pbl_info,
+ gfp_t flags)
+{
+ struct pci_dev *pdev = dev->pdev;
+ struct qedr_pbl *pbl_table;
+ dma_addr_t *pbl_main_tbl;
+ dma_addr_t pa;
+ int rc = 0;
+ void *va;
+ int i;
+
+ pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags);
+
+ if (!pbl_table) {
+ DP_ERR(dev, "pbl table is NULL\n");
+ return NULL;
+ }
+
+ for (i = 0; i < pbl_info->num_pbls; i++) {
+ va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size,
+ &pa, flags);
+ if (!va) {
+ DP_ERR(dev, "Failed to allocate pbl#%d\n", i);
+ rc = -ENOMEM;
+ goto err;
+ }
+ memset(va, 0, pbl_info->pbl_size);
+ pbl_table[i].va = va;
+ pbl_table[i].pa = pa;
+ }
+
+ /* Two-Layer PBLs, if we have more than one pbl we need to initialize
+ * the first one with physical pointers to all of the rest
+ */
+ pbl_main_tbl = (dma_addr_t *)pbl_table[0].va;
+ for (i = 0; i < pbl_info->num_pbls - 1; i++)
+ pbl_main_tbl[i] = pbl_table[i + 1].pa;
+
+ return pbl_table;
+
+err:
+ qedr_free_pbl(dev, pbl_info, pbl_table);
+ return NULL;
+}
+
+static int qedr_prepare_pbl_tbl(struct qedr_dev *dev,
+ struct qedr_pbl_info *pbl_info,
+ u32 num_pbes, int two_layer_capable)
+{
+ u32 pbl_capacity;
+ u32 pbl_size;
+ u32 num_pbls;
+
+ if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) {
+ if (num_pbes > MAX_PBES_TWO_LAYER) {
+ DP_ERR(dev, "prepare pbl table: too many pages %d\n",
+ num_pbes);
+ return -EINVAL;
+ }
+
+ /* calculate required pbl page size */
+ pbl_size = MIN_FW_PBL_PAGE_SIZE;
+ pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) *
+ NUM_PBES_ON_PAGE(pbl_size);
+
+ while (pbl_capacity < num_pbes) {
+ pbl_size *= 2;
+ pbl_capacity = pbl_size / sizeof(u64);
+ pbl_capacity = pbl_capacity * pbl_capacity;
+ }
+
+ num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size));
+ num_pbls++; /* One for the layer0 ( points to the pbls) */
+ pbl_info->two_layered = true;
+ } else {
+ /* One layered PBL */
+ num_pbls = 1;
+ pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE,
+ roundup_pow_of_two((num_pbes * sizeof(u64))));
+ pbl_info->two_layered = false;
+ }
+
+ pbl_info->num_pbls = num_pbls;
+ pbl_info->pbl_size = pbl_size;
+ pbl_info->num_pbes = num_pbes;
+
+ DP_VERBOSE(dev, QEDR_MSG_MR,
+ "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n",
+ pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size);
+
+ return 0;
+}
+
+static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem,
+ struct qedr_pbl *pbl,
+ struct qedr_pbl_info *pbl_info)
+{
+ int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0;
+ struct qedr_pbl *pbl_tbl;
+ struct scatterlist *sg;
+ struct regpair *pbe;
+ int entry;
+ u32 addr;
+
+ if (!pbl_info->num_pbes)
+ return;
+
+ /* If we have a two layered pbl, the first pbl points to the rest
+ * of the pbls and the first entry lays on the second pbl in the table
+ */
+ if (pbl_info->two_layered)
+ pbl_tbl = &pbl[1];
+ else
+ pbl_tbl = pbl;
+
+ pbe = (struct regpair *)pbl_tbl->va;
+ if (!pbe) {
+ DP_ERR(dev, "pbe is NULL\n");
+ return;
+ }
+
+ pbe_cnt = 0;
+
+ shift = ilog2(umem->page_size);
+
+ for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+ pages = sg_dma_len(sg) >> shift;
+ for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) {
+ /* store the page address in pbe */
+ pbe->lo = cpu_to_le32(sg_dma_address(sg) +
+ umem->page_size * pg_cnt);
+ addr = upper_32_bits(sg_dma_address(sg) +
+ umem->page_size * pg_cnt);
+ pbe->hi = cpu_to_le32(addr);
+ pbe_cnt++;
+ total_num_pbes++;
+ pbe++;
+
+ if (total_num_pbes == pbl_info->num_pbes)
+ return;
+
+ /* If the given pbl is full storing the pbes,
+ * move to next pbl.
+ */
+ if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) {
+ pbl_tbl++;
+ pbe = (struct regpair *)pbl_tbl->va;
+ pbe_cnt = 0;
+ }
+ }
+ }
+}
+
+static int qedr_copy_cq_uresp(struct qedr_dev *dev,
+ struct qedr_cq *cq, struct ib_udata *udata)
+{
+ struct qedr_create_cq_uresp uresp;
+ int rc;
+
+ memset(&uresp, 0, sizeof(uresp));
+
+ uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+ uresp.icid = cq->icid;
+
+ rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (rc)
+ DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid);
+
+ return rc;
+}
+
+static void consume_cqe(struct qedr_cq *cq)
+{
+ if (cq->latest_cqe == cq->toggle_cqe)
+ cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
+
+ cq->latest_cqe = qed_chain_consume(&cq->pbl);
+}
+
+static inline int qedr_align_cq_entries(int entries)
+{
+ u64 size, aligned_size;
+
+ /* We allocate an extra entry that we don't report to the FW. */
+ size = (entries + 1) * QEDR_CQE_SIZE;
+ aligned_size = ALIGN(size, PAGE_SIZE);
+
+ return aligned_size / QEDR_CQE_SIZE;
+}
+
+static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx,
+ struct qedr_dev *dev,
+ struct qedr_userq *q,
+ u64 buf_addr, size_t buf_len,
+ int access, int dmasync)
+{
+ int page_cnt;
+ int rc;
+
+ q->buf_addr = buf_addr;
+ q->buf_len = buf_len;
+ q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync);
+ if (IS_ERR(q->umem)) {
+ DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n",
+ PTR_ERR(q->umem));
+ return PTR_ERR(q->umem);
+ }
+
+ page_cnt = ib_umem_page_count(q->umem);
+ rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0);
+ if (rc)
+ goto err0;
+
+ q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL);
+ if (!q->pbl_tbl)
+ goto err0;
+
+ qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info);
+
+ return 0;
+
+err0:
+ ib_umem_release(q->umem);
+
+ return rc;
+}
+
+static inline void qedr_init_cq_params(struct qedr_cq *cq,
+ struct qedr_ucontext *ctx,
+ struct qedr_dev *dev, int vector,
+ int chain_entries, int page_cnt,
+ u64 pbl_ptr,
+ struct qed_rdma_create_cq_in_params
+ *params)
+{
+ memset(params, 0, sizeof(*params));
+ params->cq_handle_hi = upper_32_bits((uintptr_t)cq);
+ params->cq_handle_lo = lower_32_bits((uintptr_t)cq);
+ params->cnq_id = vector;
+ params->cq_size = chain_entries - 1;
+ params->dpi = (ctx) ? ctx->dpi : dev->dpi;
+ params->pbl_num_pages = page_cnt;
+ params->pbl_ptr = pbl_ptr;
+ params->pbl_two_level = 0;
+}
+
+static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags)
+{
+ /* Flush data before signalling doorbell */
+ wmb();
+ cq->db.data.agg_flags = flags;
+ cq->db.data.value = cpu_to_le32(cons);
+ writeq(cq->db.raw, cq->db_addr);
+
+ /* Make sure write would stick */
+ mmiowb();
+}
+
+int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
+{
+ struct qedr_cq *cq = get_qedr_cq(ibcq);
+ unsigned long sflags;
+
+ if (cq->cq_type == QEDR_CQ_TYPE_GSI)
+ return 0;
+
+ spin_lock_irqsave(&cq->cq_lock, sflags);
+
+ cq->arm_flags = 0;
+
+ if (flags & IB_CQ_SOLICITED)
+ cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD;
+
+ if (flags & IB_CQ_NEXT_COMP)
+ cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD;
+
+ doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags);
+
+ spin_unlock_irqrestore(&cq->cq_lock, sflags);
+
+ return 0;
+}
+
+struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *ib_ctx, struct ib_udata *udata)
+{
+ struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx);
+ struct qed_rdma_destroy_cq_out_params destroy_oparams;
+ struct qed_rdma_destroy_cq_in_params destroy_iparams;
+ struct qedr_dev *dev = get_qedr_dev(ibdev);
+ struct qed_rdma_create_cq_in_params params;
+ struct qedr_create_cq_ureq ureq;
+ int vector = attr->comp_vector;
+ int entries = attr->cqe;
+ struct qedr_cq *cq;
+ int chain_entries;
+ int page_cnt;
+ u64 pbl_ptr;
+ u16 icid;
+ int rc;
+
+ DP_VERBOSE(dev, QEDR_MSG_INIT,
+ "create_cq: called from %s. entries=%d, vector=%d\n",
+ udata ? "User Lib" : "Kernel", entries, vector);
+
+ if (entries > QEDR_MAX_CQES) {
+ DP_ERR(dev,
+ "create cq: the number of entries %d is too high. Must be equal or below %d.\n",
+ entries, QEDR_MAX_CQES);
+ return ERR_PTR(-EINVAL);
+ }
+
+ chain_entries = qedr_align_cq_entries(entries);
+ chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES);
+
+ cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+ if (!cq)
+ return ERR_PTR(-ENOMEM);
+
+ if (udata) {
+ memset(&ureq, 0, sizeof(ureq));
+ if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) {
+ DP_ERR(dev,
+ "create cq: problem copying data from user space\n");
+ goto err0;
+ }
+
+ if (!ureq.len) {
+ DP_ERR(dev,
+ "create cq: cannot create a cq with 0 entries\n");
+ goto err0;
+ }
+
+ cq->cq_type = QEDR_CQ_TYPE_USER;
+
+ rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr, ureq.len,
+ IB_ACCESS_LOCAL_WRITE, 1);
+ if (rc)
+ goto err0;
+
+ pbl_ptr = cq->q.pbl_tbl->pa;
+ page_cnt = cq->q.pbl_info.num_pbes;
+ } else {
+ cq->cq_type = QEDR_CQ_TYPE_KERNEL;
+
+ rc = dev->ops->common->chain_alloc(dev->cdev,
+ QED_CHAIN_USE_TO_CONSUME,
+ QED_CHAIN_MODE_PBL,
+ QED_CHAIN_CNT_TYPE_U32,
+ chain_entries,
+ sizeof(union rdma_cqe),
+ &cq->pbl);
+ if (rc)
+ goto err1;
+
+ page_cnt = qed_chain_get_page_cnt(&cq->pbl);
+ pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl);
+ }
+
+ qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt,
+ pbl_ptr, ¶ms);
+
+ rc = dev->ops->rdma_create_cq(dev->rdma_ctx, ¶ms, &icid);
+ if (rc)
+ goto err2;
+
+ cq->icid = icid;
+ cq->sig = QEDR_CQ_MAGIC_NUMBER;
+ spin_lock_init(&cq->cq_lock);
+
+ if (ib_ctx) {
+ rc = qedr_copy_cq_uresp(dev, cq, udata);
+ if (rc)
+ goto err3;
+ } else {
+ /* Generate doorbell address. */
+ cq->db_addr = dev->db_addr +
+ DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT);
+ cq->db.data.icid = cq->icid;
+ cq->db.data.params = DB_AGG_CMD_SET <<
+ RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
+
+ /* point to the very last element, passing it we will toggle */
+ cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl);
+ cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
+ cq->latest_cqe = NULL;
+ consume_cqe(cq);
+ cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl);
+ }
+
+ DP_VERBOSE(dev, QEDR_MSG_CQ,
+ "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n",
+ cq->icid, cq, params.cq_size);
+
+ return &cq->ibcq;
+
+err3:
+ destroy_iparams.icid = cq->icid;
+ dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams,
+ &destroy_oparams);
+err2:
+ if (udata)
+ qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
+ else
+ dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+err1:
+ if (udata)
+ ib_umem_release(cq->q.umem);
+err0:
+ kfree(cq);
+ return ERR_PTR(-EINVAL);
+}
+
+int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibcq->device);
+ struct qedr_cq *cq = get_qedr_cq(ibcq);
+
+ DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq);
+
+ return 0;
+}
+
+int qedr_destroy_cq(struct ib_cq *ibcq)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibcq->device);
+ struct qed_rdma_destroy_cq_out_params oparams;
+ struct qed_rdma_destroy_cq_in_params iparams;
+ struct qedr_cq *cq = get_qedr_cq(ibcq);
+
+ DP_VERBOSE(dev, QEDR_MSG_CQ, "destroy cq: cq_id %d", cq->icid);
+
+ /* GSIs CQs are handled by driver, so they don't exist in the FW */
+ if (cq->cq_type != QEDR_CQ_TYPE_GSI) {
+ iparams.icid = cq->icid;
+ dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams);
+ dev->ops->common->chain_free(dev->cdev, &cq->pbl);
+ }
+
+ if (ibcq->uobject && ibcq->uobject->context) {
+ qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl);
+ ib_umem_release(cq->q.umem);
+ }
+
+ kfree(cq);
+
+ return 0;
+}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
index f731063..d5e1017 100644
--- a/drivers/infiniband/hw/qedr/verbs.h
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -14,6 +14,8 @@ int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
int qedr_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid);
+int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey);
+
struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *);
int qedr_dealloc_ucontext(struct ib_ucontext *);
@@ -23,4 +25,16 @@ int qedr_del_gid(struct ib_device *device, u8 port_num,
int qedr_add_gid(struct ib_device *device, u8 port_num,
unsigned int index, const union ib_gid *gid,
const struct ib_gid_attr *attr, void **context);
+struct ib_pd *qedr_alloc_pd(struct ib_device *,
+ struct ib_ucontext *, struct ib_udata *);
+int qedr_dealloc_pd(struct ib_pd *pd);
+
+struct ib_cq *qedr_create_cq(struct ib_device *ibdev,
+ const struct ib_cq_init_attr *attr,
+ struct ib_ucontext *ib_ctx,
+ struct ib_udata *udata);
+int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *);
+int qedr_destroy_cq(struct ib_cq *);
+int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags);
+
#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index 9cdb472..7b489f0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -805,6 +805,315 @@ static int qed_rdma_get_int(struct qed_dev *cdev, struct qed_int_info *info)
return 0;
}
+int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd)
+{
+ struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+ u32 returned_id;
+ int rc;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc PD\n");
+
+ /* Allocates an unused protection domain */
+ spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+ rc = qed_rdma_bmap_alloc_id(p_hwfn,
+ &p_hwfn->p_rdma_info->pd_map, &returned_id);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+
+ *pd = (u16)returned_id;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc PD - done, rc = %d\n", rc);
+ return rc;
+}
+
+void qed_rdma_free_pd(void *rdma_cxt, u16 pd)
+{
+ struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "pd = %08x\n", pd);
+
+ /* Returns a previously allocated protection domain for reuse */
+ spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+ qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->pd_map, pd);
+ spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
+static enum qed_rdma_toggle_bit
+qed_rdma_toggle_bit_create_resize_cq(struct qed_hwfn *p_hwfn, u16 icid)
+{
+ struct qed_rdma_info *p_info = p_hwfn->p_rdma_info;
+ enum qed_rdma_toggle_bit toggle_bit;
+ u32 bmap_id;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", icid);
+
+ /* the function toggle the bit that is related to a given icid
+ * and returns the new toggle bit's value
+ */
+ bmap_id = icid - qed_cxt_get_proto_cid_start(p_hwfn, p_info->proto);
+
+ spin_lock_bh(&p_info->lock);
+ toggle_bit = !test_and_change_bit(bmap_id,
+ p_info->toggle_bits.bitmap);
+ spin_unlock_bh(&p_info->lock);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QED_RDMA_TOGGLE_BIT_= %d\n",
+ toggle_bit);
+
+ return toggle_bit;
+}
+
+int qed_rdma_create_cq(void *rdma_cxt,
+ struct qed_rdma_create_cq_in_params *params, u16 *icid)
+{
+ struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+ struct qed_rdma_info *p_info = p_hwfn->p_rdma_info;
+ struct rdma_create_cq_ramrod_data *p_ramrod;
+ enum qed_rdma_toggle_bit toggle_bit;
+ struct qed_sp_init_data init_data;
+ struct qed_spq_entry *p_ent;
+ u32 returned_id, start_cid;
+ int rc;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "cq_handle = %08x%08x\n",
+ params->cq_handle_hi, params->cq_handle_lo);
+
+ /* Allocate icid */
+ spin_lock_bh(&p_info->lock);
+ rc = qed_rdma_bmap_alloc_id(p_hwfn,
+ &p_info->cq_map, &returned_id);
+ spin_unlock_bh(&p_info->lock);
+
+ if (rc) {
+ DP_NOTICE(p_hwfn, "Can't create CQ, rc = %d\n", rc);
+ return rc;
+ }
+
+ start_cid = qed_cxt_get_proto_cid_start(p_hwfn,
+ p_info->proto);
+ *icid = returned_id + start_cid;
+
+ /* Check if icid requires a page allocation */
+ rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, *icid);
+ if (rc)
+ goto err;
+
+ /* Get SPQ entry */
+ memset(&init_data, 0, sizeof(init_data));
+ init_data.cid = *icid;
+ init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+ init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+ /* Send create CQ ramrod */
+ rc = qed_sp_init_request(p_hwfn, &p_ent,
+ RDMA_RAMROD_CREATE_CQ,
+ p_info->proto, &init_data);
+ if (rc)
+ goto err;
+
+ p_ramrod = &p_ent->ramrod.rdma_create_cq;
+
+ p_ramrod->cq_handle.hi = cpu_to_le32(params->cq_handle_hi);
+ p_ramrod->cq_handle.lo = cpu_to_le32(params->cq_handle_lo);
+ p_ramrod->dpi = cpu_to_le16(params->dpi);
+ p_ramrod->is_two_level_pbl = params->pbl_two_level;
+ p_ramrod->max_cqes = cpu_to_le32(params->cq_size);
+ p_ramrod->pbl_addr.hi = DMA_HI_LE(params->pbl_ptr);
+ p_ramrod->pbl_addr.lo = DMA_LO_LE(params->pbl_ptr);
+ p_ramrod->pbl_num_pages = cpu_to_le16(params->pbl_num_pages);
+ p_ramrod->cnq_id = (u8)RESC_START(p_hwfn, QED_RDMA_CNQ_RAM) +
+ params->cnq_id;
+ p_ramrod->int_timeout = params->int_timeout;
+
+ /* toggle the bit for every resize or create cq for a given icid */
+ toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid);
+
+ p_ramrod->toggle_bit = toggle_bit;
+
+ rc = qed_spq_post(p_hwfn, p_ent, NULL);
+ if (rc) {
+ /* restore toggle bit */
+ qed_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid);
+ goto err;
+ }
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Created CQ, rc = %d\n", rc);
+ return rc;
+
+err:
+ /* release allocated icid */
+ qed_bmap_release_id(p_hwfn, &p_info->cq_map, returned_id);
+ DP_NOTICE(p_hwfn, "Create CQ failed, rc = %d\n", rc);
+
+ return rc;
+}
+
+int qed_rdma_resize_cq(void *rdma_cxt,
+ struct qed_rdma_resize_cq_in_params *in_params,
+ struct qed_rdma_resize_cq_out_params *out_params)
+{
+ struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+ struct rdma_resize_cq_output_params *p_ramrod_res;
+ struct rdma_resize_cq_ramrod_data *p_ramrod;
+ enum qed_rdma_toggle_bit toggle_bit;
+ struct qed_sp_init_data init_data;
+ struct qed_spq_entry *p_ent;
+ dma_addr_t ramrod_res_phys;
+ u8 fw_return_code;
+ int rc = -ENOMEM;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid);
+
+ p_ramrod_res =
+ (struct rdma_resize_cq_output_params *)
+ dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(struct rdma_resize_cq_output_params),
+ &ramrod_res_phys, GFP_KERNEL);
+ if (!p_ramrod_res) {
+ DP_NOTICE(p_hwfn,
+ "qed resize cq failed: cannot allocate memory (ramrod)\n");
+ return rc;
+ }
+
+ /* Get SPQ entry */
+ memset(&init_data, 0, sizeof(init_data));
+ init_data.cid = in_params->icid;
+ init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+ init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+ rc = qed_sp_init_request(p_hwfn, &p_ent,
+ RDMA_RAMROD_RESIZE_CQ,
+ p_hwfn->p_rdma_info->proto, &init_data);
+ if (rc)
+ goto err;
+
+ p_ramrod = &p_ent->ramrod.rdma_resize_cq;
+
+ p_ramrod->flags = 0;
+
+ /* toggle the bit for every resize or create cq for a given icid */
+ toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn,
+ in_params->icid);
+
+ SET_FIELD(p_ramrod->flags,
+ RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT, toggle_bit);
+
+ SET_FIELD(p_ramrod->flags,
+ RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL,
+ in_params->pbl_two_level);
+
+ p_ramrod->pbl_log_page_size = in_params->pbl_page_size_log - 12;
+ p_ramrod->pbl_num_pages = cpu_to_le16(in_params->pbl_num_pages);
+ p_ramrod->max_cqes = cpu_to_le32(in_params->cq_size);
+ p_ramrod->pbl_addr.hi = DMA_HI_LE(in_params->pbl_ptr);
+ p_ramrod->pbl_addr.lo = DMA_LO_LE(in_params->pbl_ptr);
+
+ p_ramrod->output_params_addr.hi = DMA_HI_LE(ramrod_res_phys);
+ p_ramrod->output_params_addr.lo = DMA_LO_LE(ramrod_res_phys);
+
+ rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code);
+ if (rc)
+ goto err;
+
+ if (fw_return_code != RDMA_RETURN_OK) {
+ DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code);
+ DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code);
+ rc = -EINVAL;
+ goto err;
+ }
+
+ out_params->prod = le32_to_cpu(p_ramrod_res->old_cq_prod);
+ out_params->cons = le32_to_cpu(p_ramrod_res->old_cq_cons);
+
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(struct rdma_resize_cq_output_params),
+ p_ramrod_res, ramrod_res_phys);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Resized CQ, rc = %d\n", rc);
+
+ return rc;
+
+err: dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(struct rdma_resize_cq_output_params),
+ p_ramrod_res, ramrod_res_phys);
+ DP_NOTICE(p_hwfn, "Resized CQ, Failed - rc = %d\n", rc);
+
+ return rc;
+}
+
+int qed_rdma_destroy_cq(void *rdma_cxt,
+ struct qed_rdma_destroy_cq_in_params *in_params,
+ struct qed_rdma_destroy_cq_out_params *out_params)
+{
+ struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+ struct rdma_destroy_cq_output_params *p_ramrod_res;
+ struct rdma_destroy_cq_ramrod_data *p_ramrod;
+ struct qed_sp_init_data init_data;
+ struct qed_spq_entry *p_ent;
+ dma_addr_t ramrod_res_phys;
+ int rc = -ENOMEM;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid);
+
+ p_ramrod_res =
+ (struct rdma_destroy_cq_output_params *)
+ dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(struct rdma_destroy_cq_output_params),
+ &ramrod_res_phys, GFP_KERNEL);
+ if (!p_ramrod_res) {
+ DP_NOTICE(p_hwfn,
+ "qed destroy cq failed: cannot allocate memory (ramrod)\n");
+ return rc;
+ }
+
+ /* Get SPQ entry */
+ memset(&init_data, 0, sizeof(init_data));
+ init_data.cid = in_params->icid;
+ init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+ init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
+
+ /* Send destroy CQ ramrod */
+ rc = qed_sp_init_request(p_hwfn, &p_ent,
+ RDMA_RAMROD_DESTROY_CQ,
+ p_hwfn->p_rdma_info->proto, &init_data);
+ if (rc)
+ goto err;
+
+ p_ramrod = &p_ent->ramrod.rdma_destroy_cq;
+ p_ramrod->output_params_addr.hi = DMA_HI_LE(ramrod_res_phys);
+ p_ramrod->output_params_addr.lo = DMA_LO_LE(ramrod_res_phys);
+
+ rc = qed_spq_post(p_hwfn, p_ent, NULL);
+ if (rc)
+ goto err;
+
+ out_params->num_cq_notif = le16_to_cpu(p_ramrod_res->cnq_num);
+
+ dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(struct rdma_destroy_cq_output_params),
+ p_ramrod_res, ramrod_res_phys);
+
+ /* Free icid */
+ spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+
+ qed_bmap_release_id(p_hwfn,
+ &p_hwfn->p_rdma_info->cq_map,
+ (in_params->icid -
+ qed_cxt_get_proto_cid_start(p_hwfn,
+ p_hwfn->
+ p_rdma_info->proto)));
+
+ spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroyed CQ, rc = %d\n", rc);
+ return rc;
+
+err: dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+ sizeof(struct rdma_destroy_cq_output_params),
+ p_ramrod_res, ramrod_res_phys);
+
+ return rc;
+}
+
static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev)
{
return QED_LEADING_HWFN(cdev);
@@ -895,6 +1204,10 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
.rdma_set_rdma_int = &qed_rdma_set_int,
.rdma_get_min_cnq_msix = &qed_rdma_get_min_cnq_msix,
.rdma_cnq_prod_update = &qed_rdma_cnq_prod_update,
+ .rdma_alloc_pd = &qed_rdma_alloc_pd,
+ .rdma_dealloc_pd = &qed_rdma_free_pd,
+ .rdma_create_cq = &qed_rdma_create_cq,
+ .rdma_destroy_cq = &qed_rdma_destroy_cq,
};
const struct qed_rdma_ops *qed_get_rdma_ops()
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h
index c94c39e..6802cd0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h
@@ -94,6 +94,26 @@ struct qed_rdma_info {
enum protocol_type proto;
};
+struct qed_rdma_resize_cq_in_params {
+ u16 icid;
+ u32 cq_size;
+ bool pbl_two_level;
+ u64 pbl_ptr;
+ u16 pbl_num_pages;
+ u8 pbl_page_size_log;
+};
+
+struct qed_rdma_resize_cq_out_params {
+ u32 prod;
+ u32 cons;
+};
+
+struct qed_rdma_resize_cnq_in_params {
+ u32 cnq_id;
+ u32 pbl_page_size_log;
+ u64 pbl_ptr;
+};
+
int
qed_rdma_add_user(void *rdma_cxt,
struct qed_rdma_add_user_out_params *out_params);
diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h
index e99470c..b559b1c 100644
--- a/include/linux/qed/qed_roce_if.h
+++ b/include/linux/qed/qed_roce_if.h
@@ -263,6 +263,19 @@ struct qed_rdma_register_tid_in_params {
u64 dif_runt_addr;
};
+struct qed_rdma_create_cq_in_params {
+ u32 cq_handle_lo;
+ u32 cq_handle_hi;
+ u32 cq_size;
+ u16 dpi;
+ bool pbl_two_level;
+ u64 pbl_ptr;
+ u16 pbl_num_pages;
+ u8 pbl_page_size_log;
+ u8 cnq_id;
+ u16 int_timeout;
+};
+
struct qed_rdma_create_srq_in_params {
u64 pbl_base_addr;
u64 prod_pair_addr;
@@ -271,6 +284,14 @@ struct qed_rdma_create_srq_in_params {
u16 page_size;
};
+struct qed_rdma_destroy_cq_in_params {
+ u16 icid;
+};
+
+struct qed_rdma_destroy_cq_out_params {
+ u16 num_cq_notif;
+};
+
struct qed_rdma_create_srq_out_params {
u16 srq_id;
};
@@ -339,6 +360,14 @@ struct qed_rdma_ops {
int (*rdma_get_rdma_int)(struct qed_dev *cdev,
struct qed_int_info *info);
int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt);
+ int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd);
+ void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd);
+ int (*rdma_create_cq)(void *rdma_cxt,
+ struct qed_rdma_create_cq_in_params *params,
+ u16 *icid);
+ int (*rdma_destroy_cq)(void *rdma_cxt,
+ struct qed_rdma_destroy_cq_in_params *iparams,
+ struct qed_rdma_destroy_cq_out_params *oparams);
};
const struct qed_rdma_ops *qed_get_rdma_ops(void);
diff --git a/include/uapi/rdma/providers/qedr-abi.h b/include/uapi/rdma/providers/qedr-abi.h
index 45288dc..16f3f60 100644
--- a/include/uapi/rdma/providers/qedr-abi.h
+++ b/include/uapi/rdma/providers/qedr-abi.h
@@ -24,4 +24,23 @@ struct qedr_alloc_ucontext_resp {
u32 sges_per_srq_wr;
int max_cqes;
};
+
+struct qedr_alloc_pd_ureq {
+ u64 rsvd1;
+};
+
+struct qedr_alloc_pd_uresp {
+ u32 pd_id;
+};
+
+struct qedr_create_cq_ureq {
+ uint64_t addr;
+ size_t len;
+};
+
+struct qedr_create_cq_uresp {
+ u32 db_offset;
+ u16 icid;
+};
+
#endif /* __QEDR_USER_H__ */
--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [RFC v2 04/12] qedr: Add support for user context verbs
From: Ram Amrani @ 2016-09-20 10:35 UTC (permalink / raw)
To: davem-fT/PcQaiUtIeIZ0/mPfg9Q, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA,
Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA,
Yuval.Mintz-YGCgFSpz5w/QT0dZR+AlfA,
rajesh.borundia-YGCgFSpz5w/QT0dZR+AlfA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
Ram amrani
In-Reply-To: <1474367764-9555-1-git-send-email-Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
From: Ram amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
Add support for ucontext, query port, add and del gid verbs.
Signed-off-by: Rajesh Borundia <rajesh.borundia-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Ram Amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qedr/Makefile | 2 +-
drivers/infiniband/hw/qedr/main.c | 25 ++
drivers/infiniband/hw/qedr/qedr.h | 34 +++
drivers/infiniband/hw/qedr/verbs.c | 417 +++++++++++++++++++++++++++++
drivers/infiniband/hw/qedr/verbs.h | 26 ++
drivers/net/ethernet/qlogic/qed/qed.h | 2 +-
drivers/net/ethernet/qlogic/qed/qed_dev.c | 1 +
drivers/net/ethernet/qlogic/qed/qed_roce.c | 19 +-
drivers/net/ethernet/qlogic/qed/qed_roce.h | 1 +
include/linux/qed/qed_roce_if.h | 1 +
include/linux/qed/rdma_common.h | 1 +
include/uapi/rdma/providers/qedr-abi.h | 27 ++
12 files changed, 553 insertions(+), 3 deletions(-)
create mode 100644 drivers/infiniband/hw/qedr/verbs.c
create mode 100644 drivers/infiniband/hw/qedr/verbs.h
create mode 100644 include/uapi/rdma/providers/qedr-abi.h
diff --git a/drivers/infiniband/hw/qedr/Makefile b/drivers/infiniband/hw/qedr/Makefile
index 3a5b7a2..b10f2b1 100644
--- a/drivers/infiniband/hw/qedr/Makefile
+++ b/drivers/infiniband/hw/qedr/Makefile
@@ -1,3 +1,3 @@
obj-$(CONFIG_INFINIBAND_QEDR) := qedr.o
-qedr-y := main.o
+qedr-y := main.o verbs.o
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 70ad9e6..aa2832f 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_addr.h>
+#include <rdma/ib_user_verbs.h>
#include <linux/netdevice.h>
#include <linux/iommu.h>
#include <net/addrconf.h>
@@ -39,6 +40,8 @@
#include <linux/qed/qed_chain.h>
#include <linux/qed/qed_if.h>
#include "qedr.h"
+#include "verbs.h"
+#include <rdma/providers/qedr-abi.h>
MODULE_DESCRIPTION("QLogic 40G/100G ROCE Driver");
MODULE_AUTHOR("QLogic Corporation");
@@ -86,7 +89,29 @@ static int qedr_register_device(struct qedr_dev *dev)
memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
dev->ibdev.owner = THIS_MODULE;
+ dev->ibdev.uverbs_abi_ver = QEDR_ABI_VERSION;
+ dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) |
+ QEDR_UVERBS(QUERY_DEVICE) |
+ QEDR_UVERBS(QUERY_PORT);
+
+ dev->ibdev.phys_port_cnt = 1;
+ dev->ibdev.num_comp_vectors = dev->num_cnq;
+ dev->ibdev.node_type = RDMA_NODE_IB_CA;
+
+ dev->ibdev.query_device = qedr_query_device;
+ dev->ibdev.query_port = qedr_query_port;
+
+ dev->ibdev.query_gid = qedr_query_gid;
+ dev->ibdev.add_gid = qedr_add_gid;
+ dev->ibdev.del_gid = qedr_del_gid;
+
+ dev->ibdev.alloc_ucontext = qedr_alloc_ucontext;
+ dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext;
+ dev->ibdev.mmap = qedr_mmap;
+
+ dev->ibdev.dma_device = &dev->pdev->dev;
+
dev->ibdev.get_link_layer = qedr_link_layer;
dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str;
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
index 0484d34..9fde4ee 100644
--- a/drivers/infiniband/hw/qedr/qedr.h
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -46,6 +46,7 @@
enum DP_QEDR_MODULE {
QEDR_MSG_INIT = 0x10000,
+ QEDR_MSG_MISC = 0x400000,
};
struct qedr_dev;
@@ -174,6 +175,39 @@ struct qedr_dev {
#define QEDR_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME)
+#define QEDR_ROCE_PKEY_MAX 1
+#define QEDR_ROCE_PKEY_TABLE_LEN 1
+#define QEDR_ROCE_PKEY_DEFAULT 0xffff
+
+struct qedr_ucontext {
+ struct ib_ucontext ibucontext;
+ struct qedr_dev *dev;
+ struct qedr_pd *pd;
+ u64 dpi_addr;
+ u64 dpi_phys_addr;
+ u32 dpi_size;
+ u16 dpi;
+
+ struct list_head mm_head;
+
+ /* Lock to protect mm list */
+ struct mutex mm_list_lock;
+};
+
+struct qedr_mm {
+ struct {
+ u64 phy_addr;
+ unsigned long len;
+ } key;
+ struct list_head entry;
+};
+
+static inline
+struct qedr_ucontext *get_qedr_ucontext(struct ib_ucontext *ibucontext)
+{
+ return container_of(ibucontext, struct qedr_ucontext, ibucontext);
+}
+
static inline struct qedr_dev *get_qedr_dev(struct ib_device *ibdev)
{
return container_of(ibdev, struct qedr_dev, ibdev);
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
new file mode 100644
index 0000000..c37158c
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -0,0 +1,417 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/crc32.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/udp.h>
+#include <linux/iommu.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/iw_cm.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_cache.h>
+
+#include "qedr_hsi.h"
+#include <linux/qed/qed_if.h>
+#include "qedr.h"
+#include "verbs.h"
+#include <rdma/providers/qedr-abi.h>
+
+int qedr_query_gid(struct ib_device *ibdev, u8 port, int index,
+ union ib_gid *sgid)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibdev);
+ int rc = 0;
+
+ if (!rdma_cap_roce_gid_table(ibdev, port))
+ return -ENODEV;
+
+ rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL);
+ if (rc == -EAGAIN) {
+ memcpy(sgid, &zgid, sizeof(*sgid));
+ return 0;
+ }
+
+ DP_VERBOSE(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index,
+ sgid->global.interface_id, sgid->global.subnet_prefix);
+
+ return rc;
+}
+
+int qedr_add_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr, void **context)
+{
+ if (!rdma_cap_roce_gid_table(device, port_num))
+ return -EINVAL;
+
+ if (port_num > QEDR_MAX_PORT)
+ return -EINVAL;
+
+ if (!context)
+ return -EINVAL;
+
+ return 0;
+}
+
+int qedr_del_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, void **context)
+{
+ if (!rdma_cap_roce_gid_table(device, port_num))
+ return -EINVAL;
+
+ if (port_num > QEDR_MAX_PORT)
+ return -EINVAL;
+
+ if (!context)
+ return -EINVAL;
+
+ return 0;
+}
+
+int qedr_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *attr, struct ib_udata *udata)
+{
+ struct qedr_dev *dev = get_qedr_dev(ibdev);
+ struct qedr_device_attr *qattr = &dev->attr;
+
+ if (!dev->rdma_ctx) {
+ DP_ERR(dev,
+ "qedr_query_device called with invalid params rdma_ctx=%p\n",
+ dev->rdma_ctx);
+ return -EINVAL;
+ }
+
+ memset(attr, 0, sizeof(*attr));
+
+ attr->fw_ver = qattr->fw_ver;
+ attr->sys_image_guid = qattr->sys_image_guid;
+ attr->max_mr_size = qattr->max_mr_size;
+ attr->page_size_cap = qattr->page_size_caps;
+ attr->vendor_id = qattr->vendor_id;
+ attr->vendor_part_id = qattr->vendor_part_id;
+ attr->hw_ver = qattr->hw_ver;
+ attr->max_qp = qattr->max_qp;
+ attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe);
+ attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
+ IB_DEVICE_RC_RNR_NAK_GEN |
+ IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
+
+ attr->max_sge = qattr->max_sge;
+ attr->max_sge_rd = qattr->max_sge;
+ attr->max_cq = qattr->max_cq;
+ attr->max_cqe = qattr->max_cqe;
+ attr->max_mr = qattr->max_mr;
+ attr->max_mw = qattr->max_mw;
+ attr->max_pd = qattr->max_pd;
+ attr->atomic_cap = dev->atomic_cap;
+ attr->max_fmr = qattr->max_fmr;
+ attr->max_map_per_fmr = 16;
+ attr->max_qp_init_rd_atom =
+ 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1);
+ attr->max_qp_rd_atom =
+ min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1),
+ attr->max_qp_init_rd_atom);
+
+ attr->max_srq = qattr->max_srq;
+ attr->max_srq_sge = qattr->max_srq_sge;
+ attr->max_srq_wr = qattr->max_srq_wr;
+
+ attr->local_ca_ack_delay = qattr->dev_ack_delay;
+ attr->max_fast_reg_page_list_len = qattr->max_mr / 8;
+ attr->max_pkeys = QEDR_ROCE_PKEY_MAX;
+ attr->max_ah = qattr->max_ah;
+
+ return 0;
+}
+
+#define QEDR_SPEED_SDR (1)
+#define QEDR_SPEED_DDR (2)
+#define QEDR_SPEED_QDR (4)
+#define QEDR_SPEED_FDR10 (8)
+#define QEDR_SPEED_FDR (16)
+#define QEDR_SPEED_EDR (32)
+
+static inline void get_link_speed_and_width(int speed, u8 *ib_speed,
+ u8 *ib_width)
+{
+ switch (speed) {
+ case 1000:
+ *ib_speed = QEDR_SPEED_SDR;
+ *ib_width = IB_WIDTH_1X;
+ break;
+ case 10000:
+ *ib_speed = QEDR_SPEED_QDR;
+ *ib_width = IB_WIDTH_1X;
+ break;
+
+ case 20000:
+ *ib_speed = QEDR_SPEED_DDR;
+ *ib_width = IB_WIDTH_4X;
+ break;
+
+ case 25000:
+ *ib_speed = QEDR_SPEED_EDR;
+ *ib_width = IB_WIDTH_1X;
+ break;
+
+ case 40000:
+ *ib_speed = QEDR_SPEED_QDR;
+ *ib_width = IB_WIDTH_4X;
+ break;
+
+ case 50000:
+ *ib_speed = QEDR_SPEED_QDR;
+ *ib_width = IB_WIDTH_4X;
+ break;
+
+ case 100000:
+ *ib_speed = QEDR_SPEED_EDR;
+ *ib_width = IB_WIDTH_4X;
+ break;
+
+ default:
+ /* Unsupported */
+ *ib_speed = QEDR_SPEED_SDR;
+ *ib_width = IB_WIDTH_1X;
+ }
+}
+
+int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
+{
+ struct qedr_dev *dev;
+ struct qed_rdma_port *rdma_port;
+
+ dev = get_qedr_dev(ibdev);
+ if (port > 1) {
+ DP_ERR(dev, "invalid_port=0x%x\n", port);
+ return -EINVAL;
+ }
+
+ if (!dev->rdma_ctx) {
+ DP_ERR(dev, "rdma_ctx is NULL\n");
+ return -EINVAL;
+ }
+
+ rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
+ memset(attr, 0, sizeof(*attr));
+
+ if (rdma_port->port_state == QED_RDMA_PORT_UP) {
+ attr->state = IB_PORT_ACTIVE;
+ attr->phys_state = 5;
+ } else {
+ attr->state = IB_PORT_DOWN;
+ attr->phys_state = 3;
+ }
+ attr->max_mtu = IB_MTU_4096;
+ attr->active_mtu = iboe_get_mtu(dev->ndev->mtu);
+ attr->lid = 0;
+ attr->lmc = 0;
+ attr->sm_lid = 0;
+ attr->sm_sl = 0;
+ attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
+ attr->gid_tbl_len = QEDR_MAX_SGID;
+ attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN;
+ attr->bad_pkey_cntr = rdma_port->pkey_bad_counter;
+ attr->qkey_viol_cntr = 0;
+ get_link_speed_and_width(rdma_port->link_speed,
+ &attr->active_speed, &attr->active_width);
+ attr->max_msg_sz = rdma_port->max_msg_size;
+ attr->max_vl_num = 4;
+
+ return 0;
+}
+
+static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
+ unsigned long len)
+{
+ struct qedr_mm *mm;
+
+ mm = kzalloc(sizeof(*mm), GFP_KERNEL);
+ if (!mm)
+ return -ENOMEM;
+ mm->key.phy_addr = phy_addr;
+ /* This function might be called with a length which is not a multiple
+ * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel
+ * forces this granularity by increasing the requested size if needed.
+ * When qedr_mmap is called, it will search the list with the updated
+ * length as a key. To prevent search failures, the length is rounded up
+ * in advance to PAGE_SIZE.
+ */
+ mm->key.len = roundup(len, PAGE_SIZE);
+ INIT_LIST_HEAD(&mm->entry);
+
+ mutex_lock(&uctx->mm_list_lock);
+ list_add(&mm->entry, &uctx->mm_head);
+ mutex_unlock(&uctx->mm_list_lock);
+
+ DP_VERBOSE(uctx->dev, QEDR_MSG_MISC,
+ "added (addr=0x%llx,len=0x%lx) for ctx=%p\n",
+ (unsigned long long)mm->key.phy_addr,
+ (unsigned long)mm->key.len, uctx);
+
+ return 0;
+}
+
+static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr,
+ unsigned long len)
+{
+ bool found = false;
+ struct qedr_mm *mm;
+
+ mutex_lock(&uctx->mm_list_lock);
+ list_for_each_entry(mm, &uctx->mm_head, entry) {
+ if (len != mm->key.len || phy_addr != mm->key.phy_addr)
+ continue;
+
+ found = true;
+ break;
+ }
+ mutex_unlock(&uctx->mm_list_lock);
+ DP_VERBOSE(uctx->dev, QEDR_MSG_MISC,
+ "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n",
+ mm->key.phy_addr, mm->key.len, uctx, found);
+
+ return found;
+}
+
+struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ int rc;
+ struct qedr_ucontext *ctx;
+ struct qedr_alloc_ucontext_resp uresp;
+ struct qedr_dev *dev = get_qedr_dev(ibdev);
+ struct qed_rdma_add_user_out_params oparams;
+
+ if (!udata)
+ return ERR_PTR(-EFAULT);
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+
+ rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams);
+ if (rc) {
+ DP_ERR(dev,
+ "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n",
+ rc);
+ goto err;
+ }
+
+ ctx->dpi = oparams.dpi;
+ ctx->dpi_addr = oparams.dpi_addr;
+ ctx->dpi_phys_addr = oparams.dpi_phys_addr;
+ ctx->dpi_size = oparams.dpi_size;
+ INIT_LIST_HEAD(&ctx->mm_head);
+ mutex_init(&ctx->mm_list_lock);
+
+ memset(&uresp, 0, sizeof(uresp));
+
+ uresp.db_pa = ctx->dpi_phys_addr;
+ uresp.db_size = ctx->dpi_size;
+ uresp.max_send_wr = dev->attr.max_sqe;
+ uresp.max_recv_wr = dev->attr.max_rqe;
+ uresp.max_srq_wr = dev->attr.max_srq_wr;
+ uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE;
+ uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE;
+ uresp.sges_per_srq_wr = dev->attr.max_srq_sge;
+ uresp.max_cqes = QEDR_MAX_CQES;
+
+ rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
+ if (rc)
+ goto err;
+
+ ctx->dev = dev;
+
+ rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size);
+ if (rc)
+ goto err;
+
+ DP_VERBOSE(dev, QEDR_MSG_INIT, "Allocating user context %p\n",
+ &ctx->ibucontext);
+ return &ctx->ibucontext;
+
+err:
+ kfree(ctx);
+ return ERR_PTR(rc);
+}
+
+int qedr_dealloc_ucontext(struct ib_ucontext *ibctx)
+{
+ struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx);
+ struct qedr_mm *mm, *tmp;
+ int status = 0;
+
+ DP_VERBOSE(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n",
+ uctx);
+ uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi);
+
+ list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
+ DP_VERBOSE(uctx->dev, QEDR_MSG_MISC,
+ "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n",
+ mm->key.phy_addr, mm->key.len, uctx);
+ list_del(&mm->entry);
+ kfree(mm);
+ }
+
+ kfree(uctx);
+ return status;
+}
+
+int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+{
+ struct qedr_ucontext *ucontext = get_qedr_ucontext(context);
+ struct qedr_dev *dev = get_qedr_dev(context->device);
+ unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
+ u64 unmapped_db = dev->db_phys_addr;
+ unsigned long len = (vma->vm_end - vma->vm_start);
+ int rc = 0;
+ bool found;
+
+ DP_VERBOSE(dev, QEDR_MSG_INIT,
+ "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n",
+ vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len);
+ if (vma->vm_start & (PAGE_SIZE - 1)) {
+ DP_ERR(dev, "Vma_start not page aligned = %ld\n",
+ vma->vm_start);
+ return -EINVAL;
+ }
+
+ found = qedr_search_mmap(ucontext, vm_page, len);
+ if (!found) {
+ DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n",
+ vma->vm_pgoff);
+ return -EINVAL;
+ }
+
+ DP_VERBOSE(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
+
+ if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
+ dev->db_size))) {
+ DP_VERBOSE(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n");
+ if (vma->vm_flags & VM_READ) {
+ DP_ERR(dev, "Trying to map doorbell bar for read\n");
+ return -EPERM;
+ }
+
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+ rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ PAGE_SIZE, vma->vm_page_prot);
+ } else {
+ DP_VERBOSE(dev, QEDR_MSG_INIT, "Mapping chains\n");
+ rc = remap_pfn_range(vma, vma->vm_start,
+ vma->vm_pgoff, len, vma->vm_page_prot);
+ }
+ DP_VERBOSE(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc);
+ return rc;
+}
diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h
new file mode 100644
index 0000000..f731063
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/verbs.h
@@ -0,0 +1,26 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+#ifndef __QEDR_VERBS_H__
+#define __QEDR_VERBS_H__
+
+int qedr_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *attr, struct ib_udata *udata);
+int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props);
+
+int qedr_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid);
+
+struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *);
+int qedr_dealloc_ucontext(struct ib_ucontext *);
+
+int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma);
+int qedr_del_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, void **context);
+int qedr_add_gid(struct ib_device *device, u8 port_num,
+ unsigned int index, const union ib_gid *gid,
+ const struct ib_gid_attr *attr, void **context);
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 74052f5..a92aa80 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -53,7 +53,7 @@ enum qed_mcp_protocol_type;
static inline u32 qed_db_addr(u32 cid, u32 DEMS)
{
u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) |
- FIELD_VALUE(DB_LEGACY_ADDR_ICID, cid);
+ (cid * QED_PF_DEMS_SIZE);
return db_addr;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index eb6fbfc..3708bdd 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1480,6 +1480,7 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs;
resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs;
resc_num[QED_RDMA_CNQ_RAM] = NUM_OF_CMDQS_CQS / num_funcs;
+ resc_num[QED_RDMA_STATS_QUEUE] = RDMA_NUM_STATISTIC_COUNTERS_BB / num_funcs;
for (i = 0; i < QED_MAX_RESC; i++)
resc_start[i] = resc_num[i] * enabled_func_idx;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index e65a130..9cdb472 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -158,7 +158,7 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn,
return rc;
}
- p_rdma_info = p_rdma_info;
+ p_hwfn->p_rdma_info = p_rdma_info;
p_rdma_info->proto = PROTOCOLID_ROCE;
num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, 0);
@@ -664,6 +664,22 @@ int qed_rdma_add_user(void *rdma_cxt,
return rc;
}
+struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt)
+{
+ struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+ struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port;
+
+ DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA Query port\n");
+
+ /* Link may have changed */
+ p_port->port_state = p_hwfn->mcp_info->link_output.link_up ?
+ QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN;
+
+ p_port->link_speed = p_hwfn->mcp_info->link_output.speed;
+
+ return p_port;
+}
+
struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt)
{
struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
@@ -872,6 +888,7 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
.rdma_add_user = &qed_rdma_add_user,
.rdma_remove_user = &qed_rdma_remove_user,
.rdma_stop = &qed_rdma_stop,
+ .rdma_query_port = &qed_rdma_query_port,
.rdma_query_device = &qed_rdma_query_device,
.rdma_get_start_sb = &qed_rdma_get_sb_start,
.rdma_get_rdma_int = &qed_rdma_get_int,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h
index 46f14b2..c94c39e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h
@@ -102,6 +102,7 @@ int qed_rdma_alloc_tid(void *rdma_cxt, u32 *tid);
int qed_rdma_deregister_tid(void *rdma_cxt, u32 tid);
void qed_rdma_free_tid(void *rdma_cxt, u32 tid);
struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt);
+struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt);
int
qed_rdma_register_tid(void *rdma_cxt,
struct qed_rdma_register_tid_in_params *params);
diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h
index 0f7d527..e99470c 100644
--- a/include/linux/qed/qed_roce_if.h
+++ b/include/linux/qed/qed_roce_if.h
@@ -332,6 +332,7 @@ struct qed_rdma_ops {
void (*rdma_remove_user)(void *rdma_cxt, u16 dpi);
int (*rdma_stop)(void *rdma_cxt);
struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt);
+ struct qed_rdma_port* (*rdma_query_port)(void *rdma_cxt);
int (*rdma_get_start_sb)(struct qed_dev *cdev);
int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev);
void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod);
diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h
index 187991c..7663725 100644
--- a/include/linux/qed/rdma_common.h
+++ b/include/linux/qed/rdma_common.h
@@ -28,6 +28,7 @@
#define RDMA_MAX_PDS (64 * 1024)
#define RDMA_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS
+#define RDMA_NUM_STATISTIC_COUNTERS_BB MAX_NUM_VPORTS_BB
#define RDMA_TASK_TYPE (PROTOCOLID_ROCE)
diff --git a/include/uapi/rdma/providers/qedr-abi.h b/include/uapi/rdma/providers/qedr-abi.h
new file mode 100644
index 0000000..45288dc
--- /dev/null
+++ b/include/uapi/rdma/providers/qedr-abi.h
@@ -0,0 +1,27 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+#ifndef __QEDR_USER_H__
+#define __QEDR_USER_H__
+
+#define QEDR_ABI_VERSION (6)
+
+/* user kernel communication data structures. */
+
+struct qedr_alloc_ucontext_resp {
+ u64 db_pa;
+ u32 db_size;
+
+ u32 max_send_wr;
+ u32 max_recv_wr;
+ u32 max_srq_wr;
+ u32 sges_per_send_wr;
+ u32 sges_per_recv_wr;
+ u32 sges_per_srq_wr;
+ int max_cqes;
+};
+#endif /* __QEDR_USER_H__ */
--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [RFC v2 02/12] qedr: Add RoCE driver framework
From: Ram Amrani @ 2016-09-20 10:35 UTC (permalink / raw)
To: davem-fT/PcQaiUtIeIZ0/mPfg9Q, dledford-H+wXaHxf7aLQT0dZR+AlfA
Cc: Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA,
Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA,
Yuval.Mintz-YGCgFSpz5w/QT0dZR+AlfA,
rajesh.borundia-YGCgFSpz5w/QT0dZR+AlfA,
linux-rdma-u79uwXL29TY76Z2rM5mHXA, netdev-u79uwXL29TY76Z2rM5mHXA,
Ram amrani
In-Reply-To: <1474367764-9555-1-git-send-email-Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
From: Ram amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
Adds a skeletal implementation of the qed* RoCE driver -
basically the ability to communicate with the qede driver and
receive notifications from it regarding various init/exit events.
Signed-off-by: Rajesh Borundia <rajesh.borundia-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
Signed-off-by: Ram Amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/Kconfig | 2 +
drivers/infiniband/hw/Makefile | 1 +
drivers/infiniband/hw/qedr/Kconfig | 7 +
drivers/infiniband/hw/qedr/Makefile | 3 +
drivers/infiniband/hw/qedr/main.c | 276 ++++++++++++++++++++++++
drivers/infiniband/hw/qedr/qedr.h | 59 +++++
drivers/net/ethernet/qlogic/qede/Makefile | 1 +
drivers/net/ethernet/qlogic/qede/qede.h | 9 +
drivers/net/ethernet/qlogic/qede/qede_main.c | 35 ++-
drivers/net/ethernet/qlogic/qede/qede_roce.c | 309 +++++++++++++++++++++++++++
include/linux/qed/qed_if.h | 3 +-
include/linux/qed/qede_roce.h | 88 ++++++++
include/uapi/linux/pci_regs.h | 3 +
13 files changed, 785 insertions(+), 11 deletions(-)
create mode 100644 drivers/infiniband/hw/qedr/Kconfig
create mode 100644 drivers/infiniband/hw/qedr/Makefile
create mode 100644 drivers/infiniband/hw/qedr/main.c
create mode 100644 drivers/infiniband/hw/qedr/qedr.h
create mode 100644 drivers/net/ethernet/qlogic/qede/qede_roce.c
create mode 100644 include/linux/qed/qede_roce.h
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index e9b7dc0..77ab0f3 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -88,4 +88,6 @@ source "drivers/infiniband/sw/rxe/Kconfig"
source "drivers/infiniband/hw/hfi1/Kconfig"
+source "drivers/infiniband/hw/qedr/Kconfig"
+
endif # INFINIBAND
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index c0c7cf8..c51e464 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_INFINIBAND_NES) += nes/
obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/
obj-$(CONFIG_INFINIBAND_USNIC) += usnic/
obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
+obj-$(CONFIG_INFINIBAND_QEDR) += qedr/
diff --git a/drivers/infiniband/hw/qedr/Kconfig b/drivers/infiniband/hw/qedr/Kconfig
new file mode 100644
index 0000000..58b584f
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/Kconfig
@@ -0,0 +1,7 @@
+config INFINIBAND_QEDR
+ tristate "QLogic RoCE driver"
+ depends on NETDEVICES && ETHERNET && PCI && 64BIT && QEDE
+ select QED_LL2
+ ---help---
+ This driver provides low-level InfiniBand over Ethernet
+ support for QLogic QED host channel adapters (HCAs).
diff --git a/drivers/infiniband/hw/qedr/Makefile b/drivers/infiniband/hw/qedr/Makefile
new file mode 100644
index 0000000..3a5b7a2
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_INFINIBAND_QEDR) := qedr.o
+
+qedr-y := main.o
diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
new file mode 100644
index 0000000..f60dc80
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -0,0 +1,276 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016 QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and /or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/module.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_addr.h>
+#include <linux/netdevice.h>
+#include <linux/iommu.h>
+#include <net/addrconf.h>
+#include <linux/qed/qede_roce.h>
+#include "qedr.h"
+
+MODULE_DESCRIPTION("QLogic 40G/100G ROCE Driver");
+MODULE_AUTHOR("QLogic Corporation");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(QEDR_MODULE_VERSION);
+
+uint debug;
+module_param(debug, uint, 0);
+MODULE_PARM_DESC(debug, "Default debug msglevel");
+
+static LIST_HEAD(qedr_dev_list);
+
+void qedr_ib_dispatch_event(struct qedr_dev *dev, u8 port_num,
+ enum ib_event_type type)
+{
+ struct ib_event ibev;
+
+ ibev.device = &dev->ibdev;
+ ibev.element.port_num = port_num;
+ ibev.event = type;
+
+ ib_dispatch_event(&ibev);
+}
+
+static enum rdma_link_layer qedr_link_layer(struct ib_device *device,
+ u8 port_num)
+{
+ return IB_LINK_LAYER_ETHERNET;
+}
+
+static int qedr_register_device(struct qedr_dev *dev)
+{
+ strlcpy(dev->ibdev.name, "qedr%d", IB_DEVICE_NAME_MAX);
+
+ memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC));
+ dev->ibdev.owner = THIS_MODULE;
+
+ dev->ibdev.get_link_layer = qedr_link_layer;
+
+ return 0;
+}
+
+/* QEDR sysfs interface */
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct qedr_dev *dev = dev_get_drvdata(device);
+
+ return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
+}
+
+static ssize_t show_hca_type(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
+}
+
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
+
+static struct device_attribute *qedr_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_hca_type
+};
+
+static void qedr_remove_sysfiles(struct qedr_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
+ device_remove_file(&dev->ibdev.dev, qedr_attributes[i]);
+}
+
+void qedr_config_debug(uint debug, u32 *p_dp_module, u8 *p_dp_level)
+{
+ *p_dp_level = 0;
+ *p_dp_module = 0;
+
+ if (debug & QED_LOG_VERBOSE_MASK) {
+ *p_dp_level = QED_LEVEL_VERBOSE;
+ *p_dp_module = (debug & 0x3FFFFFFF);
+ } else if (debug & QED_LOG_INFO_MASK) {
+ *p_dp_level = QED_LEVEL_INFO;
+ } else if (debug & QED_LOG_NOTICE_MASK) {
+ *p_dp_level = QED_LEVEL_NOTICE;
+ }
+}
+
+static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev)
+{
+ struct pci_dev *bridge;
+ u32 val;
+
+ dev->atomic_cap = IB_ATOMIC_NONE;
+
+ bridge = pdev->bus->self;
+ if (!bridge)
+ return;
+
+ /* Check whether we are connected directly or via a switch */
+ while (bridge && bridge->bus->parent) {
+ DP_NOTICE(dev,
+ "Device is not connected directly to root. bridge->bus->number=%d primary=%d\n",
+ bridge->bus->number, bridge->bus->primary);
+ /* Need to check Atomic Op Routing Supported all the way to
+ * root complex.
+ */
+ pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
+ if (!(val & PCI_EXP_DEVCAP2_ATOMIC_ROUTE)) {
+ pcie_capability_clear_word(pdev,
+ PCI_EXP_DEVCTL2,
+ PCI_EXP_DEVCTL2_ATOMIC_REQ);
+ return;
+ }
+ bridge = bridge->bus->parent->self;
+ }
+ bridge = pdev->bus->self;
+
+ /* according to bridge capability */
+ pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val);
+ if (val & PCI_EXP_DEVCAP2_ATOMIC_COMP64) {
+ pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2,
+ PCI_EXP_DEVCTL2_ATOMIC_REQ);
+ dev->atomic_cap = IB_ATOMIC_GLOB;
+ } else {
+ pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2,
+ PCI_EXP_DEVCTL2_ATOMIC_REQ);
+ }
+}
+
+static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev,
+ struct net_device *ndev)
+{
+ struct qedr_dev *dev;
+ int rc = 0, i;
+
+ dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev));
+ if (!dev) {
+ pr_err("Unable to allocate ib device\n");
+ return NULL;
+ }
+
+ qedr_config_debug(debug, &dev->dp_module, &dev->dp_level);
+ DP_VERBOSE(dev, QEDR_MSG_INIT, "qedr add device called\n");
+
+ dev->pdev = pdev;
+ dev->ndev = ndev;
+ dev->cdev = cdev;
+
+ qedr_pci_set_atomic(dev, pdev);
+
+ rc = qedr_register_device(dev);
+ if (rc) {
+ DP_ERR(dev, "Unable to allocate register device\n");
+ goto init_err;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++)
+ if (device_create_file(&dev->ibdev.dev, qedr_attributes[i]))
+ goto init_err;
+
+ DP_VERBOSE(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n");
+ return dev;
+
+init_err:
+ ib_dealloc_device(&dev->ibdev);
+ DP_ERR(dev, "qedr driver load failed rc=%d\n", rc);
+
+ return NULL;
+}
+
+static void qedr_remove(struct qedr_dev *dev)
+{
+ /* First unregister with stack to stop all the active traffic
+ * of the registered clients.
+ */
+ qedr_remove_sysfiles(dev);
+
+ ib_dealloc_device(&dev->ibdev);
+}
+
+static int qedr_close(struct qedr_dev *dev)
+{
+ qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ERR);
+
+ return 0;
+}
+
+static void qedr_shutdown(struct qedr_dev *dev)
+{
+ qedr_close(dev);
+ qedr_remove(dev);
+}
+
+/* event handling via NIC driver ensures that all the NIC specific
+ * initialization done before RoCE driver notifies
+ * event to stack.
+ */
+static void qedr_notify(struct qedr_dev *dev, enum qede_roce_event event)
+{
+ switch (event) {
+ case QEDE_UP:
+ qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
+ break;
+ case QEDE_DOWN:
+ qedr_close(dev);
+ break;
+ case QEDE_CLOSE:
+ qedr_shutdown(dev);
+ break;
+ case QEDE_CHANGE_ADDR:
+ qedr_ib_dispatch_event(dev, 1, IB_EVENT_GID_CHANGE);
+ break;
+ default:
+ pr_err("Event not supported\n");
+ }
+}
+
+static struct qedr_driver qedr_drv = {
+ .name = "qedr_driver",
+ .add = qedr_add,
+ .remove = qedr_remove,
+ .notify = qedr_notify,
+};
+
+static int __init qedr_init_module(void)
+{
+ return qede_roce_register_driver(&qedr_drv);
+}
+
+static void __exit qedr_exit_module(void)
+{
+ qede_roce_unregister_driver(&qedr_drv);
+}
+
+module_init(qedr_init_module);
+module_exit(qedr_exit_module);
diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h
new file mode 100644
index 0000000..c8a1f61
--- /dev/null
+++ b/drivers/infiniband/hw/qedr/qedr.h
@@ -0,0 +1,59 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016 QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and /or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __QEDR_H__
+#define __QEDR_H__
+
+#include <linux/pci.h>
+#include <rdma/ib_addr.h>
+#include <linux/qed/qed_if.h>
+#include <linux/qed/qede_roce.h>
+
+#define QEDR_MODULE_VERSION "8.10.10.0"
+#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA"
+#define DP_NAME(dev) ((dev)->ibdev.name)
+
+enum DP_QEDR_MODULE {
+ QEDR_MSG_INIT = 0x10000,
+};
+
+struct qedr_dev {
+ struct ib_device ibdev;
+ struct qed_dev *cdev;
+ struct pci_dev *pdev;
+ struct net_device *ndev;
+
+ enum ib_atomic_cap atomic_cap;
+
+ u32 dp_module;
+ u8 dp_level;
+};
+#endif
diff --git a/drivers/net/ethernet/qlogic/qede/Makefile b/drivers/net/ethernet/qlogic/qede/Makefile
index 74a4985..28dc589 100644
--- a/drivers/net/ethernet/qlogic/qede/Makefile
+++ b/drivers/net/ethernet/qlogic/qede/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_QEDE) := qede.o
qede-y := qede_main.o qede_ethtool.o
qede-$(CONFIG_DCB) += qede_dcbnl.o
+qede-$(CONFIG_INFINIBAND_QEDR) += qede_roce.o
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index e01adce..28c0e9f 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -106,6 +106,13 @@ struct qede_vlan {
bool configured;
};
+struct qede_rdma_dev {
+ struct qedr_dev *qedr_dev;
+ struct list_head entry;
+ struct list_head roce_event_list;
+ struct workqueue_struct *roce_wq;
+};
+
struct qede_dev {
struct qed_dev *cdev;
struct net_device *ndev;
@@ -185,6 +192,8 @@ struct qede_dev {
unsigned long sp_flags;
u16 vxlan_dst_port;
u16 geneve_dst_port;
+
+ struct qede_rdma_dev rdma_info;
};
enum QEDE_STATE {
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index cd23a29..37f3a77 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -36,7 +36,7 @@
#include <linux/random.h>
#include <net/ip6_checksum.h>
#include <linux/bitops.h>
-
+#include <linux/qed/qede_roce.h>
#include "qede.h"
static char version[] =
@@ -189,8 +189,7 @@ static int qede_netdev_event(struct notifier_block *this, unsigned long event,
struct ethtool_drvinfo drvinfo;
struct qede_dev *edev;
- /* Currently only support name change */
- if (event != NETDEV_CHANGENAME)
+ if (event != NETDEV_CHANGENAME && event != NETDEV_CHANGEADDR)
goto done;
/* Check whether this is a qede device */
@@ -203,11 +202,18 @@ static int qede_netdev_event(struct notifier_block *this, unsigned long event,
goto done;
edev = netdev_priv(ndev);
- /* Notify qed of the name change */
- if (!edev->ops || !edev->ops->common)
- goto done;
- edev->ops->common->set_id(edev->cdev, edev->ndev->name,
- "qede");
+ switch (event) {
+ case NETDEV_CHANGENAME:
+ /* Notify qed of the name change */
+ if (!edev->ops || !edev->ops->common)
+ goto done;
+ edev->ops->common->set_id(edev->cdev, edev->ndev->name, "qede");
+ break;
+ case NETDEV_CHANGEADDR:
+ edev = netdev_priv(ndev);
+ qede_roce_event_changeaddr(edev);
+ break;
+ }
done:
return NOTIFY_DONE;
@@ -2541,10 +2547,14 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
qede_init_ndev(edev);
+ rc = qede_roce_dev_add(edev);
+ if (rc)
+ goto err3;
+
rc = register_netdev(edev->ndev);
if (rc) {
DP_NOTICE(edev, "Cannot register net-device\n");
- goto err3;
+ goto err4;
}
edev->ops->common->set_id(cdev, edev->ndev->name, DRV_MODULE_VERSION);
@@ -2564,6 +2574,8 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level,
return 0;
+err4:
+ qede_roce_dev_remove(edev);
err3:
free_netdev(edev->ndev);
err2:
@@ -2610,8 +2622,11 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
DP_INFO(edev, "Starting qede_remove\n");
cancel_delayed_work_sync(&edev->sp_task);
+
unregister_netdev(ndev);
+ qede_roce_dev_remove(edev);
+
edev->ops->common->set_power_state(cdev, PCI_D0);
pci_set_drvdata(pdev, NULL);
@@ -3508,6 +3523,7 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode)
DP_INFO(edev, "Starting qede unload\n");
+ qede_roce_dev_event_close(edev);
mutex_lock(&edev->qede_lock);
edev->state = QEDE_STATE_CLOSED;
@@ -3608,6 +3624,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
/* Query whether link is already-up */
memset(&link_output, 0, sizeof(link_output));
edev->ops->common->get_link(edev->cdev, &link_output);
+ qede_roce_dev_event_open(edev);
qede_link_update(edev, &link_output);
DP_INFO(edev, "Ending successfully qede load\n");
diff --git a/drivers/net/ethernet/qlogic/qede/qede_roce.c b/drivers/net/ethernet/qlogic/qede/qede_roce.c
new file mode 100644
index 0000000..3d12a44
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qede/qede_roce.c
@@ -0,0 +1,309 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016 QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and /or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/qed/qede_roce.h>
+#include "qede.h"
+
+static struct qedr_driver *qedr_drv;
+static LIST_HEAD(qedr_dev_list);
+static DEFINE_MUTEX(qedr_dev_list_lock);
+
+bool qede_roce_supported(struct qede_dev *dev)
+{
+ return dev->dev_info.common.rdma_supported;
+}
+
+static void _qede_roce_dev_add(struct qede_dev *edev)
+{
+ if (!qedr_drv)
+ return;
+
+ edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev,
+ edev->ndev);
+}
+
+static int qede_roce_create_wq(struct qede_dev *edev)
+{
+ INIT_LIST_HEAD(&edev->rdma_info.roce_event_list);
+ edev->rdma_info.roce_wq = create_singlethread_workqueue("roce_wq");
+ if (!edev->rdma_info.roce_wq) {
+ pr_err("qedr: Could not create workqueue\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void qede_roce_cleanup_event(struct qede_dev *edev)
+{
+ struct list_head *head = &edev->rdma_info.roce_event_list;
+ struct qede_roce_event_work *event_node;
+
+ flush_workqueue(edev->rdma_info.roce_wq);
+ while (!list_empty(head)) {
+ event_node = list_entry(head->next, struct qede_roce_event_work,
+ list);
+ cancel_work_sync(&event_node->work);
+ list_del(&event_node->list);
+ kfree(event_node);
+ }
+}
+
+static void qede_roce_destroy_wq(struct qede_dev *edev)
+{
+ qede_roce_cleanup_event(edev);
+ destroy_workqueue(edev->rdma_info.roce_wq);
+}
+
+int qede_roce_dev_add(struct qede_dev *edev)
+{
+ int rc = 0;
+
+ if (qede_roce_supported(edev)) {
+ rc = qede_roce_create_wq(edev);
+ if (rc)
+ return rc;
+
+ INIT_LIST_HEAD(&edev->rdma_info.entry);
+ mutex_lock(&qedr_dev_list_lock);
+ list_add_tail(&edev->rdma_info.entry, &qedr_dev_list);
+ _qede_roce_dev_add(edev);
+ mutex_unlock(&qedr_dev_list_lock);
+ }
+
+ return rc;
+}
+
+static void _qede_roce_dev_remove(struct qede_dev *edev)
+{
+ if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev)
+ qedr_drv->remove(edev->rdma_info.qedr_dev);
+ edev->rdma_info.qedr_dev = NULL;
+}
+
+void qede_roce_dev_remove(struct qede_dev *edev)
+{
+ if (!qede_roce_supported(edev))
+ return;
+
+ qede_roce_destroy_wq(edev);
+ mutex_lock(&qedr_dev_list_lock);
+ _qede_roce_dev_remove(edev);
+ list_del(&edev->rdma_info.entry);
+ mutex_unlock(&qedr_dev_list_lock);
+}
+
+static void _qede_roce_dev_open(struct qede_dev *edev)
+{
+ if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+ qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_UP);
+}
+
+void qede_roce_dev_open(struct qede_dev *edev)
+{
+ if (!qede_roce_supported(edev))
+ return;
+
+ mutex_lock(&qedr_dev_list_lock);
+ _qede_roce_dev_open(edev);
+ mutex_unlock(&qedr_dev_list_lock);
+}
+
+static void _qede_roce_dev_close(struct qede_dev *edev)
+{
+ if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+ qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_DOWN);
+}
+
+void qede_roce_dev_close(struct qede_dev *edev)
+{
+ if (!qede_roce_supported(edev))
+ return;
+
+ mutex_lock(&qedr_dev_list_lock);
+ _qede_roce_dev_close(edev);
+ mutex_unlock(&qedr_dev_list_lock);
+}
+
+void qede_roce_dev_shutdown(struct qede_dev *edev)
+{
+ if (!qede_roce_supported(edev))
+ return;
+
+ mutex_lock(&qedr_dev_list_lock);
+ if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+ qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CLOSE);
+ mutex_unlock(&qedr_dev_list_lock);
+}
+
+int qede_roce_register_driver(struct qedr_driver *drv)
+{
+ struct qede_dev *edev;
+ u8 qedr_counter = 0;
+
+ mutex_lock(&qedr_dev_list_lock);
+ if (qedr_drv) {
+ mutex_unlock(&qedr_dev_list_lock);
+ return -EINVAL;
+ }
+ qedr_drv = drv;
+
+ list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
+ struct net_device *netdev;
+
+ qedr_counter++;
+ _qede_roce_dev_add(edev);
+ netdev = edev->ndev;
+ if (netif_running(netdev) && netif_oper_up(netdev))
+ _qede_roce_dev_open(edev);
+ }
+ mutex_unlock(&qedr_dev_list_lock);
+
+ pr_err("qedr: discovered and registered %d RoCE funcs\n", qedr_counter);
+
+ return 0;
+}
+EXPORT_SYMBOL(qede_roce_register_driver);
+
+void qede_roce_unregister_driver(struct qedr_driver *drv)
+{
+ struct qede_dev *edev;
+
+ mutex_lock(&qedr_dev_list_lock);
+ list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) {
+ if (edev->rdma_info.qedr_dev)
+ _qede_roce_dev_remove(edev);
+ }
+ qedr_drv = NULL;
+ mutex_unlock(&qedr_dev_list_lock);
+}
+EXPORT_SYMBOL(qede_roce_unregister_driver);
+
+void qede_roce_changeaddr(struct qede_dev *edev)
+{
+ if (!qede_roce_supported(edev))
+ return;
+
+ if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify)
+ qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CHANGE_ADDR);
+}
+
+struct qede_roce_event_work *qede_roce_get_free_event_node(struct qede_dev
+ *edev)
+{
+ struct qede_roce_event_work *event_node = NULL;
+ struct list_head *list_node = NULL;
+ bool found = false;
+
+ list_for_each(list_node, &edev->rdma_info.roce_event_list) {
+ event_node = list_entry(list_node, struct qede_roce_event_work,
+ list);
+ if (!work_pending(&event_node->work)) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ event_node = kzalloc(sizeof(*event_node), GFP_KERNEL);
+ if (!event_node) {
+ pr_err("qedr: Could not allocate memory for roce work\n");
+ return NULL;
+ }
+ list_add_tail(&event_node->list,
+ &edev->rdma_info.roce_event_list);
+ }
+
+ return event_node;
+}
+
+static void qede_roce_handle_event(struct work_struct *work)
+{
+ struct qede_roce_event_work *event_node;
+ enum qede_roce_event event;
+ struct qede_dev *edev;
+
+ event_node = container_of(work, struct qede_roce_event_work, work);
+ event = event_node->event;
+ edev = event_node->ptr;
+
+ switch (event) {
+ case QEDE_UP:
+ qede_roce_dev_open(edev);
+ break;
+ case QEDE_DOWN:
+ qede_roce_dev_close(edev);
+ break;
+ case QEDE_CLOSE:
+ qede_roce_dev_shutdown(edev);
+ break;
+ case QEDE_CHANGE_ADDR:
+ qede_roce_changeaddr(edev);
+ break;
+ default:
+ pr_err("qede: Invalid roce event %d", event);
+ }
+}
+
+static void qede_roce_add_event(struct qede_dev *edev,
+ enum qede_roce_event event)
+{
+ struct qede_roce_event_work *event_node;
+
+ event_node = qede_roce_get_free_event_node(edev);
+ if (!event_node)
+ return;
+
+ event_node->event = event;
+ event_node->ptr = edev;
+
+ INIT_WORK(&event_node->work, qede_roce_handle_event);
+ queue_work(edev->rdma_info.roce_wq, &event_node->work);
+}
+
+void qede_roce_dev_event_open(struct qede_dev *edev)
+{
+ qede_roce_add_event(edev, QEDE_UP);
+}
+
+void qede_roce_dev_event_close(struct qede_dev *edev)
+{
+ qede_roce_add_event(edev, QEDE_DOWN);
+}
+
+void qede_roce_event_changeaddr(struct qede_dev *edev)
+{
+ qede_roce_add_event(edev, QEDE_CHANGE_ADDR);
+}
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index c2d74e8..e313742 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -260,11 +260,10 @@ struct qed_dev_info {
/* MFW version */
u32 mfw_rev;
- bool rdma_supported;
-
u32 flash_size;
u8 mf_mode;
bool tx_switching;
+ bool rdma_supported;
};
enum qed_sb_type {
diff --git a/include/linux/qed/qede_roce.h b/include/linux/qed/qede_roce.h
new file mode 100644
index 0000000..fd7acc1
--- /dev/null
+++ b/include/linux/qed/qede_roce.h
@@ -0,0 +1,88 @@
+/* QLogic qedr NIC Driver
+ * Copyright (c) 2015-2016 QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and /or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef QEDE_ROCE_H
+#define QEDE_ROCE_H
+
+struct qedr_dev;
+struct qed_dev;
+struct qede_dev;
+
+enum qede_roce_event {
+ QEDE_UP,
+ QEDE_DOWN,
+ QEDE_CHANGE_ADDR,
+ QEDE_CLOSE
+};
+
+struct qede_roce_event_work {
+ struct list_head list;
+ struct work_struct work;
+ void *ptr;
+ enum qede_roce_event event;
+};
+
+struct qedr_driver {
+ unsigned char name[32];
+
+ struct qedr_dev* (*add)(struct qed_dev *, struct pci_dev *,
+ struct net_device *);
+
+ void (*remove)(struct qedr_dev *);
+ void (*notify)(struct qedr_dev *, enum qede_roce_event);
+};
+
+/* APIs for RoCE driver to register callback handlers,
+ * which will be invoked when device is added, removed, ifup, ifdown
+ */
+int qede_roce_register_driver(struct qedr_driver *drv);
+void qede_roce_unregister_driver(struct qedr_driver *drv);
+
+bool qede_roce_supported(struct qede_dev *dev);
+
+#if IS_ENABLED(CONFIG_INFINIBAND_QEDR)
+int qede_roce_dev_add(struct qede_dev *dev);
+void qede_roce_dev_event_open(struct qede_dev *dev);
+void qede_roce_dev_event_close(struct qede_dev *dev);
+void qede_roce_dev_remove(struct qede_dev *dev);
+void qede_roce_event_changeaddr(struct qede_dev *qedr);
+#else
+static int qede_roce_dev_add(struct qede_dev *dev)
+{
+ return 0;
+}
+
+static void qede_roce_dev_event_open(struct qede_dev *dev) {}
+static void qede_roce_dev_event_close(struct qede_dev *dev) {}
+static void qede_roce_dev_remove(struct qede_dev *dev) {}
+static void qede_roce_event_changeaddr(struct qede_dev *qedr) {}
+#endif
+#endif
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 4040951..3973000 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -612,6 +612,8 @@
*/
#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */
#define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */
+#define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */
+#define PCI_EXP_DEVCAP2_ATOMIC_COMP64 0x00000100 /* Atomic 64-bit comparison */
#define PCI_EXP_DEVCAP2_LTR 0x00000800 /* Latency tolerance reporting */
#define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */
#define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */
@@ -619,6 +621,7 @@
#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */
#define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */
#define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */
+#define PCI_EXP_DEVCTL2_ATOMIC_REQ 0x0040 /* Set Atomic requests */
#define PCI_EXP_DEVCTL2_IDO_REQ_EN 0x0100 /* Allow IDO for requests */
#define PCI_EXP_DEVCTL2_IDO_CMP_EN 0x0200 /* Allow IDO for completions */
#define PCI_EXP_DEVCTL2_LTR_EN 0x0400 /* Enable LTR mechanism */
--
1.8.3.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* DSA: Suspicious RCU usage (via rtnl_bridge_getlink)
From: Russell King - ARM Linux @ 2016-09-20 10:26 UTC (permalink / raw)
To: netdev, Vivien Didelot, Andrew Lunn; +Cc: Paul E. McKenney
Issuing "bridge vlan show" on clearfog provokes a "suspicious RCU usage"
warning from the kernel (see below).
As it's illegal to schedule while holding the RCU read lock, there's the
possibility for this happening much earlier in the call sequence -
mv88e6xxx_port_vlan_dump() takes a mutex, and if that mutex were already
held, we'd schedule at that point. The RCU read lock was taken by
rtnl_bridge_getlink().
It looks horrible to fix - mvmdio.c as well as DSA locking are involved.
I'm wondering why might_sleep() doesn't detect this illegal RCU state,
rather than detecting it when we actually schedule - Paul?
===============================
[ INFO: suspicious RCU usage. ]
4.7.0+ #98 Not tainted
-------------------------------
include/linux/rcupdate.h:554 Illegal context switch in RCU read-side critical section!
other info that might help us debug this:
rcu_scheduler_active = 1, debug_locks = 0
5 locks held by bridge/31769:
#0: (rtnl_mutex){+.+.+.}, at: [<c058f508>] netlink_dump+0x20/0x268
#1: (rcu_read_lock){......}, at: [<c0571674>] rtnl_bridge_getlink+0x4c/0x25c
#2: (&ps->smi_mutex){+.+.+.}, at: [<c042ee58>] mv88e6xxx_port_vlan_dump+0x50/0x13c
#3: (&bus->mdio_lock/2){+.+...}, at: [<c042a6c4>] mdiobus_read_nested+0x4c/0x78
#4: (&dev->lock){+.+...}, at: [<c043a064>] orion_mdio_read+0x28/0xa4
stack backtrace:
CPU: 0 PID: 31769 Comm: bridge Not tainted 4.7.0+ #98
Hardware name: Marvell Armada 380/385 (Device Tree)
Backtrace:
[<c0013488>] (dump_backtrace) from [<c00137d0>] (show_stack+0x18/0x1c)
r6:60070013 r5:ffffffff r4:00000000 r3:dc8ba500
[<c00137b8>] (show_stack) from [<c02c7b00>] (dump_stack+0xa4/0xdc)
[<c02c7a5c>] (dump_stack) from [<c0076864>] (lockdep_rcu_suspicious+0xbc/0x11c)
r6:0000022a r5:c0822fec r4:ecd78c80 r3:ecd78c80
[<c00767a8>] (lockdep_rcu_suspicious) from [<c06bc4c0>] (__schedule+0x348/0x698)
r7:00000000 r6:e9cc38e8 r5:c0981976 r4:ecd78c80
[<c06bc178>] (__schedule) from [<c06bc940>] (schedule+0x3c/0xa0)
r10:c093d4c8 r9:00000000 r8:00002710 r7:00000000 r6:0000d6d8 r5:00000000
r4:0000afc8
[<c06bc904>] (schedule) from [<c06c1130>] (schedule_hrtimeout_range_clock+0xe8/0x168)
[<c06c1048>] (schedule_hrtimeout_range_clock) from [<c06c11d4>] (schedule_hrtimeout_range+0x24/0x2c)
r10:c093d100 r9:00000000 r8:00000000 r7:e9cc3a30 r6:00000001 r5:ee860c30
r4:ee860be0
[<c06c11b0>] (schedule_hrtimeout_range) from [<c06c0b58>] (usleep_range+0x54/0x5c)
[<c06c0b04>] (usleep_range) from [<c0439f9c>] (orion_mdio_wait_ready+0x114/0x14c)
[<c0439e88>] (orion_mdio_wait_ready) from [<c043a098>] (orion_mdio_read+0x5c/0xa4)
r10:00000000 r9:00000000 r8:04040000 r7:04040000 r6:00000000 r5:ee860800
r4:ee860be0
[<c043a03c>] (orion_mdio_read) from [<c042a6d8>] (mdiobus_read_nested+0x60/0x78)
r8:edc53810 r7:00000000 r6:00000004 r5:ee86082c r4:ee860800 r3:c043a03c
[<c042a678>] (mdiobus_read_nested) from [<c042d1cc>] (mv88e6xxx_reg_wait_ready+0x28/0x50)
r7:00000007 r6:ee860800 r5:00000004 r4:00000010
[<c042d1a4>] (mv88e6xxx_reg_wait_ready) from [<c042d688>] (__mv88e6xxx_reg_read+0x24/0x98)
r6:00000010 r5:ee860800 r4:00000004 r3:00000007
[<c042d664>] (__mv88e6xxx_reg_read) from [<c042d850>] (_mv88e6xxx_reg_read+0x38/0x60)
r7:00000000 r6:e9cc3b38 r5:00000010 r4:edc53810
[<c042d818>] (_mv88e6xxx_reg_read) from [<c042e240>] (_mv88e6xxx_port_pvid+0x28/0x8c)
r5:00000010 r4:edc53810
[<c042e218>] (_mv88e6xxx_port_pvid) from [<c042ee6c>] (mv88e6xxx_port_vlan_dump+0x64/0x13c)
r8:c06b48e8 r7:edc5381c r6:e9cc3bc4 r5:c093d4c8 r4:edc53810 r3:e9cc3b38
[<c042ee08>] (mv88e6xxx_port_vlan_dump) from [<c0685ab4>] (dsa_slave_port_obj_dump+0x54/0x70)
r10:00007c19 r9:00000000 r8:00000002 r7:e9cc3bc4 r6:c06b48e8 r5:c07237e0
r4:e9cc3bc4
[<c0685a60>] (dsa_slave_port_obj_dump) from [<c06b50b8>] (switchdev_port_obj_dump+0x50/0xa8)
r4:ef0e0800 r3:c0685a60
[<c06b5068>] (switchdev_port_obj_dump) from [<c06b51f4>] (switchdev_port_vlan_fill+0x6c/0x98)
r7:ea96c0cc r6:eaa7a180 r5:ea96c074 r4:00000002
[<c06b5188>] (switchdev_port_vlan_fill) from [<c0573f80>] (ndo_dflt_bridge_getlink+0x28c/0x484)
r4:ef0e0800
[<c0573cf4>] (ndo_dflt_bridge_getlink) from [<c06b4c64>] (switchdev_port_bridge_getlink+0xc8/0xec)
r10:57e108bb r9:edefbb18 r8:eaa7a180 r7:00007c19 r6:57e108bb r5:c093d4c8
r4:ef0e0800
[<c06b4b9c>] (switchdev_port_bridge_getlink) from [<c0571744>] (rtnl_bridge_getlink+0x11c/0x25c)
r8:eaa7a180 r7:c09770f0 r6:c07236e4 r5:ef0e0800 r4:00000001
[<c0571628>] (rtnl_bridge_getlink) from [<c058f5d8>] (netlink_dump+0xf0/0x268)
r10:c0571628 r9:00000000 r8:00004000 r7:edefbb18 r6:00000f40 r5:eaa7a180
r4:edefb800
[<c058f4e8>] (netlink_dump) from [<c05904ec>] (__netlink_dump_start+0x13c/0x198)
r8:edefbb18 r7:edc7c200 r6:e9cc3d90 r5:eaa7a300 r4:edefb800
[<c05903b0>] (__netlink_dump_start) from [<c0575b74>] (rtnetlink_rcv_msg+0x118/0x1f8)
r8:c0977000 r7:00000007 r6:eaa7a300 r5:00000818 r4:edc7c200 r3:e9cc3d90
[<c0575a5c>] (rtnetlink_rcv_msg) from [<c0592ad8>] (netlink_rcv_skb+0xb4/0xc8)
r10:00000000 r8:e9cc3e0c r7:eaa7a300 r6:c0575a5c r5:eaa7a300 r4:edc7c200
[<c0592a24>] (netlink_rcv_skb) from [<c0573c08>] (rtnetlink_rcv+0x24/0x2c)
r6:00000028 r5:edefb800 r4:eaa7a300 r3:000041de
[<c0573be4>] (rtnetlink_rcv) from [<c0592484>] (netlink_unicast+0x1a0/0x204)
r4:ef120800 r3:c0573be4
[<c05922e4>] (netlink_unicast) from [<c05928dc>] (netlink_sendmsg+0x348/0x364)
r10:eaa7a300 r8:00000000 r7:00000000 r6:00000028 r5:edefb800 r4:e9cc3eb8
[<c0592594>] (netlink_sendmsg) from [<c053deb0>] (sock_sendmsg+0x1c/0x2c)
r10:00000000 r9:e9cc2000 r8:00000000 r7:00000000 r6:c093d4c8 r5:eed4efc0
r4:00000000
[<c053de94>] (sock_sendmsg) from [<c053f2f4>] (SyS_sendto+0xc4/0x108)
[<c053f230>] (SyS_sendto) from [<c053f358>] (SyS_send+0x20/0x28)
r10:00000000 r8:c0010004 r7:00000121 r6:00000008 r5:57e108bb r4:00000000
[<c053f338>] (SyS_send) from [<c000fe60>] (ret_fast_syscall+0x0/0x1c)
--
RMK's Patch system: http://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line: currently at 9.6Mbps down 400kbps up
according to speedtest.net.
^ permalink raw reply
* Re: [PATCH net-next 0/5] mlx4 misc fixes and improvements
From: Tariq Toukan @ 2016-09-20 8:50 UTC (permalink / raw)
To: Or Gerlitz, David Miller; +Cc: Tariq Toukan, Linux Netdev List, Eran Ben Elisha
In-Reply-To: <CAJ3xEMi-4nNxDf3pBrDwenN50BTFtTT0+cMF62BMHYPK9WvZrg@mail.gmail.com>
On 19/09/2016 8:32 AM, Or Gerlitz wrote:
> On Mon, Sep 19, 2016 at 5:00 AM, David Miller <davem@davemloft.net> wrote:
>> From: Tariq Toukan <ttoukan.linux@gmail.com>
>> Date: Sun, 18 Sep 2016 10:27:23 +0300
>>
>>> Hi Dave,
>>>
>>> On 16/09/2016 2:21 AM, David Miller wrote:
>>>> From: Tariq Toukan <tariqt@mellanox.com>
>>>> Date: Mon, 12 Sep 2016 16:20:11 +0300
>>>>
>>>>> This patchset contains some bug fixes, a cleanup, and small
>>>>> improvements
>>>>> from the team to the mlx4 Eth and core drivers.
>>>>>
>>>>> Series generated against net-next commit:
>>>>> 02154927c115 "net: dsa: bcm_sf2: Get VLAN_PORT_MASK from b53_device"
>>>>>
>>>>> Please push the following patch to -stable >= 4.6 as well:
>>>>> "net/mlx4_core: Fix to clean devlink resources"
>>>> Again, coding style fixes and optimizations like branch prediction
>>>> hints are not bug fixes and therefore not appropriate for 'net'.
>>> Yes, I know. Please notice that it was submitted to net-next this
>>> time.
>> This is completely incompatible with a request for one of the changes
>> to go into -stable.
>>
>> If the change is not in 'net', it can't go to -stable.
> Dave,
>
> So when we're pretty late in the 4.8-rc cycle, a fix for a problem
> which was not introduced in 4.8-rc1 was targeted to net-next (4.9) and
> not net.
>
> This indeed creates a small mess when the fix needs to go to -stable as well.
>
> I guess the correct thing to do next time, would be to either send to
> net and ask you to take it to stable as part of picking the patch --
> or send to net-next, and later send you a request to put it to stable
> -- or, wait a bit and send it to net of the next kernel along with
> stable request... we will pick one of these three way of doings next
> (...) time.
>
> So, at this point, I think it would be just correct to take the series
> to net-next, and on a future point we'll issue a request to push the
> patch into stable.
>
> Or.
>
> Or.
Hi Dave,
I see that the series status is 'Changes requested'.
I agree with Or. If possible, please take the series, and we will issue
a request to push into -stable in the future.
Anyway, I can also do a re-submission if you prefer. Just let me know.
Thanks,
Tariq
^ permalink raw reply
* pull-request: wireless-drivers 2016-09-20
From: Kalle Valo @ 2016-09-20 10:20 UTC (permalink / raw)
To: David Miller; +Cc: linux-wireless, netdev, linux-kernel
Hi Dave,
last pull request for 4.8, unless something really drastic comes up. And
a small one even, just a small fix to iwlwifi to avoid a firmware crash.
Please let me know if there are any problems.
Kalle
The following changes since commit a0714125d11bcf21599b08b25fdaf384c0db6712:
Merge ath-current from ath.git (2016-09-07 20:16:37 +0300)
are available in the git repository at:
git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git tags/wireless-drivers-for-davem-2016-09-20
for you to fetch changes up to db64c5fa590b5d82ac0d5ec39bc498f95094e66b:
Merge tag 'iwlwifi-for-kalle-2016-09-15' of git://git.kernel.org/pub/scm/linux/kernel/git/iwlwifi/iwlwifi-fixes (2016-09-17 17:06:22 +0300)
----------------------------------------------------------------
wireless-drivers fixes for 4.8
iwlwifi
* fix to prevent firmware crash when sending off-channel frames
----------------------------------------------------------------
Beni Lev (1):
iwlwifi: mvm: update TX queue before making a copy of the skb
Kalle Valo (1):
Merge tag 'iwlwifi-for-kalle-2016-09-15' of git://git.kernel.org/.../iwlwifi/iwlwifi-fixes
drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
^ permalink raw reply
* [PATCH iproute2] ip: Use specific slave id
From: Hangbin Liu @ 2016-09-20 10:02 UTC (permalink / raw)
To: netdev
Cc: Stephen Hemminger, Phil Sutter, Nikolay Aleksandrov, David Ahern,
Sabrina Dubroca, Hangbin Liu
The original bond/bridge/vrf and slaves use same id, which make people
confused. Use bond/bridge/vrf_slave as id name will make code more clear.
Acked-by: Phil Sutter <psutter@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
ip/ip_common.h | 2 --
ip/ipaddress.c | 4 +++-
ip/iplink.c | 36 +++++++-----------------------------
ip/iplink_bond_slave.c | 3 +--
ip/iplink_bridge_slave.c | 3 +--
ip/iplink_vrf.c | 3 +--
ip/ipmacsec.c | 1 -
7 files changed, 13 insertions(+), 39 deletions(-)
diff --git a/ip/ip_common.h b/ip/ip_common.h
index 93ff5bc..1c1cbdf 100644
--- a/ip/ip_common.h
+++ b/ip/ip_common.h
@@ -83,11 +83,9 @@ struct link_util {
struct rtattr *);
void (*print_help)(struct link_util *, int, char **,
FILE *);
- bool slave;
};
struct link_util *get_link_kind(const char *kind);
-struct link_util *get_link_slave_kind(const char *slave_kind);
void br_dump_bridge_id(const struct ifla_bridge_id *id, char *buf, size_t len);
diff --git a/ip/ipaddress.c b/ip/ipaddress.c
index 76bd7b3..fcc3c53 100644
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -232,6 +232,7 @@ static void print_linktype(FILE *fp, struct rtattr *tb)
struct rtattr *linkinfo[IFLA_INFO_MAX+1];
struct link_util *lu;
struct link_util *slave_lu;
+ char slave[32];
char *kind;
char *slave_kind;
@@ -265,8 +266,9 @@ static void print_linktype(FILE *fp, struct rtattr *tb)
fprintf(fp, "%s", _SL_);
fprintf(fp, " %s_slave ", slave_kind);
+ snprintf(slave, sizeof(slave), "%s_slave", slave_kind);
- slave_lu = get_link_slave_kind(slave_kind);
+ slave_lu = get_link_kind(slave);
if (slave_lu && slave_lu->print_opt) {
struct rtattr *attr[slave_lu->maxattr+1], **data = NULL;
diff --git a/ip/iplink.c b/ip/iplink.c
index 6b1db18..dec8268 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -119,15 +119,14 @@ static int on_off(const char *msg, const char *realval)
static void *BODY; /* cached dlopen(NULL) handle */
static struct link_util *linkutil_list;
-static struct link_util *__get_link_kind(const char *id, bool slave)
+struct link_util *get_link_kind(const char *id)
{
void *dlh;
char buf[256];
struct link_util *l;
for (l = linkutil_list; l; l = l->next)
- if (strcmp(l->id, id) == 0 &&
- l->slave == slave)
+ if (strcmp(l->id, id) == 0)
return l;
snprintf(buf, sizeof(buf), LIBDIR "/ip/link_%s.so", id);
@@ -142,10 +141,7 @@ static struct link_util *__get_link_kind(const char *id, bool slave)
}
}
- if (slave)
- snprintf(buf, sizeof(buf), "%s_slave_link_util", id);
- else
- snprintf(buf, sizeof(buf), "%s_link_util", id);
+ snprintf(buf, sizeof(buf), "%s_link_util", id);
l = dlsym(dlh, buf);
if (l == NULL)
return NULL;
@@ -155,16 +151,6 @@ static struct link_util *__get_link_kind(const char *id, bool slave)
return l;
}
-struct link_util *get_link_kind(const char *id)
-{
- return __get_link_kind(id, false);
-}
-
-struct link_util *get_link_slave_kind(const char *id)
-{
- return __get_link_kind(id, true);
-}
-
static int get_link_mode(const char *mode)
{
if (strcasecmp(mode, "default") == 0)
@@ -872,26 +858,18 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv)
if (type) {
struct rtattr *linkinfo;
- char slavebuf[128], *ulinep = strchr(type, '_');
+ char *ulinep = strchr(type, '_');
int iflatype;
linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type,
strlen(type));
- if (ulinep && !strcmp(ulinep, "_slave")) {
- strncpy(slavebuf, type, sizeof(slavebuf));
- slavebuf[sizeof(slavebuf) - 1] = '\0';
- ulinep = strchr(slavebuf, '_');
- /* check in case it was after sizeof(slavebuf) - 1*/
- if (ulinep)
- *ulinep = '\0';
- lu = get_link_slave_kind(slavebuf);
+ lu = get_link_kind(type);
+ if (ulinep && !strcmp(ulinep, "_slave"))
iflatype = IFLA_INFO_SLAVE_DATA;
- } else {
- lu = get_link_kind(type);
+ else
iflatype = IFLA_INFO_DATA;
- }
if (lu && argc) {
struct rtattr *data = addattr_nest(&req.n,
sizeof(req), iflatype);
diff --git a/ip/iplink_bond_slave.c b/ip/iplink_bond_slave.c
index 9c60dea..877e2d9 100644
--- a/ip/iplink_bond_slave.c
+++ b/ip/iplink_bond_slave.c
@@ -130,10 +130,9 @@ static void bond_slave_print_help(struct link_util *lu, int argc, char **argv,
}
struct link_util bond_slave_link_util = {
- .id = "bond",
+ .id = "bond_slave",
.maxattr = IFLA_BOND_SLAVE_MAX,
.print_opt = bond_slave_print_opt,
.parse_opt = bond_slave_parse_opt,
.print_help = bond_slave_print_help,
- .slave = true,
};
diff --git a/ip/iplink_bridge_slave.c b/ip/iplink_bridge_slave.c
index a44d4e4..6c5c59a 100644
--- a/ip/iplink_bridge_slave.c
+++ b/ip/iplink_bridge_slave.c
@@ -293,10 +293,9 @@ static void bridge_slave_print_help(struct link_util *lu, int argc, char **argv,
}
struct link_util bridge_slave_link_util = {
- .id = "bridge",
+ .id = "bridge_slave",
.maxattr = IFLA_BRPORT_MAX,
.print_opt = bridge_slave_print_opt,
.parse_opt = bridge_slave_parse_opt,
.print_help = bridge_slave_print_help,
- .slave = true,
};
diff --git a/ip/iplink_vrf.c b/ip/iplink_vrf.c
index 015b41e..a238b29 100644
--- a/ip/iplink_vrf.c
+++ b/ip/iplink_vrf.c
@@ -91,10 +91,9 @@ struct link_util vrf_link_util = {
};
struct link_util vrf_slave_link_util = {
- .id = "vrf",
+ .id = "vrf_slave",
.maxattr = IFLA_VRF_PORT_MAX,
.print_opt = vrf_slave_print_opt,
- .slave = true,
};
/* returns table id if name is a VRF device */
diff --git a/ip/ipmacsec.c b/ip/ipmacsec.c
index 2e670e9..afbf67e 100644
--- a/ip/ipmacsec.c
+++ b/ip/ipmacsec.c
@@ -1265,5 +1265,4 @@ struct link_util macsec_link_util = {
.parse_opt = macsec_parse_opt,
.print_help = macsec_print_help,
.print_opt = macsec_print_opt,
- .slave = false,
};
--
2.5.5
^ permalink raw reply related
* Re: skb_splice_bits() and large chunks in pipe (was Re: xfs_file_splice_read: possible circular locking dependency detected
From: Herbert Xu @ 2016-09-20 9:51 UTC (permalink / raw)
To: Al Viro; +Cc: torvalds, axboe, npiggin, linux-fsdevel, netdev, edumazet
In-Reply-To: <20160919002258.GJ2356@ZenIV.linux.org.uk>
Al Viro <viro@zeniv.linux.org.uk> wrote:
>
> * shoved into scatterlist, which gets fed into crypto/*.c machinery.
> No way for a pipe_buffer stuff to get there, fortunately, because I would
> be very surprised if it works correctly with compound pages and large
> ranges in those.
FWIW the crypto API has always been supposed to handle SG entries
that cross page boundaries. There were a couple of bugs in this
area but AFAIK they've all been fixed.
Of course I cannot guarantee that every crypto driver also handles
it correctly, but at least we have a few test vectors which test
the page-crossing case specifically.
Cheers,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* [PATCH v3 net-next 2/2] net: skbuff: Coding: Use eth_type_vlan() instead of open coding it
From: Shmulik Ladkani @ 2016-09-20 9:48 UTC (permalink / raw)
To: David S. Miller
Cc: Jiri Pirko, Daniel Borkmann, Pravin Shelar, Eric Dumazet, netdev,
Shmulik Ladkani
In-Reply-To: <1474364917-31710-1-git-send-email-shmulik.ladkani@gmail.com>
Fix 'skb_vlan_pop' to use eth_type_vlan instead of directly comparing
skb->protocol to ETH_P_8021Q or ETH_P_8021AD.
Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
---
net/core/skbuff.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 93d6c5c..d6d0da5 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4564,8 +4564,7 @@ int skb_vlan_pop(struct sk_buff *skb)
if (likely(skb_vlan_tag_present(skb))) {
skb->vlan_tci = 0;
} else {
- if (unlikely(skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)))
+ if (unlikely(!eth_type_vlan(skb->protocol)))
return 0;
err = __skb_vlan_pop(skb, &vlan_tci);
@@ -4573,8 +4572,7 @@ int skb_vlan_pop(struct sk_buff *skb)
return err;
}
/* move next vlan tag to hw accel tag */
- if (likely(skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)))
+ if (likely(!eth_type_vlan(skb->protocol)))
return 0;
vlan_proto = skb->protocol;
--
1.9.1
^ permalink raw reply related
* [PATCH v3 net-next 1/2] net: skbuff: Remove errornous length validation in skb_vlan_pop()
From: Shmulik Ladkani @ 2016-09-20 9:48 UTC (permalink / raw)
To: David S. Miller
Cc: Jiri Pirko, Daniel Borkmann, Pravin Shelar, Eric Dumazet, netdev,
Shmulik Ladkani
In 93515d53b1
"net: move vlan pop/push functions into common code"
skb_vlan_pop was moved from its private location in openvswitch to
skbuff common code.
In case skb has non hw-accel vlan tag, the original 'pop_vlan()' assured
that skb->len is sufficient (if skb->len < VLAN_ETH_HLEN then pop was
considered a no-op).
This validation was moved as is into the new common 'skb_vlan_pop'.
Alas, in its original location (openvswitch), there was a guarantee that
'data' points to the mac_header, therefore the 'skb->len < VLAN_ETH_HLEN'
condition made sense.
However there's no such guarantee in the generic 'skb_vlan_pop'.
For short packets received in rx path going through 'skb_vlan_pop',
this causes 'skb_vlan_pop' to fail pop-ing a valid vlan hdr (in the non
hw-accel case) or to fail moving next tag into hw-accel tag.
Remove the 'skb->len < VLAN_ETH_HLEN' condition entirely:
It is superfluous since inner '__skb_vlan_pop' already verifies there
are VLAN_ETH_HLEN writable bytes at the mac_header.
Note this presents a slight change to skb_vlan_pop() users:
In case total length is smaller than VLAN_ETH_HLEN, skb_vlan_pop() now
returns an error, as opposed to previous "no-op" behavior.
Existing callers (e.g. tc act vlan, ovs) usually drop the packet if
'skb_vlan_pop' fails.
Fixes: 93515d53b1 ("net: move vlan pop/push functions into common code")
Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Cc: Pravin Shelar <pshelar@ovn.org>
---
v3: Elaborate log message to explain the change presented to
'skb_vlan_pop' users for packets smaller than VLAN_ETH_HLEN
v2: Remove 'skb->len < VLAN_ETH_HLEN' condition entirely (instead of
testing skb->mac_len as was in v1), suggested by Pravin Shelar
net/core/skbuff.c | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7bf82a2..93d6c5c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4564,9 +4564,8 @@ int skb_vlan_pop(struct sk_buff *skb)
if (likely(skb_vlan_tag_present(skb))) {
skb->vlan_tci = 0;
} else {
- if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)) ||
- skb->len < VLAN_ETH_HLEN))
+ if (unlikely(skb->protocol != htons(ETH_P_8021Q) &&
+ skb->protocol != htons(ETH_P_8021AD)))
return 0;
err = __skb_vlan_pop(skb, &vlan_tci);
@@ -4574,9 +4573,8 @@ int skb_vlan_pop(struct sk_buff *skb)
return err;
}
/* move next vlan tag to hw accel tag */
- if (likely((skb->protocol != htons(ETH_P_8021Q) &&
- skb->protocol != htons(ETH_P_8021AD)) ||
- skb->len < VLAN_ETH_HLEN))
+ if (likely(skb->protocol != htons(ETH_P_8021Q) &&
+ skb->protocol != htons(ETH_P_8021AD)))
return 0;
vlan_proto = skb->protocol;
--
1.9.1
^ permalink raw reply related
* Re: [PATCH v2 net-next 0/7] net: ILA resolver and generic resolver backend
From: Herbert Xu @ 2016-09-20 9:43 UTC (permalink / raw)
To: David Miller; +Cc: tom, netdev, kernel-team, roopa, tgraf
In-Reply-To: <20160918.221024.2210003128839126114.davem@davemloft.net>
David Miller <davem@davemloft.net> wrote:
>
> Can you please repost this series with Herbert Xu properly CC:'d
> since he maintains rhashtable and is making changes to it recently
> which might conflict with what you are proposing here?
There is no need to respost this series as is since I've just found
it :)
But I'd like to hear from Tom regarding
1) Why couldn't jhash be called directly instead of through rhashtable?
2) Making rhashtable parameters constanst since otherwise the inlining
will suck.
Thanks,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH net-next 6/7] net: Generic resolver backend
From: Herbert Xu @ 2016-09-20 9:40 UTC (permalink / raw)
To: Tom Herbert; +Cc: davem, netdev, tgraf, roopa, kernel-team
In-Reply-To: <1473895376-347096-7-git-send-email-tom@herbertland.com>
Tom Herbert <tom@herbertland.com> wrote:
>
> + nrslv->params.head_offset = offsetof(struct net_rslv_ent, node);
> + nrslv->params.key_offset = offsetof(struct net_rslv_ent, object);
> + nrslv->params.key_len = key_len;
> + nrslv->params.max_size = max_size;
> + nrslv->params.min_size = 256;
> + nrslv->params.automatic_shrinking = true;
> + nrslv->params.obj_cmpfn = cmp_fn ? net_rslv_cmp : NULL;
This completely defeats the rhashtable inlining since that relies
on the parameter being constant.
Looking at your next patch you have exactly one user for this. So
who is going to be the next user and do we really need all these
fields to be variable?
Cheers,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH net-next 5/7] rhashtable: abstract out function to get hash
From: Herbert Xu @ 2016-09-20 9:36 UTC (permalink / raw)
To: Tom Herbert; +Cc: davem, netdev, tgraf, roopa, kernel-team
In-Reply-To: <1473895376-347096-6-git-send-email-tom@herbertland.com>
Tom Herbert <tom@herbertland.com> wrote:
> Split out most of rht_key_hashfn which is calculating the hash into
> its own function. This way the hash function can be called separately to
> get the hash value.
>
> Acked-by: Thomas Graf <tgraf@suug.ch>
> Signed-off-by: Tom Herbert <tom@herbertland.com>
I don't get this one. You're just using jhash, right? Why not
call jhash directly instead of rht_get_key_hashfn?
Cheers,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH net-next 3/7] rhashtable: Call library function alloc_bucket_locks
From: Herbert Xu @ 2016-09-20 9:29 UTC (permalink / raw)
To: Tom Herbert; +Cc: davem, netdev, tgraf, roopa, kernel-team
In-Reply-To: <1473895376-347096-4-git-send-email-tom@herbertland.com>
Tom Herbert <tom@herbertland.com> wrote:
> To allocate the array of bucket locks for the hash table we now
> call library function alloc_bucket_spinlocks. This function is
> based on the old alloc_bucket_locks in rhashtable and should
> produce the same effect.
>
> Acked-by: Thomas Graf <tgraf@suug.ch>
> Signed-off-by: Tom Herbert <tom@herbertland.com>
On second thought when I do the nested allocation I could simply
restore this function. So
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Cheers,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH net-next 8/8] net/mlx5e: XDP TX xmit more
From: Jesper Dangaard Brouer @ 2016-09-20 9:26 UTC (permalink / raw)
To: Tariq Toukan
Cc: brouer, Tariq Toukan, David S. Miller, netdev, Eran Ben Elisha,
Saeed Mahameed, Rana Shahout
In-Reply-To: <0e732cab-ed54-5e09-569b-f75258e98ad4@gmail.com>
On Tue, 20 Sep 2016 11:19:46 +0300
Tariq Toukan <ttoukan.linux@gmail.com> wrote:
> Hi Jesper,
>
> On 20/09/2016 10:46 AM, Jesper Dangaard Brouer wrote:
> > On Mon, 19 Sep 2016 16:58:59 +0300
> > Tariq Toukan <tariqt@mellanox.com> wrote:
> >
> >> From: Saeed Mahameed <saeedm@mellanox.com>
> >>
> >> Previously we rang XDP SQ doorbell on every forwarded XDP packet.
> >>
> >> Here we introduce a xmit more like mechanism that will queue up more
> >> than one packet into SQ (up to RX napi budget) w/o notifying the hardware.
> >>
> >> Once RX napi budget is consumed and we exit napi RX loop, we will
> >> flush (doorbell) all XDP looped packets in case there are such.
> > I've already raised strong concerns with this approach on the RFC
> > patchset. Of not really taking advantage of RX bulking.
> > Please do not ignore this!
>
> Sure. Your approach can fit with our plans to split the RX completion
> poll loop into several stages.
> I will try it when we get there.
>
> > If you can promise, that we/you will also try to other approach I'm
> > suggesting, then I'm fine with this patch.
I'll take the above as a promise that there are plans to work in the
direction I'm requesting. Thanks!
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
--
Best regards,
Jesper Dangaard Brouer
MSc.CS, Principal Kernel Engineer at Red Hat
Author of http://www.iptv-analyzer.org
LinkedIn: http://www.linkedin.com/in/brouer
^ permalink raw reply
* [patch net-next 9/9] mlxsw: spectrum: Implement max rif resource
From: Jiri Pirko @ 2016-09-20 9:16 UTC (permalink / raw)
To: netdev; +Cc: davem, nogahf, idosch, eladr, yotamg, ogerlitz
In-Reply-To: <1474363017-2901-1-git-send-email-jiri@resnulli.us>
From: Nogah Frankel <nogahf@mellanox.com>
Replace max rif const with using the result from resource query.
Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 13 +++++----
drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 9 ++++---
.../net/ethernet/mellanox/mlxsw/spectrum_router.c | 31 ++++++++++++++++++++--
3 files changed, 41 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 359a9dc..c421b46 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3017,7 +3017,6 @@ err_rx_listener_register:
static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
{
struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
- int i;
mlxsw_sp_ports_remove(mlxsw_sp);
mlxsw_sp_span_fini(mlxsw_sp);
@@ -3029,8 +3028,6 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
WARN_ON(!list_empty(&mlxsw_sp->vfids.list));
WARN_ON(!list_empty(&mlxsw_sp->fids));
- for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
- WARN_ON_ONCE(mlxsw_sp->rifs[i]);
}
static struct mlxsw_config_profile mlxsw_sp_config_profile = {
@@ -3172,13 +3169,15 @@ static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r,
static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp)
{
+ struct mlxsw_resources *resources;
int i;
- for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+ resources = mlxsw_core_resources_get(mlxsw_sp->core);
+ for (i = 0; i < resources->max_rif; i++)
if (!mlxsw_sp->rifs[i])
return i;
- return MLXSW_SP_RIF_MAX;
+ return MLXSW_SP_INVALID_RIF;
}
static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport,
@@ -3258,7 +3257,7 @@ mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport,
int err;
rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
- if (rif == MLXSW_SP_RIF_MAX)
+ if (rif == MLXSW_SP_INVALID_RIF)
return ERR_PTR(-ERANGE);
err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true);
@@ -3490,7 +3489,7 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp,
int err;
rif = mlxsw_sp_avail_rif_get(mlxsw_sp);
- if (rif == MLXSW_SP_RIF_MAX)
+ if (rif == MLXSW_SP_INVALID_RIF)
return -ERANGE;
err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 352079e..73cae21 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -54,7 +54,7 @@
#define MLXSW_SP_VFID_MAX 6656 /* Bridged VLAN interfaces */
#define MLXSW_SP_RFID_BASE 15360
-#define MLXSW_SP_RIF_MAX 800
+#define MLXSW_SP_INVALID_RIF 0xffff
#define MLXSW_SP_MID_MAX 7000
@@ -269,7 +269,7 @@ struct mlxsw_sp {
DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX);
} br_mids;
struct list_head fids; /* VLAN-aware bridge FIDs */
- struct mlxsw_sp_rif *rifs[MLXSW_SP_RIF_MAX];
+ struct mlxsw_sp_rif **rifs;
struct mlxsw_sp_port **ports;
struct mlxsw_core *core;
const struct mlxsw_bus_info *bus_info;
@@ -477,9 +477,12 @@ static inline struct mlxsw_sp_rif *
mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
const struct net_device *dev)
{
+ struct mlxsw_resources *resources;
int i;
- for (i = 0; i < MLXSW_SP_RIF_MAX; i++)
+ resources = mlxsw_core_resources_get(mlxsw_sp->core);
+
+ for (i = 0; i < resources->max_rif; i++)
if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
return mlxsw_sp->rifs[i];
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index ded19f0..cc653ac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -1522,19 +1522,46 @@ static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp,
static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
{
+ struct mlxsw_resources *resources;
char rgcr_pl[MLXSW_REG_RGCR_LEN];
+ int err;
+
+ resources = mlxsw_core_resources_get(mlxsw_sp->core);
+ if (!resources->max_rif_valid)
+ return -EIO;
+
+ mlxsw_sp->rifs = kcalloc(resources->max_rif,
+ sizeof(struct mlxsw_sp_rif *), GFP_KERNEL);
+ if (!mlxsw_sp->rifs)
+ return -ENOMEM;
mlxsw_reg_rgcr_pack(rgcr_pl, true);
- mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, MLXSW_SP_RIF_MAX);
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+ mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+ if (err)
+ goto err_rgcr_fail;
+
+ return 0;
+
+err_rgcr_fail:
+ kfree(mlxsw_sp->rifs);
+ return err;
}
static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
{
+ struct mlxsw_resources *resources;
char rgcr_pl[MLXSW_REG_RGCR_LEN];
+ int i;
mlxsw_reg_rgcr_pack(rgcr_pl, false);
mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
+
+ resources = mlxsw_core_resources_get(mlxsw_sp->core);
+ for (i = 0; i < resources->max_rif; i++)
+ WARN_ON_ONCE(mlxsw_sp->rifs[i]);
+
+ kfree(mlxsw_sp->rifs);
}
int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
--
2.5.5
^ permalink raw reply related
* [patch net-next 8/9] mlxsw: pci: Add max router interface resource
From: Jiri Pirko @ 2016-09-20 9:16 UTC (permalink / raw)
To: netdev; +Cc: davem, nogahf, idosch, eladr, yotamg, ogerlitz
In-Reply-To: <1474363017-2901-1-git-send-email-jiri@resnulli.us>
From: Nogah Frankel <nogahf@mellanox.com>
Add the max number of rif (router interfaces) to resource query.
Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/core.h | 4 +++-
drivers/net/ethernet/mellanox/mlxsw/pci.c | 5 +++++
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 097e560..c4f550b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -276,7 +276,8 @@ struct mlxsw_resources {
max_virtual_routers_valid:1,
max_system_ports_valid:1,
max_vlan_groups_valid:1,
- max_regions_valid:1;
+ max_regions_valid:1,
+ max_rif_valid:1;
u8 max_span;
u8 max_lag;
u8 max_ports_in_lag;
@@ -287,6 +288,7 @@ struct mlxsw_resources {
u16 max_system_ports;
u16 max_vlan_groups;
u16 max_regions;
+ u16 max_rif;
/* Internal resources.
* Determined by the SW, not queried from the HW.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 826b502..e742bd4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1165,6 +1165,7 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci,
#define MLXSW_MAX_SYSTEM_PORT_ID 0x2502
#define MLXSW_MAX_VLAN_GROUPS_ID 0x2906
#define MLXSW_MAX_REGIONS_ID 0x2901
+#define MLXSW_MAX_RIF_ID 0x2C02
#define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100
#define MLXSW_RESOURCES_PER_QUERY 32
@@ -1212,6 +1213,10 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val,
resources->max_regions = val;
resources->max_regions_valid = 1;
break;
+ case MLXSW_MAX_RIF_ID:
+ resources->max_rif = val;
+ resources->max_rif_valid = 1;
+ break;
default:
break;
}
--
2.5.5
^ permalink raw reply related
* [patch net-next 7/9] mlxsw: pci: Add some miscellaneous resources
From: Jiri Pirko @ 2016-09-20 9:16 UTC (permalink / raw)
To: netdev; +Cc: davem, nogahf, idosch, eladr, yotamg, ogerlitz
In-Reply-To: <1474363017-2901-1-git-send-email-jiri@resnulli.us>
From: Nogah Frankel <nogahf@mellanox.com>
Add max system ports, max regions and max vlan groups to resource query.
Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/core.h | 10 ++++++++--
drivers/net/ethernet/mellanox/mlxsw/pci.c | 15 +++++++++++++++
drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 ------
3 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index e936dc9..097e560 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -267,13 +267,16 @@ struct mlxsw_driver {
};
struct mlxsw_resources {
- u8 max_span_valid:1,
+ u32 max_span_valid:1,
max_lag_valid:1,
max_ports_in_lag_valid:1,
kvd_size_valid:1,
kvd_single_min_size_valid:1,
kvd_double_min_size_valid:1,
- max_virtual_routers_valid:1;
+ max_virtual_routers_valid:1,
+ max_system_ports_valid:1,
+ max_vlan_groups_valid:1,
+ max_regions_valid:1;
u8 max_span;
u8 max_lag;
u8 max_ports_in_lag;
@@ -281,6 +284,9 @@ struct mlxsw_resources {
u32 kvd_single_min_size;
u32 kvd_double_min_size;
u16 max_virtual_routers;
+ u16 max_system_ports;
+ u16 max_vlan_groups;
+ u16 max_regions;
/* Internal resources.
* Determined by the SW, not queried from the HW.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index cb95f9a..826b502 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1162,6 +1162,9 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci,
#define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002
#define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003
#define MLXSW_MAX_VIRTUAL_ROUTERS_ID 0x2C01
+#define MLXSW_MAX_SYSTEM_PORT_ID 0x2502
+#define MLXSW_MAX_VLAN_GROUPS_ID 0x2906
+#define MLXSW_MAX_REGIONS_ID 0x2901
#define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100
#define MLXSW_RESOURCES_PER_QUERY 32
@@ -1197,6 +1200,18 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val,
resources->max_virtual_routers = val;
resources->max_virtual_routers_valid = 1;
break;
+ case MLXSW_MAX_SYSTEM_PORT_ID:
+ resources->max_system_ports = val;
+ resources->max_system_ports_valid = 1;
+ break;
+ case MLXSW_MAX_VLAN_GROUPS_ID:
+ resources->max_vlan_groups = val;
+ resources->max_vlan_groups_valid = 1;
+ break;
+ case MLXSW_MAX_REGIONS_ID:
+ resources->max_regions = val;
+ resources->max_regions_valid = 1;
+ break;
default:
break;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 29ad6a0..359a9dc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3040,12 +3040,6 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = {
.max_mid = MLXSW_SP_MID_MAX,
.used_max_pgt = 1,
.max_pgt = 0,
- .used_max_system_port = 1,
- .max_system_port = 64,
- .used_max_vlan_groups = 1,
- .max_vlan_groups = 127,
- .used_max_regions = 1,
- .max_regions = 400,
.used_flood_tables = 1,
.used_flood_mode = 1,
.flood_mode = 3,
--
2.5.5
^ permalink raw reply related
* [patch net-next 6/9] mlxsw: spectrum: Implement max virtual routers resource
From: Jiri Pirko @ 2016-09-20 9:16 UTC (permalink / raw)
To: netdev; +Cc: davem, nogahf, idosch, eladr, yotamg, ogerlitz
In-Reply-To: <1474363017-2901-1-git-send-email-jiri@resnulli.us>
From: Nogah Frankel <nogahf@mellanox.com>
Replace max virtual routers const with the result from
the resource query.
Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 4 +-
.../net/ethernet/mellanox/mlxsw/spectrum_router.c | 43 +++++++++++++++++++---
2 files changed, 38 insertions(+), 9 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 208dfee..352079e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -64,8 +64,6 @@
#define MLXSW_SP_LPM_TREE_MAX 22
#define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN)
-#define MLXSW_SP_VIRTUAL_ROUTER_MAX 256
-
#define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */
#define MLXSW_SP_BYTES_PER_CELL 96
@@ -249,7 +247,7 @@ struct mlxsw_sp_port_mall_tc_entry {
struct mlxsw_sp_router {
struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT];
- struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX];
+ struct mlxsw_sp_vr *vrs;
struct rhashtable neigh_ht;
struct {
struct delayed_work dw;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 4afb498..ded19f0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -372,10 +372,12 @@ static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
{
+ struct mlxsw_resources *resources;
struct mlxsw_sp_vr *vr;
int i;
- for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+ resources = mlxsw_core_resources_get(mlxsw_sp->core);
+ for (i = 0; i < resources->max_virtual_routers; i++) {
vr = &mlxsw_sp->router.vrs[i];
if (!vr->used)
return vr;
@@ -417,11 +419,14 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
u32 tb_id,
enum mlxsw_sp_l3proto proto)
{
+ struct mlxsw_resources *resources;
struct mlxsw_sp_vr *vr;
int i;
tb_id = mlxsw_sp_fix_tb_id(tb_id);
- for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+
+ resources = mlxsw_core_resources_get(mlxsw_sp->core);
+ for (i = 0; i < resources->max_virtual_routers; i++) {
vr = &mlxsw_sp->router.vrs[i];
if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
return vr;
@@ -555,15 +560,33 @@ static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
&vr->fib->prefix_usage);
}
-static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
+static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
{
+ struct mlxsw_resources *resources;
struct mlxsw_sp_vr *vr;
int i;
- for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) {
+ resources = mlxsw_core_resources_get(mlxsw_sp->core);
+ if (!resources->max_virtual_routers_valid)
+ return -EIO;
+
+ mlxsw_sp->router.vrs = kcalloc(resources->max_virtual_routers,
+ sizeof(struct mlxsw_sp_vr),
+ GFP_KERNEL);
+ if (!mlxsw_sp->router.vrs)
+ return -ENOMEM;
+
+ for (i = 0; i < resources->max_virtual_routers; i++) {
vr = &mlxsw_sp->router.vrs[i];
vr->id = i;
}
+
+ return 0;
+}
+
+static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ kfree(mlxsw_sp->router.vrs);
}
struct mlxsw_sp_neigh_key {
@@ -1523,14 +1546,21 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
err = __mlxsw_sp_router_init(mlxsw_sp);
if (err)
return err;
+
mlxsw_sp_lpm_init(mlxsw_sp);
- mlxsw_sp_vrs_init(mlxsw_sp);
- err = mlxsw_sp_neigh_init(mlxsw_sp);
+ err = mlxsw_sp_vrs_init(mlxsw_sp);
+ if (err)
+ goto err_vrs_init;
+
+ err = mlxsw_sp_neigh_init(mlxsw_sp);
if (err)
goto err_neigh_init;
+
return 0;
err_neigh_init:
+ mlxsw_sp_vrs_fini(mlxsw_sp);
+err_vrs_init:
__mlxsw_sp_router_fini(mlxsw_sp);
return err;
}
@@ -1538,6 +1568,7 @@ err_neigh_init:
void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
{
mlxsw_sp_neigh_fini(mlxsw_sp);
+ mlxsw_sp_vrs_fini(mlxsw_sp);
__mlxsw_sp_router_fini(mlxsw_sp);
}
--
2.5.5
^ permalink raw reply related
* [patch net-next 5/9] mlxsw: pci: Add max virtual routers resource
From: Jiri Pirko @ 2016-09-20 9:16 UTC (permalink / raw)
To: netdev; +Cc: davem, nogahf, idosch, eladr, yotamg, ogerlitz
In-Reply-To: <1474363017-2901-1-git-send-email-jiri@resnulli.us>
From: Nogah Frankel <nogahf@mellanox.com>
Add the max number of virtual routers to resource query.
Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/core.h | 4 +++-
drivers/net/ethernet/mellanox/mlxsw/pci.c | 5 +++++
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 1193bdc..e936dc9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -272,13 +272,15 @@ struct mlxsw_resources {
max_ports_in_lag_valid:1,
kvd_size_valid:1,
kvd_single_min_size_valid:1,
- kvd_double_min_size_valid:1;
+ kvd_double_min_size_valid:1,
+ max_virtual_routers_valid:1;
u8 max_span;
u8 max_lag;
u8 max_ports_in_lag;
u32 kvd_size;
u32 kvd_single_min_size;
u32 kvd_double_min_size;
+ u16 max_virtual_routers;
/* Internal resources.
* Determined by the SW, not queried from the HW.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index c2d2fa1..cb95f9a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1161,6 +1161,7 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci,
#define MLXSW_KVD_SIZE_ID 0x1001
#define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002
#define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003
+#define MLXSW_MAX_VIRTUAL_ROUTERS_ID 0x2C01
#define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100
#define MLXSW_RESOURCES_PER_QUERY 32
@@ -1192,6 +1193,10 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val,
resources->kvd_double_min_size = val;
resources->kvd_double_min_size_valid = 1;
break;
+ case MLXSW_MAX_VIRTUAL_ROUTERS_ID:
+ resources->max_virtual_routers = val;
+ resources->max_virtual_routers_valid = 1;
+ break;
default:
break;
}
--
2.5.5
^ permalink raw reply related
* [patch net-next 4/9] mlxsw: profile: Add KVD resources to profile config
From: Jiri Pirko @ 2016-09-20 9:16 UTC (permalink / raw)
To: netdev; +Cc: davem, nogahf, idosch, eladr, yotamg, ogerlitz
In-Reply-To: <1474363017-2901-1-git-send-email-jiri@resnulli.us>
From: Nogah Frankel <nogahf@mellanox.com>
Use resources from resource query to determine values for
the profile configuration.
Add KVD determined section sizes to the resources struct.
Change the profile struct and value to match this changes.
Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/core.h | 15 ++++--
drivers/net/ethernet/mellanox/mlxsw/pci.c | 73 +++++++++++++++++++++-----
drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 7 +--
drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 3 +-
4 files changed, 76 insertions(+), 22 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 76ad566..1193bdc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -190,7 +190,8 @@ struct mlxsw_config_profile {
used_max_pkey:1,
used_ar_sec:1,
used_adaptive_routing_group_cap:1,
- used_kvd_sizes:1;
+ used_kvd_split_data:1; /* indicate for the kvd's values */
+
u8 max_vepa_channels;
u16 max_mid;
u16 max_pgt;
@@ -210,8 +211,9 @@ struct mlxsw_config_profile {
u16 adaptive_routing_group_cap;
u8 arn;
u32 kvd_linear_size;
- u32 kvd_hash_single_size;
- u32 kvd_hash_double_size;
+ u16 kvd_hash_granularity;
+ u8 kvd_hash_single_parts;
+ u8 kvd_hash_double_parts;
u8 resource_query_enable;
struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT];
};
@@ -277,6 +279,13 @@ struct mlxsw_resources {
u32 kvd_size;
u32 kvd_single_min_size;
u32 kvd_double_min_size;
+
+ /* Internal resources.
+ * Determined by the SW, not queried from the HW.
+ */
+ u32 kvd_single_size;
+ u32 kvd_double_size;
+ u32 kvd_linear_size;
};
struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 7b2ab1e..c2d2fa1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1234,10 +1234,52 @@ static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox,
return -EIO;
}
+static int mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_config_profile *profile,
+ struct mlxsw_resources *resources)
+{
+ u32 singles_size, doubles_size, linear_size;
+
+ if (!resources->kvd_single_min_size_valid ||
+ !resources->kvd_double_min_size_valid ||
+ !profile->used_kvd_split_data)
+ return -EIO;
+
+ linear_size = profile->kvd_linear_size;
+
+ /* The hash part is what left of the kvd without the
+ * linear part. It is split to the single size and
+ * double size by the parts ratio from the profile.
+ * Both sizes must be a multiplications of the
+ * granularity from the profile.
+ */
+ doubles_size = (resources->kvd_size - linear_size);
+ doubles_size *= profile->kvd_hash_double_parts;
+ doubles_size /= (profile->kvd_hash_double_parts +
+ profile->kvd_hash_single_parts);
+ doubles_size /= profile->kvd_hash_granularity;
+ doubles_size *= profile->kvd_hash_granularity;
+ singles_size = resources->kvd_size - doubles_size -
+ linear_size;
+
+ /* Check results are legal. */
+ if (singles_size < resources->kvd_single_min_size ||
+ doubles_size < resources->kvd_double_min_size ||
+ resources->kvd_size < linear_size)
+ return -EIO;
+
+ resources->kvd_single_size = singles_size;
+ resources->kvd_double_size = doubles_size;
+ resources->kvd_linear_size = linear_size;
+
+ return 0;
+}
+
static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
- const struct mlxsw_config_profile *profile)
+ const struct mlxsw_config_profile *profile,
+ struct mlxsw_resources *resources)
{
int i;
+ int err;
mlxsw_cmd_mbox_zero(mbox);
@@ -1323,19 +1365,22 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set(
mbox, profile->adaptive_routing_group_cap);
}
- if (profile->used_kvd_sizes) {
- mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(
- mbox, 1);
- mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(
- mbox, profile->kvd_linear_size);
- mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(
- mbox, 1);
- mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(
- mbox, profile->kvd_hash_single_size);
+ if (resources->kvd_size_valid) {
+ err = mlxsw_pci_profile_get_kvd_sizes(profile, resources);
+ if (err)
+ return err;
+
+ mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(mbox, 1);
+ mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(mbox,
+ resources->kvd_linear_size);
+ mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(mbox,
+ 1);
+ mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(mbox,
+ resources->kvd_single_size);
mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set(
- mbox, 1);
- mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(
- mbox, profile->kvd_hash_double_size);
+ mbox, 1);
+ mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox,
+ resources->kvd_double_size);
}
for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++)
@@ -1537,7 +1582,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
if (err)
goto err_query_resources;
- err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile);
+ err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, resources);
if (err)
goto err_config_profile;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 1a6d21e..29ad6a0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3057,10 +3057,11 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = {
.max_ib_mc = 0,
.used_max_pkey = 1,
.max_pkey = 0,
- .used_kvd_sizes = 1,
+ .used_kvd_split_data = 1,
+ .kvd_hash_granularity = MLXSW_SP_KVD_GRANULARITY,
+ .kvd_hash_single_parts = 2,
+ .kvd_hash_double_parts = 1,
.kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE,
- .kvd_hash_single_size = MLXSW_SP_KVD_HASH_SINGLE_SIZE,
- .kvd_hash_double_size = MLXSW_SP_KVD_HASH_DOUBLE_SIZE,
.swid_config = {
{
.used_type = 1,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index a056aaa..208dfee 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -74,8 +74,7 @@
#define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL)
#define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */
-#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */
-#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */
+#define MLXSW_SP_KVD_GRANULARITY 128
/* Maximum delay buffer needed in case of PAUSE frames, in cells.
* Assumes 100m cable and maximum MTU.
--
2.5.5
^ permalink raw reply related
* [patch net-next 3/9] mlxsw: pci: Add KVD size relate resources
From: Jiri Pirko @ 2016-09-20 9:16 UTC (permalink / raw)
To: netdev; +Cc: davem, nogahf, idosch, eladr, yotamg, ogerlitz
In-Reply-To: <1474363017-2901-1-git-send-email-jiri@resnulli.us>
From: Nogah Frankel <nogahf@mellanox.com>
Add KVD size, and minimum sizes for the single and double
sections resources to resources query.
Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/core.h | 8 +++++++-
drivers/net/ethernet/mellanox/mlxsw/pci.c | 15 +++++++++++++++
2 files changed, 22 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 558d1ce..76ad566 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -267,10 +267,16 @@ struct mlxsw_driver {
struct mlxsw_resources {
u8 max_span_valid:1,
max_lag_valid:1,
- max_ports_in_lag_valid:1;
+ max_ports_in_lag_valid:1,
+ kvd_size_valid:1,
+ kvd_single_min_size_valid:1,
+ kvd_double_min_size_valid:1;
u8 max_span;
u8 max_lag;
u8 max_ports_in_lag;
+ u32 kvd_size;
+ u32 kvd_single_min_size;
+ u32 kvd_double_min_size;
};
struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 57c2d34..7b2ab1e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1158,6 +1158,9 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci,
#define MLXSW_MAX_SPAN_ID 0x2420
#define MLXSW_MAX_LAG_ID 0x2520
#define MLXSW_MAX_PORTS_IN_LAG_ID 0x2521
+#define MLXSW_KVD_SIZE_ID 0x1001
+#define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002
+#define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003
#define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100
#define MLXSW_RESOURCES_PER_QUERY 32
@@ -1177,6 +1180,18 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val,
resources->max_ports_in_lag = val;
resources->max_ports_in_lag_valid = 1;
break;
+ case MLXSW_KVD_SIZE_ID:
+ resources->kvd_size = val;
+ resources->kvd_size_valid = 1;
+ break;
+ case MLXSW_KVD_SINGLE_MIN_SIZE_ID:
+ resources->kvd_single_min_size = val;
+ resources->kvd_single_min_size_valid = 1;
+ break;
+ case MLXSW_KVD_DOUBLE_MIN_SIZE_ID:
+ resources->kvd_double_min_size = val;
+ resources->kvd_double_min_size_valid = 1;
+ break;
default:
break;
}
--
2.5.5
^ permalink raw reply related
* [patch net-next 2/9] mlxsw: spectrum: lag resources- use resources data instead of consts
From: Jiri Pirko @ 2016-09-20 9:16 UTC (permalink / raw)
To: netdev; +Cc: davem, nogahf, idosch, eladr, yotamg, ogerlitz
In-Reply-To: <1474363017-2901-1-git-send-email-jiri@resnulli.us>
From: Nogah Frankel <nogahf@mellanox.com>
Use max lag and max ports in lag resources as the result of resource query
instead of using const to save them.
Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/core.c | 26 +++++++-------
drivers/net/ethernet/mellanox/mlxsw/core.h | 4 ---
drivers/net/ethernet/mellanox/mlxsw/pci.c | 12 -------
drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 41 +++++++++++++++++-----
drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 5 +--
.../ethernet/mellanox/mlxsw/spectrum_switchdev.c | 4 ++-
drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 4 ---
7 files changed, 50 insertions(+), 46 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index 068ee65a..aa33d58 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -1100,10 +1100,15 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
goto err_alloc_stats;
}
- if (mlxsw_driver->profile->used_max_lag &&
- mlxsw_driver->profile->used_max_port_per_lag) {
- alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag *
- mlxsw_driver->profile->max_port_per_lag;
+ err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile,
+ &mlxsw_core->resources);
+ if (err)
+ goto err_bus_init;
+
+ if (mlxsw_core->resources.max_lag_valid &&
+ mlxsw_core->resources.max_ports_in_lag_valid) {
+ alloc_size = sizeof(u8) * mlxsw_core->resources.max_lag *
+ mlxsw_core->resources.max_ports_in_lag;
mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL);
if (!mlxsw_core->lag.mapping) {
err = -ENOMEM;
@@ -1111,11 +1116,6 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
}
}
- err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile,
- &mlxsw_core->resources);
- if (err)
- goto err_bus_init;
-
err = mlxsw_emad_init(mlxsw_core);
if (err)
goto err_emad_init;
@@ -1146,10 +1146,10 @@ err_hwmon_init:
err_devlink_register:
mlxsw_emad_fini(mlxsw_core);
err_emad_init:
- mlxsw_bus->fini(bus_priv);
-err_bus_init:
kfree(mlxsw_core->lag.mapping);
err_alloc_lag_mapping:
+ mlxsw_bus->fini(bus_priv);
+err_bus_init:
free_percpu(mlxsw_core->pcpu_stats);
err_alloc_stats:
devlink_free(devlink);
@@ -1615,7 +1615,7 @@ EXPORT_SYMBOL(mlxsw_core_skb_receive);
static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core,
u16 lag_id, u8 port_index)
{
- return mlxsw_core->driver->profile->max_port_per_lag * lag_id +
+ return mlxsw_core->resources.max_ports_in_lag * lag_id +
port_index;
}
@@ -1644,7 +1644,7 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
{
int i;
- for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) {
+ for (i = 0; i < mlxsw_core->resources.max_ports_in_lag; i++) {
int index = mlxsw_core_lag_mapping_index(mlxsw_core,
lag_id, i);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index 51f27a3..558d1ce 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -179,8 +179,6 @@ struct mlxsw_swid_config {
struct mlxsw_config_profile {
u16 used_max_vepa_channels:1,
- used_max_lag:1,
- used_max_port_per_lag:1,
used_max_mid:1,
used_max_pgt:1,
used_max_system_port:1,
@@ -194,8 +192,6 @@ struct mlxsw_config_profile {
used_adaptive_routing_group_cap:1,
used_kvd_sizes:1;
u8 max_vepa_channels;
- u16 max_lag;
- u16 max_port_per_lag;
u16 max_mid;
u16 max_pgt;
u16 max_system_port;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index cb284ea..57c2d34 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1232,18 +1232,6 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
mlxsw_cmd_mbox_config_profile_max_vepa_channels_set(
mbox, profile->max_vepa_channels);
}
- if (profile->used_max_lag) {
- mlxsw_cmd_mbox_config_profile_set_max_lag_set(
- mbox, 1);
- mlxsw_cmd_mbox_config_profile_max_lag_set(
- mbox, profile->max_lag);
- }
- if (profile->used_max_port_per_lag) {
- mlxsw_cmd_mbox_config_profile_set_max_port_per_lag_set(
- mbox, 1);
- mlxsw_cmd_mbox_config_profile_max_port_per_lag_set(
- mbox, profile->max_port_per_lag);
- }
if (profile->used_max_mid) {
mlxsw_cmd_mbox_config_profile_set_max_mid_set(
mbox, 1);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index fa31261..1a6d21e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2887,7 +2887,9 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp)
static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
{
+ struct mlxsw_resources *resources;
char slcr_pl[MLXSW_REG_SLCR_LEN];
+ int err;
mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC |
MLXSW_REG_SLCR_LAG_HASH_DMAC |
@@ -2898,7 +2900,26 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
MLXSW_REG_SLCR_LAG_HASH_SPORT |
MLXSW_REG_SLCR_LAG_HASH_DPORT |
MLXSW_REG_SLCR_LAG_HASH_IPPROTO);
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl);
+ err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl);
+ if (err)
+ return err;
+
+ resources = mlxsw_core_resources_get(mlxsw_sp->core);
+ if (!(resources->max_lag_valid && resources->max_ports_in_lag_valid))
+ return -EIO;
+
+ mlxsw_sp->lags = kcalloc(resources->max_lag,
+ sizeof(struct mlxsw_sp_upper),
+ GFP_KERNEL);
+ if (!mlxsw_sp->lags)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp)
+{
+ kfree(mlxsw_sp->lags);
}
static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
@@ -2982,6 +3003,7 @@ err_span_init:
err_router_init:
mlxsw_sp_switchdev_fini(mlxsw_sp);
err_switchdev_init:
+ mlxsw_sp_lag_fini(mlxsw_sp);
err_lag_init:
mlxsw_sp_buffers_fini(mlxsw_sp);
err_buffers_init:
@@ -3001,6 +3023,7 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
mlxsw_sp_span_fini(mlxsw_sp);
mlxsw_sp_router_fini(mlxsw_sp);
mlxsw_sp_switchdev_fini(mlxsw_sp);
+ mlxsw_sp_lag_fini(mlxsw_sp);
mlxsw_sp_buffers_fini(mlxsw_sp);
mlxsw_sp_traps_fini(mlxsw_sp);
mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
@@ -3013,10 +3036,6 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
static struct mlxsw_config_profile mlxsw_sp_config_profile = {
.used_max_vepa_channels = 1,
.max_vepa_channels = 0,
- .used_max_lag = 1,
- .max_lag = MLXSW_SP_LAG_MAX,
- .used_max_port_per_lag = 1,
- .max_port_per_lag = MLXSW_SP_PORT_PER_LAG_MAX,
.used_max_mid = 1,
.max_mid = MLXSW_SP_MID_MAX,
.used_max_pgt = 1,
@@ -3683,12 +3702,14 @@ static bool mlxsw_sp_port_fdb_should_flush(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
u8 local_port = mlxsw_sp_port->local_port;
u16 lag_id = mlxsw_sp_port->lag_id;
+ struct mlxsw_resources *resources;
int i, count = 0;
if (!mlxsw_sp_port->lagged)
return true;
- for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) {
+ resources = mlxsw_core_resources_get(mlxsw_sp->core);
+ for (i = 0; i < resources->max_ports_in_lag; i++) {
struct mlxsw_sp_port *lag_port;
lag_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i);
@@ -3894,11 +3915,13 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp,
struct net_device *lag_dev,
u16 *p_lag_id)
{
+ struct mlxsw_resources *resources;
struct mlxsw_sp_upper *lag;
int free_lag_id = -1;
int i;
- for (i = 0; i < MLXSW_SP_LAG_MAX; i++) {
+ resources = mlxsw_core_resources_get(mlxsw_sp->core);
+ for (i = 0; i < resources->max_lag; i++) {
lag = mlxsw_sp_lag_get(mlxsw_sp, i);
if (lag->ref_count) {
if (lag->dev == lag_dev) {
@@ -3932,9 +3955,11 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp,
u16 lag_id, u8 *p_port_index)
{
+ struct mlxsw_resources *resources;
int i;
- for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) {
+ resources = mlxsw_core_resources_get(mlxsw_sp->core);
+ for (i = 0; i < resources->max_ports_in_lag; i++) {
if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) {
*p_port_index = i;
return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 49f4caf..a056aaa 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -56,9 +56,6 @@
#define MLXSW_SP_RFID_BASE 15360
#define MLXSW_SP_RIF_MAX 800
-#define MLXSW_SP_LAG_MAX 64
-#define MLXSW_SP_PORT_PER_LAG_MAX 16
-
#define MLXSW_SP_MID_MAX 7000
#define MLXSW_SP_PORTS_PER_CLUSTER_MAX 4
@@ -290,7 +287,7 @@ struct mlxsw_sp {
#define MLXSW_SP_DEFAULT_AGEING_TIME 300
u32 ageing_time;
struct mlxsw_sp_upper master_bridge;
- struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX];
+ struct mlxsw_sp_upper *lags;
u8 port_to_module[MLXSW_PORT_MAX_PORTS];
struct mlxsw_sp_sb sb;
struct mlxsw_sp_router router;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 7186c48..2b04b76 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1205,9 +1205,11 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
u16 lag_id)
{
struct mlxsw_sp_port *mlxsw_sp_port;
+ struct mlxsw_resources *resources;
int i;
- for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) {
+ resources = mlxsw_core_resources_get(mlxsw_sp->core);
+ for (i = 0; i < resources->max_ports_in_lag; i++) {
mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i);
if (mlxsw_sp_port)
return mlxsw_sp_port;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 377daa4..8b15bf0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -1512,10 +1512,6 @@ static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core)
static struct mlxsw_config_profile mlxsw_sx_config_profile = {
.used_max_vepa_channels = 1,
.max_vepa_channels = 0,
- .used_max_lag = 1,
- .max_lag = 64,
- .used_max_port_per_lag = 1,
- .max_port_per_lag = 16,
.used_max_mid = 1,
.max_mid = 7000,
.used_max_pgt = 1,
--
2.5.5
^ permalink raw reply related
* [patch net-next 1/9] mlxsw: pci: Add lag related resources to resources query
From: Jiri Pirko @ 2016-09-20 9:16 UTC (permalink / raw)
To: netdev; +Cc: davem, nogahf, idosch, eladr, yotamg, ogerlitz
In-Reply-To: <1474363017-2901-1-git-send-email-jiri@resnulli.us>
From: Nogah Frankel <nogahf@mellanox.com>
Add max lag and max ports in lag resources to resources query.
Signed-off-by: Nogah Frankel <nogahf@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/core.h | 6 +++++-
drivers/net/ethernet/mellanox/mlxsw/pci.c | 10 ++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index d2e3297..51f27a3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -269,8 +269,12 @@ struct mlxsw_driver {
};
struct mlxsw_resources {
- u8 max_span_valid:1;
+ u8 max_span_valid:1,
+ max_lag_valid:1,
+ max_ports_in_lag_valid:1;
u8 max_span;
+ u8 max_lag;
+ u8 max_ports_in_lag;
};
struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 1d1360c..cb284ea 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -1156,6 +1156,8 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci,
#define MLXSW_RESOURCES_TABLE_END_ID 0xffff
#define MLXSW_MAX_SPAN_ID 0x2420
+#define MLXSW_MAX_LAG_ID 0x2520
+#define MLXSW_MAX_PORTS_IN_LAG_ID 0x2521
#define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100
#define MLXSW_RESOURCES_PER_QUERY 32
@@ -1167,6 +1169,14 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val,
resources->max_span = val;
resources->max_span_valid = 1;
break;
+ case MLXSW_MAX_LAG_ID:
+ resources->max_lag = val;
+ resources->max_lag_valid = 1;
+ break;
+ case MLXSW_MAX_PORTS_IN_LAG_ID:
+ resources->max_ports_in_lag = val;
+ resources->max_ports_in_lag_valid = 1;
+ break;
default:
break;
}
--
2.5.5
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox